diff options
author | Edward Pilatowicz <Edward.Pilatowicz@Sun.COM> | 2008-11-17 14:55:44 -0800 |
---|---|---|
committer | Edward Pilatowicz <Edward.Pilatowicz@Sun.COM> | 2008-11-17 14:55:44 -0800 |
commit | 7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f (patch) | |
tree | d9e4edfa4b2d5b96d61caa2e293f5e58a3a01a71 | |
parent | 53730946491a2da1c44c44e89a41006494591b53 (diff) | |
download | illumos-joyent-7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f.tar.gz |
PSARC/2007/664 Paravirtualized Drivers for Fully Virtualized xVM Domains
PSARC/2009/015 ddi_strdup
6796427 add ddi_strdup(9F), strdup(9F), and strfree(9F) to the DDI
6677559 Solaris should provide a PV cdrom driver for xVM HVM environments
6703437 xdb doesn't do anything with xm block-configure requests
6774478 misc xdf bugs
35 files changed, 6306 insertions, 3863 deletions
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c index eb65f81f8c..a51544a70f 100644 --- a/usr/src/cmd/truss/codes.c +++ b/usr/src/cmd/truss/codes.c @@ -88,6 +88,10 @@ #include <sys/fs/zfs.h> #include <inet/kssl/kssl.h> #include <sys/dkio.h> +#include <sys/fdio.h> +#include <sys/cdio.h> +#include <sys/scsi/impl/uscsi.h> +#include <sys/devinfo_impl.h> #include <sys/dumpadm.h> #include <sys/mntio.h> #include <sys/zcons.h> @@ -1075,34 +1079,168 @@ const struct ioc { { (uint_t)KSSL_DELETE_ENTRY, "KSSL_DELETE_ENTRY", "sockaddr_in"}, - /* dkio ioctls - (0x04 << 8) */ + /* disk ioctls - (0x04 << 8) - dkio.h */ { (uint_t)DKIOCGGEOM, "DKIOCGGEOM", "struct dk_geom"}, - { (uint_t)DKIOCSGEOM, "DKIOCSGEOM", - "struct dk_geom"}, { (uint_t)DKIOCINFO, "DKIOCINFO", "struct dk_info"}, + { (uint_t)DKIOCEJECT, "DKIOCEJECT", + NULL}, { (uint_t)DKIOCGVTOC, "DKIOCGVTOC", "struct vtoc"}, { (uint_t)DKIOCSVTOC, "DKIOCSVTOC", "struct vtoc"}, - { (uint_t)DKIOCADDBAD, "DKIOCADDBAD", + { (uint_t)DKIOCGEXTVTOC, "DKIOCGEXTVTOC", + "struct extvtoc"}, + { (uint_t)DKIOCSEXTVTOC, "DKIOCSEXTVTOC", + "struct extvtoc"}, + { (uint_t)DKIOCFLUSHWRITECACHE, "DKIOCFLUSHWRITECACHE", + NULL}, + { (uint_t)DKIOCGETWCE, "DKIOCGETWCE", + NULL}, + { (uint_t)DKIOCSETWCE, "DKIOCSETWCE", NULL}, + { (uint_t)DKIOCSGEOM, "DKIOCSGEOM", + "struct dk_geom"}, + { (uint_t)DKIOCSAPART, "DKIOCSAPART", + "struct dk_allmap"}, + { (uint_t)DKIOCGAPART, "DKIOCGAPART", + "struct dk_allmap"}, { (uint_t)DKIOCG_PHYGEOM, "DKIOCG_PHYGEOM", "struct dk_geom"}, { (uint_t)DKIOCG_VIRTGEOM, "DKIOCG_VIRTGEOM", "struct dk_geom"}, + { (uint_t)DKIOCLOCK, "DKIOCLOCK", + NULL}, + { (uint_t)DKIOCUNLOCK, "DKIOCUNLOCK", + NULL}, + { (uint_t)DKIOCSTATE, "DKIOCSTATE", + NULL}, + { (uint_t)DKIOCREMOVABLE, "DKIOCREMOVABLE", + NULL}, + { (uint_t)DKIOCHOTPLUGGABLE, "DKIOCHOTPLUGGABLE", + NULL}, + { (uint_t)DKIOCADDBAD, "DKIOCADDBAD", + NULL}, + { (uint_t)DKIOCGETDEF, "DKIOCGETDEF", + NULL}, + { (uint_t)DKIOCPARTINFO, "DKIOCPARTINFO", + "struct part_info"}, + { (uint_t)DKIOCEXTPARTINFO, "DKIOCEXTPARTINFO", + "struct extpart_info"}, { (uint_t)DKIOCGMEDIAINFO, "DKIOCGMEDIAINFO", "struct dk_minfo"}, + { (uint_t)DKIOCGMBOOT, "DKIOCGMBOOT", + NULL}, + { (uint_t)DKIOCSMBOOT, "DKIOCSMBOOT", + NULL}, + { (uint_t)DKIOCSETEFI, "DKIOCSETEFI", + "struct dk_efi"}, + { (uint_t)DKIOCGETEFI, "DKIOCGETEFI", + "struct dk_efi"}, + { (uint_t)DKIOCPARTITION, "DKIOCPARTITION", + "struct partition64"}, + { (uint_t)DKIOCGETVOLCAP, "DKIOCGETVOLCAP", + "struct volcap_t"}, + { (uint_t)DKIOCSETVOLCAP, "DKIOCSETVOLCAP", + "struct volcap_t"}, + { (uint_t)DKIOCDMR, "DKIOCDMR", + "struct vol_directed_rd"}, + { (uint_t)DKIOCDUMPINIT, "DKIOCDUMPINIT", + NULL}, + { (uint_t)DKIOCDUMPFINI, "DKIOCDUMPFINI", + NULL}, + + /* disk ioctls - (0x04 << 8) - fdio.h */ + { (uint_t)FDIOGCHAR, "FDIOGCHAR", + "struct fd_char"}, + { (uint_t)FDIOSCHAR, "FDIOSCHAR", + "struct fd_char"}, + { (uint_t)FDEJECT, "FDEJECT", + NULL}, + { (uint_t)FDGETCHANGE, "FDGETCHANGE", + NULL}, + { (uint_t)FDGETDRIVECHAR, "FDGETDRIVECHAR", + "struct fd_drive"}, + { (uint_t)FDSETDRIVECHAR, "FDSETDRIVECHAR", + "struct fd_drive"}, + { (uint_t)FDGETSEARCH, "FDGETSEARCH", + NULL}, + { (uint_t)FDSETSEARCH, "FDSETSEARCH", + NULL}, + { (uint_t)FDIOCMD, "FDIOCMD", + "struct fd_cmd"}, + { (uint_t)FDRAW, "FDRAW", + "struct fd_raw"}, + { (uint_t)FDDEFGEOCHAR, "FDDEFGEOCHAR", + NULL}, + + /* disk ioctls - (0x04 << 8) - cdio.h */ + { (uint_t)CDROMPAUSE, "CDROMPAUSE", + NULL}, + { (uint_t)CDROMRESUME, "CDROMRESUME", + NULL}, + { (uint_t)CDROMPLAYMSF, "CDROMPLAYMSF", + "struct cdrom_msf"}, + { (uint_t)CDROMPLAYTRKIND, "CDROMPLAYTRKIND", + "struct cdrom_ti"}, + { (uint_t)CDROMREADTOCHDR, "CDROMREADTOCHDR", + "struct cdrom_tochdr"}, + { (uint_t)CDROMREADTOCENTRY, "CDROMREADTOCENTRY", + "struct cdrom_tocentry"}, + { (uint_t)CDROMSTOP, "CDROMSTOP", + NULL}, + { (uint_t)CDROMSTART, "CDROMSTART", + NULL}, + { (uint_t)CDROMEJECT, "CDROMEJECT", + NULL}, + { (uint_t)CDROMVOLCTRL, "CDROMVOLCTRL", + "struct cdrom_volctrl"}, + { (uint_t)CDROMSUBCHNL, "CDROMSUBCHNL", + "struct cdrom_subchnl"}, + { (uint_t)CDROMREADMODE2, "CDROMREADMODE2", + "struct cdrom_read"}, + { (uint_t)CDROMREADMODE1, "CDROMREADMODE1", + "struct cdrom_read"}, + { (uint_t)CDROMREADOFFSET, "CDROMREADOFFSET", + NULL}, + { (uint_t)CDROMGBLKMODE, "CDROMGBLKMODE", + NULL}, + { (uint_t)CDROMSBLKMODE, "CDROMSBLKMODE", + NULL}, + { (uint_t)CDROMCDDA, "CDROMCDDA", + "struct cdrom_cdda"}, + { (uint_t)CDROMCDXA, "CDROMCDXA", + "struct cdrom_cdxa"}, + { (uint_t)CDROMSUBCODE, "CDROMSUBCODE", + "struct cdrom_subcode"}, + { (uint_t)CDROMGDRVSPEED, "CDROMGDRVSPEED", + NULL}, + { (uint_t)CDROMSDRVSPEED, "CDROMSDRVSPEED", + NULL}, + { (uint_t)CDROMCLOSETRAY, "CDROMCLOSETRAY", + NULL}, + + /* disk ioctls - (0x04 << 8) - uscsi.h */ + { (uint_t)USCSICMD, "USCSICMD", + "struct uscsi_cmd"}, /* dumpadm ioctls - (0xdd << 8) */ { (uint_t)DIOCGETDEV, "DIOCGETDEV", - "char *"}, + NULL}, /* mntio ioctls - ('m' << 8) */ { (uint_t)MNTIOC_GETMNTENT, "MNTIOC_GETMNTENT", "struct extmnttab"}, + /* devinfo ioctls - ('df' << 8) - devinfo_impl.h */ + { (uint_t)DINFOUSRLD, "DINFOUSRLD", + NULL}, + { (uint_t)DINFOLODRV, "DINFOLODRV", + NULL}, + { (uint_t)DINFOIDENT, "DINFOIDENT", + NULL}, + /* zcons ioctls */ { (uint_t)ZC_HOLDSLAVE, "ZC_HOLDSLAVE", NULL }, { (uint_t)ZC_RELEASESLAVE, "ZC_RELEASESLAVE", NULL }, diff --git a/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 index a4d8268b92..f1900b0911 100644 --- a/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 +++ b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 @@ -20,11 +20,9 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # # This required package information file contains a list of package contents. @@ -52,12 +50,14 @@ d none platform/i86hvm/kernel/drv 0755 root sys d none platform/i86hvm/kernel/drv/amd64 0755 root sys f none platform/i86hvm/kernel/drv/amd64/cmdk 0755 root sys f none platform/i86hvm/kernel/drv/amd64/rtls 0755 root sys +f none platform/i86hvm/kernel/drv/amd64/sd 0755 root sys f none platform/i86hvm/kernel/drv/amd64/xdf 0755 root sys f none platform/i86hvm/kernel/drv/amd64/xnf 0755 root sys f none platform/i86hvm/kernel/drv/amd64/xpv 0755 root sys f none platform/i86hvm/kernel/drv/amd64/xpvd 0755 root sys f none platform/i86hvm/kernel/drv/cmdk 0755 root sys f none platform/i86hvm/kernel/drv/rtls 0755 root sys +f none platform/i86hvm/kernel/drv/sd 0755 root sys f none platform/i86hvm/kernel/drv/xdf 0755 root sys f none platform/i86hvm/kernel/drv/xnf 0755 root sys f none platform/i86hvm/kernel/drv/xpv 0755 root sys @@ -67,4 +67,8 @@ f none platform/i86hvm/kernel/drv/xpvd.conf 0644 root sys d none platform/i86hvm/kernel/misc 0755 root sys d none platform/i86hvm/kernel/misc/amd64 0755 root sys f none platform/i86hvm/kernel/misc/amd64/hvm_bootstrap 0755 root sys +f none platform/i86hvm/kernel/misc/amd64/hvm_cmdk 0755 root sys +f none platform/i86hvm/kernel/misc/amd64/hvm_sd 0755 root sys f none platform/i86hvm/kernel/misc/hvm_bootstrap 0755 root sys +f none platform/i86hvm/kernel/misc/hvm_cmdk 0755 root sys +f none platform/i86hvm/kernel/misc/hvm_sd 0755 root sys diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c index b2c137655d..8f80c61853 100644 --- a/usr/src/uts/common/brand/lx/io/lx_audio.c +++ b/usr/src/uts/common/brand/lx/io/lx_audio.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -226,21 +226,6 @@ getzonename(void) return (curproc->p_zone->zone_name); } -static void -strfree(char *str) -{ - kmem_free(str, strlen(str) + 1); -} - -static char * -strdup(char *str) -{ - int n = strlen(str); - char *ptr = kmem_alloc(n + 1, KM_SLEEP); - bcopy(str, ptr, n + 1); - return (ptr); -} - static char * lxa_devprop_name(char *zname, char *pname) { diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c index 7872d54c53..0d57e91049 100644 --- a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c +++ b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,12 +19,10 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/cred.h> #include <sys/kstat.h> #include <sys/list.h> @@ -82,8 +79,6 @@ static rkstat_t rkstat_template = { int nfs4_msg_max = NFS4_MSG_MAX; #define DEFAULT_LEASE 180 -static char *strdup(const char *); - /* * Sets the appropiate fields of "ep", given "id" and various parameters. * Assumes that ep's fields have been initialized to zero/null, except for @@ -612,7 +607,7 @@ facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, return (0); len = strlen(cur_msg->msg_srv); if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, - len) != 0) + len) != 0) return (0); } else if (new_msg->msg_srv != NULL) { return (0); @@ -622,7 +617,7 @@ facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, return (0); len = strlen(cur_msg->msg_mntpt); if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt, - len) != 0) + len) != 0) return (0); } else if (new_msg->msg_mntpt != NULL) { return (0); @@ -1520,21 +1515,3 @@ nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi) rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data; rsp->no_grace.value.ul++; } - -/* - * Allocate and copy a string. XXX There really ought to be a single - * strdup() for the entire kernel. - */ -static char * -strdup(const char *s) -{ - size_t len; - char *new; - - len = strlen(s); - new = kmem_alloc(len + 1, KM_SLEEP); - bcopy(s, new, len); - new[len] = '\0'; - - return (new); -} diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c index 2c50a0566b..8dc9af2b39 100644 --- a/usr/src/uts/common/io/cmlb.c +++ b/usr/src/uts/common/io/cmlb.c @@ -202,7 +202,7 @@ static struct modlinkage modlinkage = { /* Local function prototypes */ static dev_t cmlb_make_device(struct cmlb_lun *cl); -static int cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, +static int cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid, int flags, void *tg_cookie); static void cmlb_resync_geom_caches(struct cmlb_lun *cl, diskaddr_t capacity, void *tg_cookie); @@ -227,14 +227,14 @@ static void cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie); static void cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie); static int cmlb_create_minor_nodes(struct cmlb_lun *cl); static int cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie); -static int cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr); +static boolean_t cmlb_check_efi_mbr(uchar_t *buf, boolean_t *is_mbr); #if defined(__i386) || defined(__amd64) static int cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie); #endif #if defined(_FIRMWARE_NEEDS_FDISK) -static int cmlb_has_max_chs_vals(struct ipart *fdp); +static boolean_t cmlb_has_max_chs_vals(struct ipart *fdp); #endif #if defined(_SUNOS_VTOC_16) @@ -487,10 +487,8 @@ cmlb_free_handle(cmlb_handle_t *cmlbhandlep) * scsi/generic/inquiry.h * * is_removable whether or not device is removable. - * 0 non-removable, 1 removable. * * is_hotpluggable whether or not device is hotpluggable. - * 0 non-hotpluggable, 1 hotpluggable. * * node_type minor node type (as used by ddi_create_minor_node) * @@ -588,7 +586,7 @@ cmlb_free_handle(cmlb_handle_t *cmlbhandlep) */ int cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type, - int is_removable, int is_hotpluggable, char *node_type, + boolean_t is_removable, boolean_t is_hotpluggable, char *node_type, int alter_behavior, cmlb_handle_t cmlbhandle, void *tg_cookie) { @@ -596,6 +594,9 @@ cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type, diskaddr_t cap; int status; + ASSERT(VALID_BOOLEAN(is_removable)); + ASSERT(VALID_BOOLEAN(is_hotpluggable)); + if (tgopsp->tg_version < TG_DK_OPS_VERSION_1) return (EINVAL); @@ -608,13 +609,13 @@ cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type, cl->cl_is_hotpluggable = is_hotpluggable; cl->cl_node_type = node_type; cl->cl_sys_blocksize = DEV_BSIZE; - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; cl->cl_def_labeltype = CMLB_LABEL_VTOC; cl->cl_alter_behavior = alter_behavior; cl->cl_reserved = -1; cl->cl_msglog_flag |= CMLB_ALLOW_2TB_WARN; - if (is_removable == 0) { + if (!is_removable) { mutex_exit(CMLB_MUTEX(cl)); status = DK_TG_GETCAP(cl, &cap, tg_cookie); mutex_enter(CMLB_MUTEX(cl)); @@ -663,7 +664,7 @@ cmlb_detach(cmlb_handle_t cmlbhandle, void *tg_cookie) mutex_enter(CMLB_MUTEX(cl)); cl->cl_def_labeltype = CMLB_LABEL_UNDEF; - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL); i_ddi_prop_dyn_driver_set(CMLB_DEVINFO(cl), NULL); cl->cl_state = CMLB_INITED; @@ -720,11 +721,11 @@ cmlb_validate(cmlb_handle_t cmlbhandle, int flags, void *tg_cookie) return (ENXIO); } - rval = cmlb_validate_geometry((struct cmlb_lun *)cmlbhandle, 1, + rval = cmlb_validate_geometry((struct cmlb_lun *)cmlbhandle, B_TRUE, flags, tg_cookie); if (rval == ENOTSUP) { - if (cl->cl_f_geometry_is_valid == TRUE) { + if (cl->cl_f_geometry_is_valid) { cl->cl_cur_labeltype = CMLB_LABEL_EFI; ret = 0; } else { @@ -762,7 +763,7 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie) return; mutex_enter(CMLB_MUTEX(cl)); - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; mutex_exit(CMLB_MUTEX(cl)); } @@ -774,19 +775,19 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie) * cmlbhandle cmlb handle associated with device. * * Return values: - * TRUE if incore label/geom data is valid. - * FALSE otherwise. + * B_TRUE if incore label/geom data is valid. + * B_FALSE otherwise. * */ -int +boolean_t cmlb_is_valid(cmlb_handle_t cmlbhandle) { struct cmlb_lun *cl = (struct cmlb_lun *)cmlbhandle; if (cmlbhandle == NULL) - return (FALSE); + return (B_FALSE); return (cl->cl_f_geometry_is_valid); @@ -816,7 +817,7 @@ cmlb_close(cmlb_handle_t cmlbhandle, void *tg_cookie) struct cmlb_lun *cl = (struct cmlb_lun *)cmlbhandle; mutex_enter(CMLB_MUTEX(cl)); - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; /* revert to default minor node for this device */ if (ISREMOVABLE(cl)) { @@ -861,7 +862,7 @@ cmlb_get_devid_block(cmlb_handle_t cmlbhandle, diskaddr_t *devidblockp, return (EINVAL); } - if ((cl->cl_f_geometry_is_valid == FALSE) || + if ((!cl->cl_f_geometry_is_valid) || (cl->cl_solaris_size < DK_LABEL_LOC)) { mutex_exit(CMLB_MUTEX(cl)); return (EINVAL); @@ -955,16 +956,16 @@ cmlb_partinfo(cmlb_handle_t cmlbhandle, int part, diskaddr_t *nblocksp, if (part < 0 || part >= MAXPART) { rval = EINVAL; } else { - if (cl->cl_f_geometry_is_valid == FALSE) - (void) cmlb_validate_geometry((struct cmlb_lun *)cl, 0, - 0, tg_cookie); + if (!cl->cl_f_geometry_is_valid) + (void) cmlb_validate_geometry((struct cmlb_lun *)cl, + B_FALSE, 0, tg_cookie); #if defined(_SUNOS_VTOC_16) - if (((cl->cl_f_geometry_is_valid == FALSE) || + if (((!cl->cl_f_geometry_is_valid) || (part < NDKMAP && cl->cl_solaris_size == 0)) && (part != P0_RAW_DISK)) { #else - if ((cl->cl_f_geometry_is_valid == FALSE) || + if ((!cl->cl_f_geometry_is_valid) || (part < NDKMAP && cl->cl_solaris_size == 0)) { #endif rval = EINVAL; @@ -1030,11 +1031,11 @@ cmlb_efi_label_capacity(cmlb_handle_t cmlbhandle, diskaddr_t *capacity, return (EINVAL); } - if (cl->cl_f_geometry_is_valid == FALSE) - (void) cmlb_validate_geometry((struct cmlb_lun *)cl, 0, + if (!cl->cl_f_geometry_is_valid) + (void) cmlb_validate_geometry((struct cmlb_lun *)cl, B_FALSE, 0, tg_cookie); - if ((cl->cl_f_geometry_is_valid == FALSE) || (capacity == NULL) || + if ((!cl->cl_f_geometry_is_valid) || (capacity == NULL) || (cl->cl_cur_labeltype != CMLB_LABEL_EFI)) { rval = EINVAL; } else { @@ -1268,33 +1269,36 @@ cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie) ASSERT(mutex_owned(CMLB_MUTEX(cl))); - if (cl->cl_f_geometry_is_valid == FALSE) { - mutex_exit(CMLB_MUTEX(cl)); - status = DK_TG_GETCAP(cl, &capacity, tg_cookie); - if (status != 0) { - mutex_enter(CMLB_MUTEX(cl)); - return (EIO); - } + if (cl->cl_f_geometry_is_valid) + return (0); - status = DK_TG_GETBLOCKSIZE(cl, &lbasize, tg_cookie); + mutex_exit(CMLB_MUTEX(cl)); + status = DK_TG_GETCAP(cl, &capacity, tg_cookie); + if (status != 0) { mutex_enter(CMLB_MUTEX(cl)); - if (status != 0) - return (EIO); + return (EIO); + } - if ((capacity != 0) && (lbasize != 0)) { - cl->cl_blockcount = capacity; - cl->cl_tgt_blocksize = lbasize; - return (0); - } else - return (EIO); - } else + status = DK_TG_GETBLOCKSIZE(cl, &lbasize, tg_cookie); + mutex_enter(CMLB_MUTEX(cl)); + if (status != 0) + return (EIO); + + if ((capacity != 0) && (lbasize != 0)) { + cl->cl_blockcount = capacity; + cl->cl_tgt_blocksize = lbasize; return (0); + } else { + return (EIO); + } } static int cmlb_create_minor(dev_info_t *dip, char *name, int spec_type, minor_t minor_num, char *node_type, int flag, boolean_t internal) { + ASSERT(VALID_BOOLEAN(internal)); + if (internal) return (ddi_create_internal_pathname(dip, name, spec_type, minor_num)); @@ -1331,7 +1335,8 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl) ASSERT(cl != NULL); ASSERT(mutex_owned(CMLB_MUTEX(cl))); - internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); + internal = VOID2BOOLEAN( + (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); /* check the most common case */ if (cl->cl_cur_labeltype != CMLB_LABEL_UNDEF && @@ -1462,7 +1467,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl) * Context: Kernel thread only (can sleep). */ static int -cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags, +cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid, int flags, void *tg_cookie) { int label_error = 0; @@ -1470,8 +1475,9 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags, int count; ASSERT(mutex_owned(CMLB_MUTEX(cl))); + ASSERT(VALID_BOOLEAN(forcerevalid)); - if ((cl->cl_f_geometry_is_valid == TRUE) && (forcerevalid == 0)) { + if ((cl->cl_f_geometry_is_valid) && (!forcerevalid)) { if (cl->cl_cur_labeltype == CMLB_LABEL_EFI) return (ENOTSUP); return (0); @@ -1580,7 +1586,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags, * a default label. */ label_error = 0; - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; goto no_solaris_partition; } @@ -1638,7 +1644,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags, #elif defined(_SUNOS_VTOC_16) if (label_error != EACCES) { #endif - if (cl->cl_f_geometry_is_valid == FALSE) { + if (!cl->cl_f_geometry_is_valid) { cmlb_build_default_label(cl, tg_cookie); } label_error = 0; @@ -2065,7 +2071,7 @@ done: bzero(&cl->cl_g, sizeof (struct dk_geom)); bzero(&cl->cl_vtoc, sizeof (struct dk_vtoc)); bzero(&cl->cl_map, NDKMAP * (sizeof (struct dk_map))); - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; } cl->cl_solaris_offset = solaris_offset; cl->cl_solaris_size = solaris_size; @@ -2130,8 +2136,8 @@ cmlb_validate_efi(efi_gpt_t *labp) } /* - * This function returns FALSE if there is a valid MBR signature and no - * partition table entries of type EFI_PMBR (0xEE). Otherwise it returns TRUE. + * This function returns B_FALSE if there is a valid MBR signature and no + * partition table entries of type EFI_PMBR (0xEE). Otherwise it returns B_TRUE. * * The EFI spec (1.10 and later) requires having a Protective MBR (PMBR) to * recognize the disk as GPT partitioned. However, some other OS creates an MBR @@ -2139,11 +2145,11 @@ cmlb_validate_efi(efi_gpt_t *labp) * corrupted, currently best attempt to allow data access would be to try to * check for GPT headers. Hence in case of more than one partition entry, but * at least one EFI_PMBR partition type or no valid magic number, the function - * returns TRUE to continue with looking for GPT header. + * returns B_TRUE to continue with looking for GPT header. */ -static int -cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr) +static boolean_t +cmlb_check_efi_mbr(uchar_t *buf, boolean_t *is_mbr) { struct ipart *fdp; struct mboot *mbp = (struct mboot *)buf; @@ -2151,22 +2157,22 @@ cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr) int i; if (is_mbr != NULL) - *is_mbr = TRUE; + *is_mbr = B_TRUE; if (LE_16(mbp->signature) != MBB_MAGIC) { if (is_mbr != NULL) - *is_mbr = FALSE; - return (TRUE); + *is_mbr = B_FALSE; + return (B_TRUE); } bcopy(&mbp->parts[0], fdisk, sizeof (fdisk)); for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) { if (fdp->systid == EFI_PMBR) - return (TRUE); + return (B_TRUE); } - return (FALSE); + return (B_FALSE); } static int @@ -2185,7 +2191,7 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags, int iofailed = 0; struct uuid uuid_type_reserved = EFI_RESERVED; #if defined(_FIRMWARE_NEEDS_FDISK) - int is_mbr; + boolean_t is_mbr; #endif ASSERT(mutex_owned(CMLB_MUTEX(cl))); @@ -2215,15 +2221,15 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags, } #if defined(_FIRMWARE_NEEDS_FDISK) - if (cmlb_check_efi_mbr(buf, &is_mbr) == FALSE) { - if (is_mbr == TRUE) + if (!cmlb_check_efi_mbr(buf, &is_mbr)) { + if (is_mbr) rval = ESRCH; else rval = EINVAL; goto done_err; } #else - if (cmlb_check_efi_mbr(buf, NULL) == FALSE) { + if (!cmlb_check_efi_mbr(buf, NULL)) { rval = EINVAL; goto done_err; } @@ -2341,7 +2347,7 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags, cl->cl_solaris_offset = 0; cl->cl_solaris_size = capacity; cl->cl_label_from_media = CMLB_LABEL_EFI; - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; /* clear the vtoc label */ bzero(&cl->cl_vtoc, sizeof (struct dk_vtoc)); @@ -2362,7 +2368,7 @@ done_err1: * causes things like opens and stats on the partition to fail. */ if ((capacity > CMLB_EXTVTOC_LIMIT) && (rval != ESRCH) && !iofailed) { - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; } return (rval); } @@ -2546,7 +2552,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags) } /* Mark the geometry as valid. */ - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; /* * if we got invalidated when mutex exit and entered again, @@ -2584,7 +2590,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags) if ((labp->dkl_map[i].dkl_nblk) && (part_end > cl->cl_blockcount)) { - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; break; } } @@ -2595,7 +2601,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags) part_end = vpartp->p_start + vpartp->p_size; if ((vpartp->p_size > 0) && (part_end > cl->cl_blockcount)) { - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; break; } } @@ -2609,7 +2615,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags) "Label says %llu blocks; Drive says %llu blocks\n", label_capacity, cl->cl_blockcount); } - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; label_error = CMLB_LABEL_IS_INVALID; } @@ -2775,7 +2781,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie) * Got fdisk table but no solaris entry therefore * don't create a default label */ - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; return; } @@ -2915,7 +2921,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie) cl->cl_vtoc.v_nparts = V_NUMPAR; cl->cl_vtoc.v_version = V_VERSION; - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; cl->cl_label_from_media = CMLB_LABEL_UNDEF; cmlb_dbg(CMLB_INFO, cl, @@ -2938,7 +2944,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie) /* * Function: cmlb_has_max_chs_vals * - * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum. + * Description: Return B_TRUE if Cylinder-Head-Sector values are all at maximum. * * Arguments: fdp - ptr to CHS info * @@ -2946,7 +2952,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie) * * Context: Any. */ -static int +static boolean_t cmlb_has_max_chs_vals(struct ipart *fdp) { return ((fdp->begcyl == LBA_MAX_CYL) && @@ -2992,7 +2998,7 @@ cmlb_dkio_get_geometry(struct cmlb_lun *cl, caddr_t arg, int flag, * is ready. */ mutex_enter(CMLB_MUTEX(cl)); - rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie); + rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie); #if defined(_SUNOS_VTOC_8) if (rval == EINVAL && cl->cl_alter_behavior & CMLB_FAKE_GEOM_LABEL_IOCTLS_VTOC8) { @@ -3105,7 +3111,7 @@ cmlb_dkio_set_geometry(struct cmlb_lun *cl, caddr_t arg, int flag) cl->cl_offset[i] += cl->cl_solaris_offset; #endif } - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; mutex_exit(CMLB_MUTEX(cl)); kmem_free(tmp_geom, sizeof (struct dk_geom)); @@ -3145,7 +3151,7 @@ cmlb_dkio_get_partition(struct cmlb_lun *cl, caddr_t arg, int flag, * information. */ mutex_enter(CMLB_MUTEX(cl)); - if ((rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie)) != 0) { + if ((rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie)) != 0) { mutex_exit(CMLB_MUTEX(cl)); return (rval); } @@ -3332,7 +3338,7 @@ cmlb_dkio_get_vtoc(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) return (EOVERFLOW); } - rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie); + rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie); #if defined(_SUNOS_VTOC_8) if (rval == EINVAL && @@ -3445,7 +3451,7 @@ cmlb_dkio_get_extvtoc(struct cmlb_lun *cl, caddr_t arg, int flag, bzero(&ext_vtoc, sizeof (struct extvtoc)); mutex_enter(CMLB_MUTEX(cl)); - rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie); + rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie); #if defined(_SUNOS_VTOC_8) if (rval == EINVAL && @@ -3705,7 +3711,8 @@ cmlb_dkio_set_vtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, int rval = 0; boolean_t internal; - internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); + internal = VOID2BOOLEAN( + (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); #ifdef _MULTI_DATAMODEL switch (ddi_model_convert_from(flag & FMODELS)) { @@ -3777,7 +3784,8 @@ cmlb_dkio_set_vtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, if ((rval = cmlb_build_label_vtoc(cl, &user_vtoc)) == 0) { if ((rval = cmlb_write_label(cl, tg_cookie)) == 0) { - if (cmlb_validate_geometry(cl, 1, 0, tg_cookie) != 0) { + if (cmlb_validate_geometry(cl, + B_TRUE, 0, tg_cookie) != 0) { cmlb_dbg(CMLB_ERROR, cl, "cmlb_dkio_set_vtoc: " "Failed validate geometry\n"); @@ -3822,7 +3830,8 @@ cmlb_dkio_set_extvtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, vtoctovtoc32(user_extvtoc, user_vtoc); #endif - internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); + internal = VOID2BOOLEAN( + (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); mutex_enter(CMLB_MUTEX(cl)); #if defined(__i386) || defined(__amd64) if (cl->cl_tgt_blocksize != cl->cl_sys_blocksize) { @@ -3851,7 +3860,8 @@ cmlb_dkio_set_extvtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, if ((rval = cmlb_build_label_vtoc(cl, &user_vtoc)) == 0) { if ((rval = cmlb_write_label(cl, tg_cookie)) == 0) { - if (cmlb_validate_geometry(cl, 1, 0, tg_cookie) != 0) { + if (cmlb_validate_geometry(cl, + B_TRUE, 0, tg_cookie) != 0) { cmlb_dbg(CMLB_ERROR, cl, "cmlb_dkio_set_vtoc: " "Failed validate geometry\n"); @@ -4307,7 +4317,8 @@ cmlb_dkio_set_efi(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag)) return (EFAULT); - internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); + internal = VOID2BOOLEAN( + (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0); user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64; @@ -4359,7 +4370,7 @@ cmlb_dkio_set_efi(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag, if (rval == 0) { mutex_enter(CMLB_MUTEX(cl)); - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; mutex_exit(CMLB_MUTEX(cl)); } } @@ -4491,7 +4502,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie) * update the fdisk and vtoc tables in memory */ rval = cmlb_update_fdisk_and_vtoc(cl, tg_cookie); - if ((cl->cl_f_geometry_is_valid == FALSE) || (rval != 0)) { + if ((!cl->cl_f_geometry_is_valid) || (rval != 0)) { mutex_exit(CMLB_MUTEX(cl)); kmem_free(mboot, (size_t)(sizeof (struct mboot))); return (rval); @@ -4572,7 +4583,7 @@ cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie) ret = DK_TG_GETPHYGEOM(cl, pgeomp, tg_cookie); mutex_enter(CMLB_MUTEX(cl)); - if (ret == 0) { + if (ret == 0) { geom_base_cap = 0; } else { cmlb_dbg(CMLB_ERROR, cl, @@ -4648,7 +4659,7 @@ cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie) " hd %d sec %d", cl->cl_g.dkg_ncyl, cl->cl_g.dkg_acyl, cl->cl_g.dkg_nhead, cl->cl_g.dkg_nsect); - cl->cl_f_geometry_is_valid = FALSE; + cl->cl_f_geometry_is_valid = B_FALSE; } @@ -4727,7 +4738,7 @@ cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie) * a default label. */ label_rc = 0; - cl->cl_f_geometry_is_valid = TRUE; + cl->cl_f_geometry_is_valid = B_TRUE; goto no_solaris_partition; } } else if (capacity < 0) { @@ -4738,11 +4749,11 @@ cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie) /* * For Removable media We reach here if we have found a * SOLARIS PARTITION. - * If cl_f_geometry_is_valid is FALSE it indicates that the SOLARIS + * If cl_f_geometry_is_valid is B_FALSE it indicates that the SOLARIS * PARTITION has changed from the previous one, hence we will setup a * default VTOC in this case. */ - if (cl->cl_f_geometry_is_valid == FALSE) { + if (!cl->cl_f_geometry_is_valid) { /* if we get here it is writable */ /* we are called from SMBOOT, and after a write of fdisk */ cmlb_build_default_label(cl, tg_cookie); diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c index ca11d4aa41..158872e401 100644 --- a/usr/src/uts/common/io/scsi/targets/sd.c +++ b/usr/src/uts/common/io/scsi/targets/sd.c @@ -74,10 +74,10 @@ #if (defined(__fibre)) #define SD_MODULE_NAME "SCSI SSA/FCAL Disk Driver" char _depends_on[] = "misc/scsi misc/cmlb drv/fcp"; -#else +#else /* !__fibre */ #define SD_MODULE_NAME "SCSI Disk Driver" char _depends_on[] = "misc/scsi misc/cmlb"; -#endif +#endif /* !__fibre */ /* * Define the interconnect type, to allow the driver to distinguish @@ -1674,7 +1674,7 @@ static struct cb_ops sd_cb_ops = { sdawrite /* async I/O write entry point */ }; -static struct dev_ops sd_ops = { +struct dev_ops sd_ops = { DEVO_REV, /* devo_rev, */ 0, /* refcnt */ sdinfo, /* info */ @@ -1689,30 +1689,39 @@ static struct dev_ops sd_ops = { ddi_quiesce_not_needed, /* quiesce */ }; - /* * This is the loadable module wrapper. */ #include <sys/modctl.h> +#ifndef XPV_HVM_DRIVER static struct modldrv modldrv = { &mod_driverops, /* Type of module. This one is a driver */ SD_MODULE_NAME, /* Module name. */ &sd_ops /* driver ops */ }; +static struct modlinkage modlinkage = { + MODREV_1, &modldrv, NULL +}; + +#else /* XPV_HVM_DRIVER */ +static struct modlmisc modlmisc = { + &mod_miscops, /* Type of module. This one is a misc */ + "HVM " SD_MODULE_NAME, /* Module name. */ +}; static struct modlinkage modlinkage = { - MODREV_1, - &modldrv, - NULL + MODREV_1, &modlmisc, NULL }; +#endif /* XPV_HVM_DRIVER */ + static cmlb_tg_ops_t sd_tgops = { TG_DK_OPS_VERSION_1, sd_tg_rdwr, sd_tg_getinfo - }; +}; static struct scsi_asq_key_strings sd_additional_codes[] = { 0x81, 0, "Logical Unit is Reserved", @@ -2204,13 +2213,20 @@ _init(void) /* establish driver name from module name */ sd_label = (char *)mod_modname(&modlinkage); +#ifndef XPV_HVM_DRIVER err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun), SD_MAXUNIT); - if (err != 0) { return (err); } +#else /* XPV_HVM_DRIVER */ + /* Remove the leading "hvm_" from the module name */ + ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0); + sd_label += strlen("hvm_"); + +#endif /* XPV_HVM_DRIVER */ + mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&sd_log_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&sd_label_mutex, NULL, MUTEX_DRIVER, NULL); @@ -2250,7 +2266,9 @@ _init(void) sd_scsi_target_lun_fini(); +#ifndef XPV_HVM_DRIVER ddi_soft_state_fini(&sd_state); +#endif /* !XPV_HVM_DRIVER */ return (err); } @@ -2291,7 +2309,9 @@ _fini(void) cv_destroy(&sd_tr.srq_resv_reclaim_cv); cv_destroy(&sd_tr.srq_inprocess_cv); +#ifndef XPV_HVM_DRIVER ddi_soft_state_fini(&sd_state); +#endif /* !XPV_HVM_DRIVER */ return (err); } @@ -2495,7 +2515,9 @@ sdprobe(dev_info_t *devi) { struct scsi_device *devp; int rval; - int instance; +#ifndef XPV_HVM_DRIVER + int instance = ddi_get_instance(devi); +#endif /* !XPV_HVM_DRIVER */ /* * if it wasn't for pln, sdprobe could actually be nulldev @@ -2512,11 +2534,11 @@ sdprobe(dev_info_t *devi) return (DDI_PROBE_FAILURE); } - instance = ddi_get_instance(devi); - +#ifndef XPV_HVM_DRIVER if (ddi_get_soft_state(sd_state, instance) != NULL) { return (DDI_PROBE_PARTIAL); } +#endif /* !XPV_HVM_DRIVER */ /* * Call the SCSA utility probe routine to see if we actually @@ -6903,9 +6925,11 @@ sd_unit_attach(dev_info_t *devi) * this routine will have a value of zero. */ instance = ddi_get_instance(devp->sd_dev); +#ifndef XPV_HVM_DRIVER if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) { goto probe_failed; } +#endif /* !XPV_HVM_DRIVER */ /* * Retrieve a pointer to the newly-allocated soft state. @@ -7798,7 +7822,8 @@ sd_unit_attach(dev_info_t *devi) #endif if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype, - un->un_f_has_removable_media, un->un_f_is_hotpluggable, + VOID2BOOLEAN(un->un_f_has_removable_media != 0), + VOID2BOOLEAN(un->un_f_is_hotpluggable != 0), un->un_node_type, offbyone, un->un_cmlbhandle, (void *)SD_PATH_DIRECT) != 0) { goto cmlb_attach_failed; @@ -8082,7 +8107,9 @@ get_softstate_failed: * ddi_get_soft_state() fails. The implication seems to be * that the get_soft_state cannot fail if the zalloc succeeds. */ +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(sd_state, instance); +#endif /* !XPV_HVM_DRIVER */ probe_failed: scsi_unprobe(devp); @@ -8111,7 +8138,9 @@ sd_unit_detach(dev_info_t *devi) int tgt; dev_t dev; dev_info_t *pdip = ddi_get_parent(devi); +#ifndef XPV_HVM_DRIVER int instance = ddi_get_instance(devi); +#endif /* !XPV_HVM_DRIVER */ mutex_enter(&sd_detach_mutex); @@ -8523,7 +8552,9 @@ sd_unit_detach(dev_info_t *devi) devp->sd_private = NULL; bzero(un, sizeof (struct sd_lun)); +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(sd_state, instance); +#endif /* !XPV_HVM_DRIVER */ mutex_exit(&sd_detach_mutex); @@ -10148,7 +10179,7 @@ sd_ready_and_valid(sd_ssc_t *ssc, int part) uint_t lbasize; int rval = SD_READY_VALID; char name_str[48]; - int is_valid; + boolean_t is_valid; struct sd_lun *un; int status; @@ -21268,7 +21299,7 @@ sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p) int i = 0; cred_t *cr; int tmprval = EINVAL; - int is_valid; + boolean_t is_valid; sd_ssc_t *ssc; /* diff --git a/usr/src/uts/common/os/modctl.c b/usr/src/uts/common/os/modctl.c index b8f4d61378..4b5a3685d7 100644 --- a/usr/src/uts/common/os/modctl.c +++ b/usr/src/uts/common/os/modctl.c @@ -477,22 +477,6 @@ modctl_modreserve(modid_t id, int *data) return (0); } -/* to be removed when Ed introduces these */ -static char * -ddi_strdup(const char *str, int flag) -{ - char *rv; - int n = strlen(str) + 1; - rv = kmem_alloc(n, flag); - bcopy(str, rv, n); - return (rv); -} -static void -strfree(char *str) -{ - kmem_free(str, strlen(str)+1); -} - /* Add/Remove driver and binding aliases */ static int modctl_update_driver_aliases(int add, int *data) @@ -603,7 +587,7 @@ modctl_update_driver_aliases(int add, int *data) } #endif check_esc_sequences(name, cname); - aip->alias_name = ddi_strdup(cname, KM_SLEEP); + aip->alias_name = strdup(cname); ap = alias.a_next; aip++; } diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c index 70d6e0a6d1..827ee186ee 100644 --- a/usr/src/uts/common/os/sunddi.c +++ b/usr/src/uts/common/os/sunddi.c @@ -8963,6 +8963,35 @@ ddi_quiesce_not_supported(dev_info_t *dip) return (DDI_FAILURE); } +char * +ddi_strdup(const char *str, int flag) +{ + int n; + char *ptr; + + ASSERT(str != NULL); + ASSERT((flag == KM_SLEEP) || (flag == KM_NOSLEEP)); + + n = strlen(str); + if ((ptr = kmem_alloc(n + 1, flag)) == NULL) + return (NULL); + bcopy(str, ptr, n + 1); + return (ptr); +} + +char * +strdup(const char *str) +{ + return (ddi_strdup(str, KM_SLEEP)); +} + +void +strfree(char *str) +{ + ASSERT(str != NULL); + kmem_free(str, strlen(str) + 1); +} + /* * Generic DDI callback interfaces. */ diff --git a/usr/src/uts/common/sys/cmlb.h b/usr/src/uts/common/sys/cmlb.h index e95cce9ade..cf1db2a1d3 100644 --- a/usr/src/uts/common/sys/cmlb.h +++ b/usr/src/uts/common/sys/cmlb.h @@ -19,15 +19,13 @@ * CDDL HEADER END */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_CMLB_H #define _SYS_CMLB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -246,10 +244,8 @@ cmlb_alloc_handle(cmlb_handle_t *cmlbhandlep); * scsi/generic/inquiry.h * * is_removable whether or not device is removable. - * 0 non-removable, 1 removable. * * is_hotpluggable whether or not device is hotpluggable. - * 0 non-hotpluggable, 1 hotpluggable. * * node_type minor node type (as used by ddi_create_minor_node) * @@ -308,7 +304,7 @@ cmlb_alloc_handle(cmlb_handle_t *cmlbhandlep); */ int cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type, - int is_removable, int is_hotpluggable, char *node_type, + boolean_t is_removable, boolean_t is_hotpluggable, char *node_type, int alter_behavior, cmlb_handle_t cmlbhandle, void *tg_cookie); @@ -370,7 +366,7 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie); * FALSE otherwise. * */ -int +boolean_t cmlb_is_valid(cmlb_handle_t cmlbhandle); /* diff --git a/usr/src/uts/common/sys/cmlb_impl.h b/usr/src/uts/common/sys/cmlb_impl.h index 797213c52a..b77f6c0239 100644 --- a/usr/src/uts/common/sys/cmlb_impl.h +++ b/usr/src/uts/common/sys/cmlb_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -72,9 +72,9 @@ extern "C" { #define CMLB_LABEL(cl) (DEVI(((cl)->cl_devi))->devi_binding_name) -#define ISREMOVABLE(cl) (cl->cl_is_removable == 1) +#define ISREMOVABLE(cl) (cl->cl_is_removable) #define ISCD(cl) (cl->cl_device_type == DTYPE_RODIRECT) -#define ISHOTPLUGGABLE(cl) (cl->cl_is_hotpluggable == 1) +#define ISHOTPLUGGABLE(cl) (cl->cl_is_hotpluggable) #if defined(_SUNOS_VTOC_8) @@ -93,10 +93,6 @@ extern "C" { #define CMLBUNIT(dev) (getminor((dev)) >> CMLBUNIT_SHIFT) #define CMLBPART(dev) (getminor((dev)) & CMLBPART_MASK) - -#define TRUE 1 -#define FALSE 0 - /* * Return codes of cmlb_uselabel(). */ @@ -181,14 +177,14 @@ typedef struct cmlb_lun { cmlb_state_t cl_state; /* state of handle */ - int cl_f_geometry_is_valid; + boolean_t cl_f_geometry_is_valid; int cl_sys_blocksize; kmutex_t cl_mutex; /* the following are passed in at attach time */ - int cl_is_removable; /* 1 is removable */ - int cl_is_hotpluggable; /* 1 is hotpluggable */ + boolean_t cl_is_removable; /* is removable */ + boolean_t cl_is_hotpluggable; /* is hotpluggable */ int cl_alter_behavior; char *cl_node_type; /* DDI_NT_... */ int cl_device_type; /* DTYPE_DIRECT,.. */ diff --git a/usr/src/uts/common/sys/dktp/bbh.h b/usr/src/uts/common/sys/dktp/bbh.h index d6176ce47f..31b3cedd88 100644 --- a/usr/src/uts/common/sys/dktp/bbh.h +++ b/usr/src/uts/common/sys/dktp/bbh.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,14 +19,14 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_DKTP_BBH_H #define _SYS_DKTP_BBH_H -#pragma ident "%Z%%M% %I% %E% SMI" +#include <sys/scsi/scsi_types.h> #ifdef __cplusplus extern "C" { diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h index 51627aa992..7f619b8da6 100644 --- a/usr/src/uts/common/sys/sunddi.h +++ b/usr/src/uts/common/sys/sunddi.h @@ -388,6 +388,10 @@ typedef enum { #define DDI_MODEL_NATIVE DATAMODEL_NATIVE #define DDI_MODEL_NONE DATAMODEL_NONE +extern char *ddi_strdup(const char *str, int flag); +extern char *strdup(const char *str); +extern void strfree(char *str); + /* * Functions and data references which really should be in <sys/ddi.h> */ diff --git a/usr/src/uts/common/sys/types.h b/usr/src/uts/common/sys/types.h index 1745b91e87..8f133c1056 100644 --- a/usr/src/uts/common/sys/types.h +++ b/usr/src/uts/common/sys/types.h @@ -23,15 +23,13 @@ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #ifndef _SYS_TYPES_H #define _SYS_TYPES_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/feature_tests.h> #include <sys/isa_defs.h> @@ -191,8 +189,16 @@ typedef long blksize_t; /* used for block sizes */ typedef enum { _B_FALSE, _B_TRUE } boolean_t; #else typedef enum { B_FALSE, B_TRUE } boolean_t; +#ifdef _KERNEL +#define VALID_BOOLEAN(x) (((x) == B_FALSE) || ((x) == B_TRUE)) +#define VOID2BOOLEAN(x) (((uintptr_t)(x) == 0) ? B_FALSE : B_TRUE) +#endif /* _KERNEL */ #endif /* defined(__XOPEN_OR_POSIX) */ +#ifdef _KERNEL +#define BOOLEAN2VOID(x) ((x) ? 1 : 0) +#endif /* _KERNEL */ + /* * The {u,}pad64_t types can be used in structures such that those structures * may be accessed by code produced by compilation environments which don't diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c index 5f327bb9ff..16fd5aff9d 100644 --- a/usr/src/uts/common/xen/io/xdb.c +++ b/usr/src/uts/common/xen/io/xdb.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,7 @@ #include <sys/promif.h> #include <sys/sysmacros.h> #include <public/io/xenbus.h> +#include <public/io/xs_wire.h> #include <xen/sys/xenbus_impl.h> #include <xen/sys/xendev.h> #include <sys/gnttab.h> @@ -77,10 +78,13 @@ static xdb_t *xdb_statep; static int xdb_debug = 0; +static void xdb_close(dev_info_t *); static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t); static int xdb_get_request(xdb_t *, blkif_request_t *); static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *); static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *); +static int xdb_biodone(buf_t *); + #ifdef DEBUG /* @@ -216,7 +220,18 @@ xdb_kstat_init(xdb_t *vdp) return (B_TRUE); } -static int xdb_biodone(buf_t *); +static char * +i_pathname(dev_info_t *dip) +{ + char *path, *rv; + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + rv = strdup(path); + kmem_free(path, MAXPATHLEN); + + return (rv); +} static buf_t * xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq) @@ -501,14 +516,13 @@ xdb_uninit_ioreqs(xdb_t *vdp) static uint_t xdb_intr(caddr_t arg) { - blkif_request_t req; - blkif_request_t *reqp = &req; - xdb_request_t *xreq; - buf_t *bp; - uint8_t op; - xdb_t *vdp = (xdb_t *)arg; - int ret = DDI_INTR_UNCLAIMED; - dev_info_t *dip = vdp->xs_dip; + xdb_t *vdp = (xdb_t *)arg; + dev_info_t *dip = vdp->xs_dip; + blkif_request_t req, *reqp = &req; + xdb_request_t *xreq; + buf_t *bp; + uint8_t op; + int ret = DDI_INTR_UNCLAIMED; XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "xdb@%s: I/O request received from dom %d", @@ -517,10 +531,11 @@ xdb_intr(caddr_t arg) mutex_enter(&vdp->xs_iomutex); /* shouldn't touch ring buffer if not in connected state */ - if (vdp->xs_if_status != XDB_CONNECTED) { + if (!vdp->xs_if_connected) { mutex_exit(&vdp->xs_iomutex); return (DDI_INTR_UNCLAIMED); } + ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised); /* * We'll loop till there is no more request in the ring @@ -672,7 +687,8 @@ xdb_biodone(buf_t *bp) mutex_enter(&vdp->xs_iomutex); /* send response back to frontend */ - if (vdp->xs_if_status == XDB_CONNECTED) { + if (vdp->xs_if_connected) { + ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised); if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr)) xvdi_notify_oe(vdp->xs_dip); XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, @@ -684,7 +700,7 @@ xdb_biodone(buf_t *bp) xdb_free_req(xreq); vdp->xs_ionum--; - if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) { + if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) { /* we're closing, someone is waiting for I/O clean-up */ cv_signal(&vdp->xs_ionumcv); } @@ -704,6 +720,14 @@ xdb_bindto_frontend(xdb_t *vdp) dev_info_t *dip = vdp->xs_dip; char protocol[64] = ""; + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + + /* + * Switch to the XenbusStateInitialised state. This let's the + * frontend know that we're about to negotiate a connection. + */ + (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); + /* * Gather info from frontend */ @@ -712,9 +736,11 @@ xdb_bindto_frontend(xdb_t *vdp) return (DDI_FAILURE); err = xenbus_gather(XBT_NULL, oename, - "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL); + XBP_RING_REF, "%lu", &gref, + XBP_EVENT_CHAN, "%u", &evtchn, + NULL); if (err != 0) { - xvdi_fatal_error(dip, err, + xvdi_dev_error(dip, err, "Getting ring-ref and evtchn from frontend"); return (DDI_FAILURE); } @@ -724,7 +750,7 @@ xdb_bindto_frontend(xdb_t *vdp) vdp->xs_entrysize = sizeof (union blkif_sring_entry); err = xenbus_gather(XBT_NULL, oename, - "protocol", "%63s", protocol, NULL); + XBP_PROTOCOL, "%63s", protocol, NULL); if (err) (void) strcpy(protocol, "unspecified, assuming native"); else { @@ -756,15 +782,13 @@ xdb_bindto_frontend(xdb_t *vdp) #endif /* - * map and init ring - * - * The ring parameters must match those which have been allocated - * in the front end. + * Map and init ring. The ring parameters must match those which + * have been allocated in the front end. */ - err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, - gref, &vdp->xs_ring); - if (err != DDI_SUCCESS) + if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize, + gref, &vdp->xs_ring) != DDI_SUCCESS) return (DDI_FAILURE); + /* * This will be removed after we use shadow I/O ring request since * we don't need to access the ring itself directly, thus the access @@ -772,9 +796,7 @@ xdb_bindto_frontend(xdb_t *vdp) */ vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl; - /* - * bind event channel - */ + /* bind event channel */ err = xvdi_bind_evtchn(dip, evtchn); if (err != DDI_SUCCESS) { xvdi_unmap_ring(vdp->xs_ring); @@ -787,43 +809,313 @@ xdb_bindto_frontend(xdb_t *vdp) static void xdb_unbindfrom_frontend(xdb_t *vdp) { + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + xvdi_free_evtchn(vdp->xs_dip); xvdi_unmap_ring(vdp->xs_ring); } +/* + * xdb_params_change() initiates a allows change to the underlying device/file + * that the backend is accessing. It does this by disconnecting from the + * frontend, closing the old device, clearing a bunch of xenbus parameters, + * and switching back to the XenbusStateInitialising state. The frontend + * should notice this transition to the XenbusStateInitialising state and + * should attempt to reconnect to us (the backend). + */ +static void +xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs) +{ + xenbus_transaction_t xbt; + dev_info_t *dip = vdp->xs_dip; + char *xsname; + int err; + + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + ASSERT(vdp->xs_params_path != NULL); + + if ((xsname = xvdi_get_xsname(dip)) == NULL) + return; + if (strcmp(vdp->xs_params_path, params) == 0) + return; + + /* + * Close the device we're currently accessing and update the + * path which points to our backend device/file. + */ + xdb_close(dip); + vdp->xs_fe_initialised = B_FALSE; + +trans_retry: + if ((err = xenbus_transaction_start(&xbt)) != 0) { + xvdi_dev_error(dip, err, "params change transaction init"); + goto errout; + } + + /* + * Delete all the xenbus properties that are connection dependant + * and go back to the initializing state so that the frontend + * driver can re-negotiate a connection. + */ + if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) || + ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) || + ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) || + ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) || + ((err = xenbus_rm(xbt, xsname, "instance")) != 0) || + ((err = xenbus_rm(xbt, xsname, "node")) != 0) || + (update_xs && ((err = xenbus_printf(xbt, xsname, + "params", "%s", params)) != 0)) || + ((err = xvdi_switch_state(dip, + xbt, XenbusStateInitialising) > 0))) { + (void) xenbus_transaction_end(xbt, 1); + xvdi_dev_error(dip, err, "params change transaction setup"); + goto errout; + } + + if ((err = xenbus_transaction_end(xbt, 0)) != 0) { + if (err == EAGAIN) { + /* transaction is ended, don't need to abort it */ + goto trans_retry; + } + xvdi_dev_error(dip, err, "params change transaction commit"); + goto errout; + } + + /* Change the device that we plan to access */ + strfree(vdp->xs_params_path); + vdp->xs_params_path = strdup(params); + return; + +errout: + (void) xvdi_switch_state(dip, xbt, XenbusStateInitialising); +} + +/* + * xdb_watch_params_cb() - This callback is invoked whenever there + * is an update to the following xenbus parameter: + * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params + * + * This normally happens during xm block-configure operations, which + * are used to change CD device images for HVM domUs. + */ +/*ARGSUSED*/ +static void +xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg) +{ + xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); + char *xsname, *oename, *str, *str2; + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((oename = xvdi_get_oename(dip)) == NULL)) { + return; + } + + mutex_enter(&vdp->xs_cbmutex); + + if (xenbus_read_str(xsname, "params", &str) != 0) { + mutex_exit(&vdp->xs_cbmutex); + return; + } + + if (strcmp(vdp->xs_params_path, str) == 0) { + /* Nothing todo */ + mutex_exit(&vdp->xs_cbmutex); + strfree(str); + return; + } + + /* + * If the frontend isn't a cd device, doesn't support media + * requests, or has locked the media, then we can't change + * the params value. restore the current value. + */ + str2 = NULL; + if (!XDB_IS_FE_CD(vdp) || + (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) || + (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) { + if (str2 != NULL) + strfree(str2); + strfree(str); + + str = i_pathname(dip); + cmn_err(CE_NOTE, + "!%s: media locked, ignoring params update", str); + strfree(str); + + mutex_exit(&vdp->xs_cbmutex); + return; + } + + XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, + "block-configure params request: \"%s\"", str)); + + xdb_params_change(vdp, str, B_FALSE); + mutex_exit(&vdp->xs_cbmutex); + strfree(str); +} + +/* + * xdb_watch_media_req_cb() - This callback is invoked whenever there + * is an update to the following xenbus parameter: + * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req + * + * Media requests are only supported on CD devices and are issued by + * the frontend. Currently the only supported media request operaions + * are "lock" and "eject". A "lock" prevents the backend from changing + * the backing device/file (via xm block-configure). An "eject" requests + * tells the backend device that it should disconnect from the frontend + * and closing the backing device/file that is currently in use. + */ +/*ARGSUSED*/ +static void +xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg) +{ + xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); + char *oename, *str; + + mutex_enter(&vdp->xs_cbmutex); + + if ((oename = xvdi_get_oename(dip)) == NULL) { + mutex_exit(&vdp->xs_cbmutex); + return; + } + + if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) { + mutex_exit(&vdp->xs_cbmutex); + return; + } + + if (!XDB_IS_FE_CD(vdp)) { + xvdi_dev_error(dip, EINVAL, + "media-req only supported for cdrom devices"); + mutex_exit(&vdp->xs_cbmutex); + return; + } + + if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { + mutex_exit(&vdp->xs_cbmutex); + strfree(str); + return; + } + strfree(str); + + XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request")); + + xdb_params_change(vdp, "", B_TRUE); + (void) xenbus_printf(XBT_NULL, oename, + XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE); + mutex_exit(&vdp->xs_cbmutex); +} + +/* + * If we're dealing with a cdrom device, let the frontend know that + * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch + * to handle those frontend media request changes, which modify the + * following xenstore parameter: + * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req + */ +static boolean_t +xdb_media_req_init(xdb_t *vdp) +{ + dev_info_t *dip = vdp->xs_dip; + char *xsname, *oename; + + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((oename = xvdi_get_oename(dip)) == NULL)) + return (B_FALSE); + + if (!XDB_IS_FE_CD(vdp)) + return (B_TRUE); + + if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0) + return (B_FALSE); + + if (xvdi_add_xb_watch_handler(dip, oename, + XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) { + xvdi_dev_error(dip, EAGAIN, + "Failed to register watch for cdrom media requests"); + return (B_FALSE); + } + + return (B_TRUE); +} + +/* + * Get our params value. Also, if we're using "params" then setup a + * watch to handle xm block-configure operations which modify the + * following xenstore parameter: + * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params + */ +static boolean_t +xdb_params_init(xdb_t *vdp) +{ + dev_info_t *dip = vdp->xs_dip; + char *str, *xsname; + int err, watch_params = B_FALSE; + + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + ASSERT(vdp->xs_params_path == NULL); + + if ((xsname = xvdi_get_xsname(dip)) == NULL) + return (B_FALSE); + + if ((err = xenbus_read_str(xsname, + "dynamic-device-path", &str)) == ENOENT) { + err = xenbus_read_str(xsname, "params", &str); + watch_params = B_TRUE; + } + if (err != 0) + return (B_FALSE); + vdp->xs_params_path = str; + + /* + * If we got our backing store path from "dynamic-device-path" then + * there's no reason to watch "params" + */ + if (!watch_params) + return (B_TRUE); + + if (xvdi_add_xb_watch_handler(dip, xsname, "params", + xdb_watch_params_cb, NULL) != DDI_SUCCESS) { + strfree(vdp->xs_params_path); + vdp->xs_params_path = NULL; + return (B_FALSE); + } + + return (B_TRUE); +} + #define LOFI_CTRL_NODE "/dev/lofictl" #define LOFI_DEV_NODE "/devices/pseudo/lofi@0:" -#define LOFI_MODE FREAD | FWRITE | FEXCL +#define LOFI_MODE (FREAD | FWRITE | FEXCL) static int xdb_setup_node(xdb_t *vdp, char *path) { - dev_info_t *dip; - char *xsnode, *node; - ldi_handle_t ldi_hdl; - struct lofi_ioctl *li; - int minor; - int err; - unsigned int len; + dev_info_t *dip = vdp->xs_dip; + char *xsname, *str; + ldi_handle_t ldi_hdl; + struct lofi_ioctl *li; + int minor, err; - dip = vdp->xs_dip; - xsnode = xvdi_get_xsname(dip); - if (xsnode == NULL) + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + + if ((xsname = xvdi_get_xsname(dip)) == NULL) return (DDI_FAILURE); - err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", - (void **)&node, &len); - if (err == ENOENT) - err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, - &len); - if (err != 0) { - xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); + if ((err = xenbus_read_str(xsname, "type", &str)) != 0) { + xvdi_dev_error(dip, err, "Getting type from backend device"); return (DDI_FAILURE); } + if (strcmp(str, "file") == 0) + vdp->xs_type |= XDB_DEV_BE_LOFI; + strfree(str); - if (!XDB_IS_LOFI(vdp)) { - (void) strlcpy(path, node, MAXPATHLEN); - kmem_free(node, len); + if (!XDB_IS_BE_LOFI(vdp)) { + (void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN); + ASSERT(vdp->xs_lofi_path == NULL); return (DDI_SUCCESS); } @@ -832,63 +1124,55 @@ xdb_setup_node(xdb_t *vdp, char *path) &ldi_hdl, vdp->xs_ldi_li); } while (err == EBUSY); if (err != 0) { - kmem_free(node, len); return (DDI_FAILURE); } li = kmem_zalloc(sizeof (*li), KM_SLEEP); - (void) strlcpy(li->li_filename, node, MAXPATHLEN); - kmem_free(node, len); - if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, - LOFI_MODE | FKIOCTL, kcred, &minor) != 0) { + (void) strlcpy(li->li_filename, vdp->xs_params_path, + sizeof (li->li_filename)); + err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li, + LOFI_MODE | FKIOCTL, kcred, &minor); + (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); + kmem_free(li, sizeof (*li)); + + if (err != 0) { cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s", - ddi_get_name_addr(dip), li->li_filename); - (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); - kmem_free(li, sizeof (*li)); + ddi_get_name_addr(dip), vdp->xs_params_path); return (DDI_FAILURE); } + /* * return '/devices/...' instead of '/dev/lofi/...' since the * former is available immediately after calling ldi_ioctl */ (void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor); - (void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path); - (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); - kmem_free(li, sizeof (*li)); + (void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path); + + ASSERT(vdp->xs_lofi_path == NULL); + vdp->xs_lofi_path = strdup(path); + return (DDI_SUCCESS); } static void xdb_teardown_node(xdb_t *vdp) { - dev_info_t *dip; - char *xsnode, *node; + dev_info_t *dip = vdp->xs_dip; ldi_handle_t ldi_hdl; struct lofi_ioctl *li; int err; - unsigned int len; - if (!XDB_IS_LOFI(vdp)) - return; + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); - dip = vdp->xs_dip; - xsnode = xvdi_get_xsname(dip); - if (xsnode == NULL) + if (!XDB_IS_BE_LOFI(vdp)) return; - err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path", - (void **)&node, &len); - if (err == ENOENT) - err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node, - &len); - if (err != 0) { - xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'"); - return; - } + vdp->xs_type &= ~XDB_DEV_BE_LOFI; + ASSERT(vdp->xs_lofi_path != NULL); li = kmem_zalloc(sizeof (*li), KM_SLEEP); - (void) strlcpy(li->li_filename, node, MAXPATHLEN); - kmem_free(node, len); + (void) strlcpy(li->li_filename, vdp->xs_params_path, + sizeof (li->li_filename)); do { err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred, @@ -908,67 +1192,47 @@ xdb_teardown_node(xdb_t *vdp) (void) ldi_close(ldi_hdl, LOFI_MODE, kcred); kmem_free(li, sizeof (*li)); + + strfree(vdp->xs_lofi_path); + vdp->xs_lofi_path = NULL; } static int xdb_open_device(xdb_t *vdp) { + dev_info_t *dip = vdp->xs_dip; uint64_t devsize; - dev_info_t *dip; - char *xsnode; char *nodepath; - char *mode = NULL; - char *type = NULL; - int err; - dip = vdp->xs_dip; - xsnode = xvdi_get_xsname(dip); - if (xsnode == NULL) - return (DDI_FAILURE); + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); - err = xenbus_gather(XBT_NULL, xsnode, - "mode", NULL, &mode, "type", NULL, &type, NULL); - if (err != 0) { - if (mode) - kmem_free(mode, strlen(mode) + 1); - if (type) - kmem_free(type, strlen(type) + 1); - xvdi_fatal_error(dip, err, - "Getting mode and type from backend device"); - return (DDI_FAILURE); - } - if (strcmp(type, "file") == 0) { - vdp->xs_type |= XDB_DEV_LOFI; - } - kmem_free(type, strlen(type) + 1); - if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) { - vdp->xs_type |= XDB_DEV_RO; + if (strlen(vdp->xs_params_path) == 0) { + /* + * it's possible to have no backing device when dealing + * with a pv cdrom drive that has no virtual cd associated + * with it. + */ + ASSERT(XDB_IS_FE_CD(vdp)); + ASSERT(vdp->xs_sectors == 0); + ASSERT(vdp->xs_ldi_li == NULL); + ASSERT(vdp->xs_ldi_hdl == NULL); + return (DDI_SUCCESS); } - kmem_free(mode, strlen(mode) + 1); - /* - * try to open backend device - */ if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0) return (DDI_FAILURE); nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - err = xdb_setup_node(vdp, nodepath); - if (err != DDI_SUCCESS) { - xvdi_fatal_error(dip, err, + + /* try to open backend device */ + if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) { + xvdi_dev_error(dip, ENXIO, "Getting device path of backend device"); ldi_ident_release(vdp->xs_ldi_li); kmem_free(nodepath, MAXPATHLEN); return (DDI_FAILURE); } - if (*nodepath == '\0') { - /* Allow a CD-ROM device with an empty backend. */ - vdp->xs_sectors = 0; - kmem_free(nodepath, MAXPATHLEN); - return (DDI_SUCCESS); - } - if (ldi_open_by_name(nodepath, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) { @@ -980,16 +1244,6 @@ xdb_open_device(xdb_t *vdp) return (DDI_FAILURE); } - /* check if it's a CD/DVD disc */ - if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, - "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT) - vdp->xs_type |= XDB_DEV_CD; - /* check if it's a removable disk */ - if (ldi_prop_exists(vdp->xs_ldi_hdl, - LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, - "removable-media")) - vdp->xs_type |= XDB_DEV_RMB; - if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) { (void) ldi_close(vdp->xs_ldi_hdl, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); @@ -1000,6 +1254,17 @@ xdb_open_device(xdb_t *vdp) } vdp->xs_sectors = devsize / XB_BSIZE; + /* check if the underlying device is a CD/DVD disc */ + if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS, + INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT) + vdp->xs_type |= XDB_DEV_BE_CD; + + /* check if the underlying device is a removable disk */ + if (ldi_prop_exists(vdp->xs_ldi_hdl, + LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, + "removable-media")) + vdp->xs_type |= XDB_DEV_BE_RMB; + kmem_free(nodepath, MAXPATHLEN); return (DDI_SUCCESS); } @@ -1007,171 +1272,155 @@ xdb_open_device(xdb_t *vdp) static void xdb_close_device(xdb_t *vdp) { + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + + if (strlen(vdp->xs_params_path) == 0) { + ASSERT(XDB_IS_FE_CD(vdp)); + ASSERT(vdp->xs_sectors == 0); + ASSERT(vdp->xs_ldi_li == NULL); + ASSERT(vdp->xs_ldi_hdl == NULL); + return; + } + (void) ldi_close(vdp->xs_ldi_hdl, FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred); xdb_teardown_node(vdp); ldi_ident_release(vdp->xs_ldi_li); + vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB); + vdp->xs_sectors = 0; vdp->xs_ldi_li = NULL; vdp->xs_ldi_hdl = NULL; } /* * Kick-off connect process - * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY - * the xs_if_status will be changed to XDB_CONNECTED on success, - * otherwise, xs_if_status will not be changed + * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE + * the xs_if_connected will be changed to B_TRUE on success, */ -static int +static void xdb_start_connect(xdb_t *vdp) { - uint32_t dinfo; - xenbus_transaction_t xbt; - int err, svdst; - char *xsnode; - dev_info_t *dip = vdp->xs_dip; - char *barrier; - uint_t len; + xenbus_transaction_t xbt; + dev_info_t *dip = vdp->xs_dip; + boolean_t fb_exists; + int err, instance = ddi_get_instance(dip); + uint64_t sectors; + uint_t dinfo, ssize; + char *xsname; + + ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1)) + return; + + mutex_enter(&vdp->xs_iomutex); + /* + * if the hotplug scripts haven't run or if the frontend is not + * initialized, then we can't try to connect. + */ + if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) { + ASSERT(!vdp->xs_if_connected); + mutex_exit(&vdp->xs_iomutex); + return; + } + + /* If we're already connected then there's nothing todo */ + if (vdp->xs_if_connected) { + mutex_exit(&vdp->xs_iomutex); + return; + } + mutex_exit(&vdp->xs_iomutex); /* * Start connect to frontend only when backend device are ready * and frontend has moved to XenbusStateInitialised, which means - * ready to connect + * ready to connect. */ - ASSERT((vdp->xs_fe_status == XDB_FE_READY) && - (vdp->xs_dev_status == XDB_DEV_READY)); - - if (((xsnode = xvdi_get_xsname(dip)) == NULL) || - ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) || - (xdb_open_device(vdp) != DDI_SUCCESS)) - return (DDI_FAILURE); + XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, + "xdb@%s: starting connection process", ddi_get_name_addr(dip))); - (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised); + if (xdb_open_device(vdp) != DDI_SUCCESS) + return; - if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) - goto errout1; + if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) { + xdb_close_device(vdp); + return; + } /* init i/o requests */ xdb_init_ioreqs(vdp); if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp) - != DDI_SUCCESS) - goto errout2; + != DDI_SUCCESS) { + xdb_uninit_ioreqs(vdp); + xdb_unbindfrom_frontend(vdp); + xdb_close_device(vdp); + return; + } + + dinfo = 0; + if (XDB_IS_RO(vdp)) + dinfo |= VDISK_READONLY; + if (XDB_IS_BE_RMB(vdp)) + dinfo |= VDISK_REMOVABLE; + if (XDB_IS_BE_CD(vdp)) + dinfo |= VDISK_CDROM; + if (XDB_IS_FE_CD(vdp)) + dinfo |= VDISK_REMOVABLE | VDISK_CDROM; /* * we can recieve intr any time from now on * mark that we're ready to take intr */ mutex_enter(&vdp->xs_iomutex); - /* - * save it in case we need to restore when we - * fail to write xenstore later - */ - svdst = vdp->xs_if_status; - vdp->xs_if_status = XDB_CONNECTED; + ASSERT(vdp->xs_fe_initialised); + vdp->xs_if_connected = B_TRUE; mutex_exit(&vdp->xs_iomutex); - /* write into xenstore the info needed by frontend */ trans_retry: - if (xenbus_transaction_start(&xbt)) { - xvdi_fatal_error(dip, EIO, "transaction start"); - goto errout3; + /* write into xenstore the info needed by frontend */ + if ((err = xenbus_transaction_start(&xbt)) != 0) { + xvdi_dev_error(dip, err, "connect transaction init"); + goto errout; } - /* - * If feature-barrier isn't present in xenstore, add it. - */ - if (xenbus_read(xbt, xsnode, "feature-barrier", - (void **)&barrier, &len) != 0) { - if ((err = xenbus_printf(xbt, xsnode, "feature-barrier", - "%d", 1)) != 0) { - cmn_err(CE_WARN, "xdb@%s: failed to write " - "'feature-barrier'", ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, err, "writing 'feature-barrier'"); - goto abort_trans; - } - } else - kmem_free(barrier, len); - - dinfo = 0; - if (XDB_IS_RO(vdp)) - dinfo |= VDISK_READONLY; - if (XDB_IS_CD(vdp)) - dinfo |= VDISK_CDROM; - if (XDB_IS_RMB(vdp)) - dinfo |= VDISK_REMOVABLE; - if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) { - xvdi_fatal_error(dip, err, "writing 'info'"); - goto abort_trans; - } + /* If feature-barrier isn't present in xenstore, add it. */ + fb_exists = xenbus_exists(xsname, XBP_FB); /* hard-coded 512-byte sector size */ - if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) { - xvdi_fatal_error(dip, err, "writing 'sector-size'"); - goto abort_trans; - } - - if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64, - vdp->xs_sectors)) { - xvdi_fatal_error(dip, err, "writing 'sectors'"); - goto abort_trans; + ssize = DEV_BSIZE; + sectors = vdp->xs_sectors; + if (((!fb_exists && + (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) || + (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) || + (err = xenbus_printf(xbt, xsname, "sector-size", "%u", ssize)) || + (err = xenbus_printf(xbt, xsname, + XBP_SECTORS, "%"PRIu64, sectors)) || + (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) || + ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) { + (void) xenbus_transaction_end(xbt, 1); + xvdi_dev_error(dip, err, "connect transaction setup"); + goto errout; } - if (err = xenbus_printf(xbt, xsnode, "instance", "%d", - ddi_get_instance(dip))) { - xvdi_fatal_error(dip, err, "writing 'instance'"); - goto abort_trans; - } - - if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) { - xvdi_fatal_error(dip, err, "writing 'state'"); - goto abort_trans; - } - - if (err = xenbus_transaction_end(xbt, 0)) { - if (err == EAGAIN) + if ((err = xenbus_transaction_end(xbt, 0)) != 0) { + if (err == EAGAIN) { /* transaction is ended, don't need to abort it */ goto trans_retry; - xvdi_fatal_error(dip, err, "completing transaction"); - goto errout3; + } + xvdi_dev_error(dip, err, "connect transaction commit"); + goto errout; } - return (DDI_SUCCESS); + return; -abort_trans: - (void) xenbus_transaction_end(xbt, 1); -errout3: - mutex_enter(&vdp->xs_iomutex); - vdp->xs_if_status = svdst; - mutex_exit(&vdp->xs_iomutex); - ddi_remove_intr(dip, 0, NULL); -errout2: - xdb_uninit_ioreqs(vdp); - xdb_unbindfrom_frontend(vdp); -errout1: - xdb_close_device(vdp); - return (DDI_FAILURE); -} - -/* - * Kick-off disconnect process - * xs_if_status will not be changed - */ -static int -xdb_start_disconnect(xdb_t *vdp) -{ - /* - * Kick-off disconnect process - */ - if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0) - return (DDI_FAILURE); - - return (DDI_SUCCESS); +errout: + xdb_close(dip); } /* * Disconnect from frontend and close backend device - * ifstatus will be changed to XDB_DISCONNECTED - * Xenbus state will be changed to XenbusStateClosed */ static void xdb_close(dev_info_t *dip) @@ -1179,23 +1428,36 @@ xdb_close(dev_info_t *dip) xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip); ASSERT(MUTEX_HELD(&vdp->xs_cbmutex)); - mutex_enter(&vdp->xs_iomutex); - if (vdp->xs_if_status != XDB_CONNECTED) { - vdp->xs_if_status = XDB_DISCONNECTED; + /* + * if the hotplug scripts haven't run or if the frontend is not + * initialized, then we can't be connected, so there's no + * connection to close. + */ + if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) { + ASSERT(!vdp->xs_if_connected); + mutex_exit(&vdp->xs_iomutex); + return; + } + + /* if we're not connected, there's nothing to do */ + if (!vdp->xs_if_connected) { cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); - (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); return; } - vdp->xs_if_status = XDB_DISCONNECTED; + + XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected")); + + vdp->xs_if_connected = B_FALSE; cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); /* stop accepting I/O request from frontend */ ddi_remove_intr(dip, 0, NULL); + /* clear all on-going I/Os, if any */ mutex_enter(&vdp->xs_iomutex); while (vdp->xs_ionum > 0) @@ -1207,109 +1469,53 @@ xdb_close(dev_info_t *dip) xdb_unbindfrom_frontend(vdp); xdb_close_device(vdp); vdp->xs_peer = (domid_t)-1; - (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); -} - -/* - * Xdb_check_state_transition will check the XenbusState change to see - * if the change is a valid transition or not. - * The new state is written by frontend domain, or by running xenstore-write - * to change it manually in dom0 - */ -static int -xdb_check_state_transition(xdb_t *vdp, XenbusState oestate) -{ - enum xdb_state status; - int stcheck; -#define STOK 0 /* need further process */ -#define STNOP 1 /* no action need taking */ -#define STBUG 2 /* unexpected state change, could be a bug */ - - status = vdp->xs_if_status; - stcheck = STOK; - - switch (status) { - case XDB_UNKNOWN: - if (vdp->xs_fe_status == XDB_FE_UNKNOWN) { - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateConnected)) - stcheck = STBUG; - else if ((oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait)) - stcheck = STNOP; - } else { - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateConnected)) - stcheck = STBUG; - else if (oestate == XenbusStateInitialised) - stcheck = STNOP; - } - break; - case XDB_CONNECTED: - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised)) - stcheck = STBUG; - else if (oestate == XenbusStateConnected) - stcheck = STNOP; - break; - case XDB_DISCONNECTED: - default: - stcheck = STBUG; - } - - if (stcheck == STOK) - return (DDI_SUCCESS); - - if (stcheck == STBUG) - cmn_err(CE_NOTE, "xdb@%s: unexpected otherend " - "state change to %d!, when status is %d", - ddi_get_name_addr(vdp->xs_dip), oestate, status); - - return (DDI_FAILURE); } static void xdb_send_buf(void *arg) { - buf_t *bp; - xdb_t *vdp = (xdb_t *)arg; + xdb_t *vdp = (xdb_t *)arg; + buf_t *bp; + int err; mutex_enter(&vdp->xs_iomutex); + while (vdp->xs_send_buf) { + if ((bp = vdp->xs_f_iobuf) == NULL) { + /* wait for some io to send */ + XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, + "send buf waiting for io")); + cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); + continue; + } - while (vdp->xs_if_status != XDB_DISCONNECTED) { - while ((bp = vdp->xs_f_iobuf) != NULL) { - vdp->xs_f_iobuf = bp->av_forw; - bp->av_forw = NULL; - vdp->xs_ionum++; - mutex_exit(&vdp->xs_iomutex); - if (bp->b_bcount != 0) { - int err = ldi_strategy(vdp->xs_ldi_hdl, bp); - if (err != 0) { - bp->b_flags |= B_ERROR; - (void) xdb_biodone(bp); - XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, - "xdb@%s: sent buf to backend dev" - "failed, err=%d", - ddi_get_name_addr(vdp->xs_dip), - err)); - } else { - XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, - "sent buf to backend ok")); - } - } else /* no I/O need to be done */ - (void) xdb_biodone(bp); + vdp->xs_f_iobuf = bp->av_forw; + bp->av_forw = NULL; + vdp->xs_ionum++; + mutex_exit(&vdp->xs_iomutex); + if (bp->b_bcount == 0) { + /* no I/O needs to be done */ + (void) xdb_biodone(bp); mutex_enter(&vdp->xs_iomutex); + continue; } - if (vdp->xs_if_status != XDB_DISCONNECTED) - cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex); + err = EIO; + if (vdp->xs_ldi_hdl != NULL) + err = ldi_strategy(vdp->xs_ldi_hdl, bp); + if (err != 0) { + bp->b_flags |= B_ERROR; + (void) xdb_biodone(bp); + XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, + "xdb@%s: sent buf to backend devfailed, err=%d", + ddi_get_name_addr(vdp->xs_dip), err)); + } else { + XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, + "sent buf to backend ok")); + } + mutex_enter(&vdp->xs_iomutex); } - + XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing")); mutex_exit(&vdp->xs_iomutex); } @@ -1324,17 +1530,19 @@ xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: " "hotplug status change to %d!", ddi_get_name_addr(dip), state)); + if (state != Connected) + return; + mutex_enter(&vdp->xs_cbmutex); - if (state == Connected) { - /* Hotplug script has completed successfully */ - if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) { - vdp->xs_dev_status = XDB_DEV_READY; - if (vdp->xs_fe_status == XDB_FE_READY) - /* try to connect to frontend */ - if (xdb_start_connect(vdp) != DDI_SUCCESS) - (void) xdb_start_disconnect(vdp); - } + + /* If hotplug script have already run, there's nothing todo */ + if (vdp->xs_hp_connected) { + mutex_exit(&vdp->xs_cbmutex); + return; } + + vdp->xs_hp_connected = B_TRUE; + xdb_start_connect(vdp); mutex_exit(&vdp->xs_cbmutex); } @@ -1351,29 +1559,47 @@ xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, mutex_enter(&vdp->xs_cbmutex); - if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) { - mutex_exit(&vdp->xs_cbmutex); - return; - } - + /* + * Now it'd really be nice if there was a well defined state + * transition model for xen frontend drivers, but unfortunatly + * there isn't. So we're stuck with assuming that all state + * transitions are possible, and we'll just have to deal with + * them regardless of what state we're in. + */ switch (new_state) { - case XenbusStateInitialised: - ASSERT(vdp->xs_if_status == XDB_UNKNOWN); + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + /* tear down our connection to the frontend */ + xdb_close(dip); + vdp->xs_fe_initialised = B_FALSE; + break; - /* frontend is ready for connecting */ - vdp->xs_fe_status = XDB_FE_READY; + case XenbusStateInitialised: + /* + * If we were conected, then we need to drop the connection + * and re-negotiate it. + */ + xdb_close(dip); + vdp->xs_fe_initialised = B_TRUE; + xdb_start_connect(vdp); + break; - if (vdp->xs_dev_status == XDB_DEV_READY) - if (xdb_start_connect(vdp) != DDI_SUCCESS) - (void) xdb_start_disconnect(vdp); + case XenbusStateConnected: + /* nothing todo here other than congratulate the frontend */ break; + case XenbusStateClosing: + /* monkey see monkey do */ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); break; + case XenbusStateClosed: - /* clean up */ + /* tear down our connection to the frontend */ xdb_close(dip); - + vdp->xs_fe_initialised = B_FALSE; + (void) xvdi_switch_state(dip, XBT_NULL, new_state); + break; } mutex_exit(&vdp->xs_cbmutex); @@ -1382,9 +1608,11 @@ xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, static int xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { - xdb_t *vdp; - ddi_iblock_cookie_t ibc; - int instance; + ddi_iblock_cookie_t ibc; + xdb_t *vdp; + int instance = ddi_get_instance(dip); + char *xsname, *oename; + char *str; switch (cmd) { case DDI_RESUME: @@ -1394,42 +1622,69 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) default: return (DDI_FAILURE); } - /* DDI_ATTACH */ - instance = ddi_get_instance(dip); - if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((oename = xvdi_get_oename(dip)) == NULL)) return (DDI_FAILURE); - vdp = ddi_get_soft_state(xdb_statep, instance); - vdp->xs_dip = dip; + /* + * Disable auto-detach. This is necessary so that we don't get + * detached while we're disconnected from the front end. + */ + (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1); + if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) - goto errout1; + return (DDI_FAILURE); - if (!xdb_kstat_init(vdp)) - goto errout1; + if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS) + return (DDI_FAILURE); + vdp = ddi_get_soft_state(xdb_statep, instance); + vdp->xs_dip = dip; mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc); mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc); cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL); cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL); - ddi_set_driver_private(dip, vdp); + if (!xdb_kstat_init(vdp)) + goto errout1; + + /* Check if the frontend device is supposed to be a cdrom */ + if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0) + return (DDI_FAILURE); + if (strcmp(str, XBV_DEV_TYPE_CD) == 0) + vdp->xs_type |= XDB_DEV_FE_CD; + strfree(str); + + /* Check if the frontend device is supposed to be read only */ + if (xenbus_read_str(xsname, "mode", &str) != 0) + return (DDI_FAILURE); + if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL)) + vdp->xs_type |= XDB_DEV_RO; + strfree(str); + + mutex_enter(&vdp->xs_cbmutex); + if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) { + xvdi_remove_xb_watch_handlers(dip); + mutex_exit(&vdp->xs_cbmutex); + goto errout2; + } + mutex_exit(&vdp->xs_cbmutex); + + vdp->xs_send_buf = B_TRUE; vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1, TASKQ_DEFAULTPRI, 0); - if (vdp->xs_iotaskq == NULL) - goto errout2; (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp, DDI_SLEEP); /* Watch frontend and hotplug state change */ - if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change, - NULL) != DDI_SUCCESS) + if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change, + NULL) != DDI_SUCCESS) || + (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change, + NULL) != DDI_SUCCESS)) goto errout3; - if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change, - NULL) != DDI_SUCCESS) { - goto errout4; - } /* * Kick-off hotplug script @@ -1437,7 +1692,7 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) { cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script", ddi_get_name_addr(dip)); - goto errout4; + goto errout3; } /* @@ -1450,25 +1705,40 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) ddi_get_name_addr(dip))); return (DDI_SUCCESS); -errout4: - xvdi_remove_event_handler(dip, NULL); errout3: + ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected); + + xvdi_remove_event_handler(dip, NULL); + + /* Disconnect from the backend */ mutex_enter(&vdp->xs_cbmutex); mutex_enter(&vdp->xs_iomutex); - vdp->xs_if_status = XDB_DISCONNECTED; + vdp->xs_send_buf = B_FALSE; cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); mutex_exit(&vdp->xs_cbmutex); + + /* wait for all io to dtrain and destroy io taskq */ ddi_taskq_destroy(vdp->xs_iotaskq); + + /* tear down block-configure watch */ + mutex_enter(&vdp->xs_cbmutex); + xvdi_remove_xb_watch_handlers(dip); + mutex_exit(&vdp->xs_cbmutex); + errout2: + /* remove kstats */ + kstat_delete(vdp->xs_kstats); + +errout1: + /* free up driver state */ ddi_set_driver_private(dip, NULL); cv_destroy(&vdp->xs_iocv); cv_destroy(&vdp->xs_ionumcv); mutex_destroy(&vdp->xs_cbmutex); mutex_destroy(&vdp->xs_iomutex); - kstat_delete(vdp->xs_kstats); -errout1: ddi_soft_state_free(xdb_statep, instance); + return (DDI_FAILURE); } @@ -1490,19 +1760,25 @@ xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) /* DDI_DETACH handling */ - /* shouldn't detach, if still used by frontend */ + /* refuse to detach if we're still in use by the frontend */ mutex_enter(&vdp->xs_iomutex); - if (vdp->xs_if_status != XDB_DISCONNECTED) { + if (vdp->xs_if_connected) { mutex_exit(&vdp->xs_iomutex); return (DDI_FAILURE); } + vdp->xs_send_buf = B_FALSE; + cv_broadcast(&vdp->xs_iocv); mutex_exit(&vdp->xs_iomutex); xvdi_remove_event_handler(dip, NULL); - /* can do nothing about it, if it fails */ (void) xvdi_post_event(dip, XEN_HP_REMOVE); ddi_taskq_destroy(vdp->xs_iotaskq); + + mutex_enter(&vdp->xs_cbmutex); + xvdi_remove_xb_watch_handlers(dip); + mutex_exit(&vdp->xs_cbmutex); + cv_destroy(&vdp->xs_iocv); cv_destroy(&vdp->xs_ionumcv); mutex_destroy(&vdp->xs_cbmutex); @@ -1528,7 +1804,7 @@ static struct dev_ops xdb_dev_ops = { NULL, /* devo_cb_ops */ NULL, /* devo_bus_ops */ NULL, /* power */ - ddi_quiesce_not_needed, /* quiesce */ + ddi_quiesce_not_needed, /* quiesce */ }; /* @@ -1536,7 +1812,7 @@ static struct dev_ops xdb_dev_ops = { */ static struct modldrv modldrv = { &mod_driverops, /* Type of module. */ - "vbd backend driver", /* Name of the module */ + "vbd backend driver", /* Name of the module */ &xdb_dev_ops /* driver ops */ }; diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h index 0abd008d0a..f8046e8219 100644 --- a/usr/src/uts/common/xen/io/xdb.h +++ b/usr/src/uts/common/xen/io/xdb.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,8 +28,6 @@ #ifndef _SYS_XDB_H #define _SYS_XDB_H -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef __cplusplus extern "C" { #endif @@ -42,57 +40,17 @@ extern "C" { /* * Info of the exported blk device */ -#define XDB_DEV_RO (1) /* read-only or writable */ -#define XDB_IS_RO(vdp) ((vdp)->xs_type & XDB_DEV_RO) -#define XDB_DEV_LOFI (1 << 1) /* lofi device or physical device */ -#define XDB_IS_LOFI(vdp) ((vdp)->xs_type & XDB_DEV_LOFI) -#define XDB_DEV_CD (1 << 2) /* cdrom disc */ -#define XDB_IS_CD(vdp) ((vdp)->xs_type & XDB_DEV_CD) -#define XDB_DEV_RMB (1 << 3) /* removable device */ -#define XDB_IS_RMB(vdp) ((vdp)->xs_type & XDB_DEV_RMB) - -/* - * Xdb interface status - */ -enum xdb_state { - /* - * initial state - */ - XDB_UNKNOWN, - /* - * frontend xenbus state changed to XenbusStateConnected, - * we finally connect - */ - XDB_CONNECTED, - /* - * frontend xenbus state changed to XenbusStateClosed, - * interface disconnected - */ - XDB_DISCONNECTED -}; - -/* - * backend device status - */ -enum xdb_dev_state { - /* initial state */ - XDB_DEV_UNKNOWN, - /* backend device is ready (hotplug script finishes successfully) */ - XDB_DEV_READY -}; - -/* - * frontend status - */ -enum xdb_fe_state { - /* initial state */ - XDB_FE_UNKNOWN, - /* - * frontend's xenbus state has changed to - * XenbusStateInitialised, is ready for connecting - */ - XDB_FE_READY -}; +#define XDB_DEV_RO (1 << 0) /* backend and frontend are read-only */ +#define XDB_DEV_BE_LOFI (1 << 1) /* backend device is a lofi device */ +#define XDB_DEV_BE_RMB (1 << 2) /* backend device is removable */ +#define XDB_DEV_BE_CD (1 << 3) /* backend device is cdrom */ +#define XDB_DEV_FE_CD (1 << 4) /* frontend device is cdrom */ + +#define XDB_IS_RO(vdp) ((vdp)->xs_type & XDB_DEV_RO) +#define XDB_IS_BE_LOFI(vdp) ((vdp)->xs_type & XDB_DEV_BE_LOFI) +#define XDB_IS_BE_RMB(vdp) ((vdp)->xs_type & XDB_DEV_BE_RMB) +#define XDB_IS_BE_CD(vdp) ((vdp)->xs_type & XDB_DEV_BE_CD) +#define XDB_IS_FE_CD(vdp) ((vdp)->xs_type & XDB_DEV_FE_CD) /* * Other handy macrosx @@ -183,12 +141,6 @@ struct xdb { */ buf_t *xs_f_iobuf; buf_t *xs_l_iobuf; - /* xdb interface status */ - enum xdb_state xs_if_status; - /* backend device status */ - enum xdb_dev_state xs_dev_status; - /* frontend status */ - enum xdb_fe_state xs_fe_status; /* head of free list of xdb_request_t */ int xs_free_req; /* pre-allocated xdb_request_t pool */ @@ -201,6 +153,23 @@ struct xdb { enum blkif_protocol xs_blk_protocol; size_t xs_nentry; size_t xs_entrysize; + + /* Protected by xs_cbmutex */ + boolean_t xs_hp_connected; /* hot plug scripts have run */ + boolean_t xs_fe_initialised; /* frontend is initialized */ + char *xs_lofi_path; + char *xs_params_path; + struct xenbus_watch *xs_watch_params; + struct xenbus_watch *xs_watch_media_req; + ddi_taskq_t *xs_watch_taskq; + int xs_watch_taskq_count; + + /* Protected by xs_cbmutex and xs_iomutex */ + boolean_t xs_if_connected; /* connected to frontend */ + + /* Protected by xs_iomutex */ + boolean_t xs_send_buf; + #ifdef DEBUG uint64_t *page_addrs; /* for debug aid */ #endif /* DEBUG */ diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c index 56c18a6cec..8a5105b82c 100644 --- a/usr/src/uts/common/xen/io/xdf.c +++ b/usr/src/uts/common/xen/io/xdf.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -29,12 +29,47 @@ * TODO: * - support alternate block size (currently only DEV_BSIZE supported) * - revalidate geometry for removable devices + * + * This driver export solaris disk device nodes, accepts IO requests from + * those nodes, and services those requests by talking to a backend device + * in another domain. + * + * Communication with the backend device is done via a ringbuffer (which is + * managed via xvdi interfaces) and dma memory (which is managed via ddi + * interfaces). + * + * Communication with the backend device is dependant upon establishing a + * connection to the backend device. This connection process involves + * reading device configuration information from xenbus and publishing + * some frontend runtime configuration parameters via the xenbus (for + * consumption by the backend). Once we've published runtime configuration + * information via the xenbus, the backend device can enter the connected + * state and we'll enter the XD_CONNECTED state. But before we can allow + * random IO to begin, we need to do IO to the backend device to determine + * the device label and if flush operations are supported. Once this is + * done we enter the XD_READY state and can process any IO operations. + * + * We recieve notifications of xenbus state changes for the backend device + * (aka, the "other end") via the xdf_oe_change() callback. This callback + * is single threaded, meaning that we can't recieve new notification of + * other end state changes while we're processing an outstanding + * notification of an other end state change. There for we can't do any + * blocking operations from the xdf_oe_change() callback. This is why we + * have a seperate taskq (xdf_ready_tq) which exists to do the necessary + * IO to get us from the XD_CONNECTED to the XD_READY state. All IO + * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go + * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs + * generated by the xdf_ready_tq_thread thread have priority over all + * other IO requests. + * + * We also communicate with the backend device via the xenbus "media-req" + * (XBP_MEDIA_REQ) property. For more information on this see the + * comments in blkif.h. */ -#include <sys/ddi.h> -#include <sys/sunddi.h> +#include <io/xdf.h> + #include <sys/conf.h> -#include <sys/cmlb.h> #include <sys/dkio.h> #include <sys/promif.h> #include <sys/sysmacros.h> @@ -43,140 +78,78 @@ #ifdef XPV_HVM_DRIVER #include <sys/xpv_support.h> #include <sys/sunndi.h> -#endif /* XPV_HVM_DRIVER */ +#else /* !XPV_HVM_DRIVER */ +#include <sys/evtchn_impl.h> +#endif /* !XPV_HVM_DRIVER */ #include <public/io/xenbus.h> #include <xen/sys/xenbus_impl.h> -#include <xen/sys/xendev.h> -#include <sys/gnttab.h> #include <sys/scsi/generic/inquiry.h> #include <xen/io/blkif_impl.h> -#include <io/xdf.h> +#include <sys/fdio.h> +#include <sys/cdio.h> + +/* + * DEBUG_EVAL can be used to include debug only statements without + * having to use '#ifdef DEBUG' statements + */ +#ifdef DEBUG +#define DEBUG_EVAL(x) (x) +#else /* !DEBUG */ +#define DEBUG_EVAL(x) +#endif /* !DEBUG */ + +#define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */ +#define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */ +#define INVALID_DOMID ((domid_t)-1) #define FLUSH_DISKCACHE 0x1 #define WRITE_BARRIER 0x2 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */ -#define USE_WRITE_BARRIER(vdp) \ +#define USE_WRITE_BARRIER(vdp) \ ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported) -#define USE_FLUSH_DISKCACHE(vdp) \ +#define USE_FLUSH_DISKCACHE(vdp) \ ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported) -#define IS_WRITE_BARRIER(vdp, bp) \ - (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ +#define IS_WRITE_BARRIER(vdp, bp) \ + (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \ ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block)) -#define IS_FLUSH_DISKCACHE(bp) \ +#define IS_FLUSH_DISKCACHE(bp) \ (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0)) -static void *vbd_ss; -static kmem_cache_t *xdf_vreq_cache; -static kmem_cache_t *xdf_gs_cache; -static int xdf_maxphys = XB_MAXPHYS; -int xdfdebug = 0; -extern int do_polled_io; -diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; -int xdf_barrier_flush_disable = 0; +#define VREQ_DONE(vreq) \ + VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \ + (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \ + (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws))) -/* - * dev_ops and cb_ops entrypoints - */ -static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); -static int xdf_attach(dev_info_t *, ddi_attach_cmd_t); -static int xdf_detach(dev_info_t *, ddi_detach_cmd_t); -static int xdf_reset(dev_info_t *, ddi_reset_cmd_t); -static int xdf_open(dev_t *, int, int, cred_t *); -static int xdf_close(dev_t, int, int, struct cred *); -static int xdf_strategy(struct buf *); -static int xdf_read(dev_t, struct uio *, cred_t *); -static int xdf_aread(dev_t, struct aio_req *, cred_t *); -static int xdf_write(dev_t, struct uio *, cred_t *); -static int xdf_awrite(dev_t, struct aio_req *, cred_t *); -static int xdf_dump(dev_t, caddr_t, daddr_t, int); -static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); -static uint_t xdf_intr(caddr_t); -static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *, - caddr_t, int *); +#define BP_VREQ(bp) ((v_req_t *)((bp)->av_back)) +#define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq))) -/* - * misc private functions - */ -static int xdf_suspend(dev_info_t *); -static int xdf_resume(dev_info_t *); -static int xdf_start_connect(xdf_t *); -static int xdf_start_disconnect(xdf_t *); -static int xdf_post_connect(xdf_t *); -static void xdf_post_disconnect(xdf_t *); -static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *); -static void xdf_iostart(xdf_t *); -static void xdf_iofini(xdf_t *, uint64_t, int); -static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *); -static int xdf_drain_io(xdf_t *); -static boolean_t xdf_isopen(xdf_t *, int); -static int xdf_check_state_transition(xdf_t *, XenbusState); -static int xdf_connect(xdf_t *, boolean_t); -static int xdf_dmacallback(caddr_t); -static void xdf_timeout_handler(void *); -static uint_t xdf_iorestart(caddr_t); -static v_req_t *vreq_get(xdf_t *, buf_t *); -static void vreq_free(xdf_t *, v_req_t *); -static int vreq_setup(xdf_t *, v_req_t *); -static ge_slot_t *gs_get(xdf_t *, int); -static void gs_free(xdf_t *, ge_slot_t *); -static grant_ref_t gs_grant(ge_slot_t *, mfn_t); -static void unexpectedie(xdf_t *); -static void xdfmin(struct buf *); -static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *); -extern int xdf_kstat_create(dev_info_t *, char *, int); -extern void xdf_kstat_delete(dev_info_t *); +extern int do_polled_io; -#if defined(XPV_HVM_DRIVER) -static void xdf_hvm_add(dev_info_t *); -static void xdf_hvm_rm(dev_info_t *); -static void xdf_hvm_init(void); -static void xdf_hvm_fini(void); -#endif /* XPV_HVM_DRIVER */ +/* run-time tunables that we don't want the compiler to optimize away */ +volatile int xdf_debug = 0; +volatile boolean_t xdf_barrier_flush_disable = B_FALSE; -static struct cb_ops xdf_cbops = { - xdf_open, - xdf_close, - xdf_strategy, - nodev, - xdf_dump, - xdf_read, - xdf_write, - xdf_ioctl, - nodev, - nodev, - nodev, - nochpoll, - xdf_prop_op, - NULL, - D_MP | D_NEW | D_64BIT, - CB_REV, - xdf_aread, - xdf_awrite -}; +/* per module globals */ +major_t xdf_major; +static void *xdf_ssp; +static kmem_cache_t *xdf_vreq_cache; +static kmem_cache_t *xdf_gs_cache; +static int xdf_maxphys = XB_MAXPHYS; +static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK; +static int xdf_fbrewrites; /* flush block re-write count */ -struct dev_ops xdf_devops = { - DEVO_REV, /* devo_rev */ - 0, /* devo_refcnt */ - xdf_getinfo, /* devo_getinfo */ - nulldev, /* devo_identify */ - nulldev, /* devo_probe */ - xdf_attach, /* devo_attach */ - xdf_detach, /* devo_detach */ - xdf_reset, /* devo_reset */ - &xdf_cbops, /* devo_cb_ops */ - (struct bus_ops *)NULL, /* devo_bus_ops */ - NULL, /* devo_power */ - ddi_quiesce_not_supported, /* devo_quiesce */ -}; +/* misc public functions (used by xdf_shell.c) */ +int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); +int xdf_lb_getinfo(dev_info_t *, int, void *, void *); -static struct modldrv modldrv = { - &mod_driverops, /* Type of module. This one is a driver */ - "virtual block driver", /* short description */ - &xdf_devops /* driver specific ops */ -}; +/* misc private functions */ +static void xdf_io_start(xdf_t *); -static struct modlinkage xdf_modlinkage = { - MODREV_1, (void *)&modldrv, NULL +/* callbacks from commmon label */ +static cmlb_tg_ops_t xdf_lb_ops = { + TG_DK_OPS_VERSION_1, + xdf_lb_rdwr, + xdf_lb_getinfo }; /* @@ -204,992 +177,762 @@ static ddi_device_acc_attr_t xc_acc_attr = { DDI_STRICTORDER_ACC }; -/* callbacks from commmon label */ - -int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *); -int xdf_lb_getinfo(dev_info_t *, int, void *, void *); - -static cmlb_tg_ops_t xdf_lb_ops = { - TG_DK_OPS_VERSION_1, - xdf_lb_rdwr, - xdf_lb_getinfo -}; - -int -_init(void) +static void +xdf_timeout_handler(void *arg) { - int rc; + xdf_t *vdp = arg; - if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0) - return (rc); + mutex_enter(&vdp->xdf_dev_lk); + vdp->xdf_timeout_id = 0; + mutex_exit(&vdp->xdf_dev_lk); - xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", - sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); - xdf_gs_cache = kmem_cache_create("xdf_gs_cache", - sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + /* new timeout thread could be re-scheduled */ + xdf_io_start(vdp); +} -#if defined(XPV_HVM_DRIVER) - xdf_hvm_init(); -#endif /* XPV_HVM_DRIVER */ +/* + * callback func when DMA/GTE resources is available + * + * Note: we only register one callback function to grant table subsystem + * since we only have one 'struct gnttab_free_callback' in xdf_t. + */ +static int +xdf_dmacallback(caddr_t arg) +{ + xdf_t *vdp = (xdf_t *)arg; + ASSERT(vdp != NULL); - if ((rc = mod_install(&xdf_modlinkage)) != 0) { -#if defined(XPV_HVM_DRIVER) - xdf_hvm_fini(); -#endif /* XPV_HVM_DRIVER */ - kmem_cache_destroy(xdf_vreq_cache); - kmem_cache_destroy(xdf_gs_cache); - ddi_soft_state_fini(&vbd_ss); - return (rc); - } + DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", + vdp->xdf_addr)); - return (rc); + ddi_trigger_softintr(vdp->xdf_softintr_id); + return (DDI_DMA_CALLBACK_DONE); } -int -_fini(void) +static ge_slot_t * +gs_get(xdf_t *vdp, int isread) { + grant_ref_t gh; + ge_slot_t *gs; - int err; - if ((err = mod_remove(&xdf_modlinkage)) != 0) - return (err); - -#if defined(XPV_HVM_DRIVER) - xdf_hvm_fini(); -#endif /* XPV_HVM_DRIVER */ + /* try to alloc GTEs needed in this slot, first */ + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { + if (vdp->xdf_gnt_callback.next == NULL) { + SETDMACBON(vdp); + gnttab_request_free_callback( + &vdp->xdf_gnt_callback, + (void (*)(void *))xdf_dmacallback, + (void *)vdp, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + } + return (NULL); + } - kmem_cache_destroy(xdf_vreq_cache); - kmem_cache_destroy(xdf_gs_cache); - ddi_soft_state_fini(&vbd_ss); + gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); + if (gs == NULL) { + gnttab_free_grant_references(gh); + if (vdp->xdf_timeout_id == 0) + /* restart I/O after one second */ + vdp->xdf_timeout_id = + timeout(xdf_timeout_handler, vdp, hz); + return (NULL); + } - return (0); -} + /* init gs_slot */ + gs->gs_oeid = vdp->xdf_peer; + gs->gs_isread = isread; + gs->gs_ghead = gh; + gs->gs_ngrefs = 0; -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&xdf_modlinkage, modinfop)); + return (gs); } -/*ARGSUSED*/ -static int -xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) +static void +gs_free(ge_slot_t *gs) { - int instance; - xdf_t *vbdp; - - instance = XDF_INST(getminor((dev_t)arg)); - - switch (cmd) { - case DDI_INFO_DEVT2DEVINFO: - if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) { - *rp = NULL; - return (DDI_FAILURE); - } - *rp = vbdp->xdf_dip; - return (DDI_SUCCESS); + int i; - case DDI_INFO_DEVT2INSTANCE: - *rp = (void *)(uintptr_t)instance; - return (DDI_SUCCESS); - - default: - return (DDI_FAILURE); - } + /* release all grant table entry resources used in this slot */ + for (i = 0; i < gs->gs_ngrefs; i++) + gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0); + gnttab_free_grant_references(gs->gs_ghead); + list_remove(&gs->gs_vreq->v_gs, gs); + kmem_cache_free(xdf_gs_cache, gs); } -static int -xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags, - char *name, caddr_t valuep, int *lengthp) +static grant_ref_t +gs_grant(ge_slot_t *gs, mfn_t mfn) { - xdf_t *vdp; + grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead); - if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(dip))) == NULL) - return (ddi_prop_op(dev, dip, prop_op, mod_flags, - name, valuep, lengthp)); + ASSERT(gr != -1); + ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); + gs->gs_ge[gs->gs_ngrefs++] = gr; + gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread); - return (cmlb_prop_op(vdp->xdf_vd_lbl, - dev, dip, prop_op, mod_flags, name, valuep, lengthp, - XDF_PART(getminor(dev)), NULL)); + return (gr); } -static int -xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +/* + * Alloc a vreq for this bp + * bp->av_back contains the pointer to the vreq upon return + */ +static v_req_t * +vreq_get(xdf_t *vdp, buf_t *bp) { - xdf_t *vdp; - ddi_iblock_cookie_t softibc; - int instance; - - xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM, - "xdfdebug", 0); - - switch (cmd) { - case DDI_ATTACH: - break; + v_req_t *vreq = NULL; - case DDI_RESUME: - return (xdf_resume(devi)); + ASSERT(BP_VREQ(bp) == NULL); - default: - return (DDI_FAILURE); + vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); + if (vreq == NULL) { + if (vdp->xdf_timeout_id == 0) + /* restart I/O after one second */ + vdp->xdf_timeout_id = + timeout(xdf_timeout_handler, vdp, hz); + return (NULL); } + bzero(vreq, sizeof (v_req_t)); + list_create(&vreq->v_gs, sizeof (ge_slot_t), + offsetof(ge_slot_t, gs_vreq_link)); + vreq->v_buf = bp; + vreq->v_status = VREQ_INIT; + vreq->v_runq = B_FALSE; + BP_VREQ_SET(bp, vreq); + /* init of other fields in vreq is up to the caller */ - instance = ddi_get_instance(devi); - if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS) - return (DDI_FAILURE); - - DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance)); - vdp = ddi_get_soft_state(vbd_ss, instance); - ddi_set_driver_private(devi, vdp); - vdp->xdf_dip = devi; - cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); + list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); - if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie", - ddi_get_name_addr(devi)); - goto errout0; - } - mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); - mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc); - mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, - (void *)vdp->xdf_ibc); + return (vreq); +} - if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc) - != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie", - ddi_get_name_addr(devi)); - goto errout0; - } - if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, - &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: failed to add softintr", - ddi_get_name_addr(devi)); - goto errout0; - } +static void +vreq_free(xdf_t *vdp, v_req_t *vreq) +{ + buf_t *bp = vreq->v_buf; -#if !defined(XPV_HVM_DRIVER) - /* create kstat for iostat(1M) */ - if (xdf_kstat_create(devi, "xdf", instance) != 0) { - cmn_err(CE_WARN, "xdf@%s: failed to create kstat", - ddi_get_name_addr(devi)); - goto errout0; - } -#endif /* !XPV_HVM_DRIVER */ + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(BP_VREQ(bp) == vreq); - /* driver handles kernel-issued IOCTLs */ - if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP, - DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop", - ddi_get_name_addr(devi)); - goto errout0; - } + list_remove(&vdp->xdf_vreq_act, vreq); - /* - * Initialize the physical geometry stucture. Note that currently - * we don't know the size of the backend device so the number - * of blocks on the device will be initialized to zero. Once - * we connect to the backend device we'll update the physical - * geometry to reflect the real size of the device. - */ - xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); + if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) + goto done; - /* - * create default device minor nodes: non-removable disk - * we will adjust minor nodes after we are connected w/ backend - */ - cmlb_alloc_handle(&vdp->xdf_vd_lbl); - if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1, - DDI_NT_BLOCK_XVMD, -#if defined(XPV_HVM_DRIVER) - CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | - CMLB_INTERNAL_MINOR_NODES, -#else /* !XPV_HVM_DRIVER */ - CMLB_FAKE_LABEL_ONE_PARTITION, -#endif /* !XPV_HVM_DRIVER */ - vdp->xdf_vd_lbl, NULL) != 0) { - cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed", - ddi_get_name_addr(devi)); - goto errout0; + switch (vreq->v_status) { + case VREQ_DMAWIN_DONE: + case VREQ_GS_ALLOCED: + case VREQ_DMABUF_BOUND: + (void) ddi_dma_unbind_handle(vreq->v_dmahdl); + /*FALLTHRU*/ + case VREQ_DMAMEM_ALLOCED: + if (!ALIGNED_XFER(bp)) { + ASSERT(vreq->v_abuf != NULL); + if (!IS_ERROR(bp) && IS_READ(bp)) + bcopy(vreq->v_abuf, bp->b_un.b_addr, + bp->b_bcount); + ddi_dma_mem_free(&vreq->v_align); + } + /*FALLTHRU*/ + case VREQ_MEMDMAHDL_ALLOCED: + if (!ALIGNED_XFER(bp)) + ddi_dma_free_handle(&vreq->v_memdmahdl); + /*FALLTHRU*/ + case VREQ_DMAHDL_ALLOCED: + ddi_dma_free_handle(&vreq->v_dmahdl); + break; + default: + break; } +done: + ASSERT(!vreq->v_runq); + list_destroy(&vreq->v_gs); + kmem_cache_free(xdf_vreq_cache, vreq); +} - /* - * We ship with cache-enabled disks - */ - vdp->xdf_wce = 1; - - mutex_enter(&vdp->xdf_cb_lk); +/* + * Snarf new data if our flush block was re-written + */ +static void +check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) +{ + int nblks; + boolean_t mapin; - /* Watch backend XenbusState change */ - if (xvdi_add_event_handler(devi, XS_OE_STATE, xdf_oe_change, - NULL) != DDI_SUCCESS) { - mutex_exit(&vdp->xdf_cb_lk); - goto errout0; - } + if (IS_WRITE_BARRIER(vdp, bp)) + return; /* write was a flush write */ - if (xdf_start_connect(vdp) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: start connection failed", - ddi_get_name_addr(devi)); - (void) xdf_start_disconnect(vdp); - mutex_exit(&vdp->xdf_cb_lk); - goto errout1; + mapin = B_FALSE; + nblks = bp->b_bcount >> DEV_BSHIFT; + if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { + xdf_fbrewrites++; + if (bp->b_flags & (B_PAGEIO | B_PHYS)) { + mapin = B_TRUE; + bp_mapin(bp); + } + bcopy(bp->b_un.b_addr + + ((xdf_flush_block - blkno) << DEV_BSHIFT), + vdp->xdf_cache_flush_block, DEV_BSIZE); + if (mapin) + bp_mapout(bp); } +} - mutex_exit(&vdp->xdf_cb_lk); +/* + * Initalize the DMA and grant table resources for the buf + */ +static int +vreq_setup(xdf_t *vdp, v_req_t *vreq) +{ + int rc; + ddi_dma_attr_t dmaattr; + uint_t ndcs, ndws; + ddi_dma_handle_t dh; + ddi_dma_handle_t mdh; + ddi_dma_cookie_t dc; + ddi_acc_handle_t abh; + caddr_t aba; + ge_slot_t *gs; + size_t bufsz; + off_t off; + size_t sz; + buf_t *bp = vreq->v_buf; + int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | + DDI_DMA_STREAMING | DDI_DMA_PARTIAL; - list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), - offsetof(v_req_t, v_link)); - list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t), - offsetof(ge_slot_t, link)); + switch (vreq->v_status) { + case VREQ_INIT: + if (IS_FLUSH_DISKCACHE(bp)) { + if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { + DPRINTF(DMA_DBG, ("xdf@%s: " + "get ge_slotfailed\n", vdp->xdf_addr)); + return (DDI_FAILURE); + } + vreq->v_blkno = 0; + vreq->v_nslots = 1; + vreq->v_flush_diskcache = FLUSH_DISKCACHE; + vreq->v_status = VREQ_GS_ALLOCED; + gs->gs_vreq = vreq; + list_insert_head(&vreq->v_gs, gs); + return (DDI_SUCCESS); + } -#if defined(XPV_HVM_DRIVER) - xdf_hvm_add(devi); + if (IS_WRITE_BARRIER(vdp, bp)) + vreq->v_flush_diskcache = WRITE_BARRIER; + vreq->v_blkno = bp->b_blkno + + (diskaddr_t)(uintptr_t)bp->b_private; + /* See if we wrote new data to our flush block */ + if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) + check_fbwrite(vdp, bp, vreq->v_blkno); + vreq->v_status = VREQ_INIT_DONE; + /*FALLTHRU*/ - (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1); + case VREQ_INIT_DONE: + /* + * alloc DMA handle + */ + rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, + xdf_dmacallback, (caddr_t)vdp, &dh); + if (rc != DDI_SUCCESS) { + SETDMACBON(vdp); + DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } - /* - * Report our version to dom0. - */ - if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", - HVMPV_XDF_VERS)) - cmn_err(CE_WARN, "xdf: couldn't write version\n"); -#endif /* XPV_HVM_DRIVER */ + vreq->v_dmahdl = dh; + vreq->v_status = VREQ_DMAHDL_ALLOCED; + /*FALLTHRU*/ - ddi_report_dev(devi); + case VREQ_DMAHDL_ALLOCED: + /* + * alloc dma handle for 512-byte aligned buf + */ + if (!ALIGNED_XFER(bp)) { + /* + * XXPV: we need to temporarily enlarge the seg + * boundary and s/g length to work round CR6381968 + */ + dmaattr = xb_dma_attr; + dmaattr.dma_attr_seg = (uint64_t)-1; + dmaattr.dma_attr_sgllen = INT_MAX; + rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, + xdf_dmacallback, (caddr_t)vdp, &mdh); + if (rc != DDI_SUCCESS) { + SETDMACBON(vdp); + DPRINTF(DMA_DBG, ("xdf@%s: " + "unaligned buf DMAhandle alloc failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } + vreq->v_memdmahdl = mdh; + vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; + } + /*FALLTHRU*/ - DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance)); + case VREQ_MEMDMAHDL_ALLOCED: + /* + * alloc 512-byte aligned buf + */ + if (!ALIGNED_XFER(bp)) { + if (bp->b_flags & (B_PAGEIO | B_PHYS)) + bp_mapin(bp); - return (DDI_SUCCESS); + rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, + roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, + DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, + &aba, &bufsz, &abh); + if (rc != DDI_SUCCESS) { + SETDMACBON(vdp); + DPRINTF(DMA_DBG, ("xdf@%s: " + "DMA mem allocation failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } -errout1: - xvdi_remove_event_handler(devi, XS_OE_STATE); -errout0: - if (vdp->xdf_vd_lbl != NULL) { - cmlb_detach(vdp->xdf_vd_lbl, NULL); - cmlb_free_handle(&vdp->xdf_vd_lbl); - vdp->xdf_vd_lbl = NULL; - } -#if !defined(XPV_HVM_DRIVER) - xdf_kstat_delete(devi); -#endif /* !XPV_HVM_DRIVER */ - if (vdp->xdf_softintr_id != NULL) - ddi_remove_softintr(vdp->xdf_softintr_id); - if (vdp->xdf_ibc != NULL) { - mutex_destroy(&vdp->xdf_cb_lk); - mutex_destroy(&vdp->xdf_dev_lk); - } - cv_destroy(&vdp->xdf_dev_cv); - ddi_soft_state_free(vbd_ss, instance); - ddi_set_driver_private(devi, NULL); - ddi_prop_remove_all(devi); - cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi)); - return (DDI_FAILURE); -} + vreq->v_abuf = aba; + vreq->v_align = abh; + vreq->v_status = VREQ_DMAMEM_ALLOCED; -static int -xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd) -{ - xdf_t *vdp; - int instance; + ASSERT(bufsz >= bp->b_bcount); + if (!IS_READ(bp)) + bcopy(bp->b_un.b_addr, vreq->v_abuf, + bp->b_bcount); + } + /*FALLTHRU*/ - switch (cmd) { + case VREQ_DMAMEM_ALLOCED: + /* + * dma bind + */ + if (ALIGNED_XFER(bp)) { + rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, + dma_flags, xdf_dmacallback, (caddr_t)vdp, + &dc, &ndcs); + } else { + rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, + NULL, vreq->v_abuf, bp->b_bcount, dma_flags, + xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); + } + if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { + /* get num of dma windows */ + if (rc == DDI_DMA_PARTIAL_MAP) { + rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); + ASSERT(rc == DDI_SUCCESS); + } else { + ndws = 1; + } + } else { + SETDMACBON(vdp); + DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } - case DDI_PM_SUSPEND: - break; + vreq->v_dmac = dc; + vreq->v_dmaw = 0; + vreq->v_ndmacs = ndcs; + vreq->v_ndmaws = ndws; + vreq->v_nslots = ndws; + vreq->v_status = VREQ_DMABUF_BOUND; + /*FALLTHRU*/ - case DDI_SUSPEND: - return (xdf_suspend(devi)); + case VREQ_DMABUF_BOUND: + /* + * get ge_slot, callback is set upon failure from gs_get(), + * if not set previously + */ + if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { + DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } - case DDI_DETACH: + vreq->v_status = VREQ_GS_ALLOCED; + gs->gs_vreq = vreq; + list_insert_head(&vreq->v_gs, gs); break; - default: - return (DDI_FAILURE); - } + case VREQ_GS_ALLOCED: + /* nothing need to be done */ + break; - instance = ddi_get_instance(devi); - DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance)); - vdp = ddi_get_soft_state(vbd_ss, instance); + case VREQ_DMAWIN_DONE: + /* + * move to the next dma window + */ + ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); - if (vdp == NULL) - return (DDI_FAILURE); + /* get a ge_slot for this DMA window */ + if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { + DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", + vdp->xdf_addr)); + return (DDI_FAILURE); + } - mutex_enter(&vdp->xdf_dev_lk); - if (xdf_isopen(vdp, -1)) { - mutex_exit(&vdp->xdf_dev_lk); - return (DDI_FAILURE); - } + vreq->v_dmaw++; + VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, + &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS); + vreq->v_status = VREQ_GS_ALLOCED; + gs->gs_vreq = vreq; + list_insert_head(&vreq->v_gs, gs); + break; - if (vdp->xdf_status != XD_CLOSED) { - mutex_exit(&vdp->xdf_dev_lk); + default: return (DDI_FAILURE); } -#if defined(XPV_HVM_DRIVER) - xdf_hvm_rm(devi); -#endif /* XPV_HVM_DRIVER */ - - ASSERT(!ISDMACBON(vdp)); - mutex_exit(&vdp->xdf_dev_lk); - - if (vdp->xdf_timeout_id != 0) - (void) untimeout(vdp->xdf_timeout_id); - - xvdi_remove_event_handler(devi, XS_OE_STATE); - - /* we'll support backend running in domU later */ -#ifdef DOMU_BACKEND - (void) xvdi_post_event(devi, XEN_HP_REMOVE); -#endif - - list_destroy(&vdp->xdf_vreq_act); - list_destroy(&vdp->xdf_gs_act); - ddi_prop_remove_all(devi); - xdf_kstat_delete(devi); - ddi_remove_softintr(vdp->xdf_softintr_id); - ddi_set_driver_private(devi, NULL); - cv_destroy(&vdp->xdf_dev_cv); - mutex_destroy(&vdp->xdf_cb_lk); - mutex_destroy(&vdp->xdf_dev_lk); - if (vdp->xdf_cache_flush_block != NULL) - kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); - ddi_soft_state_free(vbd_ss, instance); return (DDI_SUCCESS); } static int -xdf_suspend(dev_info_t *devi) +xdf_cmlb_attach(xdf_t *vdp) { - xdf_t *vdp; - int instance; - enum xdf_state st; - - instance = ddi_get_instance(devi); - - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_suspend: xdf#%d\n", instance); - - if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) - return (DDI_FAILURE); - - xvdi_suspend(devi); - - mutex_enter(&vdp->xdf_cb_lk); - mutex_enter(&vdp->xdf_dev_lk); - st = vdp->xdf_status; - /* change status to stop further I/O requests */ - if (st == XD_READY) - vdp->xdf_status = XD_SUSPEND; - mutex_exit(&vdp->xdf_dev_lk); - mutex_exit(&vdp->xdf_cb_lk); + dev_info_t *dip = vdp->xdf_dip; - /* make sure no more I/O responses left in the ring buffer */ - if ((st == XD_INIT) || (st == XD_READY)) { -#ifdef XPV_HVM_DRIVER - ec_unbind_evtchn(vdp->xdf_evtchn); - xvdi_free_evtchn(devi); + return (cmlb_attach(dip, &xdf_lb_ops, + XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, + XD_IS_RM(vdp), + B_TRUE, + XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, +#if defined(XPV_HVM_DRIVER) + (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) | + CMLB_INTERNAL_MINOR_NODES, #else /* !XPV_HVM_DRIVER */ - (void) ddi_remove_intr(devi, 0, NULL); + XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION, #endif /* !XPV_HVM_DRIVER */ - (void) xdf_drain_io(vdp); - /* - * no need to teardown the ring buffer here - * it will be simply re-init'ed during resume when - * we call xvdi_alloc_ring - */ - } - - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_suspend: SUCCESS\n"); - - return (DDI_SUCCESS); + vdp->xdf_vd_lbl, NULL)); } -/*ARGSUSED*/ -static int -xdf_resume(dev_info_t *devi) +static void +xdf_io_err(buf_t *bp, int err, size_t resid) { - xdf_t *vdp; - int instance; - - instance = ddi_get_instance(devi); - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_resume: xdf%d\n", instance); + bioerror(bp, err); + if (resid == 0) + bp->b_resid = bp->b_bcount; + biodone(bp); +} - if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) - return (DDI_FAILURE); +static void +xdf_kstat_enter(xdf_t *vdp, buf_t *bp) +{ + v_req_t *vreq = BP_VREQ(bp); - mutex_enter(&vdp->xdf_cb_lk); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - if (xvdi_resume(devi) != DDI_SUCCESS) { - mutex_exit(&vdp->xdf_cb_lk); - return (DDI_FAILURE); + if (vdp->xdf_xdev_iostat == NULL) + return; + if ((vreq != NULL) && vreq->v_runq) { + kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); + } else { + kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); } +} - mutex_enter(&vdp->xdf_dev_lk); - ASSERT(vdp->xdf_status != XD_READY); - vdp->xdf_status = XD_UNKNOWN; - mutex_exit(&vdp->xdf_dev_lk); - - if (xdf_start_connect(vdp) != DDI_SUCCESS) { - mutex_exit(&vdp->xdf_cb_lk); - return (DDI_FAILURE); - } +static void +xdf_kstat_exit(xdf_t *vdp, buf_t *bp) +{ + v_req_t *vreq = BP_VREQ(bp); - mutex_exit(&vdp->xdf_cb_lk); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_resume: done\n"); - return (DDI_SUCCESS); + if (vdp->xdf_xdev_iostat == NULL) + return; + if ((vreq != NULL) && vreq->v_runq) { + kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); + } else { + kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); + } } -/*ARGSUSED*/ -static int -xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd) +static void +xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp) { - xdf_t *vdp; - int instance; - - instance = ddi_get_instance(devi); - DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance)); - if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) - return (DDI_FAILURE); + v_req_t *vreq = BP_VREQ(bp); - /* - * wait for any outstanding I/O to complete - */ - (void) xdf_drain_io(vdp); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(!vreq->v_runq); - DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance)); - return (DDI_SUCCESS); + vreq->v_runq = B_TRUE; + if (vdp->xdf_xdev_iostat == NULL) + return; + kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); } -static int -xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) +static void +xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp) { - minor_t minor; - xdf_t *vdp; - int part; - ulong_t parbit; - diskaddr_t p_blkct = 0; - boolean_t firstopen; - boolean_t nodelay; + v_req_t *vreq = BP_VREQ(bp); - minor = getminor(*devp); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(vreq->v_runq); - nodelay = (flag & (FNDELAY | FNONBLOCK)); + vreq->v_runq = B_FALSE; + if (vdp->xdf_xdev_iostat == NULL) + return; + kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); +} - DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor))); +int +xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance) +{ + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + kstat_t *kstat; + buf_t *bp; - /* do cv_wait until connected or failed */ - mutex_enter(&vdp->xdf_dev_lk); - if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) { - mutex_exit(&vdp->xdf_dev_lk); - return (ENXIO); - } + if ((kstat = kstat_create( + ks_module, instance, NULL, "disk", + KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) + return (-1); - if ((flag & FWRITE) && XD_IS_RO(vdp)) { - mutex_exit(&vdp->xdf_dev_lk); - return (EROFS); - } + /* See comment about locking in xdf_kstat_delete(). */ + mutex_enter(&vdp->xdf_iostat_lk); + mutex_enter(&vdp->xdf_dev_lk); - part = XDF_PART(minor); - parbit = 1 << part; - if ((vdp->xdf_vd_exclopen & parbit) || - ((flag & FEXCL) && xdf_isopen(vdp, part))) { + /* only one kstat can exist at a time */ + if (vdp->xdf_xdev_iostat != NULL) { mutex_exit(&vdp->xdf_dev_lk); - return (EBUSY); + mutex_exit(&vdp->xdf_iostat_lk); + kstat_delete(kstat); + return (-1); } - /* are we the first one to open this node? */ - firstopen = !xdf_isopen(vdp, -1); - - if (otyp == OTYP_LYR) - vdp->xdf_vd_lyropen[part]++; - - vdp->xdf_vd_open[otyp] |= parbit; - - if (flag & FEXCL) - vdp->xdf_vd_exclopen |= parbit; - - mutex_exit(&vdp->xdf_dev_lk); - - /* force a re-validation */ - if (firstopen) - cmlb_invalidate(vdp->xdf_vd_lbl, NULL); + vdp->xdf_xdev_iostat = kstat; + vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; + kstat_install(vdp->xdf_xdev_iostat); /* - * check size - * ignore CD/DVD which contains a zero-sized s0 + * Now that we've created a kstat, we need to update the waitq and + * runq counts for the kstat to reflect our current state. + * + * For a buf_t structure to be on the runq, it must have a ring + * buffer slot associated with it. To get a ring buffer slot the + * buf must first have a v_req_t and a ge_slot_t associated with it. + * Then when it is granted a ring buffer slot, v_runq will be set to + * true. + * + * For a buf_t structure to be on the waitq, it must not be on the + * runq. So to find all the buf_t's that should be on waitq, we + * walk the active buf list and add any buf_t's which aren't on the + * runq to the waitq. */ - if (!nodelay && !XD_IS_CD(vdp) && - ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, - NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) { - (void) xdf_close(*devp, flag, otyp, credp); - return (ENXIO); + bp = vdp->xdf_f_act; + while (bp != NULL) { + xdf_kstat_enter(vdp, bp); + bp = bp->av_forw; } + if (vdp->xdf_ready_tq_bp != NULL) + xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp); + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_iostat_lk); return (0); } -/*ARGSUSED*/ -static int -xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) +void +xdf_kstat_delete(dev_info_t *dip) { - minor_t minor; - xdf_t *vdp; - int part; - ulong_t parbit; - - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + kstat_t *kstat; + buf_t *bp; + /* + * The locking order here is xdf_iostat_lk and then xdf_dev_lk. + * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer + * and the contents of the our kstat. xdf_iostat_lk is used + * to protect the allocation and freeing of the actual kstat. + * xdf_dev_lk can't be used for this purpose because kstat + * readers use it to access the contents of the kstat and + * hence it can't be held when calling kstat_delete(). + */ + mutex_enter(&vdp->xdf_iostat_lk); mutex_enter(&vdp->xdf_dev_lk); - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) { + + if (vdp->xdf_xdev_iostat == NULL) { mutex_exit(&vdp->xdf_dev_lk); - return (ENXIO); + mutex_exit(&vdp->xdf_iostat_lk); + return; } - parbit = 1 << part; - ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); - if (otyp == OTYP_LYR) { - ASSERT(vdp->xdf_vd_lyropen[part] > 0); - if (--vdp->xdf_vd_lyropen[part] == 0) - vdp->xdf_vd_open[otyp] &= ~parbit; - } else { - vdp->xdf_vd_open[otyp] &= ~parbit; + /* + * We're about to destroy the kstat structures, so it isn't really + * necessary to update the runq and waitq counts. But, since this + * isn't a hot code path we can afford to be a little pedantic and + * go ahead and decrement the runq and waitq kstat counters to zero + * before free'ing them. This helps us ensure that we've gotten all + * our accounting correct. + * + * For an explanation of how we determine which buffers go on the + * runq vs which go on the waitq, see the comments in + * xdf_kstat_create(). + */ + bp = vdp->xdf_f_act; + while (bp != NULL) { + xdf_kstat_exit(vdp, bp); + bp = bp->av_forw; } - vdp->xdf_vd_exclopen &= ~parbit; + if (vdp->xdf_ready_tq_bp != NULL) + xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp); + kstat = vdp->xdf_xdev_iostat; + vdp->xdf_xdev_iostat = NULL; mutex_exit(&vdp->xdf_dev_lk); - return (0); + kstat_delete(kstat); + mutex_exit(&vdp->xdf_iostat_lk); } -static int -xdf_strategy(struct buf *bp) +/* + * Add an IO requests onto the active queue. + * + * We have to detect IOs generated by xdf_ready_tq_thread. These IOs + * are used to establish a connection to the backend, so they recieve + * priority over all other IOs. Since xdf_ready_tq_thread only does + * synchronous IO, there can only be one xdf_ready_tq_thread request at any + * given time and we record the buf associated with that request in + * xdf_ready_tq_bp. + */ +static void +xdf_bp_push(xdf_t *vdp, buf_t *bp) { - xdf_t *vdp; - minor_t minor; - diskaddr_t p_blkct, p_blkst; - ulong_t nblks; - int part; - - minor = getminor(bp->b_edev); - part = XDF_PART(minor); - - vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor)); - if ((vdp == NULL) || !xdf_isopen(vdp, part)) { - bioerror(bp, ENXIO); - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } - - /* Check for writes to a read only device */ - if (!IS_READ(bp) && XD_IS_RO(vdp)) { - bioerror(bp, EROFS); - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(bp->av_forw == NULL); - /* Check if this I/O is accessing a partition or the entire disk */ - if ((long)bp->b_private == XB_SLICE_NONE) { - /* This I/O is using an absolute offset */ - p_blkct = vdp->xdf_xdev_nblocks; - p_blkst = 0; - } else { - /* This I/O is using a partition relative offset */ - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, - &p_blkst, NULL, NULL, NULL)) { - bioerror(bp, ENXIO); - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } - } - - /* check for a starting block beyond the disk or partition limit */ - if (bp->b_blkno > p_blkct) { - DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64, - (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); - bioerror(bp, EINVAL); - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } - - /* Legacy: don't set error flag at this case */ - if (bp->b_blkno == p_blkct) { - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); - } + xdf_kstat_enter(vdp, bp); - /* Adjust for partial transfer */ - nblks = bp->b_bcount >> XB_BSHIFT; - if ((bp->b_blkno + nblks) > p_blkct) { - bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; - bp->b_bcount -= bp->b_resid; + if (curthread == vdp->xdf_ready_tq_thread) { + /* new IO requests from the ready thread */ + ASSERT(vdp->xdf_ready_tq_bp == NULL); + vdp->xdf_ready_tq_bp = bp; + return; } - DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n", - (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); - - /* Fix up the buf struct */ - bp->b_flags |= B_BUSY; - bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */ - bp->b_private = (void *)(uintptr_t)p_blkst; + /* this is normal IO request */ + ASSERT(bp != vdp->xdf_ready_tq_bp); - mutex_enter(&vdp->xdf_dev_lk); - if (vdp->xdf_xdev_iostat != NULL) - kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); if (vdp->xdf_f_act == NULL) { - vdp->xdf_f_act = vdp->xdf_l_act = bp; - } else { - vdp->xdf_l_act->av_forw = bp; - vdp->xdf_l_act = bp; + /* this is only only IO on the active queue */ + ASSERT(vdp->xdf_l_act == NULL); + ASSERT(vdp->xdf_i_act == NULL); + vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp; + return; } - mutex_exit(&vdp->xdf_dev_lk); - - xdf_iostart(vdp); - if (do_polled_io) - (void) xdf_drain_io(vdp); - return (0); -} - -/*ARGSUSED*/ -static int -xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) -{ - - xdf_t *vdp; - minor_t minor; - diskaddr_t p_blkcnt; - int part; - - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); - - DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n", - (int64_t)uiop->uio_offset)); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, - NULL, NULL, NULL, NULL)) - return (ENXIO); - - if (U_INVAL(uiop)) - return (EINVAL); - - return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); -} - -/*ARGSUSED*/ -static int -xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) -{ - xdf_t *vdp; - minor_t minor; - diskaddr_t p_blkcnt; - int part; - - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); - - DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n", - (int64_t)uiop->uio_offset)); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, - NULL, NULL, NULL, NULL)) - return (ENXIO); - - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) - return (ENOSPC); - - if (U_INVAL(uiop)) - return (EINVAL); - - return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop)); -} - -/*ARGSUSED*/ -static int -xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) -{ - xdf_t *vdp; - minor_t minor; - struct uio *uiop = aiop->aio_uio; - diskaddr_t p_blkcnt; - int part; - - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, - NULL, NULL, NULL, NULL)) - return (ENXIO); - - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) - return (ENOSPC); - if (U_INVAL(uiop)) - return (EINVAL); - - return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop)); + /* add this IO to the tail of the active queue */ + vdp->xdf_l_act->av_forw = bp; + vdp->xdf_l_act = bp; + if (vdp->xdf_i_act == NULL) + vdp->xdf_i_act = bp; } -/*ARGSUSED*/ -static int -xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) -{ - xdf_t *vdp; - minor_t minor; - struct uio *uiop = aiop->aio_uio; - diskaddr_t p_blkcnt; - int part; - - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, - NULL, NULL, NULL, NULL)) - return (ENXIO); - - if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) - return (ENOSPC); - - if (U_INVAL(uiop)) - return (EINVAL); - - return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop)); -} - -static int -xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) +static void +xdf_bp_pop(xdf_t *vdp, buf_t *bp) { - struct buf dumpbuf, *dbp; - xdf_t *vdp; - minor_t minor; - int err = 0; - int part; - diskaddr_t p_blkcnt, p_blkst; + buf_t *bp_iter; - minor = getminor(dev); - if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL) - return (ENXIO); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(VREQ_DONE(BP_VREQ(bp))); - DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n", - (void *)addr, blkno, nblk)); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, - NULL, NULL, NULL)) - return (ENXIO); - - if ((blkno + nblk) > p_blkcnt) { - cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64, - blkno + nblk, (uint64_t)p_blkcnt); - return (EINVAL); + if (vdp->xdf_ready_tq_bp == bp) { + /* we're done with a ready thread IO request */ + ASSERT(bp->av_forw == NULL); + vdp->xdf_ready_tq_bp = NULL; + return; } - dbp = &dumpbuf; - bioinit(dbp); - dbp->b_flags = B_BUSY; - dbp->b_un.b_addr = addr; - dbp->b_bcount = nblk << DEV_BSHIFT; - dbp->b_blkno = blkno; - dbp->b_edev = dev; - dbp->b_private = (void *)(uintptr_t)p_blkst; + /* we're done with a normal IO request */ + ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act)); + ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act)); + ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act))); + ASSERT(vdp->xdf_f_act != vdp->xdf_i_act); - mutex_enter(&vdp->xdf_dev_lk); - if (vdp->xdf_xdev_iostat != NULL) - kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); - if (vdp->xdf_f_act == NULL) { - vdp->xdf_f_act = vdp->xdf_l_act = dbp; + if (bp == vdp->xdf_f_act) { + /* This IO was at the head of our active queue. */ + vdp->xdf_f_act = bp->av_forw; + if (bp == vdp->xdf_l_act) + vdp->xdf_l_act = NULL; } else { - vdp->xdf_l_act->av_forw = dbp; - vdp->xdf_l_act = dbp; + /* There IO finished before some other pending IOs. */ + bp_iter = vdp->xdf_f_act; + while (bp != bp_iter->av_forw) { + bp_iter = bp_iter->av_forw; + ASSERT(VREQ_DONE(BP_VREQ(bp_iter))); + ASSERT(bp_iter != vdp->xdf_i_act); + } + bp_iter->av_forw = bp->av_forw; + if (bp == vdp->xdf_l_act) + vdp->xdf_l_act = bp_iter; } - dbp->av_forw = NULL; - dbp->av_back = NULL; - mutex_exit(&vdp->xdf_dev_lk); - xdf_iostart(vdp); - err = xdf_drain_io(vdp); - biofini(dbp); - return (err); + bp->av_forw = NULL; } -/*ARGSUSED*/ -static int -xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, - int *rvalp) +static buf_t * +xdf_bp_next(xdf_t *vdp) { - int instance; - xdf_t *vdp; - minor_t minor; - int part; - - minor = getminor(dev); - instance = XDF_INST(minor); - - if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) - return (ENXIO); - - DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n", - instance, cmd, cmd)); - - part = XDF_PART(minor); - if (!xdf_isopen(vdp, part)) - return (ENXIO); - - switch (cmd) { - case DKIOCGMEDIAINFO: { - struct dk_minfo media_info; - - media_info.dki_lbsize = DEV_BSIZE; - media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; - media_info.dki_media_type = DK_FIXED_DISK; + v_req_t *vreq; + buf_t *bp; - if (ddi_copyout(&media_info, (void *)arg, - sizeof (struct dk_minfo), mode)) { - return (EFAULT); - } else { - return (0); - } + if (vdp->xdf_state == XD_CONNECTED) { + /* + * If we're in the XD_CONNECTED state, we only service IOs + * from the xdf_ready_tq_thread thread. + */ + if ((bp = vdp->xdf_ready_tq_bp) == NULL) + return (NULL); + if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) + return (bp); + return (NULL); } - case DKIOCINFO: { - struct dk_cinfo info; - - /* controller information */ - if (XD_IS_CD(vdp)) - info.dki_ctype = DKC_CDROM; - else - info.dki_ctype = DKC_VBD; - - info.dki_cnum = 0; - (void) strncpy((char *)(&info.dki_cname), "xdf", 8); + /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */ + if (vdp->xdf_state != XD_READY) + return (NULL); - /* unit information */ - info.dki_unit = ddi_get_instance(vdp->xdf_dip); - (void) strncpy((char *)(&info.dki_dname), "xdf", 8); - info.dki_flags = DKI_FMTVOL; - info.dki_partition = part; - info.dki_maxtransfer = maxphys / DEV_BSIZE; - info.dki_addr = 0; - info.dki_space = 0; - info.dki_prio = 0; - info.dki_vec = 0; + ASSERT(vdp->xdf_ready_tq_bp == NULL); + for (;;) { + if ((bp = vdp->xdf_i_act) == NULL) + return (NULL); + if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq))) + return (bp); - if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) - return (EFAULT); - else - return (0); + /* advance the active buf index pointer */ + vdp->xdf_i_act = bp->av_forw; } +} - case DKIOCSTATE: { - enum dkio_state dkstate = DKIO_INSERTED; - if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate), - mode) != 0) - return (EFAULT); - return (0); - } +static void +xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr) +{ + ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; + v_req_t *vreq = gs->gs_vreq; + buf_t *bp = vreq->v_buf; - /* - * is media removable? - */ - case DKIOCREMOVABLE: { - int i = XD_IS_RM(vdp) ? 1 : 0; - if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode)) - return (EFAULT); - return (0); - } + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(BP_VREQ(bp) == vreq); - case DKIOCG_PHYGEOM: - case DKIOCG_VIRTGEOM: - case DKIOCGGEOM: - case DKIOCSGEOM: - case DKIOCGAPART: - case DKIOCSAPART: - case DKIOCGVTOC: - case DKIOCSVTOC: - case DKIOCPARTINFO: - case DKIOCGEXTVTOC: - case DKIOCSEXTVTOC: - case DKIOCEXTPARTINFO: - case DKIOCGMBOOT: - case DKIOCSMBOOT: - case DKIOCGETEFI: - case DKIOCSETEFI: - case DKIOCPARTITION: { - int rc; + gs_free(gs); - rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, - rvalp, NULL); - return (rc); - } + if (bioerr != 0) + bioerror(bp, bioerr); + ASSERT(vreq->v_nslots > 0); + if (--vreq->v_nslots > 0) + return; - case DKIOCGETWCE: - if (ddi_copyout(&vdp->xdf_wce, (void *)arg, - sizeof (vdp->xdf_wce), mode)) - return (EFAULT); - return (0); - case DKIOCSETWCE: - if (ddi_copyin((void *)arg, &vdp->xdf_wce, - sizeof (vdp->xdf_wce), mode)) - return (EFAULT); - return (0); - case DKIOCFLUSHWRITECACHE: { - int rc; - struct dk_callback *dkc = (struct dk_callback *)arg; + /* remove this IO from our active queue */ + xdf_bp_pop(vdp, bp); - if (vdp->xdf_flush_supported) { - rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, - NULL, 0, 0, (void *)dev); - } else if (vdp->xdf_feature_barrier && - !xdf_barrier_flush_disable) { - rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, - vdp->xdf_cache_flush_block, xdf_flush_block, - DEV_BSIZE, (void *)dev); - } else { - return (ENOTTY); - } - if ((mode & FKIOCTL) && (dkc != NULL) && - (dkc->dkc_callback != NULL)) { - (*dkc->dkc_callback)(dkc->dkc_cookie, rc); - /* need to return 0 after calling callback */ - rc = 0; - } - return (rc); - } + ASSERT(vreq->v_runq); + xdf_kstat_exit(vdp, bp); + vreq->v_runq = B_FALSE; + vreq_free(vdp, vreq); - default: - return (ENOTTY); + if (IS_ERROR(bp)) { + xdf_io_err(bp, geterror(bp), 0); + } else if (bp->b_resid != 0) { + /* Partial transfers are an error */ + xdf_io_err(bp, EIO, bp->b_resid); + } else { + biodone(bp); } } @@ -1197,24 +940,20 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, * xdf interrupt handler */ static uint_t -xdf_intr(caddr_t arg) +xdf_intr_locked(xdf_t *vdp) { - xdf_t *vdp = (xdf_t *)arg; xendev_ring_t *xbr; blkif_response_t *resp; int bioerr; uint64_t id; - extern int do_polled_io; uint8_t op; uint16_t status; ddi_acc_handle_t acchdl; - mutex_enter(&vdp->xdf_dev_lk); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - if ((xbr = vdp->xdf_xb_ring) == NULL) { - mutex_exit(&vdp->xdf_dev_lk); + if ((xbr = vdp->xdf_xb_ring) == NULL) return (DDI_INTR_UNCLAIMED); - } acchdl = vdp->xdf_xb_ring_hdl; @@ -1228,164 +967,256 @@ xdf_intr(caddr_t arg) DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n", op, id, status)); - /* - * XXPV - close connection to the backend and restart - */ if (status != BLKIF_RSP_OKAY) { DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s", - ddi_get_name_addr(vdp->xdf_dip), + vdp->xdf_addr, (op == BLKIF_OP_READ) ? "reading" : "writing")); bioerr = EIO; } else { bioerr = 0; } - xdf_iofini(vdp, id, bioerr); + xdf_io_fini(vdp, id, bioerr); } + return (DDI_INTR_CLAIMED); +} +static uint_t +xdf_intr(caddr_t arg) +{ + xdf_t *vdp = (xdf_t *)arg; + int rv; + + mutex_enter(&vdp->xdf_dev_lk); + rv = xdf_intr_locked(vdp); mutex_exit(&vdp->xdf_dev_lk); if (!do_polled_io) - xdf_iostart(vdp); + xdf_io_start(vdp); - return (DDI_INTR_CLAIMED); + return (rv); } -int xdf_fbrewrites; /* how many times was our flush block rewritten */ - -/* - * Snarf new data if our flush block was re-written - */ static void -check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno) +xdf_ring_push(xdf_t *vdp) { - int nblks; - boolean_t mapin; + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - if (IS_WRITE_BARRIER(vdp, bp)) - return; /* write was a flush write */ + if (vdp->xdf_xb_ring == NULL) + return; - mapin = B_FALSE; - nblks = bp->b_bcount >> DEV_BSHIFT; - if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) { - xdf_fbrewrites++; - if (bp->b_flags & (B_PAGEIO | B_PHYS)) { - mapin = B_TRUE; - bp_mapin(bp); - } - bcopy(bp->b_un.b_addr + - ((xdf_flush_block - blkno) << DEV_BSHIFT), - vdp->xdf_cache_flush_block, DEV_BSIZE); - if (mapin) - bp_mapout(bp); + if (xvdi_ring_push_request(vdp->xdf_xb_ring)) { + DPRINTF(IO_DBG, ( + "xdf@%s: xdf_ring_push: sent request(s) to backend\n", + vdp->xdf_addr)); } + + if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN) + xvdi_notify_oe(vdp->xdf_dip); } -static void -xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr) +static int +xdf_ring_drain_locked(xdf_t *vdp) { - ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id; - v_req_t *vreq = gs->vreq; - buf_t *bp = vreq->v_buf; + int pollc, rv = 0; - gs_free(vdp, gs); - if (bioerr) - bioerror(bp, bioerr); - vreq->v_nslots--; - if (vreq->v_nslots != 0) - return; + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - XDF_UPDATE_IO_STAT(vdp, bp); - if (vdp->xdf_xdev_iostat != NULL) - kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf_ring_drain: start\n"); - if (IS_ERROR(bp)) - bp->b_resid = bp->b_bcount; + for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) { + if (vdp->xdf_xb_ring == NULL) + goto out; - vreq_free(vdp, vreq); - biodone(bp); + if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) + (void) xdf_intr_locked(vdp); + if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring)) + goto out; + xdf_ring_push(vdp); + + /* file-backed devices can be slow */ + mutex_exit(&vdp->xdf_dev_lk); +#ifdef XPV_HVM_DRIVER + (void) HYPERVISOR_yield(); +#endif /* XPV_HVM_DRIVER */ + delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY)); + mutex_enter(&vdp->xdf_dev_lk); + } + cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr); + +out: + if (vdp->xdf_xb_ring != NULL) { + if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) || + xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) + rv = EIO; + } + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n", + vdp->xdf_addr, rv); + return (rv); +} + +static int +xdf_ring_drain(xdf_t *vdp) +{ + int rv; + mutex_enter(&vdp->xdf_dev_lk); + rv = xdf_ring_drain_locked(vdp); + mutex_exit(&vdp->xdf_dev_lk); + return (rv); } /* - * return value of xdf_prepare_rreq() - * used in xdf_iostart() + * Destroy all v_req_t, grant table entries, and our ring buffer. */ -#define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */ -#define XF_COMP 1 /* no more I/O left in buf */ - static void -xdf_iostart(xdf_t *vdp) +xdf_ring_destroy(xdf_t *vdp) { - xendev_ring_t *xbr; - struct buf *bp; - blkif_request_t *rreq; - int retval; - int rreqready = 0; + v_req_t *vreq; + buf_t *bp; + ge_slot_t *gs; + + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + + if ((vdp->xdf_state != XD_INIT) && + (vdp->xdf_state != XD_CONNECTED) && + (vdp->xdf_state != XD_READY)) { + ASSERT(vdp->xdf_xb_ring == NULL); + ASSERT(vdp->xdf_xb_ring_hdl == NULL); + ASSERT(vdp->xdf_peer == INVALID_DOMID); + ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN); + ASSERT(list_is_empty(&vdp->xdf_vreq_act)); + return; + } - xbr = vdp->xdf_xb_ring; + /* + * We don't want to recieve async notifications from the backend + * when it finishes processing ring entries. + */ +#ifdef XPV_HVM_DRIVER + ec_unbind_evtchn(vdp->xdf_evtchn); +#else /* !XPV_HVM_DRIVER */ + (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); +#endif /* !XPV_HVM_DRIVER */ /* - * populate the ring request(s) - * - * loop until there is no buf to transfer or no free slot - * available in I/O ring + * Drain any requests in the ring. We need to do this before we + * can free grant table entries, because if active ring entries + * point to grants, then the backend could be trying to access + * those grants. */ - mutex_enter(&vdp->xdf_dev_lk); + (void) xdf_ring_drain_locked(vdp); - for (;;) { - if (vdp->xdf_status != XD_READY) - break; + /* We're done talking to the backend so free up our event channel */ + xvdi_free_evtchn(vdp->xdf_dip); + vdp->xdf_evtchn = INVALID_EVTCHN; - /* active buf queue empty? */ - if ((bp = vdp->xdf_f_act) == NULL) - break; + while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) { + bp = vreq->v_buf; + ASSERT(BP_VREQ(bp) == vreq); - /* try to grab a vreq for this bp */ - if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL)) - break; - /* alloc DMA/GTE resources */ - if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS) - break; + /* Free up any grant table entries associaed with this IO */ + while ((gs = list_head(&vreq->v_gs)) != NULL) + gs_free(gs); - /* get next blkif_request in the ring */ - if ((rreq = xvdi_ring_get_request(xbr)) == NULL) - break; - bzero(rreq, sizeof (blkif_request_t)); + /* If this IO was on the runq, move it back to the waitq. */ + if (vreq->v_runq) + xdf_kstat_runq_to_waitq(vdp, bp); - /* populate blkif_request with this buf */ - rreqready++; - retval = xdf_prepare_rreq(vdp, bp, rreq); - if (retval == XF_COMP) { - /* finish this bp, switch to next one */ - if (vdp->xdf_xdev_iostat != NULL) - kstat_waitq_to_runq( - KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); - vdp->xdf_f_act = bp->av_forw; - bp->av_forw = NULL; - } + /* + * Reset any buf IO state since we're going to re-issue the + * IO when we reconnect. + */ + vreq_free(vdp, vreq); + BP_VREQ_SET(bp, NULL); + bioerror(bp, 0); } - /* - * Send the request(s) to the backend - */ - if (rreqready) { - if (xvdi_ring_push_request(xbr)) { - DPRINTF(IO_DBG, ("xdf_iostart: " - "sent request(s) to backend\n")); - xvdi_notify_oe(vdp->xdf_dip); - } + /* reset the active queue index pointer */ + vdp->xdf_i_act = vdp->xdf_f_act; + + /* Destroy the ring */ + xvdi_free_ring(vdp->xdf_xb_ring); + vdp->xdf_xb_ring = NULL; + vdp->xdf_xb_ring_hdl = NULL; + vdp->xdf_peer = INVALID_DOMID; +} + +void +xdfmin(struct buf *bp) +{ + if (bp->b_bcount > xdf_maxphys) + bp->b_bcount = xdf_maxphys; +} + +/* + * Check if we have a pending "eject" media request. + */ +static int +xdf_eject_pending(xdf_t *vdp) +{ + dev_info_t *dip = vdp->xdf_dip; + char *xsname, *str; + + if (!vdp->xdf_media_req_supported) + return (B_FALSE); + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0)) + return (B_FALSE); + + if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) { + strfree(str); + return (B_FALSE); } + strfree(str); + return (B_TRUE); +} - mutex_exit(&vdp->xdf_dev_lk); +/* + * Generate a media request. + */ +static int +xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required) +{ + dev_info_t *dip = vdp->xdf_dip; + char *xsname; + + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + + if ((xsname = xvdi_get_xsname(dip)) == NULL) + return (ENXIO); + + /* Check if we support media requests */ + if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported) + return (ENOTTY); + + /* If an eject is pending then don't allow any new requests */ + if (xdf_eject_pending(vdp)) + return (ENXIO); + + /* Make sure that there is media present */ + if (media_required && (vdp->xdf_xdev_nblocks == 0)) + return (ENXIO); + + /* We only allow operations when the device is ready and connected */ + if (vdp->xdf_state != XD_READY) + return (EIO); + + if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0) + return (EIO); + + return (0); } /* * populate a single blkif_request_t w/ a buf */ -static int -xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) +static void +xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) { - int rval; grant_ref_t gr; uint8_t fsect, lsect; size_t bcnt; @@ -1393,12 +1224,16 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) off_t blk_off; dev_info_t *dip = vdp->xdf_dip; blkif_vdev_t vdev = xvdi_get_vdevnum(dip); - v_req_t *vreq = BP2VREQ(bp); + v_req_t *vreq = BP_VREQ(bp); uint64_t blkno = vreq->v_blkno; uint_t ndmacs = vreq->v_ndmacs; ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl; int seg = 0; int isread = IS_READ(bp); + ge_slot_t *gs = list_head(&vreq->v_gs); + + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(vreq->v_status == VREQ_GS_ALLOCED); if (isread) ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ); @@ -1409,9 +1244,10 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) BLKIF_OP_FLUSH_DISKCACHE); ddi_put16(acchdl, &rreq->handle, vdev); ddi_put64(acchdl, &rreq->id, - (uint64_t)(uintptr_t)(vreq->v_gs)); + (uint64_t)(uintptr_t)(gs)); ddi_put8(acchdl, &rreq->nr_segments, 0); - return (XF_COMP); + vreq->v_status = VREQ_DMAWIN_DONE; + return; case WRITE_BARRIER: ddi_put8(acchdl, &rreq->operation, BLKIF_OP_WRITE_BARRIER); @@ -1429,34 +1265,39 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) ddi_put16(acchdl, &rreq->handle, vdev); ddi_put64(acchdl, &rreq->sector_number, blkno); - ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs)); + ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs)); /* * loop until all segments are populated or no more dma cookie in buf */ for (;;) { - /* - * Each segment of a blkif request can transfer up to - * one 4K page of data. - */ + /* + * Each segment of a blkif request can transfer up to + * one 4K page of data. + */ bcnt = vreq->v_dmac.dmac_size; - ASSERT(bcnt <= PAGESIZE); - ASSERT((bcnt % XB_BSIZE) == 0); dma_addr = vreq->v_dmac.dmac_laddress; blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr); - ASSERT((blk_off & XB_BMASK) == 0); fsect = blk_off >> XB_BSHIFT; lsect = fsect + (bcnt >> XB_BSHIFT) - 1; + + ASSERT(bcnt <= PAGESIZE); + ASSERT((bcnt % XB_BSIZE) == 0); + ASSERT((blk_off & XB_BMASK) == 0); ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE && lsect < XB_MAX_SEGLEN / XB_BSIZE); - DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n", - seg, vreq->v_dmac.dmac_size, blk_off)); - gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT); + + gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT); ddi_put32(acchdl, &rreq->seg[seg].gref, gr); ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect); ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect); - DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64 - "\n", seg, fsect, lsect, gr, dma_addr)); + + DPRINTF(IO_DBG, ( + "xdf@%s: seg%d: dmacS %lu blk_off %ld\n", + vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off)); + DPRINTF(IO_DBG, ( + "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n", + vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr)); blkno += (bcnt >> XB_BSHIFT); seg++; @@ -1468,243 +1309,243 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq) vreq->v_status = VREQ_DMAWIN_DONE; vreq->v_blkno = blkno; - if (vreq->v_dmaw + 1 == vreq->v_ndmaws) - /* last win */ - rval = XF_COMP; - else - rval = XF_PARTIAL; break; } ddi_put8(acchdl, &rreq->nr_segments, seg); - DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n", - rreq->id)); - - return (rval); + DPRINTF(IO_DBG, ( + "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n", + vdp->xdf_addr, rreq->id)); } -#define XDF_QSEC 50000 /* .005 second */ -#define XDF_POLLCNT 12 /* loop for 12 times before time out */ - -static int -xdf_drain_io(xdf_t *vdp) +static void +xdf_io_start(xdf_t *vdp) { - int pollc, rval; - xendev_ring_t *xbr; - - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_drain_io: start\n"); + struct buf *bp; + v_req_t *vreq; + blkif_request_t *rreq; + boolean_t rreqready = B_FALSE; mutex_enter(&vdp->xdf_dev_lk); - if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND)) - goto out; + /* + * Populate the ring request(s). Loop until there is no buf to + * transfer or no free slot available in I/O ring. + */ + for (;;) { + /* don't start any new IO if we're suspending */ + if (vdp->xdf_suspending) + break; + if ((bp = xdf_bp_next(vdp)) == NULL) + break; - rval = 0; - xbr = vdp->xdf_xb_ring; - ASSERT(xbr != NULL); + /* if the buf doesn't already have a vreq, allocate one */ + if (((vreq = BP_VREQ(bp)) == NULL) && + ((vreq = vreq_get(vdp, bp)) == NULL)) + break; - for (pollc = 0; pollc < XDF_POLLCNT; pollc++) { - if (xvdi_ring_has_unconsumed_responses(xbr)) { - mutex_exit(&vdp->xdf_dev_lk); - (void) xdf_intr((caddr_t)vdp); - mutex_enter(&vdp->xdf_dev_lk); - } - if (!xvdi_ring_has_incomp_request(xbr)) - goto out; + /* alloc DMA/GTE resources */ + if (vreq_setup(vdp, vreq) != DDI_SUCCESS) + break; + + /* get next blkif_request in the ring */ + if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL) + break; + bzero(rreq, sizeof (blkif_request_t)); + rreqready = B_TRUE; + + /* populate blkif_request with this buf */ + xdf_process_rreq(vdp, bp, rreq); -#ifndef XPV_HVM_DRIVER - (void) HYPERVISOR_yield(); -#endif /* XPV_HVM_DRIVER */ /* - * file-backed devices can be slow + * This buffer/vreq pair is has been allocated a ring buffer + * resources, so if it isn't already in our runq, add it. */ - drv_usecwait(XDF_QSEC << pollc); + if (!vreq->v_runq) + xdf_kstat_waitq_to_runq(vdp, bp); } - cmn_err(CE_WARN, "xdf_polled_io: timeout"); - rval = EIO; -out: + + /* Send the request(s) to the backend */ + if (rreqready) + xdf_ring_push(vdp); + mutex_exit(&vdp->xdf_dev_lk); - if (xdfdebug & SUSRES_DBG) - xen_printf("xdf_drain_io: end, err=%d\n", rval); - return (rval); } -/* ARGSUSED5 */ -int -xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp, - diskaddr_t start, size_t reqlen, void *tg_cookie) -{ - xdf_t *vdp; - struct buf *bp; - int err = 0; - vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); - if (vdp == NULL) - return (ENXIO); +/* check if partition is open, -1 - check all partitions on the disk */ +static boolean_t +xdf_isopen(xdf_t *vdp, int partition) +{ + int i; + ulong_t parbit; + boolean_t rval = B_FALSE; - if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) - return (EINVAL); + ASSERT((partition == -1) || + ((partition >= 0) || (partition < XDF_PEXT))); - bp = getrbuf(KM_SLEEP); - if (cmd == TG_READ) - bp->b_flags = B_BUSY | B_READ; + if (partition == -1) + parbit = (ulong_t)-1; else - bp->b_flags = B_BUSY | B_WRITE; - bp->b_un.b_addr = bufp; - bp->b_bcount = reqlen; - bp->b_blkno = start; - bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ + parbit = 1 << partition; - mutex_enter(&vdp->xdf_dev_lk); - if (vdp->xdf_xdev_iostat != NULL) - kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); - if (vdp->xdf_f_act == NULL) { - vdp->xdf_f_act = vdp->xdf_l_act = bp; - } else { - vdp->xdf_l_act->av_forw = bp; - vdp->xdf_l_act = bp; + for (i = 0; i < OTYPCNT; i++) { + if (vdp->xdf_vd_open[i] & parbit) + rval = B_TRUE; } - mutex_exit(&vdp->xdf_dev_lk); - xdf_iostart(vdp); - err = biowait(bp); - - ASSERT(bp->b_flags & B_DONE); - freerbuf(bp); - return (err); + return (rval); } /* - * synthetic geometry + * The connection should never be closed as long as someone is holding + * us open, there is pending IO, or someone is waiting waiting for a + * connection. */ -#define XDF_NSECTS 256 -#define XDF_NHEADS 16 - -static void -xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp) +static boolean_t +xdf_busy(xdf_t *vdp) { - xdf_t *vdp; - uint_t ncyl; - - vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); - - ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); - geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; - geomp->g_acyl = 0; - geomp->g_nhead = XDF_NHEADS; - geomp->g_secsize = XB_BSIZE; - geomp->g_nsect = XDF_NSECTS; - geomp->g_intrlv = 0; - geomp->g_rpm = 7200; - geomp->g_capacity = vdp->xdf_xdev_nblocks; -} + if ((vdp->xdf_xb_ring != NULL) && + xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { + ASSERT(vdp->xdf_state != XD_CLOSED); + return (B_TRUE); + } -static int -xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp) -{ - xdf_t *vdp; + if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) { + ASSERT(vdp->xdf_state != XD_CLOSED); + return (B_TRUE); + } - vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)); + if (xdf_isopen(vdp, -1)) { + ASSERT(vdp->xdf_state != XD_CLOSED); + return (B_TRUE); + } - if (vdp == NULL) - return (ENXIO); + if (vdp->xdf_connect_req > 0) { + ASSERT(vdp->xdf_state != XD_CLOSED); + return (B_TRUE); + } - mutex_enter(&vdp->xdf_dev_lk); - *capp = vdp->xdf_pgeom.g_capacity; - DPRINTF(LBL_DBG, ("capacity %llu\n", *capp)); - mutex_exit(&vdp->xdf_dev_lk); - return (0); + return (B_FALSE); } -static int -xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp) +static void +xdf_set_state(xdf_t *vdp, xdf_state_t new_state) { - xdf_t *vdp; - - if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL) - return (ENXIO); - *geomp = vdp->xdf_pgeom; - return (0); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n", + vdp->xdf_addr, vdp->xdf_state, new_state)); + vdp->xdf_state = new_state; + cv_broadcast(&vdp->xdf_dev_cv); } -/* - * No real HBA, no geometry available from it - */ -/*ARGSUSED*/ -static int -xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp) +static void +xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet) { - return (EINVAL); -} + dev_info_t *dip = vdp->xdf_dip; + boolean_t busy; -static int -xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep) -{ - xdf_t *vdp; + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); + ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED)); - if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi)))) - return (ENXIO); + /* Check if we're already there. */ + if (vdp->xdf_state == new_state) + return; - if (XD_IS_RO(vdp)) - tgattributep->media_is_writable = 0; - else - tgattributep->media_is_writable = 1; - return (0); -} + mutex_enter(&vdp->xdf_dev_lk); + busy = xdf_busy(vdp); -/* ARGSUSED3 */ -int -xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie) -{ - switch (cmd) { - case TG_GETPHYGEOM: - return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg)); - case TG_GETVIRTGEOM: - return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg)); - case TG_GETCAPACITY: - return (xdf_lb_getcap(devi, (diskaddr_t *)arg)); - case TG_GETBLOCKSIZE: - *(uint32_t *)arg = XB_BSIZE; - return (0); - case TG_GETATTR: - return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg)); - default: - return (ENOTTY); + /* If we're already closed then there's nothing todo. */ + if (vdp->xdf_state == XD_CLOSED) { + ASSERT(!busy); + xdf_set_state(vdp, new_state); + mutex_exit(&vdp->xdf_dev_lk); + return; + } + +#ifdef DEBUG + /* UhOh. Warn the user that something bad has happened. */ + if (!quiet && busy && (vdp->xdf_state == XD_READY) && + (vdp->xdf_xdev_nblocks != 0)) { + cmn_err(CE_WARN, "xdf@%s: disconnected while in use", + vdp->xdf_addr); } +#endif /* DEBUG */ + + xdf_ring_destroy(vdp); + + /* If we're busy then we can only go into the unknown state */ + xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state); + mutex_exit(&vdp->xdf_dev_lk); + + /* if we're closed now, let the other end know */ + if (vdp->xdf_state == XD_CLOSED) + (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); } + /* * Kick-off connect process * Status should be XD_UNKNOWN or XD_CLOSED * On success, status will be changed to XD_INIT - * On error, status won't be changed + * On error, it will be changed to XD_UNKNOWN */ static int -xdf_start_connect(xdf_t *vdp) +xdf_setstate_init(xdf_t *vdp) { - char *xsnode; - grant_ref_t gref; - xenbus_transaction_t xbt; - int rv; - dev_info_t *dip = vdp->xdf_dip; + dev_info_t *dip = vdp->xdf_dip; + xenbus_transaction_t xbt; + grant_ref_t gref; + char *xsname, *str; + int rv; + + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); + ASSERT((vdp->xdf_state == XD_UNKNOWN) || + (vdp->xdf_state == XD_CLOSED)); + + DPRINTF(DDI_DBG, + ("xdf@%s: starting connection process\n", vdp->xdf_addr)); - if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1) + /* + * If an eject is pending then don't allow a new connection, but + * we want to return without displaying an error message. + */ + if (xdf_eject_pending(vdp)) { + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + return (DDI_FAILURE); + } + + if ((xsname = xvdi_get_xsname(dip)) == NULL) goto errout; - if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel", - ddi_get_name_addr(dip)); + if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID) goto errout; - } + + (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising); + + /* + * Sanity check for the existance of the xenbus device-type property. + * This property might not exist if we our xenbus device nodes was + * force destroyed while we were still connected to the backend. + */ + if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) + goto errout; + strfree(str); + + if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) + goto errout; + vdp->xdf_evtchn = xvdi_get_evtchn(dip); #ifdef XPV_HVM_DRIVER ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp); #else /* !XPV_HVM_DRIVER */ if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: " - "failed to add intr handler", ddi_get_name_addr(dip)); + cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: " + "failed to add intr handler", vdp->xdf_addr); goto errout1; } #endif /* !XPV_HVM_DRIVER */ @@ -1713,7 +1554,7 @@ xdf_start_connect(xdf_t *vdp) sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) != DDI_SUCCESS) { cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring", - ddi_get_name_addr(dip)); + vdp->xdf_addr); goto errout2; } vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */ @@ -1721,75 +1562,48 @@ xdf_start_connect(xdf_t *vdp) /* * Write into xenstore the info needed by backend */ - if ((xsnode = xvdi_get_xsname(dip)) == NULL) { - cmn_err(CE_WARN, "xdf@%s: " - "failed to get xenstore node path", - ddi_get_name_addr(dip)); - goto fail_trans; - } trans_retry: if (xenbus_transaction_start(&xbt)) { cmn_err(CE_WARN, "xdf@%s: failed to start transaction", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, EIO, "transaction start"); + vdp->xdf_addr); + xvdi_fatal_error(dip, EIO, "connect transaction init"); goto fail_trans; } - if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) { - cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, rv, "writing ring-ref"); - goto abort_trans; - } - - if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u", - vdp->xdf_evtchn)) { - cmn_err(CE_WARN, "xdf@%s: failed to write event-channel", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, rv, "writing event-channel"); - goto abort_trans; - } - /* - * "protocol" is written by the domain builder in the case of PV + * XBP_PROTOCOL is written by the domain builder in the case of PV * domains. However, it is not written for HVM domains, so let's * write it here. */ - if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s", - XEN_IO_PROTO_ABI_NATIVE)) { - cmn_err(CE_WARN, "xdf@%s: failed to write protocol", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, rv, "writing protocol"); - goto abort_trans; - } - - if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) { - cmn_err(CE_WARN, "xdf@%s: " - "failed to switch state to XenbusStateInitialised", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, rv, "writing state"); - goto abort_trans; + if (((rv = xenbus_printf(xbt, xsname, + XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) || + ((rv = xenbus_printf(xbt, xsname, + XBP_RING_REF, "%u", gref)) != 0) || + ((rv = xenbus_printf(xbt, xsname, + XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) || + ((rv = xenbus_printf(xbt, xsname, + XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) || + ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) { + (void) xenbus_transaction_end(xbt, 1); + xvdi_fatal_error(dip, rv, "connect transaction setup"); + goto fail_trans; } /* kick-off connect process */ if (rv = xenbus_transaction_end(xbt, 0)) { if (rv == EAGAIN) goto trans_retry; - cmn_err(CE_WARN, "xdf@%s: failed to end transaction", - ddi_get_name_addr(dip)); - xvdi_fatal_error(dip, rv, "completing transaction"); + xvdi_fatal_error(dip, rv, "connect transaction commit"); goto fail_trans; } - ASSERT(mutex_owned(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_status = XD_INIT; + xdf_set_state(vdp, XD_INIT); mutex_exit(&vdp->xdf_dev_lk); return (DDI_SUCCESS); -abort_trans: - (void) xenbus_transaction_end(xbt, 1); fail_trans: xvdi_free_ring(vdp->xdf_xb_ring); errout2: @@ -1800,28 +1614,14 @@ errout2: #endif /* !XPV_HVM_DRIVER */ errout1: xvdi_free_evtchn(dip); + vdp->xdf_evtchn = INVALID_EVTCHN; errout: - cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting", - ddi_get_name_addr(dip)); + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend", + vdp->xdf_addr); return (DDI_FAILURE); } -/* - * Kick-off disconnect process - * Status won't be changed - */ -static int -xdf_start_disconnect(xdf_t *vdp) -{ - if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) { - cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting", - ddi_get_name_addr(vdp->xdf_dip)); - return (DDI_FAILURE); - } - - return (DDI_SUCCESS); -} - int xdf_get_flush_block(xdf_t *vdp) { @@ -1837,92 +1637,12 @@ xdf_get_flush_block(xdf_t *vdp) return (DDI_SUCCESS); } -/* - * Finish other initialization after we've connected to backend - * Status should be XD_INIT before calling this routine - * On success, status should be changed to XD_READY - * On error, status should stay XD_INIT - */ -static int -xdf_post_connect(xdf_t *vdp) +static void +xdf_setstate_ready(void *arg) { - int rv; - uint_t len; - char *type; - char *barrier; - dev_info_t *devi = vdp->xdf_dip; + xdf_t *vdp = (xdf_t *)arg; - /* - * Determine if feature barrier is supported by backend - */ - if (xenbus_read(XBT_NULL, xvdi_get_oename(devi), - "feature-barrier", (void **)&barrier, &len) == 0) { - vdp->xdf_feature_barrier = 1; - kmem_free(barrier, len); - } else { - cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", - ddi_get_name_addr(vdp->xdf_dip)); - vdp->xdf_feature_barrier = 0; - } - - /* probe backend */ - if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi), - "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks, - "info", "%u", &vdp->xdf_xdev_info, NULL)) { - cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " - "cannot read backend info", ddi_get_name_addr(devi)); - xvdi_fatal_error(devi, rv, "reading backend info"); - return (DDI_FAILURE); - } - - /* - * Make sure that the device we're connecting isn't smaller than - * the old connected device. - */ - if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) { - cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " - "backend disk device shrank", ddi_get_name_addr(devi)); - /* XXX: call xvdi_fatal_error() here? */ - xvdi_fatal_error(devi, rv, "reading backend info"); - return (DDI_FAILURE); - } - -#ifdef _ILP32 - if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { - cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " - "backend disk device too large with %llu blocks for" - " 32-bit kernel", ddi_get_name_addr(devi), - vdp->xdf_xdev_nblocks); - xvdi_fatal_error(devi, rv, "reading backend info"); - return (DDI_FAILURE); - } -#endif - - - /* - * Only update the physical geometry to reflect the new device - * size if this is the first time we're connecting to the backend - * device. Once we assign a physical geometry to a device it stays - * fixed until: - * - we get detach and re-attached (at which point we - * automatically assign a new physical geometry). - * - someone calls TG_SETPHYGEOM to explicity set the - * physical geometry. - */ - if (vdp->xdf_pgeom.g_capacity == 0) - xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom); - - /* fix disk type */ - if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type", - (void **)&type, &len) != 0) { - cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: " - "cannot read device-type", ddi_get_name_addr(devi)); - xvdi_fatal_error(devi, rv, "reading device-type"); - return (DDI_FAILURE); - } - if (strcmp(type, "cdrom") == 0) - vdp->xdf_xdev_info |= VDISK_CDROM; - kmem_free(type, len); + vdp->xdf_ready_tq_thread = curthread; /* * We've created all the minor nodes via cmlb_attach() using default @@ -1930,50 +1650,41 @@ xdf_post_connect(xdf_t *vdp) * in case there's anyone (say, booting thread) ever trying to open * it before connected to backend. We will refresh all those minor * nodes w/ latest info we've got now when we are almost connected. - * - * Don't do this when xdf is already opened by someone (could happen - * during resume), for that cmlb_attach() will invalid the label info - * and confuse those who has already opened the node, which is bad. */ - if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) { - /* re-init cmlb w/ latest info we got from backend */ - if (cmlb_attach(devi, &xdf_lb_ops, - XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT, - XD_IS_RM(vdp), 1, - XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD, -#if defined(XPV_HVM_DRIVER) - CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT | - CMLB_INTERNAL_MINOR_NODES, -#else /* !XPV_HVM_DRIVER */ - CMLB_FAKE_LABEL_ONE_PARTITION, -#endif /* !XPV_HVM_DRIVER */ - vdp->xdf_vd_lbl, NULL) != 0) { - cmn_err(CE_WARN, "xdf@%s: cmlb attach failed", - ddi_get_name_addr(devi)); - return (DDI_FAILURE); + mutex_enter(&vdp->xdf_dev_lk); + if (vdp->xdf_cmbl_reattach) { + vdp->xdf_cmbl_reattach = B_FALSE; + + mutex_exit(&vdp->xdf_dev_lk); + if (xdf_cmlb_attach(vdp) != 0) { + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + return; } + mutex_enter(&vdp->xdf_dev_lk); } - /* mark vbd is ready for I/O */ - ASSERT(mutex_owned(&vdp->xdf_cb_lk)); - mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_status = XD_READY; + /* If we're not still trying to get to the ready state, then bail. */ + if (vdp->xdf_state != XD_CONNECTED) { + mutex_exit(&vdp->xdf_dev_lk); + return; + } mutex_exit(&vdp->xdf_dev_lk); + /* * If backend has feature-barrier, see if it supports disk * cache flush op. */ - vdp->xdf_flush_supported = 0; + vdp->xdf_flush_supported = B_FALSE; if (vdp->xdf_feature_barrier) { /* * Pretend we already know flush is supported so probe * will attempt the correct op. */ - vdp->xdf_flush_supported = 1; + vdp->xdf_flush_supported = B_TRUE; if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) { - vdp->xdf_flush_supported = 1; + vdp->xdf_flush_supported = B_TRUE; } else { - vdp->xdf_flush_supported = 0; + vdp->xdf_flush_supported = B_FALSE; /* * If the other end does not support the cache flush op * then we must use a barrier-write to force disk @@ -1985,39 +1696,182 @@ xdf_post_connect(xdf_t *vdp) * (512 bytes) from whatever write we did last * and rewrite that block? */ - if (xdf_get_flush_block(vdp) != DDI_SUCCESS) - return (DDI_FAILURE); + if (xdf_get_flush_block(vdp) != DDI_SUCCESS) { + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + return; + } } } - cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi), - (uint64_t)vdp->xdf_xdev_nblocks); + mutex_enter(&vdp->xdf_cb_lk); + mutex_enter(&vdp->xdf_dev_lk); + if (vdp->xdf_state == XD_CONNECTED) + xdf_set_state(vdp, XD_READY); + mutex_exit(&vdp->xdf_dev_lk); - return (DDI_SUCCESS); + /* Restart any currently queued up io */ + xdf_io_start(vdp); + + mutex_exit(&vdp->xdf_cb_lk); } /* - * Finish other uninitialization after we've disconnected from backend - * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED + * synthetic geometry */ +#define XDF_NSECTS 256 +#define XDF_NHEADS 16 + static void -xdf_post_disconnect(xdf_t *vdp) +xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp) { -#ifdef XPV_HVM_DRIVER - ec_unbind_evtchn(vdp->xdf_evtchn); -#else /* !XPV_HVM_DRIVER */ - (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL); -#endif /* !XPV_HVM_DRIVER */ - xvdi_free_evtchn(vdp->xdf_dip); - xvdi_free_ring(vdp->xdf_xb_ring); - vdp->xdf_xb_ring = NULL; - vdp->xdf_xb_ring_hdl = NULL; - vdp->xdf_peer = (domid_t)-1; + xdf_t *vdp; + uint_t ncyl; + + vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); + + ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS); - ASSERT(mutex_owned(&vdp->xdf_cb_lk)); + bzero(geomp, sizeof (*geomp)); + geomp->g_ncyl = ncyl == 0 ? 1 : ncyl; + geomp->g_acyl = 0; + geomp->g_nhead = XDF_NHEADS; + geomp->g_nsect = XDF_NSECTS; + geomp->g_secsize = XB_BSIZE; + geomp->g_capacity = vdp->xdf_xdev_nblocks; + geomp->g_intrlv = 0; + geomp->g_rpm = 7200; +} + +/* + * Finish other initialization after we've connected to backend + * Status should be XD_INIT before calling this routine + * On success, status should be changed to XD_CONNECTED. + * On error, status should stay XD_INIT + */ +static int +xdf_setstate_connected(xdf_t *vdp) +{ + dev_info_t *dip = vdp->xdf_dip; + cmlb_geom_t pgeom; + diskaddr_t nblocks = 0; + char *oename, *xsname, *str; + uint_t dinfo; + + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk)); + ASSERT(vdp->xdf_state == XD_INIT); + + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((oename = xvdi_get_oename(dip)) == NULL)) + return (DDI_FAILURE); + + /* Determine if feature barrier is supported by backend */ + if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB))) + cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier", + vdp->xdf_addr); + + /* + * Probe backend. Read the device size into xdf_xdev_nblocks + * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE + * flags in xdf_dinfo. If the emulated device type is "cdrom", + * we always set VDISK_CDROM, regardless of if it's present in + * the xenbus info parameter. + */ + if (xenbus_gather(XBT_NULL, oename, + XBP_SECTORS, "%"SCNu64, &nblocks, + XBP_INFO, "%u", &dinfo, + NULL) != 0) { + cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " + "cannot read backend info", vdp->xdf_addr); + return (DDI_FAILURE); + } + if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { + cmn_err(CE_WARN, "xdf@%s: cannot read device-type", + vdp->xdf_addr); + return (DDI_FAILURE); + } + if (strcmp(str, XBV_DEV_TYPE_CD) == 0) + dinfo |= VDISK_CDROM; + strfree(str); + + vdp->xdf_xdev_nblocks = nblocks; +#ifdef _ILP32 + if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) { + cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: " + "backend disk device too large with %llu blocks for" + " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks); + xvdi_fatal_error(dip, EFBIG, "reading backend info"); + return (DDI_FAILURE); + } +#endif + + /* + * If the physical geometry for a fixed disk has been explicity + * set then make sure that the specified physical geometry isn't + * larger than the device we connected to. + */ + if (vdp->xdf_pgeom_fixed && + (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) { + cmn_err(CE_WARN, + "xdf@%s: connect failed, fixed geometry too large", + vdp->xdf_addr); + return (DDI_FAILURE); + } + + vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP); + + /* mark vbd is ready for I/O */ mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_status = XD_CLOSED; + xdf_set_state(vdp, XD_CONNECTED); + + /* check if the cmlb label should be updated */ + xdf_synthetic_pgeom(dip, &pgeom); + if ((vdp->xdf_dinfo != dinfo) || + (!vdp->xdf_pgeom_fixed && + (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) { + vdp->xdf_cmbl_reattach = B_TRUE; + + vdp->xdf_dinfo = dinfo; + if (!vdp->xdf_pgeom_fixed) + vdp->xdf_pgeom = pgeom; + } + + if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) { + if (vdp->xdf_xdev_nblocks == 0) { + vdp->xdf_mstate = DKIO_EJECTED; + cv_broadcast(&vdp->xdf_mstate_cv); + } else { + vdp->xdf_mstate = DKIO_INSERTED; + cv_broadcast(&vdp->xdf_mstate_cv); + } + } else { + if (vdp->xdf_mstate != DKIO_NONE) { + vdp->xdf_mstate = DKIO_NONE; + cv_broadcast(&vdp->xdf_mstate_cv); + } + } + mutex_exit(&vdp->xdf_dev_lk); + + cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr, + (uint64_t)vdp->xdf_xdev_nblocks); + + /* Restart any currently queued up io */ + xdf_io_start(vdp); + + /* + * To get to the ready state we have to do IO to the backend device, + * but we can't initiate IO from the other end change callback thread + * (which is the current context we're executing in.) This is because + * if the other end disconnects while we're doing IO from the callback + * thread, then we can't recieve that disconnect event and we hang + * waiting for an IO that can never complete. + */ + (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp, + DDI_SLEEP); + + (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); + return (DDI_SUCCESS); } /*ARGSUSED*/ @@ -2026,881 +1880,1646 @@ xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data) { XenbusState new_state = *(XenbusState *)impl_data; xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); - boolean_t unexpect_die = B_FALSE; - int status; DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n", - ddi_get_name_addr(dip), new_state)); + vdp->xdf_addr, new_state)); mutex_enter(&vdp->xdf_cb_lk); - if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) { + /* We assume that this callback is single threaded */ + ASSERT(vdp->xdf_oe_change_thread == NULL); + DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread); + + /* ignore any backend state changes if we're suspending/suspended */ + if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) { + DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); mutex_exit(&vdp->xdf_cb_lk); return; } switch (new_state) { + case XenbusStateUnknown: case XenbusStateInitialising: - ASSERT(vdp->xdf_status == XD_CLOSED); - /* - * backend recovered from a previous failure, - * kick-off connect process again - */ - if (xdf_start_connect(vdp) != DDI_SUCCESS) { - cmn_err(CE_WARN, "xdf@%s:" - " failed to start reconnecting to backend", - ddi_get_name_addr(dip)); - } + case XenbusStateInitWait: + case XenbusStateInitialised: + if (vdp->xdf_state == XD_INIT) + break; + + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + if (xdf_setstate_init(vdp) != DDI_SUCCESS) + break; + ASSERT(vdp->xdf_state == XD_INIT); break; + case XenbusStateConnected: - ASSERT(vdp->xdf_status == XD_INIT); - (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); - /* finish final init after connect */ - if (xdf_post_connect(vdp) != DDI_SUCCESS) - (void) xdf_start_disconnect(vdp); + if ((vdp->xdf_state == XD_CONNECTED) || + (vdp->xdf_state == XD_READY)) + break; + + if (vdp->xdf_state != XD_INIT) { + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + if (xdf_setstate_init(vdp) != DDI_SUCCESS) + break; + ASSERT(vdp->xdf_state == XD_INIT); + } + + if (xdf_setstate_connected(vdp) != DDI_SUCCESS) { + xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE); + break; + } + ASSERT(vdp->xdf_state == XD_CONNECTED); break; + case XenbusStateClosing: - mutex_enter(&vdp->xdf_dev_lk); if (xdf_isopen(vdp, -1)) { - cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, " - "still in use", ddi_get_name_addr(dip)); - } else { - if ((vdp->xdf_status == XD_READY) || - (vdp->xdf_status == XD_INIT)) - vdp->xdf_status = XD_CLOSING; - (void) xdf_start_disconnect(vdp); + cmn_err(CE_NOTE, + "xdf@%s: hot-unplug failed, still in use", + vdp->xdf_addr); + break; } - mutex_exit(&vdp->xdf_dev_lk); - break; + /*FALLTHROUGH*/ case XenbusStateClosed: - /* first check if BE closed unexpectedly */ - mutex_enter(&vdp->xdf_dev_lk); - if (xdf_isopen(vdp, -1)) { - unexpect_die = B_TRUE; - unexpectedie(vdp); - cmn_err(CE_WARN, "xdf@%s: backend closed, " - "reconnecting...", ddi_get_name_addr(dip)); - } - mutex_exit(&vdp->xdf_dev_lk); + xdf_disconnect(vdp, XD_CLOSED, B_FALSE); + break; + } - if (vdp->xdf_status == XD_READY) { - mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_status = XD_CLOSING; - mutex_exit(&vdp->xdf_dev_lk); + /* notify anybody waiting for oe state change */ + cv_broadcast(&vdp->xdf_dev_cv); + DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL); + mutex_exit(&vdp->xdf_cb_lk); +} -#ifdef DOMU_BACKEND - (void) xvdi_post_event(dip, XEN_HP_REMOVE); -#endif +static int +xdf_connect_locked(xdf_t *vdp, boolean_t wait) +{ + int rv; - xdf_post_disconnect(vdp); - (void) xvdi_switch_state(dip, XBT_NULL, - XenbusStateClosed); - } else if ((vdp->xdf_status == XD_INIT) || - (vdp->xdf_status == XD_CLOSING)) { - xdf_post_disconnect(vdp); - } else { - mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_status = XD_CLOSED; - mutex_exit(&vdp->xdf_dev_lk); - } + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + + /* we can't connect once we're in the closed state */ + if (vdp->xdf_state == XD_CLOSED) + return (XD_CLOSED); + + vdp->xdf_connect_req++; + while (vdp->xdf_state != XD_READY) { + mutex_exit(&vdp->xdf_dev_lk); + if (vdp->xdf_state == XD_UNKNOWN) + (void) xdf_setstate_init(vdp); + mutex_enter(&vdp->xdf_dev_lk); + + if (!wait || (vdp->xdf_state == XD_READY)) + goto out; + + mutex_exit((&vdp->xdf_cb_lk)); + rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk); + mutex_exit((&vdp->xdf_dev_lk)); + mutex_enter((&vdp->xdf_cb_lk)); + mutex_enter((&vdp->xdf_dev_lk)); + if (rv == 0) + goto out; } - /* notify anybody waiting for oe state change */ +out: + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + + /* Try to lock the media */ + (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); + + vdp->xdf_connect_req--; + return (vdp->xdf_state); +} + +static uint_t +xdf_iorestart(caddr_t arg) +{ + xdf_t *vdp = (xdf_t *)arg; + + ASSERT(vdp != NULL); + mutex_enter(&vdp->xdf_dev_lk); - cv_broadcast(&vdp->xdf_dev_cv); + ASSERT(ISDMACBON(vdp)); + SETDMACBOFF(vdp); mutex_exit(&vdp->xdf_dev_lk); - status = vdp->xdf_status; - mutex_exit(&vdp->xdf_cb_lk); + xdf_io_start(vdp); + + return (DDI_INTR_CLAIMED); +} + +#if defined(XPV_HVM_DRIVER) + +typedef struct xdf_hvm_entry { + list_node_t xdf_he_list; + char *xdf_he_path; + dev_info_t *xdf_he_dip; +} xdf_hvm_entry_t; + +static list_t xdf_hvm_list; +static kmutex_t xdf_hvm_list_lock; - if (status == XD_READY) { - xdf_iostart(vdp); - } else if ((status == XD_CLOSED) && !unexpect_die) { - /* interface is closed successfully, remove all minor nodes */ - if (vdp->xdf_vd_lbl != NULL) { - cmlb_detach(vdp->xdf_vd_lbl, NULL); - cmlb_free_handle(&vdp->xdf_vd_lbl); - vdp->xdf_vd_lbl = NULL; +static xdf_hvm_entry_t * +i_xdf_hvm_find(const char *path, dev_info_t *dip) +{ + xdf_hvm_entry_t *i; + + ASSERT((path != NULL) || (dip != NULL)); + ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); + + i = list_head(&xdf_hvm_list); + while (i != NULL) { + if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { + i = list_next(&xdf_hvm_list, i); + continue; } + if ((dip != NULL) && (i->xdf_he_dip != dip)) { + i = list_next(&xdf_hvm_list, i); + continue; + } + break; } + return (i); } -/* check if partition is open, -1 - check all partitions on the disk */ -static boolean_t -xdf_isopen(xdf_t *vdp, int partition) +dev_info_t * +xdf_hvm_hold(const char *path) { - int i; - ulong_t parbit; - boolean_t rval = B_FALSE; + xdf_hvm_entry_t *i; + dev_info_t *dip; - ASSERT((partition == -1) || - ((partition >= 0) || (partition < XDF_PEXT))); + mutex_enter(&xdf_hvm_list_lock); + i = i_xdf_hvm_find(path, NULL); + if (i == NULL) { + mutex_exit(&xdf_hvm_list_lock); + return (B_FALSE); + } + ndi_hold_devi(dip = i->xdf_he_dip); + mutex_exit(&xdf_hvm_list_lock); + return (dip); +} - if (partition == -1) - parbit = (ulong_t)-1; - else - parbit = 1 << partition; +static void +xdf_hvm_add(dev_info_t *dip) +{ + xdf_hvm_entry_t *i; + char *path; - for (i = 0; i < OTYPCNT; i++) { - if (vdp->xdf_vd_open[i] & parbit) - rval = B_TRUE; - } + /* figure out the path for the dip */ + path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); - return (rval); + i = kmem_alloc(sizeof (*i), KM_SLEEP); + i->xdf_he_dip = dip; + i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); + + mutex_enter(&xdf_hvm_list_lock); + ASSERT(i_xdf_hvm_find(path, NULL) == NULL); + ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); + list_insert_head(&xdf_hvm_list, i); + mutex_exit(&xdf_hvm_list_lock); + + kmem_free(path, MAXPATHLEN); } -/* - * Xdf_check_state_transition will check the XenbusState change to see - * if the change is a valid transition or not. - * The new state is written by backend domain, or by running xenstore-write - * to change it manually in dom0 - */ -static int -xdf_check_state_transition(xdf_t *vdp, XenbusState oestate) -{ - int status; - int stcheck; -#define STOK 0 /* need further process */ -#define STNOP 1 /* no action need taking */ -#define STBUG 2 /* unexpected state change, could be a bug */ - - status = vdp->xdf_status; - stcheck = STOK; - - switch (status) { - case XD_UNKNOWN: - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateConnected)) - stcheck = STBUG; - else if ((oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised)) - stcheck = STNOP; - break; - case XD_INIT: - if (oestate == XenbusStateUnknown) - stcheck = STBUG; - else if ((oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised)) - stcheck = STNOP; - break; - case XD_READY: - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised)) - stcheck = STBUG; - else if (oestate == XenbusStateConnected) - stcheck = STNOP; - break; - case XD_CLOSING: - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateInitialising) || - (oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised) || - (oestate == XenbusStateConnected)) - stcheck = STBUG; - else if (oestate == XenbusStateClosing) - stcheck = STNOP; - break; - case XD_CLOSED: - if ((oestate == XenbusStateUnknown) || - (oestate == XenbusStateConnected)) - stcheck = STBUG; - else if ((oestate == XenbusStateInitWait) || - (oestate == XenbusStateInitialised) || - (oestate == XenbusStateClosing) || - (oestate == XenbusStateClosed)) - stcheck = STNOP; - break; - case XD_SUSPEND: - default: - stcheck = STBUG; - } +static void +xdf_hvm_rm(dev_info_t *dip) +{ + xdf_hvm_entry_t *i; - if (stcheck == STOK) - return (DDI_SUCCESS); + mutex_enter(&xdf_hvm_list_lock); + VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); + list_remove(&xdf_hvm_list, i); + mutex_exit(&xdf_hvm_list_lock); - if (stcheck == STBUG) - cmn_err(CE_NOTE, "xdf@%s: unexpected otherend " - "state change to %d!, when status is %d", - ddi_get_name_addr(vdp->xdf_dip), oestate, status); + kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); + kmem_free(i, sizeof (*i)); +} - return (DDI_FAILURE); +static void +xdf_hvm_init(void) +{ + list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), + offsetof(xdf_hvm_entry_t, xdf_he_list)); + mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); } -static int -xdf_connect(xdf_t *vdp, boolean_t wait) +static void +xdf_hvm_fini(void) { - ASSERT(mutex_owned(&vdp->xdf_dev_lk)); - while (vdp->xdf_status != XD_READY) { - if (!wait || (vdp->xdf_status > XD_READY)) - break; + ASSERT(list_head(&xdf_hvm_list) == NULL); + list_destroy(&xdf_hvm_list); + mutex_destroy(&xdf_hvm_list_lock); +} + +boolean_t +xdf_hvm_connect(dev_info_t *dip) +{ + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + char *oename, *str; + int rv; + + mutex_enter(&vdp->xdf_cb_lk); + mutex_enter(&vdp->xdf_dev_lk); + + /* + * Before try to establish a connection we need to wait for the + * backend hotplug scripts to have run. Once they are run the + * "<oename>/hotplug-status" property will be set to "connected". + */ + for (;;) { + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + + /* + * Get the xenbus path to the backend device. Note that + * we can't cache this path (and we look it up on each pass + * through this loop) because it could change during + * suspend, resume, and migration operations. + */ + if ((oename = xvdi_get_oename(dip)) == NULL) { + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + return (B_FALSE); + } - if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0) + str = NULL; + if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) && + (strcmp(str, XBV_HP_STATUS_CONN) == 0)) break; + + if (str != NULL) + strfree(str); + + /* wait for an update to "<oename>/hotplug-status" */ + mutex_exit(&vdp->xdf_dev_lk); + if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) { + /* we got interrupted by a signal */ + mutex_exit(&vdp->xdf_cb_lk); + return (B_FALSE); + } + mutex_enter(&vdp->xdf_dev_lk); + } + + /* Good news. The backend hotplug scripts have been run. */ + ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk)); + ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk)); + ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0); + strfree(str); + + /* + * If we're emulating a cd device and if the backend doesn't support + * media request opreations, then we're not going to bother trying + * to establish a connection for a couple reasons. First off, media + * requests support is required to support operations like eject and + * media locking. Second, other backend platforms like Linux don't + * support hvm pv cdrom access. They don't even have a backend pv + * driver for cdrom device nodes, so we don't want to block forever + * waiting for a connection to a backend driver that doesn't exist. + */ + if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) { + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + return (B_FALSE); } - return (vdp->xdf_status); + rv = xdf_connect_locked(vdp, B_TRUE); + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + + return ((rv == XD_READY) ? B_TRUE : B_FALSE); } -/* - * callback func when DMA/GTE resources is available - * - * Note: we only register one callback function to grant table subsystem - * since we only have one 'struct gnttab_free_callback' in xdf_t. - */ -static int -xdf_dmacallback(caddr_t arg) +int +xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) { - xdf_t *vdp = (xdf_t *)arg; - ASSERT(vdp != NULL); + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); - DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n", - ddi_get_name_addr(vdp->xdf_dip))); + /* sanity check the requested physical geometry */ + mutex_enter(&vdp->xdf_dev_lk); + if ((geomp->g_secsize != XB_BSIZE) || + (geomp->g_capacity == 0)) { + mutex_exit(&vdp->xdf_dev_lk); + return (EINVAL); + } - ddi_trigger_softintr(vdp->xdf_softintr_id); - return (DDI_DMA_CALLBACK_DONE); + /* + * If we've already connected to the backend device then make sure + * we're not defining a physical geometry larger than our backend + * device. + */ + if ((vdp->xdf_xdev_nblocks != 0) && + (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { + mutex_exit(&vdp->xdf_dev_lk); + return (EINVAL); + } + + bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom)); + vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl; + vdp->xdf_pgeom.g_acyl = geomp->g_acyl; + vdp->xdf_pgeom.g_nhead = geomp->g_nhead; + vdp->xdf_pgeom.g_nsect = geomp->g_nsect; + vdp->xdf_pgeom.g_secsize = geomp->g_secsize; + vdp->xdf_pgeom.g_capacity = geomp->g_capacity; + vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv; + vdp->xdf_pgeom.g_rpm = geomp->g_rpm; + + vdp->xdf_pgeom_fixed = B_TRUE; + mutex_exit(&vdp->xdf_dev_lk); + + /* force a re-validation */ + cmlb_invalidate(vdp->xdf_vd_lbl, NULL); + + return (0); } -static uint_t -xdf_iorestart(caddr_t arg) +boolean_t +xdf_is_cd(dev_info_t *dip) { - xdf_t *vdp = (xdf_t *)arg; + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + boolean_t rv; - ASSERT(vdp != NULL); + mutex_enter(&vdp->xdf_cb_lk); + rv = XD_IS_CD(vdp); + mutex_exit(&vdp->xdf_cb_lk); + return (rv); +} - mutex_enter(&vdp->xdf_dev_lk); - ASSERT(ISDMACBON(vdp)); - SETDMACBOFF(vdp); - mutex_exit(&vdp->xdf_dev_lk); +boolean_t +xdf_is_rm(dev_info_t *dip) +{ + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + boolean_t rv; - xdf_iostart(vdp); + mutex_enter(&vdp->xdf_cb_lk); + rv = XD_IS_RM(vdp); + mutex_exit(&vdp->xdf_cb_lk); + return (rv); +} - return (DDI_INTR_CLAIMED); +boolean_t +xdf_media_req_supported(dev_info_t *dip) +{ + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + boolean_t rv; + + mutex_enter(&vdp->xdf_cb_lk); + rv = vdp->xdf_media_req_supported; + mutex_exit(&vdp->xdf_cb_lk); + return (rv); } -static void -xdf_timeout_handler(void *arg) +#endif /* XPV_HVM_DRIVER */ + +static int +xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp) { - xdf_t *vdp = arg; + xdf_t *vdp; + vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); + + if (vdp == NULL) + return (ENXIO); mutex_enter(&vdp->xdf_dev_lk); - vdp->xdf_timeout_id = 0; + *capp = vdp->xdf_pgeom.g_capacity; + DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp)); mutex_exit(&vdp->xdf_dev_lk); + return (0); +} - /* new timeout thread could be re-scheduled */ - xdf_iostart(vdp); +static int +xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp) +{ + xdf_t *vdp; + + if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) + return (ENXIO); + *geomp = vdp->xdf_pgeom; + return (0); } /* - * Alloc a vreq for this bp - * bp->av_back contains the pointer to the vreq upon return + * No real HBA, no geometry available from it */ -static v_req_t * -vreq_get(xdf_t *vdp, buf_t *bp) +/*ARGSUSED*/ +static int +xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp) { - v_req_t *vreq = NULL; + return (EINVAL); +} - ASSERT(BP2VREQ(bp) == NULL); +static int +xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep) +{ + xdf_t *vdp; - vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP); - if (vreq == NULL) { - if (vdp->xdf_timeout_id == 0) - /* restart I/O after one second */ - vdp->xdf_timeout_id = - timeout(xdf_timeout_handler, vdp, hz); - return (NULL); + if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)))) + return (ENXIO); + + if (XD_IS_RO(vdp)) + tgattributep->media_is_writable = 0; + else + tgattributep->media_is_writable = 1; + return (0); +} + +/* ARGSUSED3 */ +int +xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) +{ + switch (cmd) { + case TG_GETPHYGEOM: + return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg)); + case TG_GETVIRTGEOM: + return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg)); + case TG_GETCAPACITY: + return (xdf_lb_getcap(dip, (diskaddr_t *)arg)); + case TG_GETBLOCKSIZE: + *(uint32_t *)arg = XB_BSIZE; + return (0); + case TG_GETATTR: + return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg)); + default: + return (ENOTTY); } - bzero(vreq, sizeof (v_req_t)); +} - list_insert_head(&vdp->xdf_vreq_act, (void *)vreq); - bp->av_back = (buf_t *)vreq; - vreq->v_buf = bp; - vreq->v_status = VREQ_INIT; - /* init of other fields in vreq is up to the caller */ +/* ARGSUSED5 */ +int +xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp, + diskaddr_t start, size_t reqlen, void *tg_cookie) +{ + xdf_t *vdp; + struct buf *bp; + int err = 0; - return (vreq); + vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); + + /* We don't allow IO from the oe_change callback thread */ + ASSERT(curthread != vdp->xdf_oe_change_thread); + + if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity) + return (EINVAL); + + bp = getrbuf(KM_SLEEP); + if (cmd == TG_READ) + bp->b_flags = B_BUSY | B_READ; + else + bp->b_flags = B_BUSY | B_WRITE; + bp->b_un.b_addr = bufp; + bp->b_bcount = reqlen; + bp->b_blkno = start; + bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */ + + mutex_enter(&vdp->xdf_dev_lk); + xdf_bp_push(vdp, bp); + mutex_exit(&vdp->xdf_dev_lk); + xdf_io_start(vdp); + if (curthread == vdp->xdf_ready_tq_thread) + (void) xdf_ring_drain(vdp); + err = biowait(bp); + ASSERT(bp->b_flags & B_DONE); + freerbuf(bp); + return (err); } -static void -vreq_free(xdf_t *vdp, v_req_t *vreq) +/* + * Lock the current media. Set the media state to "lock". + * (Media locks are only respected by the backend driver.) + */ +static int +xdf_ioctl_mlock(xdf_t *vdp) { - buf_t *bp = vreq->v_buf; + int rv; + mutex_enter(&vdp->xdf_cb_lk); + rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); + mutex_exit(&vdp->xdf_cb_lk); + return (rv); +} - list_remove(&vdp->xdf_vreq_act, (void *)vreq); +/* + * Release a media lock. Set the media state to "none". + */ +static int +xdf_ioctl_munlock(xdf_t *vdp) +{ + int rv; + mutex_enter(&vdp->xdf_cb_lk); + rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE); + mutex_exit(&vdp->xdf_cb_lk); + return (rv); +} - if (vreq->v_flush_diskcache == FLUSH_DISKCACHE) - goto done; +/* + * Eject the current media. Ignores any media locks. (Media locks + * are only for benifit of the the backend.) + */ +static int +xdf_ioctl_eject(xdf_t *vdp) +{ + int rv; - switch (vreq->v_status) { - case VREQ_DMAWIN_DONE: - case VREQ_GS_ALLOCED: - case VREQ_DMABUF_BOUND: - (void) ddi_dma_unbind_handle(vreq->v_dmahdl); - /*FALLTHRU*/ - case VREQ_DMAMEM_ALLOCED: - if (!ALIGNED_XFER(bp)) { - ASSERT(vreq->v_abuf != NULL); - if (!IS_ERROR(bp) && IS_READ(bp)) - bcopy(vreq->v_abuf, bp->b_un.b_addr, - bp->b_bcount); - ddi_dma_mem_free(&vreq->v_align); - } - /*FALLTHRU*/ - case VREQ_MEMDMAHDL_ALLOCED: - if (!ALIGNED_XFER(bp)) - ddi_dma_free_handle(&vreq->v_memdmahdl); - /*FALLTHRU*/ - case VREQ_DMAHDL_ALLOCED: - ddi_dma_free_handle(&vreq->v_dmahdl); - break; - default: - break; + mutex_enter(&vdp->xdf_cb_lk); + if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) { + mutex_exit(&vdp->xdf_cb_lk); + return (rv); } -done: - vreq->v_buf->av_back = NULL; - kmem_cache_free(xdf_vreq_cache, vreq); + + /* + * We've set the media requests xenbus parameter to eject, so now + * disconnect from the backend, wait for the backend to clear + * the media requets xenbus paramter, and then we can reconnect + * to the backend. + */ + (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE); + mutex_enter(&vdp->xdf_dev_lk); + if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) { + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + return (EIO); + } + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + return (0); } /* - * Initalize the DMA and grant table resources for the buf + * Watch for media state changes. This can be an insertion of a device + * (triggered by a 'xm block-configure' request in another domain) or + * the ejection of a device (triggered by a local "eject" operation). + * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I). */ static int -vreq_setup(xdf_t *vdp, v_req_t *vreq) +xdf_dkstate(xdf_t *vdp, enum dkio_state mstate) { - int rc; - ddi_dma_attr_t dmaattr; - uint_t ndcs, ndws; - ddi_dma_handle_t dh; - ddi_dma_handle_t mdh; - ddi_dma_cookie_t dc; - ddi_acc_handle_t abh; - caddr_t aba; - ge_slot_t *gs; - size_t bufsz; - off_t off; - size_t sz; - buf_t *bp = vreq->v_buf; - int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) | - DDI_DMA_STREAMING | DDI_DMA_PARTIAL; + enum dkio_state prev_state; - switch (vreq->v_status) { - case VREQ_INIT: - if (IS_FLUSH_DISKCACHE(bp)) { - if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { - DPRINTF(DMA_DBG, ( - "xdf@%s: get ge_slotfailed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); + mutex_enter(&vdp->xdf_cb_lk); + prev_state = vdp->xdf_mstate; + + if (vdp->xdf_mstate == mstate) { + while (vdp->xdf_mstate == prev_state) { + if (cv_wait_sig(&vdp->xdf_mstate_cv, + &vdp->xdf_cb_lk) == 0) { + mutex_exit(&vdp->xdf_cb_lk); + return (EINTR); } - vreq->v_blkno = 0; - vreq->v_nslots = 1; - vreq->v_gs = gs; - vreq->v_flush_diskcache = FLUSH_DISKCACHE; - vreq->v_status = VREQ_GS_ALLOCED; - gs->vreq = vreq; - return (DDI_SUCCESS); } + } - if (IS_WRITE_BARRIER(vdp, bp)) - vreq->v_flush_diskcache = WRITE_BARRIER; - vreq->v_blkno = bp->b_blkno + - (diskaddr_t)(uintptr_t)bp->b_private; - bp->b_private = NULL; - /* See if we wrote new data to our flush block */ - if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp)) - check_fbwrite(vdp, bp, vreq->v_blkno); - vreq->v_status = VREQ_INIT_DONE; - /*FALLTHRU*/ + if ((prev_state != DKIO_INSERTED) && + (vdp->xdf_mstate == DKIO_INSERTED)) { + (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE); + mutex_exit(&vdp->xdf_cb_lk); + return (0); + } - case VREQ_INIT_DONE: - /* - * alloc DMA handle - */ - rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr, - xdf_dmacallback, (caddr_t)vdp, &dh); - if (rc != DDI_SUCCESS) { - SETDMACBON(vdp); - DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); - } + mutex_exit(&vdp->xdf_cb_lk); + return (0); +} - vreq->v_dmahdl = dh; - vreq->v_status = VREQ_DMAHDL_ALLOCED; - /*FALLTHRU*/ +/*ARGSUSED*/ +static int +xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, + int *rvalp) +{ + minor_t minor = getminor(dev); + int part = XDF_PART(minor); + xdf_t *vdp; + int rv; - case VREQ_DMAHDL_ALLOCED: - /* - * alloc dma handle for 512-byte aligned buf - */ - if (!ALIGNED_XFER(bp)) { - /* - * XXPV: we need to temporarily enlarge the seg - * boundary and s/g length to work round CR6381968 - */ - dmaattr = xb_dma_attr; - dmaattr.dma_attr_seg = (uint64_t)-1; - dmaattr.dma_attr_sgllen = INT_MAX; - rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr, - xdf_dmacallback, (caddr_t)vdp, &mdh); - if (rc != DDI_SUCCESS) { - SETDMACBON(vdp); - DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA" - "handle alloc failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); - } - vreq->v_memdmahdl = mdh; - vreq->v_status = VREQ_MEMDMAHDL_ALLOCED; - } - /*FALLTHRU*/ + if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) || + (!xdf_isopen(vdp, part))) + return (ENXIO); - case VREQ_MEMDMAHDL_ALLOCED: - /* - * alloc 512-byte aligned buf - */ - if (!ALIGNED_XFER(bp)) { - if (bp->b_flags & (B_PAGEIO | B_PHYS)) - bp_mapin(bp); + DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n", + vdp->xdf_addr, cmd, cmd)); - rc = ddi_dma_mem_alloc(vreq->v_memdmahdl, - roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr, - DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp, - &aba, &bufsz, &abh); - if (rc != DDI_SUCCESS) { - SETDMACBON(vdp); - DPRINTF(DMA_DBG, ( - "xdf@%s: DMA mem allocation failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); - } + switch (cmd) { + default: + return (ENOTTY); + case DKIOCG_PHYGEOM: + case DKIOCG_VIRTGEOM: + case DKIOCGGEOM: + case DKIOCSGEOM: + case DKIOCGAPART: + case DKIOCSAPART: + case DKIOCGVTOC: + case DKIOCSVTOC: + case DKIOCPARTINFO: + case DKIOCGEXTVTOC: + case DKIOCSEXTVTOC: + case DKIOCEXTPARTINFO: + case DKIOCGMBOOT: + case DKIOCSMBOOT: + case DKIOCGETEFI: + case DKIOCSETEFI: + case DKIOCPARTITION: + return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp, + rvalp, NULL)); + case FDEJECT: + case DKIOCEJECT: + case CDROMEJECT: + return (xdf_ioctl_eject(vdp)); + case DKIOCLOCK: + return (xdf_ioctl_mlock(vdp)); + case DKIOCUNLOCK: + return (xdf_ioctl_munlock(vdp)); + case CDROMREADOFFSET: { + int offset = 0; + if (!XD_IS_CD(vdp)) + return (ENOTTY); + if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode)) + return (EFAULT); + return (0); + } + case DKIOCGMEDIAINFO: { + struct dk_minfo media_info; - vreq->v_abuf = aba; - vreq->v_align = abh; - vreq->v_status = VREQ_DMAMEM_ALLOCED; + media_info.dki_lbsize = DEV_BSIZE; + media_info.dki_capacity = vdp->xdf_pgeom.g_capacity; + if (XD_IS_CD(vdp)) + media_info.dki_media_type = DK_CDROM; + else + media_info.dki_media_type = DK_FIXED_DISK; - ASSERT(bufsz >= bp->b_bcount); - if (!IS_READ(bp)) - bcopy(bp->b_un.b_addr, vreq->v_abuf, - bp->b_bcount); - } - /*FALLTHRU*/ + if (ddi_copyout(&media_info, (void *)arg, + sizeof (struct dk_minfo), mode)) + return (EFAULT); + return (0); + } + case DKIOCINFO: { + struct dk_cinfo info; - case VREQ_DMAMEM_ALLOCED: - /* - * dma bind - */ - if (ALIGNED_XFER(bp)) { - rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp, - dma_flags, xdf_dmacallback, (caddr_t)vdp, - &dc, &ndcs); + /* controller information */ + if (XD_IS_CD(vdp)) + info.dki_ctype = DKC_CDROM; + else + info.dki_ctype = DKC_VBD; + + info.dki_cnum = 0; + (void) strncpy((char *)(&info.dki_cname), "xdf", 8); + + /* unit information */ + info.dki_unit = ddi_get_instance(vdp->xdf_dip); + (void) strncpy((char *)(&info.dki_dname), "xdf", 8); + info.dki_flags = DKI_FMTVOL; + info.dki_partition = part; + info.dki_maxtransfer = maxphys / DEV_BSIZE; + info.dki_addr = 0; + info.dki_space = 0; + info.dki_prio = 0; + info.dki_vec = 0; + + if (ddi_copyout(&info, (void *)arg, sizeof (info), mode)) + return (EFAULT); + return (0); + } + case DKIOCSTATE: { + enum dkio_state mstate; + + if (ddi_copyin((void *)arg, &mstate, + sizeof (mstate), mode) != 0) + return (EFAULT); + if ((rv = xdf_dkstate(vdp, mstate)) != 0) + return (rv); + mstate = vdp->xdf_mstate; + if (ddi_copyout(&mstate, (void *)arg, + sizeof (mstate), mode) != 0) + return (EFAULT); + return (0); + } + case DKIOCREMOVABLE: { + int i = BOOLEAN2VOID(XD_IS_RM(vdp)); + if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode)) + return (EFAULT); + return (0); + } + case DKIOCGETWCE: { + int i = BOOLEAN2VOID(XD_IS_RM(vdp)); + if (ddi_copyout(&i, (void *)arg, sizeof (i), mode)) + return (EFAULT); + return (0); + } + case DKIOCSETWCE: { + int i; + if (ddi_copyin((void *)arg, &i, sizeof (i), mode)) + return (EFAULT); + vdp->xdf_wce = VOID2BOOLEAN(i); + return (0); + } + case DKIOCFLUSHWRITECACHE: { + struct dk_callback *dkc = (struct dk_callback *)arg; + + if (vdp->xdf_flush_supported) { + rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, + NULL, 0, 0, (void *)dev); + } else if (vdp->xdf_feature_barrier && + !xdf_barrier_flush_disable) { + rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, + vdp->xdf_cache_flush_block, xdf_flush_block, + DEV_BSIZE, (void *)dev); } else { - rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl, - NULL, vreq->v_abuf, bp->b_bcount, dma_flags, - xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs); + return (ENOTTY); } - if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) { - /* get num of dma windows */ - if (rc == DDI_DMA_PARTIAL_MAP) { - rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws); - ASSERT(rc == DDI_SUCCESS); - } else { - ndws = 1; - } - } else { - SETDMACBON(vdp); - DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); + if ((mode & FKIOCTL) && (dkc != NULL) && + (dkc->dkc_callback != NULL)) { + (*dkc->dkc_callback)(dkc->dkc_cookie, rv); + /* need to return 0 after calling callback */ + rv = 0; } + return (rv); + } + } + /*NOTREACHED*/ +} - vreq->v_dmac = dc; - vreq->v_dmaw = 0; - vreq->v_ndmacs = ndcs; - vreq->v_ndmaws = ndws; - vreq->v_nslots = ndws; - vreq->v_status = VREQ_DMABUF_BOUND; - /*FALLTHRU*/ +static int +xdf_strategy(struct buf *bp) +{ + xdf_t *vdp; + minor_t minor; + diskaddr_t p_blkct, p_blkst; + ulong_t nblks; + int part; - case VREQ_DMABUF_BOUND: - /* - * get ge_slot, callback is set upon failure from gs_get(), - * if not set previously - */ - if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { - DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); - } + minor = getminor(bp->b_edev); + part = XDF_PART(minor); + vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor)); - vreq->v_gs = gs; - gs->vreq = vreq; - vreq->v_status = VREQ_GS_ALLOCED; - break; + mutex_enter(&vdp->xdf_dev_lk); + if (!xdf_isopen(vdp, part)) { + mutex_exit(&vdp->xdf_dev_lk); + xdf_io_err(bp, ENXIO, 0); + return (0); + } - case VREQ_GS_ALLOCED: - /* nothing need to be done */ - break; + /* We don't allow IO from the oe_change callback thread */ + ASSERT(curthread != vdp->xdf_oe_change_thread); - case VREQ_DMAWIN_DONE: - /* - * move to the next dma window - */ - ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws); + /* Check for writes to a read only device */ + if (!IS_READ(bp) && XD_IS_RO(vdp)) { + mutex_exit(&vdp->xdf_dev_lk); + xdf_io_err(bp, EROFS, 0); + return (0); + } - /* get a ge_slot for this DMA window */ - if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) { - DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n", - ddi_get_name_addr(vdp->xdf_dip))); - return (DDI_FAILURE); + /* Check if this I/O is accessing a partition or the entire disk */ + if ((long)bp->b_private == XB_SLICE_NONE) { + /* This I/O is using an absolute offset */ + p_blkct = vdp->xdf_xdev_nblocks; + p_blkst = 0; + } else { + /* This I/O is using a partition relative offset */ + mutex_exit(&vdp->xdf_dev_lk); + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, + &p_blkst, NULL, NULL, NULL)) { + xdf_io_err(bp, ENXIO, 0); + return (0); } + mutex_enter(&vdp->xdf_dev_lk); + } - vreq->v_gs = gs; - gs->vreq = vreq; - vreq->v_dmaw++; - rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz, - &vreq->v_dmac, &vreq->v_ndmacs); - ASSERT(rc == DDI_SUCCESS); - vreq->v_status = VREQ_GS_ALLOCED; - break; + /* check for a starting block beyond the disk or partition limit */ + if (bp->b_blkno > p_blkct) { + DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64, + vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct)); + xdf_io_err(bp, EINVAL, 0); + return (0); + } - default: - return (DDI_FAILURE); + /* Legacy: don't set error flag at this case */ + if (bp->b_blkno == p_blkct) { + bp->b_resid = bp->b_bcount; + biodone(bp); + return (0); } - return (DDI_SUCCESS); + /* sanitize the input buf */ + bioerror(bp, 0); + bp->b_resid = 0; + bp->av_back = bp->av_forw = NULL; + + /* Adjust for partial transfer, this will result in an error later */ + nblks = bp->b_bcount >> XB_BSHIFT; + if ((bp->b_blkno + nblks) > p_blkct) { + bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT; + bp->b_bcount -= bp->b_resid; + } + + DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n", + vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount)); + + /* Fix up the buf struct */ + bp->b_flags |= B_BUSY; + bp->b_private = (void *)(uintptr_t)p_blkst; + + xdf_bp_push(vdp, bp); + mutex_exit(&vdp->xdf_dev_lk); + xdf_io_start(vdp); + if (do_polled_io) + (void) xdf_ring_drain(vdp); + return (0); } -static ge_slot_t * -gs_get(xdf_t *vdp, int isread) +/*ARGSUSED*/ +static int +xdf_read(dev_t dev, struct uio *uiop, cred_t *credp) { - grant_ref_t gh; - ge_slot_t *gs; + xdf_t *vdp; + minor_t minor; + diskaddr_t p_blkcnt; + int part; - /* try to alloc GTEs needed in this slot, first */ - if (gnttab_alloc_grant_references( - BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) { - if (vdp->xdf_gnt_callback.next == NULL) { - SETDMACBON(vdp); - gnttab_request_free_callback( - &vdp->xdf_gnt_callback, - (void (*)(void *))xdf_dmacallback, - (void *)vdp, - BLKIF_MAX_SEGMENTS_PER_REQUEST); - } - return (NULL); - } + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); - gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP); - if (gs == NULL) { - gnttab_free_grant_references(gh); - if (vdp->xdf_timeout_id == 0) - /* restart I/O after one second */ - vdp->xdf_timeout_id = - timeout(xdf_timeout_handler, vdp, hz); - return (NULL); - } + DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n", + vdp->xdf_addr, (int64_t)uiop->uio_offset)); - /* init gs_slot */ - list_insert_head(&vdp->xdf_gs_act, (void *)gs); - gs->oeid = vdp->xdf_peer; - gs->isread = isread; - gs->ghead = gh; - gs->ngrefs = 0; + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) + return (ENXIO); - return (gs); + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, + NULL, NULL, NULL, NULL)) + return (ENXIO); + + if (U_INVAL(uiop)) + return (EINVAL); + + return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop)); } -static void -gs_free(xdf_t *vdp, ge_slot_t *gs) +/*ARGSUSED*/ +static int +xdf_write(dev_t dev, struct uio *uiop, cred_t *credp) { - int i; - grant_ref_t *gp = gs->ge; - int ngrefs = gs->ngrefs; - boolean_t isread = gs->isread; + xdf_t *vdp; + minor_t minor; + diskaddr_t p_blkcnt; + int part; - list_remove(&vdp->xdf_gs_act, (void *)gs); + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); - /* release all grant table entry resources used in this slot */ - for (i = 0; i < ngrefs; i++, gp++) - gnttab_end_foreign_access(*gp, !isread, 0); - gnttab_free_grant_references(gs->ghead); + DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n", + vdp->xdf_addr, (int64_t)uiop->uio_offset)); - kmem_cache_free(xdf_gs_cache, (void *)gs); + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) + return (ENXIO); + + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, + NULL, NULL, NULL, NULL)) + return (ENXIO); + + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + return (ENOSPC); + + if (U_INVAL(uiop)) + return (EINVAL); + + return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop)); } -static grant_ref_t -gs_grant(ge_slot_t *gs, mfn_t mfn) +/*ARGSUSED*/ +static int +xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp) { - grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead); + xdf_t *vdp; + minor_t minor; + struct uio *uiop = aiop->aio_uio; + diskaddr_t p_blkcnt; + int part; - ASSERT(gr != -1); - ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST); - gs->ge[gs->ngrefs++] = gr; - gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread); + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); - return (gr); + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) + return (ENXIO); + + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, + NULL, NULL, NULL, NULL)) + return (ENXIO); + + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + return (ENOSPC); + + if (U_INVAL(uiop)) + return (EINVAL); + + return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop)); } -static void -unexpectedie(xdf_t *vdp) +/*ARGSUSED*/ +static int +xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp) { - /* clean up I/Os in ring that have responses */ - if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) { - mutex_exit(&vdp->xdf_dev_lk); - (void) xdf_intr((caddr_t)vdp); - mutex_enter(&vdp->xdf_dev_lk); - } + xdf_t *vdp; + minor_t minor; + struct uio *uiop = aiop->aio_uio; + diskaddr_t p_blkcnt; + int part; - /* free up all grant table entries */ - while (!list_is_empty(&vdp->xdf_gs_act)) - gs_free(vdp, list_head(&vdp->xdf_gs_act)); + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); - /* - * move bp back to active list orderly - * vreq_busy is updated in vreq_free() - */ - while (!list_is_empty(&vdp->xdf_vreq_act)) { - v_req_t *vreq = list_head(&vdp->xdf_vreq_act); - buf_t *bp = vreq->v_buf; + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) + return (ENXIO); - bp->av_back = NULL; - bp->b_resid = bp->b_bcount; - if (vdp->xdf_f_act == NULL) { - vdp->xdf_f_act = vdp->xdf_l_act = bp; - } else { - /* move to the head of list */ - bp->av_forw = vdp->xdf_f_act; - vdp->xdf_f_act = bp; - } - if (vdp->xdf_xdev_iostat != NULL) - kstat_runq_back_to_waitq( - KSTAT_IO_PTR(vdp->xdf_xdev_iostat)); - vreq_free(vdp, vreq); - } + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, + NULL, NULL, NULL, NULL)) + return (ENXIO); + + if (uiop->uio_loffset >= XB_DTOB(p_blkcnt)) + return (ENOSPC); + + if (U_INVAL(uiop)) + return (EINVAL); + + return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop)); } -static void -xdfmin(struct buf *bp) +static int +xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) { - if (bp->b_bcount > xdf_maxphys) - bp->b_bcount = xdf_maxphys; + struct buf dumpbuf, *dbp = &dumpbuf; + xdf_t *vdp; + minor_t minor; + int err = 0; + int part; + diskaddr_t p_blkcnt, p_blkst; + + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); + + DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n", + vdp->xdf_addr, (void *)addr, blkno, nblk)); + + /* We don't allow IO from the oe_change callback thread */ + ASSERT(curthread != vdp->xdf_oe_change_thread); + + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) + return (ENXIO); + + if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst, + NULL, NULL, NULL)) + return (ENXIO); + + if ((blkno + nblk) > p_blkcnt) { + cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64, + vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt); + return (EINVAL); + } + + bioinit(dbp); + dbp->b_flags = B_BUSY; + dbp->b_un.b_addr = addr; + dbp->b_bcount = nblk << DEV_BSHIFT; + dbp->b_blkno = blkno; + dbp->b_edev = dev; + dbp->b_private = (void *)(uintptr_t)p_blkst; + + mutex_enter(&vdp->xdf_dev_lk); + xdf_bp_push(vdp, dbp); + mutex_exit(&vdp->xdf_dev_lk); + xdf_io_start(vdp); + err = xdf_ring_drain(vdp); + biofini(dbp); + return (err); } -void -xdf_kstat_delete(dev_info_t *dip) +/*ARGSUSED*/ +static int +xdf_close(dev_t dev, int flag, int otyp, struct cred *credp) { - xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); - kstat_t *kstat; + minor_t minor; + xdf_t *vdp; + int part; + ulong_t parbit; - /* - * The locking order here is xdf_iostat_lk and then xdf_dev_lk. - * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer - * and the contents of the our kstat. xdf_iostat_lk is used - * to protect the allocation and freeing of the actual kstat. - * xdf_dev_lk can't be used for this purpose because kstat - * readers use it to access the contents of the kstat and - * hence it can't be held when calling kstat_delete(). - */ - mutex_enter(&vdp->xdf_iostat_lk); - mutex_enter(&vdp->xdf_dev_lk); + minor = getminor(dev); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); - if (vdp->xdf_xdev_iostat == NULL) { + mutex_enter(&vdp->xdf_dev_lk); + part = XDF_PART(minor); + if (!xdf_isopen(vdp, part)) { mutex_exit(&vdp->xdf_dev_lk); - mutex_exit(&vdp->xdf_iostat_lk); - return; + return (ENXIO); } + parbit = 1 << part; - kstat = vdp->xdf_xdev_iostat; - vdp->xdf_xdev_iostat = NULL; - mutex_exit(&vdp->xdf_dev_lk); + ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0); + if (otyp == OTYP_LYR) { + ASSERT(vdp->xdf_vd_lyropen[part] > 0); + if (--vdp->xdf_vd_lyropen[part] == 0) + vdp->xdf_vd_open[otyp] &= ~parbit; + } else { + vdp->xdf_vd_open[otyp] &= ~parbit; + } + vdp->xdf_vd_exclopen &= ~parbit; - kstat_delete(kstat); - mutex_exit(&vdp->xdf_iostat_lk); + mutex_exit(&vdp->xdf_dev_lk); + return (0); } -int -xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance) +static int +xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp) { - xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + minor_t minor; + xdf_t *vdp; + int part; + ulong_t parbit; + diskaddr_t p_blkct = 0; + boolean_t firstopen; + boolean_t nodelay; - /* See comment about locking in xdf_kstat_delete(). */ - mutex_enter(&vdp->xdf_iostat_lk); + minor = getminor(*devp); + if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) + return (ENXIO); + + nodelay = (flag & (FNDELAY | FNONBLOCK)); + + DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr)); + + /* do cv_wait until connected or failed */ + mutex_enter(&vdp->xdf_cb_lk); mutex_enter(&vdp->xdf_dev_lk); + if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) { + mutex_exit(&vdp->xdf_dev_lk); + mutex_exit(&vdp->xdf_cb_lk); + return (ENXIO); + } + mutex_exit(&vdp->xdf_cb_lk); - if (vdp->xdf_xdev_iostat != NULL) { + if ((flag & FWRITE) && XD_IS_RO(vdp)) { mutex_exit(&vdp->xdf_dev_lk); - mutex_exit(&vdp->xdf_iostat_lk); - return (-1); + return (EROFS); } - if ((vdp->xdf_xdev_iostat = kstat_create( - ks_module, ks_instance, NULL, "disk", - KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { + part = XDF_PART(minor); + parbit = 1 << part; + if ((vdp->xdf_vd_exclopen & parbit) || + ((flag & FEXCL) && xdf_isopen(vdp, part))) { mutex_exit(&vdp->xdf_dev_lk); - mutex_exit(&vdp->xdf_iostat_lk); - return (-1); + return (EBUSY); } - vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk; - kstat_install(vdp->xdf_xdev_iostat); + /* are we the first one to open this node? */ + firstopen = !xdf_isopen(vdp, -1); + + if (otyp == OTYP_LYR) + vdp->xdf_vd_lyropen[part]++; + + vdp->xdf_vd_open[otyp] |= parbit; + + if (flag & FEXCL) + vdp->xdf_vd_exclopen |= parbit; + mutex_exit(&vdp->xdf_dev_lk); - mutex_exit(&vdp->xdf_iostat_lk); + + /* force a re-validation */ + if (firstopen) + cmlb_invalidate(vdp->xdf_vd_lbl, NULL); + + /* If this is a non-blocking open then we're done */ + if (nodelay) + return (0); + + /* + * This is a blocking open, so we require: + * - that the disk have a valid label on it + * - that the size of the partition that we're opening is non-zero + */ + if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct, + NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) { + (void) xdf_close(*devp, flag, otyp, credp); + return (ENXIO); + } return (0); } -#if defined(XPV_HVM_DRIVER) +/*ARGSUSED*/ +static void +xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg) +{ + xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + cv_broadcast(&vdp->xdf_hp_status_cv); +} -typedef struct xdf_hvm_entry { - list_node_t xdf_he_list; - char *xdf_he_path; - dev_info_t *xdf_he_dip; -} xdf_hvm_entry_t; +static int +xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags, + char *name, caddr_t valuep, int *lengthp) +{ + xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip)); -static list_t xdf_hvm_list; -static kmutex_t xdf_hvm_list_lock; + /* + * Sanity check that if a dev_t or dip were specified that they + * correspond to this device driver. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + ASSERT(ddi_driver_major(dip) == xdf_major); + ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major)); + if ((ddi_driver_major(dip) != xdf_major) || + ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major))) + return (DDI_PROP_NOT_FOUND); -static xdf_hvm_entry_t * -i_xdf_hvm_find(char *path, dev_info_t *dip) -{ - xdf_hvm_entry_t *i; + if (vdp == NULL) + return (ddi_prop_op(dev, dip, prop_op, flags, + name, valuep, lengthp)); - ASSERT((path != NULL) || (dip != NULL)); - ASSERT(MUTEX_HELD(&xdf_hvm_list_lock)); + return (cmlb_prop_op(vdp->xdf_vd_lbl, + dev, dip, prop_op, flags, name, valuep, lengthp, + XDF_PART(getminor(dev)), NULL)); +} - i = list_head(&xdf_hvm_list); - while (i != NULL) { - if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) { - i = list_next(&xdf_hvm_list, i); - continue; - } - if ((dip != NULL) && (i->xdf_he_dip != dip)) { - i = list_next(&xdf_hvm_list, i); - continue; +/*ARGSUSED*/ +static int +xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp) +{ + int instance = XDF_INST(getminor((dev_t)arg)); + xdf_t *vbdp; + + switch (cmd) { + case DDI_INFO_DEVT2DEVINFO: + if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) { + *rp = NULL; + return (DDI_FAILURE); } - break; + *rp = vbdp->xdf_dip; + return (DDI_SUCCESS); + + case DDI_INFO_DEVT2INSTANCE: + *rp = (void *)(uintptr_t)instance; + return (DDI_SUCCESS); + + default: + return (DDI_FAILURE); } - return (i); } -dev_info_t * -xdf_hvm_hold(char *path) +/*ARGSUSED*/ +static int +xdf_resume(dev_info_t *dip) { - xdf_hvm_entry_t *i; - dev_info_t *dip; + xdf_t *vdp; + char *oename; - mutex_enter(&xdf_hvm_list_lock); - i = i_xdf_hvm_find(path, NULL); - if (i == NULL) { - mutex_exit(&xdf_hvm_list_lock); - return (B_FALSE); + if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL) + goto err; + + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr); + + mutex_enter(&vdp->xdf_cb_lk); + + if (xvdi_resume(dip) != DDI_SUCCESS) { + mutex_exit(&vdp->xdf_cb_lk); + goto err; } - ndi_hold_devi(dip = i->xdf_he_dip); - mutex_exit(&xdf_hvm_list_lock); - return (dip); -} -static void -xdf_hvm_add(dev_info_t *dip) -{ - xdf_hvm_entry_t *i; - char *path; + if (((oename = xvdi_get_oename(dip)) == NULL) || + (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, + xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) { + mutex_exit(&vdp->xdf_cb_lk); + goto err; + } - /* figure out the path for the dip */ - path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); - (void) ddi_pathname(dip, path); + mutex_enter(&vdp->xdf_dev_lk); + ASSERT(vdp->xdf_state != XD_READY); + xdf_set_state(vdp, XD_UNKNOWN); + mutex_exit(&vdp->xdf_dev_lk); - i = kmem_alloc(sizeof (*i), KM_SLEEP); - i->xdf_he_dip = dip; - i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP); + if (xdf_setstate_init(vdp) != DDI_SUCCESS) { + mutex_exit(&vdp->xdf_cb_lk); + goto err; + } - mutex_enter(&xdf_hvm_list_lock); - ASSERT(i_xdf_hvm_find(path, NULL) == NULL); - ASSERT(i_xdf_hvm_find(NULL, dip) == NULL); - list_insert_head(&xdf_hvm_list, i); - mutex_exit(&xdf_hvm_list_lock); + mutex_exit(&vdp->xdf_cb_lk); - kmem_free(path, MAXPATHLEN); + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr); + return (DDI_SUCCESS); +err: + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr); + return (DDI_FAILURE); } -static void -xdf_hvm_rm(dev_info_t *dip) +static int +xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { - xdf_hvm_entry_t *i; + int n, instance = ddi_get_instance(dip); + ddi_iblock_cookie_t ibc, softibc; + boolean_t dev_iscd = B_FALSE; + xdf_t *vdp; + char *oename, *xsname, *str; - mutex_enter(&xdf_hvm_list_lock); - VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL); - list_remove(&xdf_hvm_list, i); - mutex_exit(&xdf_hvm_list_lock); + if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM, + "xdf_debug", 0)) != 0) + xdf_debug = n; - kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1); - kmem_free(i, sizeof (*i)); -} + switch (cmd) { + case DDI_RESUME: + return (xdf_resume(dip)); + case DDI_ATTACH: + break; + default: + return (DDI_FAILURE); + } + /* DDI_ATTACH */ -static void -xdf_hvm_init(void) -{ - list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t), - offsetof(xdf_hvm_entry_t, xdf_he_list)); - mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL); -} + if (((xsname = xvdi_get_xsname(dip)) == NULL) || + ((oename = xvdi_get_oename(dip)) == NULL)) + return (DDI_FAILURE); -static void -xdf_hvm_fini(void) -{ - ASSERT(list_head(&xdf_hvm_list) == NULL); - list_destroy(&xdf_hvm_list); - mutex_destroy(&xdf_hvm_list_lock); + /* + * Disable auto-detach. This is necessary so that we don't get + * detached while we're disconnected from the back end. + */ + if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip, + DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS)) + return (DDI_FAILURE); + + /* driver handles kernel-issued IOCTLs */ + if (ddi_prop_create(DDI_DEV_T_NONE, dip, + DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) + return (DDI_FAILURE); + + if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (ddi_get_soft_iblock_cookie(dip, + DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS) + return (DDI_FAILURE); + + if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) { + cmn_err(CE_WARN, "xdf@%s: cannot read device-type", + ddi_get_name_addr(dip)); + return (DDI_FAILURE); + } + if (strcmp(str, XBV_DEV_TYPE_CD) == 0) + dev_iscd = B_TRUE; + strfree(str); + + if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS) + return (DDI_FAILURE); + + DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip))); + vdp = ddi_get_soft_state(xdf_ssp, instance); + ddi_set_driver_private(dip, vdp); + vdp->xdf_dip = dip; + vdp->xdf_addr = ddi_get_name_addr(dip); + vdp->xdf_suspending = B_FALSE; + vdp->xdf_media_req_supported = B_FALSE; + vdp->xdf_peer = INVALID_DOMID; + vdp->xdf_evtchn = INVALID_EVTCHN; + list_create(&vdp->xdf_vreq_act, sizeof (v_req_t), + offsetof(v_req_t, v_link)); + cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL); + cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL); + cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL); + mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc); + mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc); + mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc); + vdp->xdf_cmbl_reattach = B_TRUE; + if (dev_iscd) { + vdp->xdf_dinfo |= VDISK_CDROM; + vdp->xdf_mstate = DKIO_EJECTED; + } else { + vdp->xdf_mstate = DKIO_NONE; + } + + if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq", + 1, TASKQ_DEFAULTPRI, 0)) == NULL) + goto errout0; + + if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS, + xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS) + goto errout0; + + if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id, + &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) { + cmn_err(CE_WARN, "xdf@%s: failed to add softintr", + ddi_get_name_addr(dip)); + goto errout0; + } + + /* + * Initialize the physical geometry stucture. Note that currently + * we don't know the size of the backend device so the number + * of blocks on the device will be initialized to zero. Once + * we connect to the backend device we'll update the physical + * geometry to reflect the real size of the device. + */ + xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom); + vdp->xdf_pgeom_fixed = B_FALSE; + + /* + * create default device minor nodes: non-removable disk + * we will adjust minor nodes after we are connected w/ backend + */ + cmlb_alloc_handle(&vdp->xdf_vd_lbl); + if (xdf_cmlb_attach(vdp) != 0) { + cmn_err(CE_WARN, + "xdf@%s: attach failed, cmlb attach failed", + ddi_get_name_addr(dip)); + goto errout0; + } + + /* + * We ship with cache-enabled disks + */ + vdp->xdf_wce = B_TRUE; + + mutex_enter(&vdp->xdf_cb_lk); + /* Watch backend XenbusState change */ + if (xvdi_add_event_handler(dip, + XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) { + mutex_exit(&vdp->xdf_cb_lk); + goto errout0; + } + + if (xdf_setstate_init(vdp) != DDI_SUCCESS) { + cmn_err(CE_WARN, "xdf@%s: start connection failed", + ddi_get_name_addr(dip)); + mutex_exit(&vdp->xdf_cb_lk); + goto errout1; + } + mutex_exit(&vdp->xdf_cb_lk); + +#if defined(XPV_HVM_DRIVER) + + xdf_hvm_add(dip); + + /* Report our version to dom0. */ + if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d", + HVMPV_XDF_VERS)) + cmn_err(CE_WARN, "xdf: couldn't write version\n"); + +#else /* !XPV_HVM_DRIVER */ + + /* create kstat for iostat(1M) */ + if (xdf_kstat_create(dip, "xdf", instance) != 0) { + cmn_err(CE_WARN, "xdf@%s: failed to create kstat", + ddi_get_name_addr(dip)); + goto errout1; + } + +#endif /* !XPV_HVM_DRIVER */ + + ddi_report_dev(dip); + DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr)); + return (DDI_SUCCESS); + +errout1: + (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed); + xvdi_remove_event_handler(dip, XS_OE_STATE); +errout0: + if (vdp->xdf_vd_lbl != NULL) { + cmlb_detach(vdp->xdf_vd_lbl, NULL); + cmlb_free_handle(&vdp->xdf_vd_lbl); + vdp->xdf_vd_lbl = NULL; + } + if (vdp->xdf_softintr_id != NULL) + ddi_remove_softintr(vdp->xdf_softintr_id); + xvdi_remove_xb_watch_handlers(dip); + if (vdp->xdf_ready_tq != NULL) + ddi_taskq_destroy(vdp->xdf_ready_tq); + mutex_destroy(&vdp->xdf_cb_lk); + mutex_destroy(&vdp->xdf_dev_lk); + cv_destroy(&vdp->xdf_dev_cv); + cv_destroy(&vdp->xdf_hp_status_cv); + ddi_soft_state_free(xdf_ssp, instance); + ddi_set_driver_private(dip, NULL); + ddi_prop_remove_all(dip); + cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip)); + return (DDI_FAILURE); } -int -xdf_hvm_connect(dev_info_t *dip) +static int +xdf_suspend(dev_info_t *dip) { - xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); - int rv; + int instance = ddi_get_instance(dip); + xdf_t *vdp; - /* do cv_wait until connected or failed */ + if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) + return (DDI_FAILURE); + + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr); + + xvdi_suspend(dip); + + mutex_enter(&vdp->xdf_cb_lk); mutex_enter(&vdp->xdf_dev_lk); - rv = xdf_connect(vdp, B_TRUE); + + vdp->xdf_suspending = B_TRUE; + xdf_ring_destroy(vdp); + xdf_set_state(vdp, XD_SUSPEND); + vdp->xdf_suspending = B_FALSE; + mutex_exit(&vdp->xdf_dev_lk); - return ((rv == XD_READY) ? 0 : -1); + mutex_exit(&vdp->xdf_cb_lk); + + if (xdf_debug & SUSRES_DBG) + xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr); + + return (DDI_SUCCESS); } -int -xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp) +static int +xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { - xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip); + xdf_t *vdp; + int instance; - /* sanity check the requested physical geometry */ - mutex_enter(&vdp->xdf_dev_lk); - if ((geomp->g_secsize != XB_BSIZE) || - (geomp->g_capacity == 0)) { - mutex_exit(&vdp->xdf_dev_lk); - return (EINVAL); + switch (cmd) { + + case DDI_PM_SUSPEND: + break; + + case DDI_SUSPEND: + return (xdf_suspend(dip)); + + case DDI_DETACH: + break; + + default: + return (DDI_FAILURE); } - /* - * If we've already connected to the backend device then make sure - * we're not defining a physical geometry larger than our backend - * device. - */ - if ((vdp->xdf_xdev_nblocks != 0) && - (geomp->g_capacity > vdp->xdf_xdev_nblocks)) { - mutex_exit(&vdp->xdf_dev_lk); + instance = ddi_get_instance(dip); + DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip))); + vdp = ddi_get_soft_state(xdf_ssp, instance); + + if (vdp == NULL) + return (DDI_FAILURE); + + mutex_enter(&vdp->xdf_cb_lk); + xdf_disconnect(vdp, XD_CLOSED, B_FALSE); + if (vdp->xdf_state != XD_CLOSED) { + mutex_exit(&vdp->xdf_cb_lk); + return (DDI_FAILURE); + } + mutex_exit(&vdp->xdf_cb_lk); + + ASSERT(!ISDMACBON(vdp)); + +#if defined(XPV_HVM_DRIVER) + xdf_hvm_rm(dip); +#endif /* XPV_HVM_DRIVER */ + + if (vdp->xdf_timeout_id != 0) + (void) untimeout(vdp->xdf_timeout_id); + + xvdi_remove_event_handler(dip, XS_OE_STATE); + ddi_taskq_destroy(vdp->xdf_ready_tq); + + cmlb_detach(vdp->xdf_vd_lbl, NULL); + cmlb_free_handle(&vdp->xdf_vd_lbl); + + /* we'll support backend running in domU later */ +#ifdef DOMU_BACKEND + (void) xvdi_post_event(dip, XEN_HP_REMOVE); +#endif + + list_destroy(&vdp->xdf_vreq_act); + ddi_prop_remove_all(dip); + xdf_kstat_delete(dip); + ddi_remove_softintr(vdp->xdf_softintr_id); + xvdi_remove_xb_watch_handlers(dip); + ddi_set_driver_private(dip, NULL); + cv_destroy(&vdp->xdf_dev_cv); + mutex_destroy(&vdp->xdf_cb_lk); + mutex_destroy(&vdp->xdf_dev_lk); + if (vdp->xdf_cache_flush_block != NULL) + kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE); + ddi_soft_state_free(xdf_ssp, instance); + return (DDI_SUCCESS); +} + +/* + * Driver linkage structures. + */ +static struct cb_ops xdf_cbops = { + xdf_open, + xdf_close, + xdf_strategy, + nodev, + xdf_dump, + xdf_read, + xdf_write, + xdf_ioctl, + nodev, + nodev, + nodev, + nochpoll, + xdf_prop_op, + NULL, + D_MP | D_NEW | D_64BIT, + CB_REV, + xdf_aread, + xdf_awrite +}; + +struct dev_ops xdf_devops = { + DEVO_REV, /* devo_rev */ + 0, /* devo_refcnt */ + xdf_getinfo, /* devo_getinfo */ + nulldev, /* devo_identify */ + nulldev, /* devo_probe */ + xdf_attach, /* devo_attach */ + xdf_detach, /* devo_detach */ + nodev, /* devo_reset */ + &xdf_cbops, /* devo_cb_ops */ + NULL, /* devo_bus_ops */ + NULL, /* devo_power */ + ddi_quiesce_not_supported, /* devo_quiesce */ +}; + +/* + * Module linkage structures. + */ +static struct modldrv modldrv = { + &mod_driverops, /* Type of module. This one is a driver */ + "virtual block driver", /* short description */ + &xdf_devops /* driver specific ops */ +}; + +static struct modlinkage xdf_modlinkage = { + MODREV_1, (void *)&modldrv, NULL +}; + +/* + * standard module entry points + */ +int +_init(void) +{ + int rc; + + xdf_major = ddi_name_to_major("xdf"); + if (xdf_major == (major_t)-1) return (EINVAL); + + if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0) + return (rc); + + xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache", + sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + xdf_gs_cache = kmem_cache_create("xdf_gs_cache", + sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0); + +#if defined(XPV_HVM_DRIVER) + xdf_hvm_init(); +#endif /* XPV_HVM_DRIVER */ + + if ((rc = mod_install(&xdf_modlinkage)) != 0) { +#if defined(XPV_HVM_DRIVER) + xdf_hvm_fini(); +#endif /* XPV_HVM_DRIVER */ + kmem_cache_destroy(xdf_vreq_cache); + kmem_cache_destroy(xdf_gs_cache); + ddi_soft_state_fini(&xdf_ssp); + return (rc); } - vdp->xdf_pgeom = *geomp; - mutex_exit(&vdp->xdf_dev_lk); + return (rc); +} - /* force a re-validation */ - cmlb_invalidate(vdp->xdf_vd_lbl, NULL); +int +_fini(void) +{ + + int err; + if ((err = mod_remove(&xdf_modlinkage)) != 0) + return (err); + +#if defined(XPV_HVM_DRIVER) + xdf_hvm_fini(); +#endif /* XPV_HVM_DRIVER */ + + kmem_cache_destroy(xdf_vreq_cache); + kmem_cache_destroy(xdf_gs_cache); + ddi_soft_state_fini(&xdf_ssp); return (0); } -#endif /* XPV_HVM_DRIVER */ +int +_info(struct modinfo *modinfop) +{ + return (mod_info(&xdf_modlinkage, modinfop)); +} diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h index 7e61824096..acf606ba6c 100644 --- a/usr/src/uts/common/xen/io/xdf.h +++ b/usr/src/uts/common/xen/io/xdf.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -28,6 +28,14 @@ #ifndef _SYS_XDF_H #define _SYS_XDF_H +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/cmlb.h> +#include <sys/dkio.h> + +#include <sys/gnttab.h> +#include <xen/sys/xendev.h> + #ifdef __cplusplus extern "C" { #endif @@ -61,43 +69,50 @@ extern "C" { * vdc driver, where as here it is used as an interface between the pv_cmdk * driver and the xdf driver.) */ -#define XB_SLICE_NONE 0xFF +#define XB_SLICE_NONE 0xFF /* * blkif status */ -enum xdf_state { +typedef enum xdf_state { /* * initial state */ - XD_UNKNOWN, + XD_UNKNOWN = 0, /* * ring and evtchn alloced, xenbus state changed to * XenbusStateInitialised, wait for backend to connect */ - XD_INIT, + XD_INIT = 1, + /* + * backend and frontend xenbus state has changed to + * XenbusStateConnected. IO is now allowed, but we are not still + * fully initialized. + */ + XD_CONNECTED = 2, /* - * backend's xenbus state has changed to XenbusStateConnected, - * this is the only state allowing I/Os + * We're fully initialized and allowing regular IO. */ - XD_READY, + XD_READY = 3, /* * vbd interface close request received from backend, no more I/O * requestis allowed to be put into ring buffer, while interrupt handler * is allowed to run to finish any outstanding I/O request, disconnect * process is kicked off by changing xenbus state to XenbusStateClosed */ - XD_CLOSING, + XD_CLOSING = 4, /* * disconnection process finished, both backend and frontend's * xenbus state has been changed to XenbusStateClosed, can be detached */ - XD_CLOSED, + XD_CLOSED = 5, /* - * disconnection process finished, frontend is suspended + * We're either being suspended or resuming from a suspend. If we're + * in the process of suspending, we block all new IO, but but allow + * existing IO to drain. */ - XD_SUSPEND -}; + XD_SUSPEND = 6 +} xdf_state_t; /* * 16 partitions + fdisk @@ -117,13 +132,13 @@ enum xdf_state { * each blkif_request_t when sent out to the ring buffer. */ typedef struct ge_slot { - list_node_t link; - domid_t oeid; - struct v_req *vreq; - int isread; - grant_ref_t ghead; - int ngrefs; - grant_ref_t ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + list_node_t gs_vreq_link; + struct v_req *gs_vreq; + domid_t gs_oeid; + int gs_isread; + grant_ref_t gs_ghead; + int gs_ngrefs; + grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; } ge_slot_t; /* @@ -148,20 +163,21 @@ typedef struct ge_slot { */ typedef struct v_req { list_node_t v_link; + list_t v_gs; int v_status; buf_t *v_buf; - ddi_dma_handle_t v_dmahdl; - ddi_dma_cookie_t v_dmac; uint_t v_ndmacs; uint_t v_dmaw; uint_t v_ndmaws; uint_t v_nslots; - ge_slot_t *v_gs; uint64_t v_blkno; + ddi_dma_handle_t v_memdmahdl; ddi_acc_handle_t v_align; + ddi_dma_handle_t v_dmahdl; + ddi_dma_cookie_t v_dmac; caddr_t v_abuf; - ddi_dma_handle_t v_memdmahdl; uint8_t v_flush_diskcache; + boolean_t v_runq; } v_req_t; /* @@ -184,43 +200,56 @@ typedef struct v_req { */ typedef struct xdf { dev_info_t *xdf_dip; + char *xdf_addr; ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */ domid_t xdf_peer; /* otherend's dom ID */ xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */ ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */ list_t xdf_vreq_act; /* active vreq list */ - list_t xdf_gs_act; /* active grant table slot list */ buf_t *xdf_f_act; /* active buf list head */ buf_t *xdf_l_act; /* active buf list tail */ - enum xdf_state xdf_status; /* status of this virtual disk */ + buf_t *xdf_i_act; /* active buf list index */ + xdf_state_t xdf_state; /* status of this virtual disk */ + boolean_t xdf_suspending; ulong_t xdf_vd_open[OTYPCNT]; ulong_t xdf_vd_lyropen[XDF_PEXT]; + ulong_t xdf_connect_req; ulong_t xdf_vd_exclopen; kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */ kmutex_t xdf_dev_lk; /* mutex lock for I/O path */ kmutex_t xdf_cb_lk; /* mutex lock for event handling path */ kcondvar_t xdf_dev_cv; /* cv used in I/O path */ - uint_t xdf_xdev_info; /* disk info from backend xenstore */ + uint_t xdf_dinfo; /* disk info from backend xenstore */ diskaddr_t xdf_xdev_nblocks; /* total size in block */ cmlb_geom_t xdf_pgeom; + boolean_t xdf_pgeom_set; + boolean_t xdf_pgeom_fixed; kstat_t *xdf_xdev_iostat; cmlb_handle_t xdf_vd_lbl; ddi_softintr_t xdf_softintr_id; timeout_id_t xdf_timeout_id; struct gnttab_free_callback xdf_gnt_callback; - int xdf_feature_barrier; - int xdf_flush_supported; - int xdf_wce; + boolean_t xdf_feature_barrier; + boolean_t xdf_flush_supported; + boolean_t xdf_media_req_supported; + boolean_t xdf_wce; + boolean_t xdf_cmbl_reattach; char *xdf_flush_mem; char *xdf_cache_flush_block; int xdf_evtchn; + enum dkio_state xdf_mstate; + kcondvar_t xdf_mstate_cv; + kcondvar_t xdf_hp_status_cv; + struct buf *xdf_ready_bp; + ddi_taskq_t *xdf_ready_tq; + kthread_t *xdf_ready_tq_thread; + struct buf *xdf_ready_tq_bp; #ifdef DEBUG int xdf_dmacallback_num; + kthread_t *xdf_oe_change_thread; #endif } xdf_t; -#define BP2VREQ(bp) ((v_req_t *)((bp)->av_back)) - /* * VBD I/O requests must be aligned on a 512-byte boundary and specify * a transfer size which is a mutiple of 512-bytes @@ -235,14 +264,14 @@ typedef struct xdf { /* wrap pa_to_ma() for xdf to run in dom0 */ #define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr)) -#define XD_IS_RO(vbd) ((vbd)->xdf_xdev_info & VDISK_READONLY) -#define XD_IS_CD(vbd) ((vbd)->xdf_xdev_info & VDISK_CDROM) -#define XD_IS_RM(vbd) ((vbd)->xdf_xdev_info & VDISK_REMOVABLE) -#define IS_READ(bp) ((bp)->b_flags & B_READ) -#define IS_ERROR(bp) ((bp)->b_flags & B_ERROR) +#define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY) +#define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM) +#define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE) +#define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ) +#define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR) #define XDF_UPDATE_IO_STAT(vdp, bp) \ - if ((vdp)->xdf_xdev_iostat != NULL) { \ + { \ kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \ size_t n_done = (bp)->b_bcount - (bp)->b_resid; \ if ((bp)->b_flags & B_READ) { \ @@ -254,9 +283,8 @@ typedef struct xdf { } \ } -extern int xdfdebug; #ifdef DEBUG -#define DPRINTF(flag, args) {if (xdfdebug & (flag)) prom_printf args; } +#define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; } #define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; } #define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; } #define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0) @@ -276,11 +304,18 @@ extern int xdfdebug; #define LBL_DBG 0x80 #if defined(XPV_HVM_DRIVER) -extern dev_info_t *xdf_hvm_hold(char *); -extern int xdf_hvm_connect(dev_info_t *); +extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); +extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, + void *); +extern void xdfmin(struct buf *bp); +extern dev_info_t *xdf_hvm_hold(const char *); +extern boolean_t xdf_hvm_connect(dev_info_t *); extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *); extern int xdf_kstat_create(dev_info_t *, char *, int); extern void xdf_kstat_delete(dev_info_t *); +extern boolean_t xdf_is_cd(dev_info_t *); +extern boolean_t xdf_is_rm(dev_info_t *); +extern boolean_t xdf_media_req_supported(dev_info_t *); #endif /* XPV_HVM_DRIVER */ #ifdef __cplusplus diff --git a/usr/src/uts/common/xen/io/xenbus_probe.c b/usr/src/uts/common/xen/io/xenbus_probe.c index ebf3a12a3e..050f11ad1c 100644 --- a/usr/src/uts/common/xen/io/xenbus_probe.c +++ b/usr/src/uts/common/xen/io/xenbus_probe.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,8 +53,6 @@ * IN THE SOFTWARE. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #ifdef XPV_HVM_DRIVER #include <sys/xpv_support.h> #endif @@ -76,7 +74,7 @@ read_otherend_details(struct xenbus_device *xendev, return (err); } if (strlen(xendev->otherend) == 0 || - !xenbus_exists(XBT_NULL, xendev->otherend, "")) { + !xenbus_exists_dir(xendev->otherend, "")) { xenbus_dev_fatal(xendev, X_ENOENT, "missing other end from %s", xendev->nodename); kmem_free((void *)xendev->otherend, diff --git a/usr/src/uts/common/xen/io/xenbus_xs.c b/usr/src/uts/common/xen/io/xenbus_xs.c index 39f41ecd60..12f07530c0 100644 --- a/usr/src/uts/common/xen/io/xenbus_xs.c +++ b/usr/src/uts/common/xen/io/xenbus_xs.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -64,8 +64,6 @@ * the functions return error codes. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/errno.h> #include <sys/types.h> #include <sys/sysmacros.h> @@ -445,21 +443,34 @@ xenbus_directory(xenbus_transaction_t t, return (split(strings, len, num)); } -/* Check if a path exists. Return 1 if it does. */ -int -xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node) +/* Check if a path exists. */ +boolean_t +xenbus_exists(const char *dir, const char *node) +{ + void *p; + uint_t n; + + if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0) + return (B_FALSE); + kmem_free(p, n); + return (B_TRUE); +} + +/* Check if a directory path exists. */ +boolean_t +xenbus_exists_dir(const char *dir, const char *node) { char **d; unsigned int dir_n; int i, len; - d = xenbus_directory(t, dir, node, &dir_n); + d = xenbus_directory(XBT_NULL, dir, node, &dir_n); if (d == NULL) - return (0); + return (B_FALSE); for (i = 0, len = 0; i < dir_n; i++) len += strlen(d[i]) + 1 + sizeof (char *); kmem_free(d, len); - return (1); + return (B_TRUE); } /* @@ -480,6 +491,34 @@ xenbus_read(xenbus_transaction_t t, return (err); } +int +xenbus_read_str(const char *dir, const char *node, char **retp) +{ + uint_t n; + int err; + char *str; + + /* + * Since we access the xenbus value immediatly we can't be + * part of a transaction. + */ + if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0) + return (err); + ASSERT((str != NULL) && (n > 0)); + + /* + * Why bother with this? Because xenbus is truly annoying in the + * fact that when it returns a string, it doesn't guarantee that + * the memory that holds the string is of size strlen() + 1. + * This forces callers to keep track of the size of the memory + * containing the string. Ugh. We'll work around this by + * re-allocate strings to always be of size strlen() + 1. + */ + *retp = strdup(str); + kmem_free(str, n); + return (0); +} + /* * Write the value of a single file. * Returns err on failure. diff --git a/usr/src/uts/common/xen/os/xvdi.c b/usr/src/uts/common/xen/os/xvdi.c index 74c8ccb14c..67e0ad1e42 100644 --- a/usr/src/uts/common/xen/os/xvdi.c +++ b/usr/src/uts/common/xen/os/xvdi.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -62,6 +62,7 @@ #include <sys/bootsvcs.h> #include <sys/bootinfo.h> #include <sys/note.h> +#include <sys/sysmacros.h> #ifdef XPV_HVM_DRIVER #include <sys/xpv_support.h> #include <sys/hypervisor.h> @@ -263,6 +264,8 @@ xvdi_init_dev(dev_info_t *dip) pdp->xd_vdevnum = vdevnum; pdp->xd_devclass = devcls; pdp->xd_evtchn = INVALID_EVTCHN; + list_create(&pdp->xd_xb_watches, sizeof (xd_xb_watches_t), + offsetof(xd_xb_watches_t, xxw_list)); mutex_init(&pdp->xd_evt_lk, NULL, MUTEX_DRIVER, NULL); mutex_init(&pdp->xd_ndi_lk, NULL, MUTEX_DRIVER, NULL); ddi_set_parent_data(dip, pdp); @@ -1196,6 +1199,188 @@ i_xvdi_bepath_cb(struct xenbus_watch *w, const char **vec, unsigned int len) } } +static void +i_xvdi_xb_watch_free(xd_xb_watches_t *xxwp) +{ + ASSERT(xxwp->xxw_ref == 0); + strfree((char *)xxwp->xxw_watch.node); + kmem_free(xxwp, sizeof (*xxwp)); +} + +static void +i_xvdi_xb_watch_release(xd_xb_watches_t *xxwp) +{ + ASSERT(MUTEX_HELD(&xxwp->xxw_xppd->xd_ndi_lk)); + ASSERT(xxwp->xxw_ref > 0); + if (--xxwp->xxw_ref == 0) + i_xvdi_xb_watch_free(xxwp); +} + +static void +i_xvdi_xb_watch_hold(xd_xb_watches_t *xxwp) +{ + ASSERT(MUTEX_HELD(&xxwp->xxw_xppd->xd_ndi_lk)); + ASSERT(xxwp->xxw_ref > 0); + xxwp->xxw_ref++; +} + +static void +i_xvdi_xb_watch_cb_tq(void *arg) +{ + xd_xb_watches_t *xxwp = (xd_xb_watches_t *)arg; + dev_info_t *dip = (dev_info_t *)xxwp->xxw_watch.dev; + struct xendev_ppd *pdp = xxwp->xxw_xppd; + + xxwp->xxw_cb(dip, xxwp->xxw_watch.node, xxwp->xxw_arg); + + mutex_enter(&pdp->xd_ndi_lk); + i_xvdi_xb_watch_release(xxwp); + mutex_exit(&pdp->xd_ndi_lk); +} + +static void +i_xvdi_xb_watch_cb(struct xenbus_watch *w, const char **vec, unsigned int len) +{ + dev_info_t *dip = (dev_info_t *)w->dev; + struct xendev_ppd *pdp = ddi_get_parent_data(dip); + xd_xb_watches_t *xxwp; + + ASSERT(len > XS_WATCH_PATH); + ASSERT(vec[XS_WATCH_PATH] != NULL); + + mutex_enter(&pdp->xd_ndi_lk); + for (xxwp = list_head(&pdp->xd_xb_watches); xxwp != NULL; + xxwp = list_next(&pdp->xd_xb_watches, xxwp)) { + if (w == &xxwp->xxw_watch) + break; + } + + if (xxwp == NULL) { + mutex_exit(&pdp->xd_ndi_lk); + return; + } + + i_xvdi_xb_watch_hold(xxwp); + (void) ddi_taskq_dispatch(pdp->xd_xb_watch_taskq, + i_xvdi_xb_watch_cb_tq, xxwp, DDI_SLEEP); + mutex_exit(&pdp->xd_ndi_lk); +} + +/* + * Any watches registered with xvdi_add_xb_watch_handler() get torn down during + * a suspend operation. So if a frontend driver want's to use these interfaces, + * that driver is responsible for re-registering any watches it had before + * the suspend operation. + */ +int +xvdi_add_xb_watch_handler(dev_info_t *dip, const char *dir, const char *node, + xvdi_xb_watch_cb_t cb, void *arg) +{ + struct xendev_ppd *pdp = ddi_get_parent_data(dip); + xd_xb_watches_t *xxw_new, *xxwp; + char *path; + int n; + + ASSERT((dip != NULL) && (dir != NULL) && (node != NULL)); + ASSERT(cb != NULL); + + n = strlen(dir) + 1 + strlen(node) + 1; + path = kmem_zalloc(n, KM_SLEEP); + (void) strlcat(path, dir, n); + (void) strlcat(path, "/", n); + (void) strlcat(path, node, n); + ASSERT((strlen(path) + 1) == n); + + xxw_new = kmem_zalloc(sizeof (*xxw_new), KM_SLEEP); + xxw_new->xxw_ref = 1; + xxw_new->xxw_watch.node = path; + xxw_new->xxw_watch.callback = i_xvdi_xb_watch_cb; + xxw_new->xxw_watch.dev = (struct xenbus_device *)dip; + xxw_new->xxw_xppd = pdp; + xxw_new->xxw_cb = cb; + xxw_new->xxw_arg = arg; + + mutex_enter(&pdp->xd_ndi_lk); + + /* + * If this is the first watch we're setting up, create a taskq + * to dispatch watch events and initialize the watch list. + */ + if (pdp->xd_xb_watch_taskq == NULL) { + char tq_name[TASKQ_NAMELEN]; + + ASSERT(list_is_empty(&pdp->xd_xb_watches)); + + (void) snprintf(tq_name, sizeof (tq_name), + "%s_xb_watch_tq", ddi_get_name(dip)); + + if ((pdp->xd_xb_watch_taskq = ddi_taskq_create(dip, tq_name, + 1, TASKQ_DEFAULTPRI, 0)) == NULL) { + i_xvdi_xb_watch_release(xxw_new); + mutex_exit(&pdp->xd_ndi_lk); + return (DDI_FAILURE); + } + } + + /* Don't allow duplicate watches to be registered */ + for (xxwp = list_head(&pdp->xd_xb_watches); xxwp != NULL; + xxwp = list_next(&pdp->xd_xb_watches, xxwp)) { + + ASSERT(strcmp(xxwp->xxw_watch.node, path) != 0); + if (strcmp(xxwp->xxw_watch.node, path) != 0) + continue; + i_xvdi_xb_watch_release(xxw_new); + mutex_exit(&pdp->xd_ndi_lk); + return (DDI_FAILURE); + } + + if (register_xenbus_watch(&xxw_new->xxw_watch) != 0) { + if (list_is_empty(&pdp->xd_xb_watches)) { + ddi_taskq_destroy(pdp->xd_xb_watch_taskq); + pdp->xd_xb_watch_taskq = NULL; + } + i_xvdi_xb_watch_release(xxw_new); + mutex_exit(&pdp->xd_ndi_lk); + return (DDI_FAILURE); + } + + list_insert_head(&pdp->xd_xb_watches, xxw_new); + mutex_exit(&pdp->xd_ndi_lk); + return (DDI_SUCCESS); +} + +/* + * Tear down all xenbus watches registered by the specified dip. + */ +void +xvdi_remove_xb_watch_handlers(dev_info_t *dip) +{ + struct xendev_ppd *pdp = ddi_get_parent_data(dip); + xd_xb_watches_t *xxwp; + ddi_taskq_t *tq; + + mutex_enter(&pdp->xd_ndi_lk); + + while ((xxwp = list_remove_head(&pdp->xd_xb_watches)) != NULL) { + unregister_xenbus_watch(&xxwp->xxw_watch); + i_xvdi_xb_watch_release(xxwp); + } + ASSERT(list_is_empty(&pdp->xd_xb_watches)); + + /* + * We can't hold xd_ndi_lk while we destroy the xd_xb_watch_taskq. + * This is because if there are currently any executing taskq threads, + * we will block until they are finished, and to finish they need + * to aquire xd_ndi_lk in i_xvdi_xb_watch_cb_tq() so they can release + * their reference on their corresponding xxwp structure. + */ + tq = pdp->xd_xb_watch_taskq; + pdp->xd_xb_watch_taskq = NULL; + mutex_exit(&pdp->xd_ndi_lk); + if (tq != NULL) + ddi_taskq_destroy(tq); +} + static int i_xvdi_add_watch_oestate(dev_info_t *dip) { @@ -1417,6 +1602,8 @@ i_xvdi_rem_watches(dev_info_t *dip) i_xvdi_rem_watch_hpstate(dip); mutex_exit(&pdp->xd_ndi_lk); + + xvdi_remove_xb_watch_handlers(dip); } static int diff --git a/usr/src/uts/common/xen/sys/xenbus_impl.h b/usr/src/uts/common/xen/sys/xenbus_impl.h index 0042b1bc99..b633a529f9 100644 --- a/usr/src/uts/common/xen/sys/xenbus_impl.h +++ b/usr/src/uts/common/xen/sys/xenbus_impl.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,8 +57,6 @@ #ifndef _SYS_XENBUS_H #define _SYS_XENBUS_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <sys/mutex.h> #include <sys/list.h> @@ -71,13 +69,14 @@ extern "C" { typedef uint32_t xenbus_transaction_t; /* Register callback to watch this node. */ -struct xenbus_watch -{ +struct xenbus_watch; +typedef void (*xenbus_watch_cb_t)(struct xenbus_watch *, + const char **vec, unsigned int len); +struct xenbus_watch { list_t list; - const char *node; /* path being watched */ - void (*callback)(struct xenbus_watch *, - const char **vec, unsigned int len); - struct xenbus_device *dev; + const char *node; /* path being watched */ + xenbus_watch_cb_t callback; + struct xenbus_device *dev; }; /* @@ -103,17 +102,29 @@ struct xenbus_device { void *data; }; +typedef void (*xvdi_xb_watch_cb_t)(dev_info_t *dip, const char *path, + void *arg); + +typedef struct xd_xb_watches { + list_node_t xxw_list; + int xxw_ref; + struct xenbus_watch xxw_watch; + struct xendev_ppd *xxw_xppd; + xvdi_xb_watch_cb_t xxw_cb; + void *xxw_arg; +} xd_xb_watches_t; extern char **xenbus_directory(xenbus_transaction_t t, const char *dir, const char *node, unsigned int *num); extern int xenbus_read(xenbus_transaction_t t, const char *dir, const char *node, void **rstr, unsigned int *len); +extern int xenbus_read_str(const char *dir, const char *node, char **rstr); extern int xenbus_write(xenbus_transaction_t t, const char *dir, const char *node, const char *string); extern int xenbus_mkdir(xenbus_transaction_t t, const char *dir, const char *node); -extern int xenbus_exists(xenbus_transaction_t t, const char *dir, - const char *node); +extern boolean_t xenbus_exists(const char *dir, const char *node); +extern boolean_t xenbus_exists_dir(const char *dir, const char *node); extern int xenbus_rm(xenbus_transaction_t t, const char *dir, const char *node); extern int xenbus_transaction_start(xenbus_transaction_t *t); diff --git a/usr/src/uts/common/xen/sys/xendev.h b/usr/src/uts/common/xen/sys/xendev.h index 1f3df3c1ba..8e5921dc3f 100644 --- a/usr/src/uts/common/xen/sys/xendev.h +++ b/usr/src/uts/common/xen/sys/xendev.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +42,75 @@ extern "C" { #endif /* + * Xenbus property interfaces, initialized by framework + */ +#define XBP_HP_STATUS "hotplug-status" /* backend prop: str */ +#define XBV_HP_STATUS_CONN "connected" /* backend prop val */ +#define XBP_DEV_TYPE "device-type" /* backend prop: str */ +#define XBV_DEV_TYPE_CD "cdrom" /* backend prop val */ + +/* + * Xenbus property interfaces, initialized by backend disk driver + */ +#define XBP_SECTORS "sectors" /* backend prop: uint64 */ +#define XBP_INFO "info" /* backend prop: uint */ +#define XBP_FB "feature-barrier" /* backend prop: boolean int */ + +/* + * Xenbus property interfaces, initialized by frontend disk driver + */ +#define XBP_RING_REF "ring-ref" /* frontend prop: long */ +#define XBP_EVENT_CHAN "event-channel" /* frontend prop: long */ +#define XBP_PROTOCOL "protocol" /* frontend prop: string */ + +/* + * Xenbus CDROM property interfaces, used by backend and frontend + * + * XBP_MEDIA_REQ_SUP + * - Backend xenbus property located at: + * backend/vbd/<domU_id>/<domU_dev>/media-req-sup + * - Set by the backend, consumed by the frontend. + * - Cosumed by the frontend. + * - A boolean integer property indicating backend support + * for the XBP_MEDIA_REQ property. + * + * XBP_MEDIA_REQ + * - Frontend xenbus property located at: + * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req + * - Set and consumed by both the frontend and backend. + * - Possible values: + * XBV_MEDIA_REQ_NONE, XBV_MEDIA_REQ_LOCK, and XBV_MEDIA_REQ_EJECT + * - Only applies to CDROM devices. + * + * XBV_MEDIA_REQ_NONE + * - XBP_MEDIA_REQ property valud + * - Set and consumed by both the frontend and backend. + * - Indicates that there are no currently outstanding media requet + * operations. + * + * XBV_MEDIA_REQ_LOCK + * - XBP_MEDIA_REQ property valud + * - Set by the frontend, consumed by the backend. + * - Indicates to the backend that the currenct media is locked + * and changes to the media (via xm block-configure for example) + * should not be allowed. + * + * XBV_MEDIA_REQ_EJECT + * - XBP_MEDIA_REQ property valud + * - Set by the frontend, consumed by the backend. + * - Indicates to the backend that the currenct media should be ejected. + * This means that the backend should close it's connection to + * the frontend device, close it's current backing store device/file, + * and then set the media-req property to XBV_MEDIA_REQ_NONE. (to + * indicate that the eject operation is complete.) + */ +#define XBP_MEDIA_REQ_SUP "media-req-sup" /* backend prop: boolean int */ +#define XBP_MEDIA_REQ "media-req" /* frontend prop: str */ +#define XBV_MEDIA_REQ_NONE "none" /* frontend prop val */ +#define XBV_MEDIA_REQ_LOCK "lock" /* frontend prop val */ +#define XBV_MEDIA_REQ_EJECT "eject" /* frontend prop val */ + +/* * Xen device class codes */ typedef enum { @@ -95,6 +164,8 @@ struct xendev_ppd { ddi_callback_id_t xd_hp_ehid; ddi_taskq_t *xd_oe_taskq; ddi_taskq_t *xd_hp_taskq; + ddi_taskq_t *xd_xb_watch_taskq; + list_t xd_xb_watches; }; #define XS_OE_STATE "SUNW,xendev:otherend_state" @@ -137,6 +208,10 @@ int xvdi_init_dev(dev_info_t *); void xvdi_uninit_dev(dev_info_t *); dev_info_t *xvdi_find_dev(dev_info_t *, xendev_devclass_t, domid_t, int); +extern int xvdi_add_xb_watch_handler(dev_info_t *, const char *, + const char *, xvdi_xb_watch_cb_t cb, void *); +extern void xvdi_remove_xb_watch_handlers(dev_info_t *); + /* * common ring interfaces */ diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.files b/usr/src/uts/i86pc/i86hvm/Makefile.files index 03ff880f7c..e912c36f7c 100644 --- a/usr/src/uts/i86pc/i86hvm/Makefile.files +++ b/usr/src/uts/i86pc/i86hvm/Makefile.files @@ -20,11 +20,9 @@ # # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # This Makefile defines file modules in the directory uts/i86pc/i86hvm # and its children. These are the source files which are i86pc/i86hvm # "implementation architecture" dependent. @@ -33,9 +31,12 @@ # # Define objects # -PV_CMDK_OBJS += pv_cmdk.o -PV_RTLS_OBJS += pv_rtls.o HVM_BOOTSTRAP_OBJS += hvm_bootstrap.o +HVM_CMDK_OBJS += cmdk.o +HVM_SD_OBJS += sd.o sd_xbuf.o +PV_CMDK_OBJS += pv_cmdk.o xdf_shell.o +PV_RTLS_OBJS += pv_rtls.o +PV_SD_OBJS += pv_sd.o xdf_shell.o XDF_OBJS += xdf.o XNF_OBJS += xnf.o XPV_OBJS += xpv_support.o xvdi.o gnttab.o evtchn.o \ diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm index 0e414c5fb1..721950a871 100644 --- a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm +++ b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm @@ -21,11 +21,9 @@ # # uts/i86pc/Makefile.hvm # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" -# # This makefile provides support for building PV drivers that run # in an HVM environment. # @@ -45,8 +43,8 @@ HVM_LINT_LIB_DIR= $(UTSBASE)/$(PLATFORM)/i86hvm/lint-libs/$(OBJS_DIR) # # Define modules. # -HVM_DRV_KMODS = pv_cmdk pv_rtls xdf xnf xpv xpvd -HVM_MISC_KMODS = hvm_bootstrap +HVM_DRV_KMODS = pv_cmdk pv_sd pv_rtls xdf xnf xpv xpvd +HVM_MISC_KMODS = hvm_bootstrap hvm_cmdk hvm_sd HVM_KMODS = $(HVM_DRV_KMODS) $(HVM_MISC_KMODS) include $(UTSBASE)/i86pc/i86hvm/Makefile.files diff --git a/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile new file mode 100644 index 0000000000..a4b0995bed --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile @@ -0,0 +1,100 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/i86hvm/hvm_cmdk/Makefile +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the xdc driver. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = hvm_cmdk +OBJECTS = $(HVM_CMDK_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(HVM_CMDK_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +# +# Overrides. +# +DEBUG_FLGS = +DEBUG_DEFS += $(DEBUG_FLGS) +LDFLAGS += -dy -Nmisc/dadk -Nmisc/strategy -Nmisc/cmlb + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV +LINTTAGS += -erroff=E_STATIC_UNUSED + + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile new file mode 100644 index 0000000000..f6b3802cbd --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile @@ -0,0 +1,98 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/i86hvm/hvm_sd/Makefile +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the xdc driver. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +# Normally when compiling sd there are .conf file definitions and +# definitions for warlock, but we don't both with those here. +# +MODULE = hvm_sd +OBJECTS = $(HVM_SD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(HVM_SD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +INC_PATH += -I$(UTSBASE)/intel/io/scsi/targets + +# +# For now, disable these lint checks; maintainers should endeavor +# to investigate and remove these for maximum lint coverage. +# Please do not carry these forward to new Makefiles. +# +LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_STATIC_UNUSED +LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c index efa30c35e1..35dc9afa2d 100644 --- a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c +++ b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c @@ -18,152 +18,42 @@ * * CDDL HEADER END */ - /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -#include <sys/scsi/scsi_types.h> -#include <sys/modctl.h> -#include <sys/cmlb.h> -#include <sys/types.h> -#include <sys/xpv_support.h> -#include <sys/xendev.h> -#include <sys/gnttab.h> -#include <public/xen.h> -#include <public/grant_table.h> -#include <io/xdf.h> -#include <sys/vtoc.h> -#include <sys/dkio.h> -#include <sys/dktp/dadev.h> -#include <sys/dktp/dadkio.h> -#include <sys/dktp/tgdk.h> -#include <sys/dktp/bbh.h> -#include <sys/dktp/cmdk.h> -#include <sys/dktp/altsctr.h> +#include <io/xdf_shell.h> /* - * General Notes - * - * We don't support disks with bad block mappins. We have this - * limitation because the underlying xdf driver doesn't support - * bad block remapping. If there is a need to support this feature - * it should be added directly to the xdf driver and we should just - * pass requests strait on through and let it handle the remapping. - * Also, it's probably worth pointing out that most modern disks do bad - * block remapping internally in the hardware so there's actually less - * of a chance of us ever discovering bad blocks. Also, in most cases - * this driver (and the xdf driver) will only be used with virtualized - * devices, so one might wonder why a virtual device would ever actually - * experience bad blocks. To wrap this up, you might be wondering how - * these bad block mappings get created and how they are managed. Well, - * there are two tools for managing bad block mappings, format(1M) and - * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk - * to attempt to find bad block and create mappings for them. Format(1M) - * and addbadsec(1M) can also be used to edit existing mappings that may - * be saved on the disk. - * - * The underlying PV driver that this driver passes on requests to is the - * xdf driver. Since in most cases the xdf driver doesn't deal with - * physical disks it has it's own algorithm for assigning a physical - * geometry to a virtual disk (ie, cylinder count, head count, etc.) - * The default values chosen by the xdf driver may not match those - * assigned to a disk by a hardware disk emulator in an HVM environment. - * This is a problem since these physical geometry attributes affect - * things like the partition table, backup label location, etc. So - * to emulate disk devices correctly we need to know the physical geometry - * that was assigned to a disk at the time of it's initalization. - * Normally in an HVM environment this information will passed to - * the BIOS and operating system from the hardware emulator that is - * emulating the disk devices. In the case of a solaris dom0+xvm - * this would be qemu. So to work around this issue, this driver will - * query the emulated hardware to get the assigned physical geometry - * and then pass this geometry onto the xdf driver so that it can use it. - * But really, this information is essentially metadata about the disk - * that should be kept with the disk image itself. (Assuming or course - * that a disk image is the actual backingstore for this emulated device.) - * This metadata should also be made available to PV drivers via a common - * mechamisn, probably the xenstore. The fact that this metadata isn't - * available outside of HVM domains means that it's difficult to move - * disks between HVM and PV domains, since a fully PV domain will have no - * way of knowing what the correct geometry of the target device is. - * (Short of reading the disk, looking for things like partition tables - * and labels, and taking a best guess at what the geometry was when - * the disk was initialized. Unsuprisingly, qemu actually does this.) - * - * This driver has to map cmdk device instances into their corresponding - * xdf device instances. We have to do this to ensure that when a user - * accesses a emulated cmdk device we map those accesses to the proper - * paravirtualized device. Basically what we need to know is how multiple - * 'disk' entries in a domU configuration file get mapped to emulated - * cmdk devices and to xdf devices. The 'disk' entry to xdf instance - * mappings we know because those are done within the Solaris xvdi code - * and the xpvd nexus driver. But the config to emulated devices mappings - * are handled entirely within the xen management tool chain and the - * hardware emulator. Since all the tools that establish these mappings - * live in dom0, dom0 should really supply us with this information, - * probably via the xenstore. Unfortunatly it doesn't so, since there's - * no good way to determine this mapping dynamically, this driver uses - * a hard coded set of static mappings. These mappings are hardware - * emulator specific because each different hardware emulator could have - * a different device tree with different cmdk device paths. This - * means that if we want to continue to use this static mapping approach - * to allow Solaris to run on different hardware emulators we'll have - * to analyze each of those emulators to determine what paths they - * use and hard code those paths into this driver. yech. This metadata - * really needs to be supplied to us by dom0. - * - * This driver access underlying xdf nodes. Unfortunatly, devices - * must create minor nodes during attach, and for disk devices to create - * minor nodes, they have to look at the label on the disk, so this means - * that disk drivers must be able to access a disk contents during - * attach. That means that this disk driver must be able to access - * underlying xdf nodes during attach. Unfortunatly, due to device tree - * locking restrictions, we cannot have an attach operation occuring on - * this device and then attempt to access another device which may - * cause another attach to occur in a different device tree branch - * since this could result in deadlock. Hence, this driver can only - * access xdf device nodes that we know are attached, and it can't use - * any ddi interfaces to access those nodes if those interfaces could - * trigger an attach of the xdf device. So this driver works around - * these restrictions by talking directly to xdf devices via - * xdf_hvm_hold(). This interface takes a pathname to an xdf device, - * and if that device is already attached then it returns the a held dip - * pointer for that device node. This prevents us from getting into - * deadlock situations, but now we need a mechanism to ensure that all - * the xdf device nodes this driver might access are attached before - * this driver tries to access them. This is accomplished via the - * hvmboot_rootconf() callback which is invoked just before root is - * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure - * all xdf device visible to the system. All these xdf device nodes - * will also be marked with the "ddi-no-autodetach" property so that - * once they are configured, the will not be automatically unconfigured. - * The only way that they could be unconfigured is if the administrator - * explicitly attempts to unload required modules via rem_drv(1M) - * or modunload(1M). + * We're emulating (and possibly layering on top of) cmdk devices, so xdf + * disk unit mappings must match up with cmdk disk unit mappings'. */ +#if !defined(XDF_PSHIFT) +#error "can't find definition for xdf unit mappings - XDF_PSHIFT" +#endif /* XDF_PSHIFT */ -/* - * 16 paritions + fdisk (see xdf.h) - */ -#define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev)))) -#define XDF_DEV2PART(dev) XDF_PART((getminor((dev)))) - -#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ - (otyp == OTYP_CHR) || \ - (otyp == OTYP_LYR)) +#if !defined(CMDK_UNITSHF) +#error "can't find definition for cmdk unit mappings - CMDK_UNITSHF" +#endif /* CMDK_UNITSHF */ -#define PV_CMDK_NODES 4 +#if ((XDF_PSHIFT - CMDK_UNITSHF) != 0) +#error "cmdk and xdf unit mappings don't match." +#endif /* ((XDF_PSHIFT - CMDK_UNITSHF) != 0) */ -typedef struct hvm_to_pv { - char *h2p_hvm_path; - char *h2p_pv_path; -} hvm_to_pv_t; +extern const struct dev_ops cmdk_ops; +extern void *cmdk_state; /* + * Globals required by xdf_shell.c */ -static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { +const char *xdfs_c_name = "cmdk"; +const char *xdfs_c_linkinfo = "PV Common Direct Access Disk"; +void **xdfs_c_hvm_ss = &cmdk_state; +const size_t xdfs_c_hvm_ss_size = sizeof (struct cmdk); +const struct dev_ops *xdfs_c_hvm_dev_ops = &cmdk_ops; + +const xdfs_h2p_map_t xdfs_c_h2p_map[] = { /* * The paths mapping here are very specific to xen and qemu. When a * domU is booted under xen in HVM mode, qemu is normally used to @@ -217,132 +107,16 @@ static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = { { NULL, 0 } }; -typedef struct pv_cmdk { - dev_info_t *dk_dip; - cmlb_handle_t dk_cmlbhandle; - ddi_devid_t dk_devid; - kmutex_t dk_mutex; - dev_info_t *dk_xdf_dip; - dev_t dk_xdf_dev; - int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT]; - ldi_handle_t dk_xdf_lh[XDF_PEXT]; -} pv_cmdk_t; - /* - * Globals + * Private functions */ -static void *pv_cmdk_state; -static major_t pv_cmdk_major; -static hvm_to_pv_t *pv_cmdk_h2p; - -/* - * Function prototypes for xdf callback functions - */ -extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *); -extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, - void *); - -static boolean_t -pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part) -{ - int otyp; - - ASSERT(MUTEX_HELD(&dkp->dk_mutex)); - - for (otyp = 0; (otyp < OTYPCNT); otyp++) { - if (dkp->dk_xdf_otyp_count[otyp][part] != 0) - return (B_TRUE); - } - return (B_FALSE); -} - /* - * Cmlb ops vectors, allows the cmlb module to directly access the entire - * pv_cmdk disk device without going through any partitioning layers. - */ -/*ARGSUSED*/ -static int -pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, - diskaddr_t start, size_t count, void *tg_cookie) -{ - int instance = ddi_get_instance(dip); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - if (dkp == NULL) - return (ENXIO); - - return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count, - tg_cookie)); -} - -/*ARGSUSED*/ -static int -pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) -{ - int instance = ddi_get_instance(dip); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - int err; - - if (dkp == NULL) - return (ENXIO); - - if (cmd == TG_GETVIRTGEOM) { - cmlb_geom_t pgeom, *vgeomp; - diskaddr_t capacity; - - /* - * The native xdf driver doesn't support this ioctl. - * Intead of passing it on, emulate it here so that the - * results look the same as what we get for a real cmdk - * device. - * - * Get the real size of the device - */ - if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip, - TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) - return (err); - capacity = pgeom.g_capacity; - - /* - * If the controller returned us something that doesn't - * really fit into an Int 13/function 8 geometry - * result, just fail the ioctl. See PSARC 1998/313. - */ - if (capacity >= (63 * 254 * 1024)) - return (EINVAL); - - vgeomp = (cmlb_geom_t *)arg; - vgeomp->g_capacity = capacity; - vgeomp->g_nsect = 63; - vgeomp->g_nhead = 254; - vgeomp->g_ncyl = capacity / (63 * 254); - vgeomp->g_acyl = 0; - vgeomp->g_secsize = 512; - vgeomp->g_intrlv = 1; - vgeomp->g_rpm = 3600; - return (0); - } - - return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie)); -} - -static cmlb_tg_ops_t pv_cmdk_lb_ops = { - TG_DK_OPS_VERSION_1, - pv_cmdk_lb_rdwr, - pv_cmdk_lb_getinfo -}; - -/* - * devid management functions - */ - -/* - * pv_cmdk_get_modser() is basically a local copy of + * xdfs_get_modser() is basically a local copy of * cmdk_get_modser() modified to work without the dadk layer. * (which the non-pv version of the cmdk driver uses.) */ static int -pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) +xdfs_get_modser(xdfs_state_t *xsp, int ioccmd, char *buf, int len) { struct scsi_device *scsi_device; opaque_t ctlobjp; @@ -355,7 +129,7 @@ pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) strarg.is_buf = buf; strarg.is_size = len; - scsi_device = ddi_get_driver_private(dkp->dk_dip); + scsi_device = ddi_get_driver_private(xsp->xdfss_dip); ctlobjp = scsi_device->sd_address.a_hba_tran; if (CTL_IOCTL(ctlobjp, ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0) @@ -382,14 +156,14 @@ pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len) } /* - * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser() + * xdfs_devid_modser() is basically a copy of cmdk_devid_modser() * that has been modified to use local pv cmdk driver functions. * * Build a devid from the model and serial number * Return DDI_SUCCESS or DDI_FAILURE. */ static int -pv_cmdk_devid_modser(struct pv_cmdk *dkp) +xdfs_devid_modser(xdfs_state_t *xsp) { int rc = DDI_FAILURE; char *hwid; @@ -400,12 +174,12 @@ pv_cmdk_devid_modser(struct pv_cmdk *dkp) * device ID is a concatenation of model number, '=', serial number. */ hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP); - modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); + modlen = xdfs_get_modser(xsp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN); if (modlen == 0) goto err; hwid[modlen++] = '='; - serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL, + serlen = xdfs_get_modser(xsp, DIOCTL_GETSERIAL, hwid + modlen, CMDK_HWIDLEN - modlen); if (serlen == 0) goto err; @@ -413,8 +187,8 @@ pv_cmdk_devid_modser(struct pv_cmdk *dkp) hwid[modlen + serlen] = 0; /* Initialize the device ID, trailing NULL not included */ - rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen, - hwid, (ddi_devid_t *)&dkp->dk_devid); + rc = ddi_devid_init(xsp->xdfss_dip, DEVID_ATA_SERIAL, modlen + serlen, + hwid, (ddi_devid_t *)&xsp->xdfss_tgt_devid); if (rc != DDI_SUCCESS) goto err; @@ -427,7 +201,7 @@ err: } /* - * pv_cmdk_devid_read() is basically a local copy of + * xdfs_devid_read() is basically a local copy of * cmdk_devid_read() modified to work without the dadk layer. * (which the non-pv version of the cmdk driver uses.) * @@ -436,18 +210,18 @@ err: * Return DDI_SUCCESS or DDI_FAILURE. */ static int -pv_cmdk_devid_read(struct pv_cmdk *dkp) +xdfs_devid_read(xdfs_state_t *xsp) { diskaddr_t blk; struct dk_devid *dkdevidp; uint_t *ip, chksum; int i; - if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) + if (cmlb_get_devid_block(xsp->xdfss_cmlbhandle, &blk, 0) != 0) return (DDI_FAILURE); dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP); - if (pv_cmdk_lb_rdwr(dkp->dk_dip, + if (xdfs_lb_rdwr(xsp->xdfss_dip, TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0) goto err; @@ -470,8 +244,8 @@ pv_cmdk_devid_read(struct pv_cmdk *dkp) /* keep a copy of the device id */ i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid); - dkp->dk_devid = kmem_alloc(i, KM_SLEEP); - bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i); + xsp->xdfss_tgt_devid = kmem_alloc(i, KM_SLEEP); + bcopy(dkdevidp->dkd_devid, xsp->xdfss_tgt_devid, i); kmem_free(dkdevidp, NBPSCTR); return (DDI_SUCCESS); @@ -481,7 +255,7 @@ err: } /* - * pv_cmdk_devid_fabricate() is basically a local copy of + * xdfs_devid_fabricate() is basically a local copy of * cmdk_devid_fabricate() modified to work without the dadk layer. * (which the non-pv version of the cmdk driver uses.) * @@ -490,7 +264,7 @@ err: * Return DDI_SUCCESS or DDI_FAILURE. */ static int -pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) +xdfs_devid_fabricate(xdfs_state_t *xsp) { ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */ struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */ @@ -498,10 +272,10 @@ pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) uint_t *ip, chksum; int i; - if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0) + if (cmlb_get_devid_block(xsp->xdfss_cmlbhandle, &blk, 0) != 0) return (DDI_FAILURE); - if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) != + if (ddi_devid_init(xsp->xdfss_dip, DEVID_FAB, 0, NULL, &devid) != DDI_SUCCESS) return (DDI_FAILURE); @@ -527,13 +301,13 @@ pv_cmdk_devid_fabricate(struct pv_cmdk *dkp) /* Fill in the checksum */ DKD_FORMCHKSUM(chksum, dkdevidp); - if (pv_cmdk_lb_rdwr(dkp->dk_dip, + if (xdfs_lb_rdwr(xsp->xdfss_dip, TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0) goto err; kmem_free(dkdevidp, NBPSCTR); - dkp->dk_devid = devid; + xsp->xdfss_tgt_devid = devid; return (DDI_SUCCESS); err: @@ -545,180 +319,10 @@ err: } /* - * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup() - * that has been modified to use local pv cmdk driver functions. - * - * Create and register the devid. - * There are 4 different ways we can get a device id: - * 1. Already have one - nothing to do - * 2. Build one from the drive's model and serial numbers - * 3. Read one from the disk (first sector of last track) - * 4. Fabricate one and write it on the disk. - * If any of these succeeds, register the deviceid - */ -static void -pv_cmdk_devid_setup(struct pv_cmdk *dkp) -{ - int rc; - - /* Try options until one succeeds, or all have failed */ - - /* 1. All done if already registered */ - - if (dkp->dk_devid != NULL) - return; - - /* 2. Build a devid from the model and serial number */ - rc = pv_cmdk_devid_modser(dkp); - if (rc != DDI_SUCCESS) { - /* 3. Read devid from the disk, if present */ - rc = pv_cmdk_devid_read(dkp); - - /* 4. otherwise make one up and write it on the disk */ - if (rc != DDI_SUCCESS) - rc = pv_cmdk_devid_fabricate(dkp); - } - - /* If we managed to get a devid any of the above ways, register it */ - if (rc == DDI_SUCCESS) - (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid); -} - -/* - * Local Functions + * xdfs_rwcmd_copyin() is a duplicate of rwcmd_copyin(). */ static int -pv_cmdk_iodone(struct buf *bp) -{ - struct buf *bp_orig = bp->b_chain; - - /* Propegate back the io results */ - bp_orig->b_resid = bp->b_resid; - bioerror(bp_orig, geterror(bp)); - biodone(bp_orig); - - freerbuf(bp); - return (0); -} - -static int -pv_cmdkstrategy(struct buf *bp) -{ - dev_t dev = bp->b_edev; - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - dev_t xdf_devt; - struct buf *bp_clone; - - /* - * Sanity checks that the dev_t associated with the buf we were - * passed actually corresponds us and that the partition we're - * trying to access is actually open. On debug kernels we'll - * panic and on non-debug kernels we'll return failure. - */ - ASSERT(getmajor(dev) == pv_cmdk_major); - if (getmajor(dev) != pv_cmdk_major) - goto err; - - mutex_enter(&dkp->dk_mutex); - ASSERT(pv_cmdk_isopen_part(dkp, part)); - if (!pv_cmdk_isopen_part(dkp, part)) { - mutex_exit(&dkp->dk_mutex); - goto err; - } - mutex_exit(&dkp->dk_mutex); - - /* clone this buffer */ - xdf_devt = dkp->dk_xdf_dev | part; - bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno, - pv_cmdk_iodone, NULL, KM_SLEEP); - bp_clone->b_chain = bp; - - /* - * If we're being invoked on behalf of the physio() call in - * pv_cmdk_dioctl_rwcmd() then b_private will be set to - * XB_SLICE_NONE and we need to propegate this flag into the - * cloned buffer so that the xdf driver will see it. - */ - if (bp->b_private == (void *)XB_SLICE_NONE) - bp_clone->b_private = (void *)XB_SLICE_NONE; - - /* - * Pass on the cloned buffer. Note that we don't bother to check - * for failure because the xdf strategy routine will have to - * invoke biodone() if it wants to return an error, which means - * that the pv_cmdk_iodone() callback will get invoked and it - * will propegate the error back up the stack and free the cloned - * buffer. - */ - ASSERT(dkp->dk_xdf_lh[part] != NULL); - return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone)); - -err: - bioerror(bp, ENXIO); - bp->b_resid = bp->b_bcount; - biodone(bp); - return (0); -} - -/*ARGSUSED*/ -static int -pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - return (ldi_read(dkp->dk_xdf_lh[part], uio, credp)); -} - -/*ARGSUSED*/ -static int -pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - return (ldi_write(dkp->dk_xdf_lh[part], uio, credp)); -} - -/*ARGSUSED*/ -static int -pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp)); -} - -/*ARGSUSED*/ -static int -pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp)); -} - -static int -pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk)); -} - -/* - * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin(). - */ -static int -pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) +xdfs_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) { switch (ddi_model_convert_from(flag)) { case DDI_MODEL_ILP32: { @@ -753,10 +357,10 @@ pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag) } /* - * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout(). + * xdfs_rwcmd_copyout() is a duplicate of rwcmd_copyout(). */ static int -pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) +xdfs_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) { switch (ddi_model_convert_from(flag)) { case DDI_MODEL_ILP32: { @@ -795,15 +399,8 @@ pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag) return (0); } -static void -pv_cmdkmin(struct buf *bp) -{ - if (bp->b_bcount > DK_MAXRECSIZE) - bp->b_bcount = DK_MAXRECSIZE; -} - static int -pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) +xdfs_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) { struct dadkio_rwcmd *rwcmdp; struct iovec aiov; @@ -812,7 +409,7 @@ pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) int rw, status; rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP); - status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); + status = xdfs_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag); if (status != 0) goto out; @@ -845,35 +442,46 @@ pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag) bp->b_private = (void *)XB_SLICE_NONE; rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ); - status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio); + status = physio(xdfs_strategy, bp, dev, rw, xdfs_minphys, &auio); biofini(bp); kmem_free(bp, sizeof (buf_t)); if (status == 0) - status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); + status = xdfs_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag); out: kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd)); return (status); } -static int -pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, - int *rvalp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - int err; +/* + * xdf_shell callback functions + */ +/*ARGSUSED*/ +int +xdfs_c_ioctl(xdfs_state_t *xsp, dev_t dev, int part, + int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp, boolean_t *done) +{ + *done = B_TRUE; switch (cmd) { default: - return (ldi_ioctl(dkp->dk_xdf_lh[part], - cmd, arg, flag, credp, rvalp)); + *done = B_FALSE; + return (0); + case DKIOCLOCK: + case DKIOCUNLOCK: + case FDEJECT: + case DKIOCEJECT: + case CDROMEJECT: { + /* we don't support ejectable devices */ + return (ENOTTY); + } case DKIOCGETWCE: - case DKIOCSETWCE: + case DKIOCSETWCE: { + /* we don't support write cache get/set */ return (EIO); + } case DKIOCADDBAD: { /* * This is for ata/ide bad block handling. It is supposed @@ -889,7 +497,7 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, * I can't actually find any code that utilizes this ioctl, * hence we're leaving it explicitly unimplemented. */ - ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF"); + ASSERT("ioctl cmd unsupported by xdf shell: DKIOCGETDEF"); return (EIO); } case DIOCTL_RWCMD: { @@ -898,16 +506,18 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, * reading and writing the disk. Great, another way to * do the same thing... */ - return (pv_cmdk_dioctl_rwcmd(dev, arg, flag)); + return (xdfs_dioctl_rwcmd(dev, arg, flag)); } case DKIOCINFO: { - dev_info_t *dip = dkp->dk_dip; + int instance = ddi_get_instance(xsp->xdfss_dip); + dev_info_t *dip = xsp->xdfss_dip; struct dk_cinfo info; + int rv; /* Pass on the ioctl request, save the response */ - if ((err = ldi_ioctl(dkp->dk_xdf_lh[part], + if ((rv = ldi_ioctl(xsp->xdfss_tgt_lh[part], cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) - return (err); + return (rv); /* Update controller info */ info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); @@ -930,129 +540,47 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, /*NOTREACHED*/ } -/*ARGSUSED*/ -static int -pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp) +/* + * xdfs_c_devid_setup() is a slightly modified copy of cmdk_devid_setup(). + * + * Create and register the devid. + * There are 4 different ways we can get a device id: + * 1. Already have one - nothing to do + * 2. Build one from the drive's model and serial numbers + * 3. Read one from the disk (first sector of last track) + * 4. Fabricate one and write it on the disk. + * If any of these succeeds, register the deviceid + */ +void +xdfs_c_devid_setup(xdfs_state_t *xsp) { - ldi_ident_t li; - dev_t dev = *dev_p; - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - dev_t xdf_devt = dkp->dk_xdf_dev | part; - int err = 0; - - if ((otyp < 0) || (otyp >= OTYPCNT)) - return (EINVAL); - - /* allocate an ldi handle */ - VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); - - mutex_enter(&dkp->dk_mutex); - - /* - * We translate all device opens (chr, blk, and lyr) into - * block device opens. Why? Because for all the opens that - * come through this driver, we only keep around one LDI handle. - * So that handle can only be of one open type. The reason - * that we choose the block interface for this is that to use - * the block interfaces for a device the system needs to allocatex - * buf_ts, which are associated with system memory which can act - * as a cache for device data. So normally when a block device - * is closed the system will ensure that all these pages get - * flushed out of memory. But if we were to open the device - * as a character device, then when we went to close the underlying - * device (even if we had invoked the block interfaces) any data - * remaining in memory wouldn't necessairly be flushed out - * before the device was closed. - */ - if (dkp->dk_xdf_lh[part] == NULL) { - ASSERT(!pv_cmdk_isopen_part(dkp, part)); - - err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, - &dkp->dk_xdf_lh[part], li); - - if (err != 0) { - mutex_exit(&dkp->dk_mutex); - ldi_ident_release(li); - return (err); - } - - /* Disk devices really shouldn't clone */ - ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); - } else { - ldi_handle_t lh_tmp; - - ASSERT(pv_cmdk_isopen_part(dkp, part)); - - /* do ldi open/close to get flags and cred check */ - err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp, - &lh_tmp, li); - if (err != 0) { - mutex_exit(&dkp->dk_mutex); - ldi_ident_release(li); - return (err); - } - - /* Disk devices really shouldn't clone */ - ASSERT(xdf_devt == (dkp->dk_xdf_dev | part)); - (void) ldi_close(lh_tmp, flag, credp); - } - ldi_ident_release(li); - - dkp->dk_xdf_otyp_count[otyp][part]++; - - mutex_exit(&dkp->dk_mutex); - return (0); -} + int rc; -/*ARGSUSED*/ -static int -pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp) -{ - int instance = XDF_DEV2UNIT(dev); - int part = XDF_DEV2PART(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - int err = 0; + /* Try options until one succeeds, or all have failed */ - ASSERT((otyp >= 0) && otyp < OTYPCNT); + /* 1. All done if already registered */ - /* - * Sanity check that that the dev_t specified corresponds to this - * driver and that the device is actually open. On debug kernels we'll - * panic and on non-debug kernels we'll return failure. - */ - ASSERT(getmajor(dev) == pv_cmdk_major); - if (getmajor(dev) != pv_cmdk_major) - return (ENXIO); - - mutex_enter(&dkp->dk_mutex); - ASSERT(pv_cmdk_isopen_part(dkp, part)); - if (!pv_cmdk_isopen_part(dkp, part)) { - mutex_exit(&dkp->dk_mutex); - return (ENXIO); - } + if (xsp->xdfss_tgt_devid != NULL) + return; - ASSERT(dkp->dk_xdf_lh[part] != NULL); - ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0); - if (otyp == OTYP_LYR) { - dkp->dk_xdf_otyp_count[otyp][part]--; - } else { - dkp->dk_xdf_otyp_count[otyp][part] = 0; - } + /* 2. Build a devid from the model and serial number */ + rc = xdfs_devid_modser(xsp); + if (rc != DDI_SUCCESS) { + /* 3. Read devid from the disk, if present */ + rc = xdfs_devid_read(xsp); - if (!pv_cmdk_isopen_part(dkp, part)) { - err = ldi_close(dkp->dk_xdf_lh[part], flag, credp); - dkp->dk_xdf_lh[part] = NULL; + /* 4. otherwise make one up and write it on the disk */ + if (rc != DDI_SUCCESS) + rc = xdfs_devid_fabricate(xsp); } - mutex_exit(&dkp->dk_mutex); - - return (err); + /* If we managed to get a devid any of the above ways, register it */ + if (rc == DDI_SUCCESS) + (void) ddi_devid_register(xsp->xdfss_dip, xsp->xdfss_tgt_devid); } -static int -pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) +int +xdfs_c_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) { struct scsi_device *scsi_device; struct tgdk_geom tgdk_geom; @@ -1079,13 +607,8 @@ pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) return (0); } -/* - * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in - * the alternate partition/slice. Returns B_FALSE is there are no bad - * block mappins found, and B_TRUE is there are bad block mappins found. - */ -static boolean_t -pv_cmdk_bb_check(struct pv_cmdk *dkp) +boolean_t +xdfs_c_bb_check(xdfs_state_t *xsp) { struct alts_parttbl *ap; diskaddr_t nblocks, blk; @@ -1096,7 +619,7 @@ pv_cmdk_bb_check(struct pv_cmdk *dkp) /* find slice with V_ALTSCTR tag */ for (alts = 0; alts < NDKMAP; alts++) { - if (cmlb_partinfo(dkp->dk_cmlbhandle, alts, + if (cmlb_partinfo(xsp->xdfss_cmlbhandle, alts, &nblocks, &blk, NULL, &vtoctag, 0) != 0) { /* no partition table exists */ return (B_FALSE); @@ -1110,8 +633,7 @@ pv_cmdk_bb_check(struct pv_cmdk *dkp) /* read in ALTS label block */ ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP); - if (pv_cmdk_lb_rdwr(dkp->dk_dip, - TG_READ, ap, blk, NBPSCTR, NULL) != 0) + if (xdfs_lb_rdwr(xsp->xdfss_dip, TG_READ, ap, blk, NBPSCTR, NULL) != 0) goto err; altused = ap->alts_ent_used; /* number of BB entries */ @@ -1131,400 +653,22 @@ err: return (B_FALSE); } -/* - * Autoconfiguration Routines - */ -static int -pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) -{ - int instance = ddi_get_instance(dip); - dev_info_t *xdf_dip = NULL; - struct pv_cmdk *dkp; - cmlb_geom_t pgeom; - char *path; - int i; - - if (cmd != DDI_ATTACH) - return (DDI_FAILURE); - - /* - * This cmdk device layers on top of an xdf device. So the first - * thing we need to do is determine which xdf device instance this - * cmdk instance should be layered on top of. - */ - path = kmem_alloc(MAXPATHLEN, KM_SLEEP); - (void) ddi_pathname(dip, path); - for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) { - if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0) - break; - } - kmem_free(path, MAXPATHLEN); - - if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) { - /* - * UhOh. We don't know what xdf instance this cmdk device - * should be mapped to. - */ - return (DDI_FAILURE); - } - - /* Check if this device exists */ - xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path); - if (xdf_dip == NULL) - return (DDI_FAILURE); - - /* allocate and initialize our state structure */ - (void) ddi_soft_state_zalloc(pv_cmdk_state, instance); - dkp = ddi_get_soft_state(pv_cmdk_state, instance); - mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); - dkp->dk_dip = dip; - dkp->dk_xdf_dip = xdf_dip; - dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip), - XDF_MINOR(ddi_get_instance(xdf_dip), 0)); - - ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0); - - /* - * GROSS HACK ALERT! GROSS HACK ALERT! - * - * Before we can initialize the cmlb layer, we have to tell the - * underlying xdf device what it's physical geometry should be. - * See the block comments at the top of this file for more info. - */ - if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) || - (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) { - ddi_release_devi(dkp->dk_xdf_dip); - mutex_destroy(&dkp->dk_mutex); - ddi_soft_state_free(pv_cmdk_state, instance); - return (DDI_FAILURE); - } - - /* create kstat for iostat(1M) */ - if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) { - ddi_release_devi(dkp->dk_xdf_dip); - mutex_destroy(&dkp->dk_mutex); - ddi_soft_state_free(pv_cmdk_state, instance); - return (DDI_FAILURE); - } - - /* - * Force the xdf front end driver to connect to the backend. From - * the solaris device tree perspective, the xdf driver devinfo node - * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() - * would not have returned a dip.) But this doesn't mean that the - * xdf device has actually established a connection to it's back - * end driver. For us to be able to access the xdf device it needs - * to be connected. There are two ways to force the xdf driver to - * connect to the backend device. - */ - if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) { - cmn_err(CE_WARN, - "pv driver failed to connect: %s", - pv_cmdk_h2p[i].h2p_pv_path); - xdf_kstat_delete(dkp->dk_xdf_dip); - ddi_release_devi(dkp->dk_xdf_dip); - mutex_destroy(&dkp->dk_mutex); - ddi_soft_state_free(pv_cmdk_state, instance); - return (DDI_FAILURE); - } - - /* - * Initalize cmlb. Note that for partition information cmlb - * will access the underly xdf disk device directly via - * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no - * layered driver handles associated with this access because - * it is a direct disk access that doesn't go through - * any of the device nodes exported by the xdf device (since - * all exported device nodes only reflect the portion of - * the device visible via the partition/slice that the node - * is associated with.) So while not observable via the LDI, - * this direct disk access is ok since we're actually holding - * the target device. - */ - cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle); - if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops, - DTYPE_DIRECT, /* device_type */ - 0, /* not removable */ - 0, /* not hot pluggable */ - DDI_NT_BLOCK, - CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */ - dkp->dk_cmlbhandle, 0) != 0) { - cmlb_free_handle(&dkp->dk_cmlbhandle); - xdf_kstat_delete(dkp->dk_xdf_dip); - ddi_release_devi(dkp->dk_xdf_dip); - mutex_destroy(&dkp->dk_mutex); - ddi_soft_state_free(pv_cmdk_state, instance); - return (DDI_FAILURE); - } - - if (pv_cmdk_bb_check(dkp)) { - cmn_err(CE_WARN, - "pv cmdk disks with bad blocks are unsupported: %s", - pv_cmdk_h2p[i].h2p_hvm_path); - - cmlb_detach(dkp->dk_cmlbhandle, 0); - cmlb_free_handle(&dkp->dk_cmlbhandle); - xdf_kstat_delete(dkp->dk_xdf_dip); - ddi_release_devi(dkp->dk_xdf_dip); - mutex_destroy(&dkp->dk_mutex); - ddi_soft_state_free(pv_cmdk_state, instance); - return (DDI_FAILURE); - } - - /* setup devid string */ - pv_cmdk_devid_setup(dkp); - - /* Calling validate will create minor nodes according to disk label */ - (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0); - - /* - * Add a zero-length attribute to tell the world we support - * kernel ioctls (for layered drivers). - */ - (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, - DDI_KERNEL_IOCTL, NULL, 0); - - /* Have the system report any newly created device nodes */ - ddi_report_dev(dip); - - return (DDI_SUCCESS); -} - -static int -pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) +char * +xdfs_c_cmlb_node_type(xdfs_state_t *xsp) { - int instance = ddi_get_instance(dip); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - if (cmd != DDI_DETACH) - return (DDI_FAILURE); - - ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex)); - - ddi_devid_unregister(dip); - if (dkp->dk_devid) - ddi_devid_free(dkp->dk_devid); - cmlb_detach(dkp->dk_cmlbhandle, 0); - cmlb_free_handle(&dkp->dk_cmlbhandle); - mutex_destroy(&dkp->dk_mutex); - xdf_kstat_delete(dkp->dk_xdf_dip); - ddi_release_devi(dkp->dk_xdf_dip); - ddi_soft_state_free(pv_cmdk_state, instance); - ddi_prop_remove_all(dip); - - return (DDI_SUCCESS); + return (xsp->xdfss_tgt_is_cd ? DDI_NT_CD : DDI_NT_BLOCK); } /*ARGSUSED*/ -static int -pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, - void **result) -{ - dev_t dev = (dev_t)arg; - int instance = XDF_DEV2UNIT(dev); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - if (dkp == NULL) - return (DDI_FAILURE); - *result = (void *)dkp->dk_dip; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)(intptr_t)instance; - break; - default: - return (DDI_FAILURE); - } - return (DDI_SUCCESS); -} - -static int -pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, - int flags, char *name, caddr_t valuep, int *lengthp) -{ - int instance = ddi_get_instance(dip); - struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance); - dev_info_t *xdf_dip; - dev_t xdf_devt; - int err; - - /* - * Sanity check that if a dev_t or dip were specified that they - * correspond to this device driver. On debug kernels we'll - * panic and on non-debug kernels we'll return failure. - */ - ASSERT(ddi_driver_major(dip) == pv_cmdk_major); - ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major)); - if ((ddi_driver_major(dip) != pv_cmdk_major) || - ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major))) - return (DDI_PROP_NOT_FOUND); - - /* - * This property lookup might be associated with a device node - * that is not yet attached, if so pass it onto ddi_prop_op(). - */ - if (dkp == NULL) - return (ddi_prop_op(dev, dip, prop_op, flags, - name, valuep, lengthp)); - - /* - * Make sure we only lookup static properties. - * - * If there are static properties of the underlying xdf driver - * that we want to mirror, then we'll have to explicity look them - * up and define them during attach. There are a few reasons - * for this. Most importantly, most static properties are typed - * and all dynamic properties are untyped, ie, for dynamic - * properties the caller must know the type of the property and - * how to interpret the value of the property. the prop_op drivedr - * entry point is only designed for returning dynamic/untyped - * properties, so if we were to attempt to lookup and pass back - * static properties of the underlying device here then we would - * be losing the type information for those properties. Another - * reason we don't want to pass on static property requests is that - * static properties are enumerable in the device tree, where as - * dynamic ones are not. - */ - flags |= DDI_PROP_DYNAMIC; - - /* - * We can't use the ldi here to access the underlying device because - * the ldi actually opens the device, and that open might fail if the - * device has already been opened with the FEXCL flag. If we used - * the ldi here, it would also be possible for some other caller - * to try open the device with the FEXCL flag and get a failure - * back because we have it open to do a property query. - * - * Instad we'll grab a hold on the target dip and query the - * property directly. - */ - mutex_enter(&dkp->dk_mutex); - - if ((xdf_dip = dkp->dk_xdf_dip) == NULL) { - mutex_exit(&dkp->dk_mutex); - return (DDI_PROP_NOT_FOUND); - } - e_ddi_hold_devi(xdf_dip); - - /* figure out the dev_t we're going to pass on down */ - if (dev == DDI_DEV_T_ANY) { - xdf_devt = DDI_DEV_T_ANY; - } else { - xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev); - } - - mutex_exit(&dkp->dk_mutex); - - /* - * Cdev_prop_op() is not a public interface, and normally the caller - * is required to make sure that the target driver actually implements - * this interface before trying to invoke it. In this case we know - * that we're always accessing the xdf driver and it does have this - * interface defined, so we can skip the check. - */ - err = cdev_prop_op(xdf_devt, xdf_dip, - prop_op, flags, name, valuep, lengthp); - ddi_release_devi(xdf_dip); - return (err); -} - -/* - * Device driver ops vector - */ -static struct cb_ops pv_cmdk_cb_ops = { - pv_cmdkopen, /* open */ - pv_cmdkclose, /* close */ - pv_cmdkstrategy, /* strategy */ - nodev, /* print */ - pv_cmdkdump, /* dump */ - pv_cmdkread, /* read */ - pv_cmdkwrite, /* write */ - pv_cmdkioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - pv_cmdk_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ - CB_REV, /* cb_rev */ - pv_cmdkaread, /* async read */ - pv_cmdkawrite /* async write */ -}; - -struct dev_ops pv_cmdk_ops = { - DEVO_REV, /* devo_rev, */ - 0, /* refcnt */ - pv_cmdk_getinfo, /* info */ - nulldev, /* identify */ - nulldev, /* probe */ - pv_cmdkattach, /* attach */ - pv_cmdkdetach, /* detach */ - nodev, /* reset */ - &pv_cmdk_cb_ops, /* driver operations */ - (struct bus_ops *)0, /* bus operations */ - NULL, /* power */ - ddi_quiesce_not_supported, /* devo_quiesce */ -}; - -/* - * Module linkage information for the kernel. - */ -static struct modldrv modldrv = { - &mod_driverops, /* Type of module. This one is a driver */ - "PV Common Direct Access Disk", - &pv_cmdk_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, (void *)&modldrv, NULL -}; - -int -_init(void) -{ - int rval; - - if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1) - return (EINVAL); - - /* - * In general ide usually supports 4 disk devices, this same - * limitation also applies to software emulating ide devices. - * so by default we pre-allocate 4 cmdk soft state structures. - */ - if ((rval = ddi_soft_state_init(&pv_cmdk_state, - sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0) - return (rval); - - /* - * Currently we only support qemu as the backing hardware emulator - * for cmdk devices. - */ - pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu; - - /* Install our module */ - if ((rval = mod_install(&modlinkage)) != 0) { - ddi_soft_state_fini(&pv_cmdk_state); - return (rval); - } - - return (0); -} - int -_info(struct modinfo *modinfop) +xdfs_c_cmlb_alter_behavior(xdfs_state_t *xsp) { - return (mod_info(&modlinkage, modinfop)); + return (xsp->xdfss_tgt_is_cd ? + 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT); } -int -_fini(void) +/*ARGSUSED*/ +void +xdfs_c_attach(xdfs_state_t *xsp) { - int rval; - if ((rval = mod_remove(&modlinkage)) != 0) - return (rval); - ddi_soft_state_fini(&pv_cmdk_state); - return (0); } diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_sd.c b/usr/src/uts/i86pc/i86hvm/io/pv_sd.c new file mode 100644 index 0000000000..74edb42907 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/pv_sd.c @@ -0,0 +1,187 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <io/xdf_shell.h> + +#include <sys/scsi/targets/sddef.h> + +/* + * We're emulating (and possibly layering on top of) sd devices, so xdf + * disk unit mappings must match up with sd disk unit mappings'. + */ +#if !defined(XDF_PSHIFT) +#error "can't find definition for xdf unit mappings - XDF_PSHIFT" +#endif /* XDF_PSHIFT */ + +#if !defined(SDUNIT_SHIFT) +#error "can't find definition for cmdk unit mappings - SDUNIT_SHIFT" +#endif /* SDUNIT_SHIFT */ + +#if ((XDF_PSHIFT - SDUNIT_SHIFT) != 0) +#error "sd and xdf unit mappings don't match." +#endif /* ((XDF_PSHIFT - SDUNIT_SHIFT) != 0) */ + +extern const struct dev_ops sd_ops; +extern void *sd_state; + +/* + * Globals required by xdf_shell.c + */ +const char *xdfs_c_name = "sd"; +const char *xdfs_c_linkinfo = "PV SCSI Disk Driver"; +void **xdfs_c_hvm_ss = &sd_state; +const size_t xdfs_c_hvm_ss_size = sizeof (struct sd_lun); +const struct dev_ops *xdfs_c_hvm_dev_ops = &sd_ops; + +const xdfs_h2p_map_t xdfs_c_h2p_map[] = { + { "/pci@0,0/pci-ide@1,1/ide@0/sd@0,0", "/xpvd/xdf@768" }, + { "/pci@0,0/pci-ide@1,1/ide@0/sd@1,0", "/xpvd/xdf@832" }, + { "/pci@0,0/pci-ide@1,1/ide@1/sd@0,0", "/xpvd/xdf@5632" }, + { "/pci@0,0/pci-ide@1,1/ide@1/sd@1,0", "/xpvd/xdf@5696" }, + { NULL, 0 } +}; + +/*ARGSUSED*/ +int +xdfs_c_ioctl(xdfs_state_t *xsp, dev_t dev, int part, + int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp, boolean_t *done) +{ + dev_info_t *dip = xsp->xdfss_dip; + int instance = ddi_get_instance(dip); + int rv; + + *done = B_TRUE; + switch (cmd) { + case DKIOCINFO: { + struct dk_cinfo info; + + /* Pass on the ioctl request, save the response */ + if ((rv = ldi_ioctl(xsp->xdfss_tgt_lh[part], + cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0) + return (rv); + + /* Update controller info */ + info.dki_cnum = ddi_get_instance(ddi_get_parent(dip)); + (void) strlcpy(info.dki_cname, + ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname)); + + /* Update unit info. */ + if (info.dki_ctype == DKC_VBD) { + /* + * Normally a real scsi device would report the + * controller type as DKC_SCSI_CCS. But we don't + * emulate a real scsi controller. (Which becomes + * apparent if anyone tries to issue us a uscsi(7i) + * command.) So instead of reporting DKC_SCSI_CCS, + * we report DKC_UNKNOWN. + */ + info.dki_ctype = DKC_UNKNOWN; + } + info.dki_unit = instance; + (void) strlcpy(info.dki_dname, + ddi_driver_name(dip), sizeof (info.dki_dname)); + info.dki_addr = 1; + + if (ddi_copyout(&info, (void *)arg, sizeof (info), flag)) + return (EFAULT); + + return (0); + } + default: + *done = B_FALSE; + return (0); + } /* switch (cmd) */ + /*NOTREACHED*/ +} + +/*ARGSUSED*/ +void +xdfs_c_devid_setup(xdfs_state_t *xsp) +{ + /* + * Currently we only support cdrom devices, which don't have + * devids associated with them. + */ + ASSERT("cdrom devices don't have a devid"); +} + +/*ARGSUSED*/ +int +xdfs_c_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom) +{ + /* + * Currently we only support cdrom devices, which don't have + * a physical geometry, so this routine should never get + * invoked. + */ + ASSERT("cdrom devices don't have any physical geometry"); + return (-1); +} + +/*ARGSUSED*/ +boolean_t +xdfs_c_bb_check(xdfs_state_t *xsp) +{ + /* + * Currently we only support cdrom devices, which don't have + * bad blocks, so this routine should never get invoked. + */ + ASSERT("cdrom devices don't support bad block mappings"); + return (B_TRUE); +} + +char * +xdfs_c_cmlb_node_type(xdfs_state_t *xsp) +{ + return (xsp->xdfss_tgt_is_cd ? DDI_NT_CD_CHAN : DDI_NT_BLOCK_CHAN); +} + +/*ARGSUSED*/ +int +xdfs_c_cmlb_alter_behavior(xdfs_state_t *xsp) +{ + return (0); +} + +void +xdfs_c_attach(xdfs_state_t *xsp) +{ + dev_info_t *dip = xsp->xdfss_dip; + int dtype = DTYPE_DIRECT; + + if (xsp->xdfss_tgt_is_cd) { + dtype = DTYPE_RODIRECT; + (void) ddi_prop_create(DDI_DEV_T_NONE, dip, + DDI_PROP_CANSLEEP, "removable-media", NULL, 0); + } + + /* + * We use ndi_* instead of ddi_* because it will result in + * INQUIRY_DEVICE_TYPE being a hardware property instead + * or a driver property + */ + (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip, + INQUIRY_DEVICE_TYPE, dtype); +} diff --git a/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c new file mode 100644 index 0000000000..5162cb52ae --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c @@ -0,0 +1,1278 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#include <io/xdf_shell.h> +#include <sys/dkio.h> +#include <sys/scsi/scsi_types.h> + +/* + * General Notes + * + * We don't support disks with bad block mappins. We have this + * limitation because the underlying xdf driver doesn't support + * bad block remapping. If there is a need to support this feature + * it should be added directly to the xdf driver and we should just + * pass requests strait on through and let it handle the remapping. + * Also, it's probably worth pointing out that most modern disks do bad + * block remapping internally in the hardware so there's actually less + * of a chance of us ever discovering bad blocks. Also, in most cases + * this driver (and the xdf driver) will only be used with virtualized + * devices, so one might wonder why a virtual device would ever actually + * experience bad blocks. To wrap this up, you might be wondering how + * these bad block mappings get created and how they are managed. Well, + * there are two tools for managing bad block mappings, format(1M) and + * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk + * to attempt to find bad block and create mappings for them. Format(1M) + * and addbadsec(1M) can also be used to edit existing mappings that may + * be saved on the disk. + * + * The underlying PV driver that this driver passes on requests to is the + * xdf driver. Since in most cases the xdf driver doesn't deal with + * physical disks it has it's own algorithm for assigning a physical + * geometry to a virtual disk (ie, cylinder count, head count, etc.) + * The default values chosen by the xdf driver may not match those + * assigned to a disk by a hardware disk emulator in an HVM environment. + * This is a problem since these physical geometry attributes affect + * things like the partition table, backup label location, etc. So + * to emulate disk devices correctly we need to know the physical geometry + * that was assigned to a disk at the time of it's initalization. + * Normally in an HVM environment this information will passed to + * the BIOS and operating system from the hardware emulator that is + * emulating the disk devices. In the case of a solaris dom0+xvm + * this would be qemu. So to work around this issue, this driver will + * query the emulated hardware to get the assigned physical geometry + * and then pass this geometry onto the xdf driver so that it can use it. + * But really, this information is essentially metadata about the disk + * that should be kept with the disk image itself. (Assuming or course + * that a disk image is the actual backingstore for this emulated device.) + * This metadata should also be made available to PV drivers via a common + * mechanism, probably the xenstore. The fact that this metadata isn't + * available outside of HVM domains means that it's difficult to move + * disks between HVM and PV domains, since a fully PV domain will have no + * way of knowing what the correct geometry of the target device is. + * (Short of reading the disk, looking for things like partition tables + * and labels, and taking a best guess at what the geometry was when + * the disk was initialized. Unsuprisingly, qemu actually does this.) + * + * This driver has to map xdf shell device instances into their corresponding + * xdf device instances. We have to do this to ensure that when a user + * accesses a emulated xdf shell device we map those accesses to the proper + * paravirtualized device. Basically what we need to know is how multiple + * 'disk' entries in a domU configuration file get mapped to emulated + * xdf shell devices and to xdf devices. The 'disk' entry to xdf instance + * mappings we know because those are done within the Solaris xvdi code + * and the xpvd nexus driver. But the config to emulated devices mappings + * are handled entirely within the xen management tool chain and the + * hardware emulator. Since all the tools that establish these mappings + * live in dom0, dom0 should really supply us with this information, + * probably via the xenstore. Unfortunatly it doesn't so, since there's + * no good way to determine this mapping dynamically, this driver uses + * a hard coded set of static mappings. These mappings are hardware + * emulator specific because each different hardware emulator could have + * a different device tree with different xdf shell device paths. This + * means that if we want to continue to use this static mapping approach + * to allow Solaris to run on different hardware emulators we'll have + * to analyze each of those emulators to determine what paths they + * use and hard code those paths into this driver. yech. This metadata + * really needs to be supplied to us by dom0. + * + * This driver access underlying xdf nodes. Unfortunatly, devices + * must create minor nodes during attach, and for disk devices to create + * minor nodes, they have to look at the label on the disk, so this means + * that disk drivers must be able to access a disk contents during + * attach. That means that this disk driver must be able to access + * underlying xdf nodes during attach. Unfortunatly, due to device tree + * locking restrictions, we cannot have an attach operation occuring on + * this device and then attempt to access another device which may + * cause another attach to occur in a different device tree branch + * since this could result in deadlock. Hence, this driver can only + * access xdf device nodes that we know are attached, and it can't use + * any ddi interfaces to access those nodes if those interfaces could + * trigger an attach of the xdf device. So this driver works around + * these restrictions by talking directly to xdf devices via + * xdf_hvm_hold(). This interface takes a pathname to an xdf device, + * and if that device is already attached then it returns the a held dip + * pointer for that device node. This prevents us from getting into + * deadlock situations, but now we need a mechanism to ensure that all + * the xdf device nodes this driver might access are attached before + * this driver tries to access them. This is accomplished via the + * hvmboot_rootconf() callback which is invoked just before root is + * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure + * all xdf device visible to the system. All these xdf device nodes + * will also be marked with the "ddi-no-autodetach" property so that + * once they are configured, the will not be automatically unconfigured. + * The only way that they could be unconfigured is if the administrator + * explicitly attempts to unload required modules via rem_drv(1M) + * or modunload(1M). + */ + +/* + * 16 paritions + fdisk (see xdf.h) + */ +#define XDFS_DEV2UNIT(dev) XDF_INST((getminor((dev)))) +#define XDFS_DEV2PART(dev) XDF_PART((getminor((dev)))) + +#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \ + (otyp == OTYP_CHR) || \ + (otyp == OTYP_LYR)) + +#define XDFS_NODES 4 + +#define XDFS_HVM_MODE(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_mode) +#define XDFS_HVM_DIP(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_dip) +#define XDFS_HVM_PATH(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_path) +#define XDFS_HVM_STATE(sp) \ + ((xdfs_hvm_state_t *)(&((char *)(sp))[XDFS_HVM_STATE_OFFSET])) +#define XDFS_HVM_STATE_OFFSET (xdfs_ss_size - sizeof (xdfs_hvm_state_t)) +#define XDFS_HVM_SANE(sp) \ + ASSERT(XDFS_HVM_MODE(sp)); \ + ASSERT(XDFS_HVM_DIP(sp) != NULL); \ + ASSERT(XDFS_HVM_PATH(sp) != NULL); + + +typedef struct xdfs_hvm_state { + boolean_t xdfs_hs_mode; + dev_info_t *xdfs_hs_dip; + char *xdfs_hs_path; +} xdfs_hvm_state_t; + +/* local function and structure prototypes */ +static int xdfs_iodone(struct buf *); +static boolean_t xdfs_isopen_part(xdfs_state_t *, int); +static boolean_t xdfs_isopen(xdfs_state_t *); +static cmlb_tg_ops_t xdfs_lb_ops; + +/* + * Globals + */ +major_t xdfs_major; +#define xdfs_hvm_dev_ops (xdfs_c_hvm_dev_ops) +#define xdfs_hvm_cb_ops (xdfs_hvm_dev_ops->devo_cb_ops) + +/* + * Private globals + */ +volatile boolean_t xdfs_pv_disable = B_FALSE; +static void *xdfs_ssp; +static size_t xdfs_ss_size; + +/* + * Private helper functions + */ +static boolean_t +xdfs_tgt_hold(xdfs_state_t *xsp) +{ + mutex_enter(&xsp->xdfss_mutex); + ASSERT(xsp->xdfss_tgt_holds >= 0); + if (!xsp->xdfss_tgt_attached) { + mutex_exit(&xsp->xdfss_mutex); + return (B_FALSE); + } + xsp->xdfss_tgt_holds++; + mutex_exit(&xsp->xdfss_mutex); + return (B_TRUE); +} + +static void +xdfs_tgt_release(xdfs_state_t *xsp) +{ + mutex_enter(&xsp->xdfss_mutex); + ASSERT(xsp->xdfss_tgt_attached); + ASSERT(xsp->xdfss_tgt_holds > 0); + if (--xsp->xdfss_tgt_holds == 0) + cv_broadcast(&xsp->xdfss_cv); + mutex_exit(&xsp->xdfss_mutex); +} + +/*ARGSUSED*/ +static int +xdfs_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv; + + if (xsp == NULL) + return (ENXIO); + + if (!xdfs_tgt_hold(xsp)) + return (ENXIO); + + if (cmd == TG_GETVIRTGEOM) { + cmlb_geom_t pgeom, *vgeomp; + diskaddr_t capacity; + + /* + * The native xdf driver doesn't support this ioctl. + * Intead of passing it on, emulate it here so that the + * results look the same as what we get for a real xdf + * shell device. + * + * Get the real size of the device + */ + if ((rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip, + TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0) + goto out; + capacity = pgeom.g_capacity; + + /* + * If the controller returned us something that doesn't + * really fit into an Int 13/function 8 geometry + * result, just fail the ioctl. See PSARC 1998/313. + */ + if (capacity >= (63 * 254 * 1024)) { + rv = EINVAL; + goto out; + } + + vgeomp = (cmlb_geom_t *)arg; + vgeomp->g_capacity = capacity; + vgeomp->g_nsect = 63; + vgeomp->g_nhead = 254; + vgeomp->g_ncyl = capacity / (63 * 254); + vgeomp->g_acyl = 0; + vgeomp->g_secsize = 512; + vgeomp->g_intrlv = 1; + vgeomp->g_rpm = 3600; + rv = 0; + goto out; + } + + rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip, cmd, arg, tg_cookie); + +out: + xdfs_tgt_release(xsp); + return (rv); +} + +static boolean_t +xdfs_isopen_part(xdfs_state_t *xsp, int part) +{ + int otyp; + + ASSERT(MUTEX_HELD(&xsp->xdfss_mutex)); + for (otyp = 0; (otyp < OTYPCNT); otyp++) { + if (xsp->xdfss_otyp_count[otyp][part] != 0) { + ASSERT(xsp->xdfss_tgt_attached); + ASSERT(xsp->xdfss_tgt_holds >= 0); + return (B_TRUE); + } + } + return (B_FALSE); +} + +static boolean_t +xdfs_isopen(xdfs_state_t *xsp) +{ + int part; + + ASSERT(MUTEX_HELD(&xsp->xdfss_mutex)); + for (part = 0; part < XDF_PEXT; part++) { + if (xdfs_isopen_part(xsp, part)) + return (B_TRUE); + } + return (B_FALSE); +} + +static int +xdfs_iodone(struct buf *bp) +{ + struct buf *bp_orig = bp->b_chain; + + /* Propegate back the io results */ + bp_orig->b_resid = bp->b_resid; + bioerror(bp_orig, geterror(bp)); + biodone(bp_orig); + + freerbuf(bp); + return (0); +} + +static int +xdfs_cmlb_attach(xdfs_state_t *xsp) +{ + return (cmlb_attach(xsp->xdfss_dip, &xdfs_lb_ops, + xsp->xdfss_tgt_is_cd ? DTYPE_RODIRECT : DTYPE_DIRECT, + xdf_is_rm(xsp->xdfss_tgt_dip), + B_TRUE, + xdfs_c_cmlb_node_type(xsp), + xdfs_c_cmlb_alter_behavior(xsp), + xsp->xdfss_cmlbhandle, 0)); +} + +static boolean_t +xdfs_tgt_probe(xdfs_state_t *xsp, dev_info_t *tgt_dip) +{ + cmlb_geom_t pgeom; + int tgt_instance = ddi_get_instance(tgt_dip); + + ASSERT(MUTEX_HELD(&xsp->xdfss_mutex)); + ASSERT(!xdfs_isopen(xsp)); + ASSERT(!xsp->xdfss_tgt_attached); + + xsp->xdfss_tgt_dip = tgt_dip; + xsp->xdfss_tgt_holds = 0; + xsp->xdfss_tgt_dev = makedevice(ddi_driver_major(tgt_dip), + XDF_MINOR(tgt_instance, 0)); + ASSERT((xsp->xdfss_tgt_dev & XDF_PMASK) == 0); + xsp->xdfss_tgt_is_cd = xdf_is_cd(tgt_dip); + + /* + * GROSS HACK ALERT! GROSS HACK ALERT! + * + * Before we can initialize the cmlb layer, we have to tell the + * underlying xdf device what it's physical geometry should be. + * See the block comments at the top of this file for more info. + */ + if (!xsp->xdfss_tgt_is_cd && + ((xdfs_c_getpgeom(xsp->xdfss_dip, &pgeom) != 0) || + (xdf_hvm_setpgeom(xsp->xdfss_tgt_dip, &pgeom) != 0))) + return (B_FALSE); + + /* + * Force the xdf front end driver to connect to the backend. From + * the solaris device tree perspective, the xdf driver devinfo node + * is already in the ATTACHED state. (Otherwise xdf_hvm_hold() + * would not have returned a dip.) But this doesn't mean that the + * xdf device has actually established a connection to it's back + * end driver. For us to be able to access the xdf device it needs + * to be connected. + */ + if (!xdf_hvm_connect(xsp->xdfss_tgt_dip)) { + cmn_err(CE_WARN, "pv driver failed to connect: %s", + xsp->xdfss_pv); + return (B_FALSE); + } + + if (xsp->xdfss_tgt_is_cd && !xdf_media_req_supported(tgt_dip)) { + /* + * Unfortunatly, the dom0 backend driver doesn't support + * important media request operations like eject, so fail + * the probe (this should cause us to fall back to emulated + * hvm device access, which does support things like eject). + */ + return (B_FALSE); + } + + /* create kstat for iostat(1M) */ + if (xdf_kstat_create(xsp->xdfss_tgt_dip, (char *)xdfs_c_name, + tgt_instance) != 0) + return (B_FALSE); + + /* + * Now we need to mark ourselves as attached and drop xdfss_mutex. + * We do this because the final steps in the attach process will + * need to access the underlying disk to read the label and + * possibly the devid. + */ + xsp->xdfss_tgt_attached = B_TRUE; + mutex_exit(&xsp->xdfss_mutex); + + if (!xsp->xdfss_tgt_is_cd && xdfs_c_bb_check(xsp)) { + cmn_err(CE_WARN, "pv disks with bad blocks are unsupported: %s", + xsp->xdfss_hvm); + mutex_enter(&xsp->xdfss_mutex); + xdf_kstat_delete(xsp->xdfss_tgt_dip); + xsp->xdfss_tgt_attached = B_FALSE; + return (B_FALSE); + } + + /* + * Initalize cmlb. Note that for partition information cmlb + * will access the underly xdf disk device directly via + * xdfs_lb_rdwr() and xdfs_lb_getinfo(). There are no + * layered driver handles associated with this access because + * it is a direct disk access that doesn't go through + * any of the device nodes exported by the xdf device (since + * all exported device nodes only reflect the portion of + * the device visible via the partition/slice that the node + * is associated with.) So while not observable via the LDI, + * this direct disk access is ok since we're actually holding + * the target device. + */ + if (xdfs_cmlb_attach(xsp) != 0) { + mutex_enter(&xsp->xdfss_mutex); + xdf_kstat_delete(xsp->xdfss_tgt_dip); + xsp->xdfss_tgt_attached = B_FALSE; + return (B_FALSE); + } + + /* setup devid string */ + xsp->xdfss_tgt_devid = NULL; + if (!xsp->xdfss_tgt_is_cd) + xdfs_c_devid_setup(xsp); + + (void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0); + + /* Have the system report any newly created device nodes */ + ddi_report_dev(xsp->xdfss_dip); + + mutex_enter(&xsp->xdfss_mutex); + return (B_TRUE); +} + +static boolean_t +xdfs_tgt_detach(xdfs_state_t *xsp) +{ + ASSERT(MUTEX_HELD(&xsp->xdfss_mutex)); + ASSERT(xsp->xdfss_tgt_attached); + ASSERT(xsp->xdfss_tgt_holds >= 0); + + if ((xdfs_isopen(xsp)) || (xsp->xdfss_tgt_holds != 0)) + return (B_FALSE); + + ddi_devid_unregister(xsp->xdfss_dip); + if (xsp->xdfss_tgt_devid != NULL) + ddi_devid_free(xsp->xdfss_tgt_devid); + + xdf_kstat_delete(xsp->xdfss_tgt_dip); + xsp->xdfss_tgt_attached = B_FALSE; + return (B_TRUE); +} + +/* + * Xdf_shell interfaces that may be called from outside this file. + */ +void +xdfs_minphys(struct buf *bp) +{ + xdfmin(bp); +} + +/* + * Cmlb ops vector, allows the cmlb module to directly access the entire + * xdf disk device without going through any partitioning layers. + */ +int +xdfs_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, + diskaddr_t start, size_t count, void *tg_cookie) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv; + + if (xsp == NULL) + return (ENXIO); + + if (!xdfs_tgt_hold(xsp)) + return (ENXIO); + + rv = xdf_lb_rdwr(xsp->xdfss_tgt_dip, + cmd, bufaddr, start, count, tg_cookie); + + xdfs_tgt_release(xsp); + return (rv); +} + +/* + * Driver PV and HVM cb_ops entry points + */ +/*ARGSUSED*/ +static int +xdfs_open(dev_t *dev_p, int flag, int otyp, cred_t *credp) +{ + ldi_ident_t li; + dev_t dev = *dev_p; + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + dev_t tgt_devt = xsp->xdfss_tgt_dev | part; + int err = 0; + + if ((otyp < 0) || (otyp >= OTYPCNT)) + return (EINVAL); + + if (XDFS_HVM_MODE(xsp)) { + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_open(dev_p, flag, otyp, credp)); + } + + /* allocate an ldi handle */ + VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0); + + mutex_enter(&xsp->xdfss_mutex); + + /* + * We translate all device opens (chr, blk, and lyr) into + * block device opens. Why? Because for all the opens that + * come through this driver, we only keep around one LDI handle. + * So that handle can only be of one open type. The reason + * that we choose the block interface for this is that to use + * the block interfaces for a device the system needs to allocate + * buf_ts, which are associated with system memory which can act + * as a cache for device data. So normally when a block device + * is closed the system will ensure that all these pages get + * flushed out of memory. But if we were to open the device + * as a character device, then when we went to close the underlying + * device (even if we had invoked the block interfaces) any data + * remaining in memory wouldn't necessairly be flushed out + * before the device was closed. + */ + if (xsp->xdfss_tgt_lh[part] == NULL) { + ASSERT(!xdfs_isopen_part(xsp, part)); + + err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp, + &xsp->xdfss_tgt_lh[part], li); + + if (err != 0) { + mutex_exit(&xsp->xdfss_mutex); + ldi_ident_release(li); + return (err); + } + + /* Disk devices really shouldn't clone */ + ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part)); + } else { + ldi_handle_t lh_tmp; + + ASSERT(xdfs_isopen_part(xsp, part)); + + /* do ldi open/close to get flags and cred check */ + err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp, + &lh_tmp, li); + if (err != 0) { + mutex_exit(&xsp->xdfss_mutex); + ldi_ident_release(li); + return (err); + } + + /* Disk devices really shouldn't clone */ + ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part)); + (void) ldi_close(lh_tmp, flag, credp); + } + ldi_ident_release(li); + + xsp->xdfss_otyp_count[otyp][part]++; + + mutex_exit(&xsp->xdfss_mutex); + return (0); +} + +/*ARGSUSED*/ +static int +xdfs_close(dev_t dev, int flag, int otyp, cred_t *credp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int err = 0; + + ASSERT((otyp >= 0) && otyp < OTYPCNT); + + /* Sanity check the dev_t associated with this request. */ + ASSERT(getmajor(dev) == xdfs_major); + if (getmajor(dev) != xdfs_major) + return (ENXIO); + + if (XDFS_HVM_MODE(xsp)) { + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_close(dev, flag, otyp, credp)); + } + + /* + * Sanity check that that the device is actually open. On debug + * kernels we'll panic and on non-debug kernels we'll return failure. + */ + mutex_enter(&xsp->xdfss_mutex); + ASSERT(xdfs_isopen_part(xsp, part)); + if (!xdfs_isopen_part(xsp, part)) { + mutex_exit(&xsp->xdfss_mutex); + return (ENXIO); + } + + ASSERT(xsp->xdfss_tgt_lh[part] != NULL); + ASSERT(xsp->xdfss_otyp_count[otyp][part] > 0); + if (otyp == OTYP_LYR) { + xsp->xdfss_otyp_count[otyp][part]--; + } else { + xsp->xdfss_otyp_count[otyp][part] = 0; + } + + if (!xdfs_isopen_part(xsp, part)) { + err = ldi_close(xsp->xdfss_tgt_lh[part], flag, credp); + xsp->xdfss_tgt_lh[part] = NULL; + } + + mutex_exit(&xsp->xdfss_mutex); + + return (err); +} + +int +xdfs_strategy(struct buf *bp) +{ + dev_t dev = bp->b_edev; + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + dev_t tgt_devt; + struct buf *bp_clone; + + /* Sanity check the dev_t associated with this request. */ + ASSERT(getmajor(dev) == xdfs_major); + if (getmajor(dev) != xdfs_major) + goto err; + + if (XDFS_HVM_MODE(xsp)) { + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_strategy(bp)); + } + + /* + * Sanity checks that the dev_t associated with the buf we were + * passed corresponds to an open partition. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + mutex_enter(&xsp->xdfss_mutex); + ASSERT(xdfs_isopen_part(xsp, part)); + if (!xdfs_isopen_part(xsp, part)) { + mutex_exit(&xsp->xdfss_mutex); + goto err; + } + mutex_exit(&xsp->xdfss_mutex); + + /* clone this buffer */ + tgt_devt = xsp->xdfss_tgt_dev | part; + bp_clone = bioclone(bp, 0, bp->b_bcount, tgt_devt, bp->b_blkno, + xdfs_iodone, NULL, KM_SLEEP); + bp_clone->b_chain = bp; + + /* + * If we're being invoked on behalf of the physio() call in + * xdfs_dioctl_rwcmd() then b_private will be set to + * XB_SLICE_NONE and we need to propegate this flag into the + * cloned buffer so that the xdf driver will see it. + */ + if (bp->b_private == (void *)XB_SLICE_NONE) + bp_clone->b_private = (void *)XB_SLICE_NONE; + + /* + * Pass on the cloned buffer. Note that we don't bother to check + * for failure because the xdf strategy routine will have to + * invoke biodone() if it wants to return an error, which means + * that the xdfs_iodone() callback will get invoked and it + * will propegate the error back up the stack and free the cloned + * buffer. + */ + ASSERT(xsp->xdfss_tgt_lh[part] != NULL); + return (ldi_strategy(xsp->xdfss_tgt_lh[part], bp_clone)); + +err: + bioerror(bp, ENXIO); + bp->b_resid = bp->b_bcount; + biodone(bp); + return (0); +} + +static int +xdfs_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (!XDFS_HVM_MODE(xsp)) + return (ldi_dump(xsp->xdfss_tgt_lh[part], addr, blkno, nblk)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_dump(dev, addr, blkno, nblk)); +} + +/*ARGSUSED*/ +static int +xdfs_read(dev_t dev, struct uio *uio, cred_t *credp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (!XDFS_HVM_MODE(xsp)) + return (ldi_read(xsp->xdfss_tgt_lh[part], uio, credp)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_read(dev, uio, credp)); +} + +/*ARGSUSED*/ +static int +xdfs_write(dev_t dev, struct uio *uio, cred_t *credp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (!XDFS_HVM_MODE(xsp)) + return (ldi_write(xsp->xdfss_tgt_lh[part], uio, credp)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_write(dev, uio, credp)); +} + +/*ARGSUSED*/ +static int +xdfs_aread(dev_t dev, struct aio_req *aio, cred_t *credp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (!XDFS_HVM_MODE(xsp)) + return (ldi_aread(xsp->xdfss_tgt_lh[part], aio, credp)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) || + (xdfs_hvm_cb_ops->cb_strategy == NULL) || + (xdfs_hvm_cb_ops->cb_strategy == nodev) || + (xdfs_hvm_cb_ops->cb_aread == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_aread(dev, aio, credp)); +} + +/*ARGSUSED*/ +static int +xdfs_awrite(dev_t dev, struct aio_req *aio, cred_t *credp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (!XDFS_HVM_MODE(xsp)) + return (ldi_awrite(xsp->xdfss_tgt_lh[part], aio, credp)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) || + (xdfs_hvm_cb_ops->cb_strategy == NULL) || + (xdfs_hvm_cb_ops->cb_strategy == nodev) || + (xdfs_hvm_cb_ops->cb_awrite == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_awrite(dev, aio, credp)); +} + +static int +xdfs_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, + int *rvalp) +{ + int instance = XDFS_DEV2UNIT(dev); + int part = XDFS_DEV2PART(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv; + boolean_t done; + + if (XDFS_HVM_MODE(xsp)) { + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL)) + return (ENOTSUP); + return (xdfs_hvm_cb_ops->cb_ioctl( + dev, cmd, arg, flag, credp, rvalp)); + } + + rv = xdfs_c_ioctl(xsp, dev, part, cmd, arg, flag, credp, rvalp, &done); + if (done) + return (rv); + return (ldi_ioctl(xsp->xdfss_tgt_lh[part], + cmd, arg, flag, credp, rvalp)); +} + +static int +xdfs_hvm_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, + int flags, char *name, caddr_t valuep, int *lengthp) +{ + int instance = ddi_get_instance(dip); + void *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + ASSERT(XDFS_HVM_MODE(xsp)); + + if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) || + (xdfs_hvm_cb_ops->cb_prop_op == NULL) || + (xdfs_hvm_cb_ops->cb_prop_op == nodev) || + (xdfs_hvm_cb_ops->cb_prop_op == nulldev)) + return (DDI_PROP_NOT_FOUND); + + return (xdfs_hvm_cb_ops->cb_prop_op(dev, dip, prop_op, + flags, name, valuep, lengthp)); +} + +static int +xdfs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, + int flags, char *name, caddr_t valuep, int *lengthp) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv; + dev_info_t *tgt_dip; + dev_t tgt_devt; + + /* + * Sanity check that if a dev_t or dip were specified that they + * correspond to this device driver. On debug kernels we'll + * panic and on non-debug kernels we'll return failure. + */ + ASSERT(ddi_driver_major(dip) == xdfs_major); + ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdfs_major)); + if ((ddi_driver_major(dip) != xdfs_major) || + ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdfs_major))) + return (DDI_PROP_NOT_FOUND); + + /* + * This property lookup might be associated with a device node + * that is not yet attached, if so pass it onto ddi_prop_op(). + */ + if (xsp == NULL) + return (ddi_prop_op(dev, dip, prop_op, flags, + name, valuep, lengthp)); + + /* If we're accessing the device in hvm mode, pass this request on */ + if (XDFS_HVM_MODE(xsp)) + return (xdfs_hvm_prop_op(dev, dip, prop_op, + flags, name, valuep, lengthp)); + + /* + * Make sure we only lookup static properties. + * + * If there are static properties of the underlying xdf driver + * that we want to mirror, then we'll have to explicity look them + * up and define them during attach. There are a few reasons + * for this. Most importantly, most static properties are typed + * and all dynamic properties are untyped, ie, for dynamic + * properties the caller must know the type of the property and + * how to interpret the value of the property. the prop_op drivedr + * entry point is only designed for returning dynamic/untyped + * properties, so if we were to attempt to lookup and pass back + * static properties of the underlying device here then we would + * be losing the type information for those properties. Another + * reason we don't want to pass on static property requests is that + * static properties are enumerable in the device tree, where as + * dynamic ones are not. + */ + flags |= DDI_PROP_DYNAMIC; + + /* + * We can't use the ldi here to access the underlying device because + * the ldi actually opens the device, and that open might fail if the + * device has already been opened with the FEXCL flag. If we used + * the ldi here, it would also be possible for some other caller to + * try open the device with the FEXCL flag and get a failure back + * because we have it open to do a property query. Instad we'll + * grab a hold on the target dip. + */ + if (!xdfs_tgt_hold(xsp)) + return (DDI_PROP_NOT_FOUND); + + /* figure out dip the dev_t we're going to pass on down */ + tgt_dip = xsp->xdfss_tgt_dip; + if (dev == DDI_DEV_T_ANY) { + tgt_devt = DDI_DEV_T_ANY; + } else { + tgt_devt = xsp->xdfss_tgt_dev | XDFS_DEV2PART(dev); + } + + /* + * Cdev_prop_op() is not a public interface, and normally the caller + * is required to make sure that the target driver actually implements + * this interface before trying to invoke it. In this case we know + * that we're always accessing the xdf driver and it does have this + * interface defined, so we can skip the check. + */ + rv = cdev_prop_op(tgt_devt, tgt_dip, + prop_op, flags, name, valuep, lengthp); + + xdfs_tgt_release(xsp); + return (rv); +} + +/* + * Driver PV and HVM dev_ops entry points + */ +/*ARGSUSED*/ +static int +xdfs_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, + void **result) +{ + dev_t dev = (dev_t)arg; + int instance = XDFS_DEV2UNIT(dev); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + switch (infocmd) { + case DDI_INFO_DEVT2DEVINFO: + if (xsp == NULL) + return (DDI_FAILURE); + if (XDFS_HVM_MODE(xsp)) + *result = XDFS_HVM_DIP(xsp); + else + *result = (void *)xsp->xdfss_dip; + break; + case DDI_INFO_DEVT2INSTANCE: + *result = (void *)(intptr_t)instance; + break; + default: + return (DDI_FAILURE); + } + return (DDI_SUCCESS); +} + +static int +xdfs_hvm_probe(dev_info_t *dip, char *path) +{ + int instance = ddi_get_instance(dip); + int rv = DDI_PROBE_SUCCESS; + void *xsp; + + ASSERT(path != NULL); + cmn_err(CE_WARN, "PV access to device disabled: %s", path); + + (void) ddi_soft_state_zalloc(xdfs_ssp, instance); + VERIFY((xsp = ddi_get_soft_state(xdfs_ssp, instance)) != NULL); + + if ((xdfs_hvm_dev_ops == NULL) || + (xdfs_hvm_dev_ops->devo_probe == NULL) || + ((rv = xdfs_hvm_dev_ops->devo_probe(dip)) == DDI_PROBE_FAILURE)) { + ddi_soft_state_free(xdfs_ssp, instance); + cmn_err(CE_WARN, "HVM probe of device failed: %s", path); + kmem_free(path, MAXPATHLEN); + return (DDI_PROBE_FAILURE); + } + + XDFS_HVM_MODE(xsp) = B_TRUE; + XDFS_HVM_DIP(xsp) = dip; + XDFS_HVM_PATH(xsp) = path; + + return (rv); +} + +static int +xdfs_probe(dev_info_t *dip) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp; + dev_info_t *tgt_dip; + char *path; + int i, pv_disable; + + /* if we've already probed the device then there's nothing todo */ + if (ddi_get_soft_state(xdfs_ssp, instance)) + return (DDI_PROBE_PARTIAL); + + /* Figure out our pathname */ + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + + /* see if we should disable pv access mode */ + pv_disable = ddi_prop_get_int(DDI_DEV_T_ANY, + dip, DDI_PROP_NOTPROM, "pv_disable", 0); + + if (xdfs_pv_disable || pv_disable) + return (xdfs_hvm_probe(dip, path)); + + /* + * This xdf shell device layers on top of an xdf device. So the first + * thing we need to do is determine which xdf device instance this + * xdf shell instance should be layered on top of. + */ + for (i = 0; xdfs_c_h2p_map[i].xdfs_h2p_hvm != NULL; i++) { + if (strcmp(xdfs_c_h2p_map[i].xdfs_h2p_hvm, path) == 0) + break; + } + + if ((xdfs_c_h2p_map[i].xdfs_h2p_hvm == NULL) || + ((tgt_dip = xdf_hvm_hold(xdfs_c_h2p_map[i].xdfs_h2p_pv)) == NULL)) { + /* + * UhOh. We either don't know what xdf instance this xdf + * shell device should be mapped to or the xdf node assocaited + * with this instance isnt' attached. in either case fall + * back to hvm access. + */ + return (xdfs_hvm_probe(dip, path)); + } + + /* allocate and initialize our state structure */ + (void) ddi_soft_state_zalloc(xdfs_ssp, instance); + xsp = ddi_get_soft_state(xdfs_ssp, instance); + mutex_init(&xsp->xdfss_mutex, NULL, MUTEX_DRIVER, NULL); + cv_init(&xsp->xdfss_cv, NULL, CV_DEFAULT, NULL); + mutex_enter(&xsp->xdfss_mutex); + + xsp->xdfss_dip = dip; + xsp->xdfss_pv = xdfs_c_h2p_map[i].xdfs_h2p_pv; + xsp->xdfss_hvm = xdfs_c_h2p_map[i].xdfs_h2p_hvm; + xsp->xdfss_tgt_attached = B_FALSE; + cmlb_alloc_handle((cmlb_handle_t *)&xsp->xdfss_cmlbhandle); + + if (!xdfs_tgt_probe(xsp, tgt_dip)) { + mutex_exit(&xsp->xdfss_mutex); + cmlb_free_handle(&xsp->xdfss_cmlbhandle); + ddi_soft_state_free(xdfs_ssp, instance); + ddi_release_devi(tgt_dip); + return (xdfs_hvm_probe(dip, path)); + } + mutex_exit(&xsp->xdfss_mutex); + + /* + * Add a zero-length attribute to tell the world we support + * kernel ioctls (for layered drivers). + */ + (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP, + DDI_KERNEL_IOCTL, NULL, 0); + + return (DDI_PROBE_SUCCESS); +} + +static int +xdfs_hvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + void *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv = DDI_FAILURE; + + XDFS_HVM_SANE(xsp); + + if ((xdfs_hvm_dev_ops == NULL) || + (xdfs_hvm_dev_ops->devo_attach == NULL) || + ((rv = xdfs_hvm_dev_ops->devo_attach(dip, cmd)) != DDI_SUCCESS)) { + cmn_err(CE_WARN, "HVM attach of device failed: %s", + XDFS_HVM_PATH(xsp)); + kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN); + ddi_soft_state_free(xdfs_ssp, instance); + return (rv); + } + + return (DDI_SUCCESS); +} + +/* + * Autoconfiguration Routines + */ +static int +xdfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (xsp == NULL) + return (DDI_FAILURE); + if (XDFS_HVM_MODE(xsp)) + return (xdfs_hvm_attach(dip, cmd)); + if (cmd != DDI_ATTACH) + return (DDI_FAILURE); + + xdfs_c_attach(xsp); + return (DDI_SUCCESS); +} + +static int +xdfs_hvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + void *xsp = ddi_get_soft_state(xdfs_ssp, instance); + int rv; + + XDFS_HVM_SANE(xsp); + + if ((xdfs_hvm_dev_ops == NULL) || + (xdfs_hvm_dev_ops->devo_detach == NULL)) + return (DDI_FAILURE); + + if ((rv = xdfs_hvm_dev_ops->devo_detach(dip, cmd)) != DDI_SUCCESS) + return (rv); + + kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN); + ddi_soft_state_free(xdfs_ssp, instance); + return (DDI_SUCCESS); +} + +static int +xdfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (XDFS_HVM_MODE(xsp)) + return (xdfs_hvm_detach(dip, cmd)); + if (cmd != DDI_DETACH) + return (DDI_FAILURE); + + mutex_enter(&xsp->xdfss_mutex); + if (!xdfs_tgt_detach(xsp)) { + mutex_exit(&xsp->xdfss_mutex); + return (DDI_FAILURE); + } + mutex_exit(&xsp->xdfss_mutex); + + cmlb_detach(xsp->xdfss_cmlbhandle, 0); + cmlb_free_handle(&xsp->xdfss_cmlbhandle); + ddi_release_devi(xsp->xdfss_tgt_dip); + ddi_soft_state_free(xdfs_ssp, instance); + ddi_prop_remove_all(dip); + return (DDI_SUCCESS); +} + +static int +xdfs_hvm_power(dev_info_t *dip, int component, int level) +{ + int instance = ddi_get_instance(dip); + void *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + XDFS_HVM_SANE(xsp); + + if ((xdfs_hvm_dev_ops == NULL) || + (xdfs_hvm_dev_ops->devo_power == NULL)) + return (DDI_FAILURE); + return (xdfs_hvm_dev_ops->devo_power(dip, component, level)); +} + +static int +xdfs_power(dev_info_t *dip, int component, int level) +{ + int instance = ddi_get_instance(dip); + xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance); + + if (XDFS_HVM_MODE(xsp)) + return (xdfs_hvm_power(dip, component, level)); + return (nodev()); +} + +/* + * Cmlb ops vector + */ +static cmlb_tg_ops_t xdfs_lb_ops = { + TG_DK_OPS_VERSION_1, + xdfs_lb_rdwr, + xdfs_lb_getinfo +}; + +/* + * Device driver ops vector + */ +static struct cb_ops xdfs_cb_ops = { + xdfs_open, /* open */ + xdfs_close, /* close */ + xdfs_strategy, /* strategy */ + nodev, /* print */ + xdfs_dump, /* dump */ + xdfs_read, /* read */ + xdfs_write, /* write */ + xdfs_ioctl, /* ioctl */ + nodev, /* devmap */ + nodev, /* mmap */ + nodev, /* segmap */ + nochpoll, /* poll */ + xdfs_prop_op, /* cb_prop_op */ + 0, /* streamtab */ + D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */ + CB_REV, /* cb_rev */ + xdfs_aread, /* async read */ + xdfs_awrite /* async write */ +}; + +struct dev_ops xdfs_ops = { + DEVO_REV, /* devo_rev, */ + 0, /* refcnt */ + xdfs_getinfo, /* info */ + nulldev, /* identify */ + xdfs_probe, /* probe */ + xdfs_attach, /* attach */ + xdfs_detach, /* detach */ + nodev, /* reset */ + &xdfs_cb_ops, /* driver operations */ + NULL, /* bus operations */ + xdfs_power, /* power */ + ddi_quiesce_not_supported, /* devo_quiesce */ +}; + +/* + * Module linkage information for the kernel. + */ +static struct modldrv modldrv = { + &mod_driverops, /* Type of module. This one is a driver. */ + NULL, /* Module description. Set by _init() */ + &xdfs_ops, /* Driver ops. */ +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modldrv, NULL +}; + +int +_init(void) +{ + int rval; + + xdfs_major = ddi_name_to_major((char *)xdfs_c_name); + if (xdfs_major == (major_t)-1) + return (EINVAL); + + /* + * Determine the size of our soft state structure. The base + * size of the structure is the larger of the hvm clients state + * structure, or our shell state structure. Then we'll align + * the end of the structure to a pointer boundry and append + * a xdfs_hvm_state_t structure. This way the xdfs_hvm_state_t + * structure is always present and we can use it to determine the + * current device access mode (hvm or shell). + */ + xdfs_ss_size = MAX(xdfs_c_hvm_ss_size, sizeof (xdfs_state_t)); + xdfs_ss_size = P2ROUNDUP(xdfs_ss_size, sizeof (uintptr_t)); + xdfs_ss_size += sizeof (xdfs_hvm_state_t); + + /* + * In general ide usually supports 4 disk devices, this same + * limitation also applies to software emulating ide devices. + * so by default we pre-allocate 4 xdf shell soft state structures. + */ + if ((rval = ddi_soft_state_init(&xdfs_ssp, + xdfs_ss_size, XDFS_NODES)) != 0) + return (rval); + *xdfs_c_hvm_ss = xdfs_ssp; + + /* Install our module */ + if (modldrv.drv_linkinfo == NULL) + modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo; + if ((rval = mod_install(&modlinkage)) != 0) { + ddi_soft_state_fini(&xdfs_ssp); + return (rval); + } + + return (0); +} + +int +_info(struct modinfo *modinfop) +{ + if (modldrv.drv_linkinfo == NULL) + modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo; + return (mod_info(&modlinkage, modinfop)); +} + +int +_fini(void) +{ + int rval; + if ((rval = mod_remove(&modlinkage)) != 0) + return (rval); + ddi_soft_state_fini(&xdfs_ssp); + return (0); +} diff --git a/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h new file mode 100644 index 0000000000..a7dd983e7b --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h @@ -0,0 +1,161 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _XDF_SHELL_H +#define _XDF_SHELL_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* These interfaces are all dependant upon xdf */ +#include <io/xdf.h> + +/* Include files required for this header file. */ +#include <sys/vtoc.h> + +/* + * These include files are not strictly required to include this header + * file, but pretty much every xdf_shell client will need to include these + * header files, so just include them here. + */ +#include <sys/cdio.h> +#include <sys/dklabel.h> +#include <sys/dktp/altsctr.h> +#include <sys/dktp/bbh.h> +#include <sys/dktp/cmdk.h> +#include <sys/dktp/dadev.h> +#include <sys/dktp/dadkio.h> +#include <sys/fdio.h> + +/* + * XDF Shell driver state structures + */ +typedef struct xdfs_state { + dev_info_t *xdfss_dip; + const char *xdfss_pv; + const char *xdfss_hvm; + + /* Members below are protected by xdfss_mutex */ + kmutex_t xdfss_mutex; + kcondvar_t xdfss_cv; + cmlb_handle_t xdfss_cmlbhandle; + int xdfss_otyp_count[OTYPCNT][XDF_PEXT]; + + /* Members below are only valid when xdfss_tgt_attached is true */ + dev_info_t *xdfss_tgt_dip; + boolean_t xdfss_tgt_attached; + int xdfss_tgt_holds; + dev_t xdfss_tgt_dev; + ddi_devid_t xdfss_tgt_devid; + boolean_t xdfss_tgt_locked; + boolean_t xdfss_tgt_is_cd; + ldi_handle_t xdfss_tgt_lh[XDF_PEXT]; +} xdfs_state_t; + +typedef struct xdfs_h2p_map { + const char *xdfs_h2p_hvm; + const char *xdfs_h2p_pv; +} xdfs_h2p_map_t; + +/* + * Globals defined by xdf_shell.c + */ +extern major_t xdfs_major; + +/* + * Functions defined by xdf_shell.c + */ +extern int xdfs_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, + void *); +extern int xdfs_strategy(struct buf *); +extern void xdfs_minphys(struct buf *); + +/* + * Globals that must be defined by xdf_shell.c clients + */ +extern const char *xdfs_c_name; +extern const char *xdfs_c_linkinfo; +extern void **xdfs_c_hvm_ss; +extern const size_t xdfs_c_hvm_ss_size; +extern const struct dev_ops *xdfs_c_hvm_dev_ops; +extern const xdfs_h2p_map_t xdfs_c_h2p_map[]; + +/* + * Functions that must be implemented by xdf_shell.c clients + */ + +/* + * xdfs_c_devid_setup() is invoked during device probe. If possible, it + * should create a devid for the associated disk device. This routine will + * not be invoked for cdrom devices. + */ +extern void xdfs_c_devid_setup(xdfs_state_t *); + +/* + * xdfs_c_bb_check() is invoked during device probe. It should check for + * the existance of bad blocks mappings in an alternate partition/slice and + * return B_FALSE if there are no bad block mappings found and return B_TRUE + * is there are bad block mappings found. The presence of bad block + * mappings will cause the device attach to fail. This routine will not be + * invoked for cdrom devices. + */ +extern boolean_t xdfs_c_bb_check(xdfs_state_t *); + +/* + * xdfs_c_getpgeom() is invoked during device probe. It should return the + * physical geometery of a disk device that is being attached. The failure + * of this routine will cause the device attach to fail. This routine will + * not be invoked for cdrom devices. + */ +extern int xdfs_c_getpgeom(dev_info_t *, cmlb_geom_t *); + +/* + * xdfs_c_cmlb_node_type() and xdfs_c_cmlb_alter_behavior() are invoked + * during device probe while initializing the cmlb module for the device + * node being probed. They should return a cmlb node type and cmlb alter + * behavior flag value that can be passed to cmlb_attach(). + */ +extern char *xdfs_c_cmlb_node_type(xdfs_state_t *); +extern int xdfs_c_cmlb_alter_behavior(xdfs_state_t *); + +/* + * xdfs_c_attach() is invoked during device attach. It provides an + * opportunity for the client to create properties or do anything else + * necessary for attach. + */ +extern void xdfs_c_attach(xdfs_state_t *); + +/* + * xdfs_c_getpgeom() is invoked to handle ioctl operations. + */ +extern int xdfs_c_ioctl(xdfs_state_t *, dev_t, int, + int, intptr_t, int, cred_t *, int *, boolean_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _XDF_SHELL_H */ diff --git a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile index 0c206a192d..030564df78 100644 --- a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile +++ b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile @@ -21,7 +21,7 @@ # # uts/i86pc/pv_cmdk/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # This makefile drives the production of the xdc driver. @@ -61,8 +61,8 @@ ALL_TARGET = $(BINARY) LINT_TARGET = $(LINT_MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) -LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb -LDFLAGS += -Ndrv/xpvd -Ndrv/xdf +LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xdf +LDFLAGS += -Nmisc/hvm_cmdk CPPFLAGS += -D_EXTVTOC diff --git a/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile b/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile new file mode 100644 index 0000000000..9eab704747 --- /dev/null +++ b/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile @@ -0,0 +1,100 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# uts/i86pc/i86hvm/pv_sd/Makefile +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# +# This makefile drives the production of the xdc driver. +# +# i86pc implementation architecture dependent +# + +# +# Path to the base of the uts directory tree (usually /usr/src/uts). +# +UTSBASE = ../../.. + +# +# Define the module and object file sets. +# +MODULE = sd +OBJECTS = $(PV_SD_OBJS:%=$(OBJS_DIR)/%) +LINTS = $(PV_SD_OBJS:%.o=$(LINTS_DIR)/%.ln) +ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE) + +# +# Include common rules. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm + +# +# When generating lint libraries, we want the name of the lint module +# that will be generated to by pv_sd and not sd, so override the +# default lint module name here. +# +LINT_MODULE = pv_sd + +# +# Define targets +# +ALL_TARGET = $(BINARY) +LINT_TARGET = $(LINT_MODULE).lint +INSTALL_TARGET = $(BINARY) $(ROOTMODULE) + +LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xdf +LDFLAGS += -Nmisc/hvm_sd + +# +# The Xen header files do not lint cleanly. Since the troublesome +# structures form part of the externally defined interface to the +# hypervisor, we're stuck with the noise. +# +LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN +LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED +LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV + +# +# Default build targets. +# +.KEEP_STATE: + +def: $(DEF_DEPS) + +all: $(ALL_DEPS) + +clean: $(CLEAN_DEPS) + +clobber: $(CLOBBER_DEPS) + +lint: $(LINT_DEPS) + +modlintlib: $(MODLINTLIB_DEPS) + +clean.lint: $(CLEAN_LINT_DEPS) + +install: $(INSTALL_DEPS) + +# +# Include common targets. +# +include $(UTSBASE)/i86pc/i86hvm/Makefile.targ diff --git a/usr/src/uts/i86pc/i86hvm/xdf/Makefile b/usr/src/uts/i86pc/i86hvm/xdf/Makefile index 4b7bbe75d8..bef6a685f6 100644 --- a/usr/src/uts/i86pc/i86hvm/xdf/Makefile +++ b/usr/src/uts/i86pc/i86hvm/xdf/Makefile @@ -22,11 +22,9 @@ # # uts/i86pc/xdf/Makefile # -# Copyright 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -# ident "%Z%%M% %I% %E% SMI" -# # i86pc architecture dependent # # @@ -55,7 +53,7 @@ LINT_TARGET = $(MODULE).lint INSTALL_TARGET = $(BINARY) $(ROOTMODULE) # Overrides -CPPFLAGS += -DHVMPV_XDF_VERS=1 +CPPFLAGS += -DHVMPV_XDF_VERS=2 LDFLAGS += -dy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xpv LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON diff --git a/usr/src/uts/intel/io/dktp/disk/cmdk.c b/usr/src/uts/intel/io/dktp/disk/cmdk.c index 99b56bab67..36dddd4a7b 100644 --- a/usr/src/uts/intel/io/dktp/disk/cmdk.c +++ b/usr/src/uts/intel/io/dktp/disk/cmdk.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -73,7 +73,7 @@ static int cmdk_debug = DIO; #define DKTP_DATA (dkp->dk_tgobjp)->tg_data #define DKTP_EXT (dkp->dk_tgobjp)->tg_ext -static void *cmdk_state; +void *cmdk_state; /* * the cmdk_attach_mutex protects cmdk_max_instance in multi-threaded @@ -187,18 +187,30 @@ struct dev_ops cmdk_ops = { */ #include <sys/modctl.h> -extern struct mod_ops mod_driverops; - +#ifndef XPV_HVM_DRIVER static struct modldrv modldrv = { - &mod_driverops, /* Type of module. This one is a driver */ + &mod_driverops, /* Type of module. This one is a driver */ "Common Direct Access Disk", - &cmdk_ops, /* driver ops */ + &cmdk_ops, /* driver ops */ }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modldrv, NULL }; + +#else /* XPV_HVM_DRIVER */ +static struct modlmisc modlmisc = { + &mod_miscops, /* Type of module. This one is a misc */ + "HVM Common Direct Access Disk", +}; + +static struct modlinkage modlinkage = { + MODREV_1, (void *)&modlmisc, NULL +}; + +#endif /* XPV_HVM_DRIVER */ + /* Function prototypes for cmlb callbacks */ static int cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, @@ -244,13 +256,17 @@ _init(void) { int rval; +#ifndef XPV_HVM_DRIVER if (rval = ddi_soft_state_init(&cmdk_state, sizeof (struct cmdk), 7)) return (rval); +#endif /* !XPV_HVM_DRIVER */ mutex_init(&cmdk_attach_mutex, NULL, MUTEX_DRIVER, NULL); if ((rval = mod_install(&modlinkage)) != 0) { mutex_destroy(&cmdk_attach_mutex); +#ifndef XPV_HVM_DRIVER ddi_soft_state_fini(&cmdk_state); +#endif /* !XPV_HVM_DRIVER */ } return (rval); } @@ -259,25 +275,6 @@ int _fini(void) { return (EBUSY); - - /* - * This has been commented out until cmdk is a true - * unloadable module. Right now x86's are panicking on - * a diskless reconfig boot. - */ - -#if 0 /* bugid 1186679 */ - int rval; - - rval = mod_remove(&modlinkage); - if (rval != 0) - return (rval); - - mutex_destroy(&cmdk_attach_mutex); - ddi_soft_state_fini(&cmdk_state); - - return (0); -#endif } int @@ -298,11 +295,15 @@ cmdkprobe(dev_info_t *dip) instance = ddi_get_instance(dip); +#ifndef XPV_HVM_DRIVER if (ddi_get_soft_state(cmdk_state, instance)) return (DDI_PROBE_PARTIAL); - if ((ddi_soft_state_zalloc(cmdk_state, instance) != DDI_SUCCESS) || - ((dkp = ddi_get_soft_state(cmdk_state, instance)) == NULL)) + if (ddi_soft_state_zalloc(cmdk_state, instance) != DDI_SUCCESS) + return (DDI_PROBE_PARTIAL); +#endif /* !XPV_HVM_DRIVER */ + + if ((dkp = ddi_get_soft_state(cmdk_state, instance)) == NULL) return (DDI_PROBE_PARTIAL); mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL); @@ -318,7 +319,9 @@ cmdkprobe(dev_info_t *dip) mutex_exit(&dkp->dk_mutex); mutex_destroy(&dkp->dk_mutex); rw_destroy(&dkp->dk_bbh_mutex); +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(cmdk_state, instance); +#endif /* !XPV_HVM_DRIVER */ return (DDI_PROBE_PARTIAL); } @@ -328,7 +331,9 @@ cmdkprobe(dev_info_t *dip) mutex_exit(&dkp->dk_mutex); mutex_destroy(&dkp->dk_mutex); rw_destroy(&dkp->dk_bbh_mutex); +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(cmdk_state, instance); +#endif /* !XPV_HVM_DRIVER */ return (status); } @@ -401,8 +406,8 @@ cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd) if (cmlb_attach(dip, &cmdk_lb_ops, DTYPE_DIRECT, /* device_type */ - 0, /* removable */ - 0, /* hot pluggable XXX */ + B_FALSE, /* removable */ + B_FALSE, /* hot pluggable XXX */ node_type, CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* alter_behaviour */ dkp->dk_cmlbhandle, @@ -450,7 +455,9 @@ fail2: rw_destroy(&dkp->dk_bbh_mutex); mutex_exit(&dkp->dk_mutex); mutex_destroy(&dkp->dk_mutex); +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(cmdk_state, instance); +#endif /* !XPV_HVM_DRIVER */ return (DDI_FAILURE); } @@ -516,7 +523,9 @@ cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd) rw_destroy(&dkp->dk_bbh_mutex); mutex_destroy(&dkp->dk_pm_mutex); cv_destroy(&dkp->dk_suspend_cv); +#ifndef XPV_HVM_DRIVER ddi_soft_state_free(cmdk_state, instance); +#endif /* !XPV_HVM_DRIVER */ return (DDI_SUCCESS); } diff --git a/usr/src/uts/sun/io/dada/targets/dad.c b/usr/src/uts/sun/io/dada/targets/dad.c index 72abdf4800..1d71904da5 100644 --- a/usr/src/uts/sun/io/dada/targets/dad.c +++ b/usr/src/uts/sun/io/dada/targets/dad.c @@ -20,7 +20,7 @@ */ /* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -587,8 +587,8 @@ dcdattach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (cmlb_attach(devi, &dcd_lb_ops, 0, - 0, - 0, + B_FALSE, + B_FALSE, DDI_NT_BLOCK_CHAN, CMLB_FAKE_GEOM_LABEL_IOCTLS_VTOC8, un->un_dklbhandle, |