summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Pilatowicz <Edward.Pilatowicz@Sun.COM>2008-11-17 14:55:44 -0800
committerEdward Pilatowicz <Edward.Pilatowicz@Sun.COM>2008-11-17 14:55:44 -0800
commit7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f (patch)
treed9e4edfa4b2d5b96d61caa2e293f5e58a3a01a71
parent53730946491a2da1c44c44e89a41006494591b53 (diff)
downloadillumos-joyent-7f0b8309074a5d8e9f9d8ffe7aad7bb0b1ee6b1f.tar.gz
PSARC/2007/664 Paravirtualized Drivers for Fully Virtualized xVM Domains
PSARC/2009/015 ddi_strdup 6796427 add ddi_strdup(9F), strdup(9F), and strfree(9F) to the DDI 6677559 Solaris should provide a PV cdrom driver for xVM HVM environments 6703437 xdb doesn't do anything with xm block-configure requests 6774478 misc xdf bugs
-rw-r--r--usr/src/cmd/truss/codes.c148
-rw-r--r--usr/src/pkgdefs/SUNWxvmpv/prototype_i38610
-rw-r--r--usr/src/uts/common/brand/lx/io/lx_audio.c17
-rw-r--r--usr/src/uts/common/fs/nfs/nfs4_client_debug.c33
-rw-r--r--usr/src/uts/common/io/cmlb.c191
-rw-r--r--usr/src/uts/common/io/scsi/targets/sd.c61
-rw-r--r--usr/src/uts/common/os/modctl.c18
-rw-r--r--usr/src/uts/common/os/sunddi.c29
-rw-r--r--usr/src/uts/common/sys/cmlb.h10
-rw-r--r--usr/src/uts/common/sys/cmlb_impl.h16
-rw-r--r--usr/src/uts/common/sys/dktp/bbh.h9
-rw-r--r--usr/src/uts/common/sys/sunddi.h4
-rw-r--r--usr/src/uts/common/sys/types.h12
-rw-r--r--usr/src/uts/common/xen/io/xdb.c1060
-rw-r--r--usr/src/uts/common/xen/io/xdb.h89
-rw-r--r--usr/src/uts/common/xen/io/xdf.c4845
-rw-r--r--usr/src/uts/common/xen/io/xdf.h119
-rw-r--r--usr/src/uts/common/xen/io/xenbus_probe.c6
-rw-r--r--usr/src/uts/common/xen/io/xenbus_xs.c57
-rw-r--r--usr/src/uts/common/xen/os/xvdi.c189
-rw-r--r--usr/src/uts/common/xen/sys/xenbus_impl.h33
-rw-r--r--usr/src/uts/common/xen/sys/xendev.h77
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.files11
-rw-r--r--usr/src/uts/i86pc/i86hvm/Makefile.i86hvm8
-rw-r--r--usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile100
-rw-r--r--usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile98
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c1108
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/pv_sd.c187
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/xdf_shell.c1278
-rw-r--r--usr/src/uts/i86pc/i86hvm/io/xdf_shell.h161
-rw-r--r--usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile6
-rw-r--r--usr/src/uts/i86pc/i86hvm/pv_sd/Makefile100
-rw-r--r--usr/src/uts/i86pc/i86hvm/xdf/Makefile6
-rw-r--r--usr/src/uts/intel/io/dktp/disk/cmdk.c67
-rw-r--r--usr/src/uts/sun/io/dada/targets/dad.c6
35 files changed, 6306 insertions, 3863 deletions
diff --git a/usr/src/cmd/truss/codes.c b/usr/src/cmd/truss/codes.c
index eb65f81f8c..a51544a70f 100644
--- a/usr/src/cmd/truss/codes.c
+++ b/usr/src/cmd/truss/codes.c
@@ -88,6 +88,10 @@
#include <sys/fs/zfs.h>
#include <inet/kssl/kssl.h>
#include <sys/dkio.h>
+#include <sys/fdio.h>
+#include <sys/cdio.h>
+#include <sys/scsi/impl/uscsi.h>
+#include <sys/devinfo_impl.h>
#include <sys/dumpadm.h>
#include <sys/mntio.h>
#include <sys/zcons.h>
@@ -1075,34 +1079,168 @@ const struct ioc {
{ (uint_t)KSSL_DELETE_ENTRY, "KSSL_DELETE_ENTRY",
"sockaddr_in"},
- /* dkio ioctls - (0x04 << 8) */
+ /* disk ioctls - (0x04 << 8) - dkio.h */
{ (uint_t)DKIOCGGEOM, "DKIOCGGEOM",
"struct dk_geom"},
- { (uint_t)DKIOCSGEOM, "DKIOCSGEOM",
- "struct dk_geom"},
{ (uint_t)DKIOCINFO, "DKIOCINFO",
"struct dk_info"},
+ { (uint_t)DKIOCEJECT, "DKIOCEJECT",
+ NULL},
{ (uint_t)DKIOCGVTOC, "DKIOCGVTOC",
"struct vtoc"},
{ (uint_t)DKIOCSVTOC, "DKIOCSVTOC",
"struct vtoc"},
- { (uint_t)DKIOCADDBAD, "DKIOCADDBAD",
+ { (uint_t)DKIOCGEXTVTOC, "DKIOCGEXTVTOC",
+ "struct extvtoc"},
+ { (uint_t)DKIOCSEXTVTOC, "DKIOCSEXTVTOC",
+ "struct extvtoc"},
+ { (uint_t)DKIOCFLUSHWRITECACHE, "DKIOCFLUSHWRITECACHE",
+ NULL},
+ { (uint_t)DKIOCGETWCE, "DKIOCGETWCE",
+ NULL},
+ { (uint_t)DKIOCSETWCE, "DKIOCSETWCE",
NULL},
+ { (uint_t)DKIOCSGEOM, "DKIOCSGEOM",
+ "struct dk_geom"},
+ { (uint_t)DKIOCSAPART, "DKIOCSAPART",
+ "struct dk_allmap"},
+ { (uint_t)DKIOCGAPART, "DKIOCGAPART",
+ "struct dk_allmap"},
{ (uint_t)DKIOCG_PHYGEOM, "DKIOCG_PHYGEOM",
"struct dk_geom"},
{ (uint_t)DKIOCG_VIRTGEOM, "DKIOCG_VIRTGEOM",
"struct dk_geom"},
+ { (uint_t)DKIOCLOCK, "DKIOCLOCK",
+ NULL},
+ { (uint_t)DKIOCUNLOCK, "DKIOCUNLOCK",
+ NULL},
+ { (uint_t)DKIOCSTATE, "DKIOCSTATE",
+ NULL},
+ { (uint_t)DKIOCREMOVABLE, "DKIOCREMOVABLE",
+ NULL},
+ { (uint_t)DKIOCHOTPLUGGABLE, "DKIOCHOTPLUGGABLE",
+ NULL},
+ { (uint_t)DKIOCADDBAD, "DKIOCADDBAD",
+ NULL},
+ { (uint_t)DKIOCGETDEF, "DKIOCGETDEF",
+ NULL},
+ { (uint_t)DKIOCPARTINFO, "DKIOCPARTINFO",
+ "struct part_info"},
+ { (uint_t)DKIOCEXTPARTINFO, "DKIOCEXTPARTINFO",
+ "struct extpart_info"},
{ (uint_t)DKIOCGMEDIAINFO, "DKIOCGMEDIAINFO",
"struct dk_minfo"},
+ { (uint_t)DKIOCGMBOOT, "DKIOCGMBOOT",
+ NULL},
+ { (uint_t)DKIOCSMBOOT, "DKIOCSMBOOT",
+ NULL},
+ { (uint_t)DKIOCSETEFI, "DKIOCSETEFI",
+ "struct dk_efi"},
+ { (uint_t)DKIOCGETEFI, "DKIOCGETEFI",
+ "struct dk_efi"},
+ { (uint_t)DKIOCPARTITION, "DKIOCPARTITION",
+ "struct partition64"},
+ { (uint_t)DKIOCGETVOLCAP, "DKIOCGETVOLCAP",
+ "struct volcap_t"},
+ { (uint_t)DKIOCSETVOLCAP, "DKIOCSETVOLCAP",
+ "struct volcap_t"},
+ { (uint_t)DKIOCDMR, "DKIOCDMR",
+ "struct vol_directed_rd"},
+ { (uint_t)DKIOCDUMPINIT, "DKIOCDUMPINIT",
+ NULL},
+ { (uint_t)DKIOCDUMPFINI, "DKIOCDUMPFINI",
+ NULL},
+
+ /* disk ioctls - (0x04 << 8) - fdio.h */
+ { (uint_t)FDIOGCHAR, "FDIOGCHAR",
+ "struct fd_char"},
+ { (uint_t)FDIOSCHAR, "FDIOSCHAR",
+ "struct fd_char"},
+ { (uint_t)FDEJECT, "FDEJECT",
+ NULL},
+ { (uint_t)FDGETCHANGE, "FDGETCHANGE",
+ NULL},
+ { (uint_t)FDGETDRIVECHAR, "FDGETDRIVECHAR",
+ "struct fd_drive"},
+ { (uint_t)FDSETDRIVECHAR, "FDSETDRIVECHAR",
+ "struct fd_drive"},
+ { (uint_t)FDGETSEARCH, "FDGETSEARCH",
+ NULL},
+ { (uint_t)FDSETSEARCH, "FDSETSEARCH",
+ NULL},
+ { (uint_t)FDIOCMD, "FDIOCMD",
+ "struct fd_cmd"},
+ { (uint_t)FDRAW, "FDRAW",
+ "struct fd_raw"},
+ { (uint_t)FDDEFGEOCHAR, "FDDEFGEOCHAR",
+ NULL},
+
+ /* disk ioctls - (0x04 << 8) - cdio.h */
+ { (uint_t)CDROMPAUSE, "CDROMPAUSE",
+ NULL},
+ { (uint_t)CDROMRESUME, "CDROMRESUME",
+ NULL},
+ { (uint_t)CDROMPLAYMSF, "CDROMPLAYMSF",
+ "struct cdrom_msf"},
+ { (uint_t)CDROMPLAYTRKIND, "CDROMPLAYTRKIND",
+ "struct cdrom_ti"},
+ { (uint_t)CDROMREADTOCHDR, "CDROMREADTOCHDR",
+ "struct cdrom_tochdr"},
+ { (uint_t)CDROMREADTOCENTRY, "CDROMREADTOCENTRY",
+ "struct cdrom_tocentry"},
+ { (uint_t)CDROMSTOP, "CDROMSTOP",
+ NULL},
+ { (uint_t)CDROMSTART, "CDROMSTART",
+ NULL},
+ { (uint_t)CDROMEJECT, "CDROMEJECT",
+ NULL},
+ { (uint_t)CDROMVOLCTRL, "CDROMVOLCTRL",
+ "struct cdrom_volctrl"},
+ { (uint_t)CDROMSUBCHNL, "CDROMSUBCHNL",
+ "struct cdrom_subchnl"},
+ { (uint_t)CDROMREADMODE2, "CDROMREADMODE2",
+ "struct cdrom_read"},
+ { (uint_t)CDROMREADMODE1, "CDROMREADMODE1",
+ "struct cdrom_read"},
+ { (uint_t)CDROMREADOFFSET, "CDROMREADOFFSET",
+ NULL},
+ { (uint_t)CDROMGBLKMODE, "CDROMGBLKMODE",
+ NULL},
+ { (uint_t)CDROMSBLKMODE, "CDROMSBLKMODE",
+ NULL},
+ { (uint_t)CDROMCDDA, "CDROMCDDA",
+ "struct cdrom_cdda"},
+ { (uint_t)CDROMCDXA, "CDROMCDXA",
+ "struct cdrom_cdxa"},
+ { (uint_t)CDROMSUBCODE, "CDROMSUBCODE",
+ "struct cdrom_subcode"},
+ { (uint_t)CDROMGDRVSPEED, "CDROMGDRVSPEED",
+ NULL},
+ { (uint_t)CDROMSDRVSPEED, "CDROMSDRVSPEED",
+ NULL},
+ { (uint_t)CDROMCLOSETRAY, "CDROMCLOSETRAY",
+ NULL},
+
+ /* disk ioctls - (0x04 << 8) - uscsi.h */
+ { (uint_t)USCSICMD, "USCSICMD",
+ "struct uscsi_cmd"},
/* dumpadm ioctls - (0xdd << 8) */
{ (uint_t)DIOCGETDEV, "DIOCGETDEV",
- "char *"},
+ NULL},
/* mntio ioctls - ('m' << 8) */
{ (uint_t)MNTIOC_GETMNTENT, "MNTIOC_GETMNTENT",
"struct extmnttab"},
+ /* devinfo ioctls - ('df' << 8) - devinfo_impl.h */
+ { (uint_t)DINFOUSRLD, "DINFOUSRLD",
+ NULL},
+ { (uint_t)DINFOLODRV, "DINFOLODRV",
+ NULL},
+ { (uint_t)DINFOIDENT, "DINFOIDENT",
+ NULL},
+
/* zcons ioctls */
{ (uint_t)ZC_HOLDSLAVE, "ZC_HOLDSLAVE", NULL },
{ (uint_t)ZC_RELEASESLAVE, "ZC_RELEASESLAVE", NULL },
diff --git a/usr/src/pkgdefs/SUNWxvmpv/prototype_i386 b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386
index a4d8268b92..f1900b0911 100644
--- a/usr/src/pkgdefs/SUNWxvmpv/prototype_i386
+++ b/usr/src/pkgdefs/SUNWxvmpv/prototype_i386
@@ -20,11 +20,9 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
#
# This required package information file contains a list of package contents.
@@ -52,12 +50,14 @@ d none platform/i86hvm/kernel/drv 0755 root sys
d none platform/i86hvm/kernel/drv/amd64 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/cmdk 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/rtls 0755 root sys
+f none platform/i86hvm/kernel/drv/amd64/sd 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/xdf 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/xnf 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/xpv 0755 root sys
f none platform/i86hvm/kernel/drv/amd64/xpvd 0755 root sys
f none platform/i86hvm/kernel/drv/cmdk 0755 root sys
f none platform/i86hvm/kernel/drv/rtls 0755 root sys
+f none platform/i86hvm/kernel/drv/sd 0755 root sys
f none platform/i86hvm/kernel/drv/xdf 0755 root sys
f none platform/i86hvm/kernel/drv/xnf 0755 root sys
f none platform/i86hvm/kernel/drv/xpv 0755 root sys
@@ -67,4 +67,8 @@ f none platform/i86hvm/kernel/drv/xpvd.conf 0644 root sys
d none platform/i86hvm/kernel/misc 0755 root sys
d none platform/i86hvm/kernel/misc/amd64 0755 root sys
f none platform/i86hvm/kernel/misc/amd64/hvm_bootstrap 0755 root sys
+f none platform/i86hvm/kernel/misc/amd64/hvm_cmdk 0755 root sys
+f none platform/i86hvm/kernel/misc/amd64/hvm_sd 0755 root sys
f none platform/i86hvm/kernel/misc/hvm_bootstrap 0755 root sys
+f none platform/i86hvm/kernel/misc/hvm_cmdk 0755 root sys
+f none platform/i86hvm/kernel/misc/hvm_sd 0755 root sys
diff --git a/usr/src/uts/common/brand/lx/io/lx_audio.c b/usr/src/uts/common/brand/lx/io/lx_audio.c
index b2c137655d..8f80c61853 100644
--- a/usr/src/uts/common/brand/lx/io/lx_audio.c
+++ b/usr/src/uts/common/brand/lx/io/lx_audio.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -226,21 +226,6 @@ getzonename(void)
return (curproc->p_zone->zone_name);
}
-static void
-strfree(char *str)
-{
- kmem_free(str, strlen(str) + 1);
-}
-
-static char *
-strdup(char *str)
-{
- int n = strlen(str);
- char *ptr = kmem_alloc(n + 1, KM_SLEEP);
- bcopy(str, ptr, n + 1);
- return (ptr);
-}
-
static char *
lxa_devprop_name(char *zname, char *pname)
{
diff --git a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
index 7872d54c53..0d57e91049 100644
--- a/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
+++ b/usr/src/uts/common/fs/nfs/nfs4_client_debug.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,12 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/cred.h>
#include <sys/kstat.h>
#include <sys/list.h>
@@ -82,8 +79,6 @@ static rkstat_t rkstat_template = {
int nfs4_msg_max = NFS4_MSG_MAX;
#define DEFAULT_LEASE 180
-static char *strdup(const char *);
-
/*
* Sets the appropiate fields of "ep", given "id" and various parameters.
* Assumes that ep's fields have been initialized to zero/null, except for
@@ -612,7 +607,7 @@ facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
return (0);
len = strlen(cur_msg->msg_srv);
if (strncmp(cur_msg->msg_srv, new_msg->msg_srv,
- len) != 0)
+ len) != 0)
return (0);
} else if (new_msg->msg_srv != NULL) {
return (0);
@@ -622,7 +617,7 @@ facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg,
return (0);
len = strlen(cur_msg->msg_mntpt);
if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt,
- len) != 0)
+ len) != 0)
return (0);
} else if (new_msg->msg_mntpt != NULL) {
return (0);
@@ -1520,21 +1515,3 @@ nfs4_mi_kstat_inc_no_grace(mntinfo4_t *mi)
rsp = (rkstat_t *)mi->mi_recov_ksp->ks_data;
rsp->no_grace.value.ul++;
}
-
-/*
- * Allocate and copy a string. XXX There really ought to be a single
- * strdup() for the entire kernel.
- */
-static char *
-strdup(const char *s)
-{
- size_t len;
- char *new;
-
- len = strlen(s);
- new = kmem_alloc(len + 1, KM_SLEEP);
- bcopy(s, new, len);
- new[len] = '\0';
-
- return (new);
-}
diff --git a/usr/src/uts/common/io/cmlb.c b/usr/src/uts/common/io/cmlb.c
index 2c50a0566b..8dc9af2b39 100644
--- a/usr/src/uts/common/io/cmlb.c
+++ b/usr/src/uts/common/io/cmlb.c
@@ -202,7 +202,7 @@ static struct modlinkage modlinkage = {
/* Local function prototypes */
static dev_t cmlb_make_device(struct cmlb_lun *cl);
-static int cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid,
+static int cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid,
int flags, void *tg_cookie);
static void cmlb_resync_geom_caches(struct cmlb_lun *cl, diskaddr_t capacity,
void *tg_cookie);
@@ -227,14 +227,14 @@ static void cmlb_clear_vtoc(struct cmlb_lun *cl, void *tg_cookie);
static void cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie);
static int cmlb_create_minor_nodes(struct cmlb_lun *cl);
static int cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie);
-static int cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr);
+static boolean_t cmlb_check_efi_mbr(uchar_t *buf, boolean_t *is_mbr);
#if defined(__i386) || defined(__amd64)
static int cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie);
#endif
#if defined(_FIRMWARE_NEEDS_FDISK)
-static int cmlb_has_max_chs_vals(struct ipart *fdp);
+static boolean_t cmlb_has_max_chs_vals(struct ipart *fdp);
#endif
#if defined(_SUNOS_VTOC_16)
@@ -487,10 +487,8 @@ cmlb_free_handle(cmlb_handle_t *cmlbhandlep)
* scsi/generic/inquiry.h
*
* is_removable whether or not device is removable.
- * 0 non-removable, 1 removable.
*
* is_hotpluggable whether or not device is hotpluggable.
- * 0 non-hotpluggable, 1 hotpluggable.
*
* node_type minor node type (as used by ddi_create_minor_node)
*
@@ -588,7 +586,7 @@ cmlb_free_handle(cmlb_handle_t *cmlbhandlep)
*/
int
cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type,
- int is_removable, int is_hotpluggable, char *node_type,
+ boolean_t is_removable, boolean_t is_hotpluggable, char *node_type,
int alter_behavior, cmlb_handle_t cmlbhandle, void *tg_cookie)
{
@@ -596,6 +594,9 @@ cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type,
diskaddr_t cap;
int status;
+ ASSERT(VALID_BOOLEAN(is_removable));
+ ASSERT(VALID_BOOLEAN(is_hotpluggable));
+
if (tgopsp->tg_version < TG_DK_OPS_VERSION_1)
return (EINVAL);
@@ -608,13 +609,13 @@ cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type,
cl->cl_is_hotpluggable = is_hotpluggable;
cl->cl_node_type = node_type;
cl->cl_sys_blocksize = DEV_BSIZE;
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
cl->cl_def_labeltype = CMLB_LABEL_VTOC;
cl->cl_alter_behavior = alter_behavior;
cl->cl_reserved = -1;
cl->cl_msglog_flag |= CMLB_ALLOW_2TB_WARN;
- if (is_removable == 0) {
+ if (!is_removable) {
mutex_exit(CMLB_MUTEX(cl));
status = DK_TG_GETCAP(cl, &cap, tg_cookie);
mutex_enter(CMLB_MUTEX(cl));
@@ -663,7 +664,7 @@ cmlb_detach(cmlb_handle_t cmlbhandle, void *tg_cookie)
mutex_enter(CMLB_MUTEX(cl));
cl->cl_def_labeltype = CMLB_LABEL_UNDEF;
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
ddi_remove_minor_node(CMLB_DEVINFO(cl), NULL);
i_ddi_prop_dyn_driver_set(CMLB_DEVINFO(cl), NULL);
cl->cl_state = CMLB_INITED;
@@ -720,11 +721,11 @@ cmlb_validate(cmlb_handle_t cmlbhandle, int flags, void *tg_cookie)
return (ENXIO);
}
- rval = cmlb_validate_geometry((struct cmlb_lun *)cmlbhandle, 1,
+ rval = cmlb_validate_geometry((struct cmlb_lun *)cmlbhandle, B_TRUE,
flags, tg_cookie);
if (rval == ENOTSUP) {
- if (cl->cl_f_geometry_is_valid == TRUE) {
+ if (cl->cl_f_geometry_is_valid) {
cl->cl_cur_labeltype = CMLB_LABEL_EFI;
ret = 0;
} else {
@@ -762,7 +763,7 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie)
return;
mutex_enter(CMLB_MUTEX(cl));
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
mutex_exit(CMLB_MUTEX(cl));
}
@@ -774,19 +775,19 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie)
* cmlbhandle cmlb handle associated with device.
*
* Return values:
- * TRUE if incore label/geom data is valid.
- * FALSE otherwise.
+ * B_TRUE if incore label/geom data is valid.
+ * B_FALSE otherwise.
*
*/
-int
+boolean_t
cmlb_is_valid(cmlb_handle_t cmlbhandle)
{
struct cmlb_lun *cl = (struct cmlb_lun *)cmlbhandle;
if (cmlbhandle == NULL)
- return (FALSE);
+ return (B_FALSE);
return (cl->cl_f_geometry_is_valid);
@@ -816,7 +817,7 @@ cmlb_close(cmlb_handle_t cmlbhandle, void *tg_cookie)
struct cmlb_lun *cl = (struct cmlb_lun *)cmlbhandle;
mutex_enter(CMLB_MUTEX(cl));
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
/* revert to default minor node for this device */
if (ISREMOVABLE(cl)) {
@@ -861,7 +862,7 @@ cmlb_get_devid_block(cmlb_handle_t cmlbhandle, diskaddr_t *devidblockp,
return (EINVAL);
}
- if ((cl->cl_f_geometry_is_valid == FALSE) ||
+ if ((!cl->cl_f_geometry_is_valid) ||
(cl->cl_solaris_size < DK_LABEL_LOC)) {
mutex_exit(CMLB_MUTEX(cl));
return (EINVAL);
@@ -955,16 +956,16 @@ cmlb_partinfo(cmlb_handle_t cmlbhandle, int part, diskaddr_t *nblocksp,
if (part < 0 || part >= MAXPART) {
rval = EINVAL;
} else {
- if (cl->cl_f_geometry_is_valid == FALSE)
- (void) cmlb_validate_geometry((struct cmlb_lun *)cl, 0,
- 0, tg_cookie);
+ if (!cl->cl_f_geometry_is_valid)
+ (void) cmlb_validate_geometry((struct cmlb_lun *)cl,
+ B_FALSE, 0, tg_cookie);
#if defined(_SUNOS_VTOC_16)
- if (((cl->cl_f_geometry_is_valid == FALSE) ||
+ if (((!cl->cl_f_geometry_is_valid) ||
(part < NDKMAP && cl->cl_solaris_size == 0)) &&
(part != P0_RAW_DISK)) {
#else
- if ((cl->cl_f_geometry_is_valid == FALSE) ||
+ if ((!cl->cl_f_geometry_is_valid) ||
(part < NDKMAP && cl->cl_solaris_size == 0)) {
#endif
rval = EINVAL;
@@ -1030,11 +1031,11 @@ cmlb_efi_label_capacity(cmlb_handle_t cmlbhandle, diskaddr_t *capacity,
return (EINVAL);
}
- if (cl->cl_f_geometry_is_valid == FALSE)
- (void) cmlb_validate_geometry((struct cmlb_lun *)cl, 0,
+ if (!cl->cl_f_geometry_is_valid)
+ (void) cmlb_validate_geometry((struct cmlb_lun *)cl, B_FALSE,
0, tg_cookie);
- if ((cl->cl_f_geometry_is_valid == FALSE) || (capacity == NULL) ||
+ if ((!cl->cl_f_geometry_is_valid) || (capacity == NULL) ||
(cl->cl_cur_labeltype != CMLB_LABEL_EFI)) {
rval = EINVAL;
} else {
@@ -1268,33 +1269,36 @@ cmlb_check_update_blockcount(struct cmlb_lun *cl, void *tg_cookie)
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
- if (cl->cl_f_geometry_is_valid == FALSE) {
- mutex_exit(CMLB_MUTEX(cl));
- status = DK_TG_GETCAP(cl, &capacity, tg_cookie);
- if (status != 0) {
- mutex_enter(CMLB_MUTEX(cl));
- return (EIO);
- }
+ if (cl->cl_f_geometry_is_valid)
+ return (0);
- status = DK_TG_GETBLOCKSIZE(cl, &lbasize, tg_cookie);
+ mutex_exit(CMLB_MUTEX(cl));
+ status = DK_TG_GETCAP(cl, &capacity, tg_cookie);
+ if (status != 0) {
mutex_enter(CMLB_MUTEX(cl));
- if (status != 0)
- return (EIO);
+ return (EIO);
+ }
- if ((capacity != 0) && (lbasize != 0)) {
- cl->cl_blockcount = capacity;
- cl->cl_tgt_blocksize = lbasize;
- return (0);
- } else
- return (EIO);
- } else
+ status = DK_TG_GETBLOCKSIZE(cl, &lbasize, tg_cookie);
+ mutex_enter(CMLB_MUTEX(cl));
+ if (status != 0)
+ return (EIO);
+
+ if ((capacity != 0) && (lbasize != 0)) {
+ cl->cl_blockcount = capacity;
+ cl->cl_tgt_blocksize = lbasize;
return (0);
+ } else {
+ return (EIO);
+ }
}
static int
cmlb_create_minor(dev_info_t *dip, char *name, int spec_type,
minor_t minor_num, char *node_type, int flag, boolean_t internal)
{
+ ASSERT(VALID_BOOLEAN(internal));
+
if (internal)
return (ddi_create_internal_pathname(dip,
name, spec_type, minor_num));
@@ -1331,7 +1335,8 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
ASSERT(cl != NULL);
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
- internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
+ internal = VOID2BOOLEAN(
+ (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
/* check the most common case */
if (cl->cl_cur_labeltype != CMLB_LABEL_UNDEF &&
@@ -1462,7 +1467,7 @@ cmlb_create_minor_nodes(struct cmlb_lun *cl)
* Context: Kernel thread only (can sleep).
*/
static int
-cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags,
+cmlb_validate_geometry(struct cmlb_lun *cl, boolean_t forcerevalid, int flags,
void *tg_cookie)
{
int label_error = 0;
@@ -1470,8 +1475,9 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags,
int count;
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
+ ASSERT(VALID_BOOLEAN(forcerevalid));
- if ((cl->cl_f_geometry_is_valid == TRUE) && (forcerevalid == 0)) {
+ if ((cl->cl_f_geometry_is_valid) && (!forcerevalid)) {
if (cl->cl_cur_labeltype == CMLB_LABEL_EFI)
return (ENOTSUP);
return (0);
@@ -1580,7 +1586,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags,
* a default label.
*/
label_error = 0;
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
goto no_solaris_partition;
}
@@ -1638,7 +1644,7 @@ cmlb_validate_geometry(struct cmlb_lun *cl, int forcerevalid, int flags,
#elif defined(_SUNOS_VTOC_16)
if (label_error != EACCES) {
#endif
- if (cl->cl_f_geometry_is_valid == FALSE) {
+ if (!cl->cl_f_geometry_is_valid) {
cmlb_build_default_label(cl, tg_cookie);
}
label_error = 0;
@@ -2065,7 +2071,7 @@ done:
bzero(&cl->cl_g, sizeof (struct dk_geom));
bzero(&cl->cl_vtoc, sizeof (struct dk_vtoc));
bzero(&cl->cl_map, NDKMAP * (sizeof (struct dk_map)));
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
}
cl->cl_solaris_offset = solaris_offset;
cl->cl_solaris_size = solaris_size;
@@ -2130,8 +2136,8 @@ cmlb_validate_efi(efi_gpt_t *labp)
}
/*
- * This function returns FALSE if there is a valid MBR signature and no
- * partition table entries of type EFI_PMBR (0xEE). Otherwise it returns TRUE.
+ * This function returns B_FALSE if there is a valid MBR signature and no
+ * partition table entries of type EFI_PMBR (0xEE). Otherwise it returns B_TRUE.
*
* The EFI spec (1.10 and later) requires having a Protective MBR (PMBR) to
* recognize the disk as GPT partitioned. However, some other OS creates an MBR
@@ -2139,11 +2145,11 @@ cmlb_validate_efi(efi_gpt_t *labp)
* corrupted, currently best attempt to allow data access would be to try to
* check for GPT headers. Hence in case of more than one partition entry, but
* at least one EFI_PMBR partition type or no valid magic number, the function
- * returns TRUE to continue with looking for GPT header.
+ * returns B_TRUE to continue with looking for GPT header.
*/
-static int
-cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr)
+static boolean_t
+cmlb_check_efi_mbr(uchar_t *buf, boolean_t *is_mbr)
{
struct ipart *fdp;
struct mboot *mbp = (struct mboot *)buf;
@@ -2151,22 +2157,22 @@ cmlb_check_efi_mbr(uchar_t *buf, int *is_mbr)
int i;
if (is_mbr != NULL)
- *is_mbr = TRUE;
+ *is_mbr = B_TRUE;
if (LE_16(mbp->signature) != MBB_MAGIC) {
if (is_mbr != NULL)
- *is_mbr = FALSE;
- return (TRUE);
+ *is_mbr = B_FALSE;
+ return (B_TRUE);
}
bcopy(&mbp->parts[0], fdisk, sizeof (fdisk));
for (fdp = fdisk, i = 0; i < FD_NUMPART; i++, fdp++) {
if (fdp->systid == EFI_PMBR)
- return (TRUE);
+ return (B_TRUE);
}
- return (FALSE);
+ return (B_FALSE);
}
static int
@@ -2185,7 +2191,7 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags,
int iofailed = 0;
struct uuid uuid_type_reserved = EFI_RESERVED;
#if defined(_FIRMWARE_NEEDS_FDISK)
- int is_mbr;
+ boolean_t is_mbr;
#endif
ASSERT(mutex_owned(CMLB_MUTEX(cl)));
@@ -2215,15 +2221,15 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags,
}
#if defined(_FIRMWARE_NEEDS_FDISK)
- if (cmlb_check_efi_mbr(buf, &is_mbr) == FALSE) {
- if (is_mbr == TRUE)
+ if (!cmlb_check_efi_mbr(buf, &is_mbr)) {
+ if (is_mbr)
rval = ESRCH;
else
rval = EINVAL;
goto done_err;
}
#else
- if (cmlb_check_efi_mbr(buf, NULL) == FALSE) {
+ if (!cmlb_check_efi_mbr(buf, NULL)) {
rval = EINVAL;
goto done_err;
}
@@ -2341,7 +2347,7 @@ cmlb_use_efi(struct cmlb_lun *cl, diskaddr_t capacity, int flags,
cl->cl_solaris_offset = 0;
cl->cl_solaris_size = capacity;
cl->cl_label_from_media = CMLB_LABEL_EFI;
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
/* clear the vtoc label */
bzero(&cl->cl_vtoc, sizeof (struct dk_vtoc));
@@ -2362,7 +2368,7 @@ done_err1:
* causes things like opens and stats on the partition to fail.
*/
if ((capacity > CMLB_EXTVTOC_LIMIT) && (rval != ESRCH) && !iofailed) {
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
}
return (rval);
}
@@ -2546,7 +2552,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags)
}
/* Mark the geometry as valid. */
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
/*
* if we got invalidated when mutex exit and entered again,
@@ -2584,7 +2590,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags)
if ((labp->dkl_map[i].dkl_nblk) &&
(part_end > cl->cl_blockcount)) {
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
break;
}
}
@@ -2595,7 +2601,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags)
part_end = vpartp->p_start + vpartp->p_size;
if ((vpartp->p_size > 0) &&
(part_end > cl->cl_blockcount)) {
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
break;
}
}
@@ -2609,7 +2615,7 @@ cmlb_uselabel(struct cmlb_lun *cl, struct dk_label *labp, int flags)
"Label says %llu blocks; Drive says %llu blocks\n",
label_capacity, cl->cl_blockcount);
}
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
label_error = CMLB_LABEL_IS_INVALID;
}
@@ -2775,7 +2781,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie)
* Got fdisk table but no solaris entry therefore
* don't create a default label
*/
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
return;
}
@@ -2915,7 +2921,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie)
cl->cl_vtoc.v_nparts = V_NUMPAR;
cl->cl_vtoc.v_version = V_VERSION;
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
cl->cl_label_from_media = CMLB_LABEL_UNDEF;
cmlb_dbg(CMLB_INFO, cl,
@@ -2938,7 +2944,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie)
/*
* Function: cmlb_has_max_chs_vals
*
- * Description: Return TRUE if Cylinder-Head-Sector values are all at maximum.
+ * Description: Return B_TRUE if Cylinder-Head-Sector values are all at maximum.
*
* Arguments: fdp - ptr to CHS info
*
@@ -2946,7 +2952,7 @@ cmlb_build_default_label(struct cmlb_lun *cl, void *tg_cookie)
*
* Context: Any.
*/
-static int
+static boolean_t
cmlb_has_max_chs_vals(struct ipart *fdp)
{
return ((fdp->begcyl == LBA_MAX_CYL) &&
@@ -2992,7 +2998,7 @@ cmlb_dkio_get_geometry(struct cmlb_lun *cl, caddr_t arg, int flag,
* is ready.
*/
mutex_enter(CMLB_MUTEX(cl));
- rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie);
+ rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie);
#if defined(_SUNOS_VTOC_8)
if (rval == EINVAL &&
cl->cl_alter_behavior & CMLB_FAKE_GEOM_LABEL_IOCTLS_VTOC8) {
@@ -3105,7 +3111,7 @@ cmlb_dkio_set_geometry(struct cmlb_lun *cl, caddr_t arg, int flag)
cl->cl_offset[i] += cl->cl_solaris_offset;
#endif
}
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
mutex_exit(CMLB_MUTEX(cl));
kmem_free(tmp_geom, sizeof (struct dk_geom));
@@ -3145,7 +3151,7 @@ cmlb_dkio_get_partition(struct cmlb_lun *cl, caddr_t arg, int flag,
* information.
*/
mutex_enter(CMLB_MUTEX(cl));
- if ((rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie)) != 0) {
+ if ((rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie)) != 0) {
mutex_exit(CMLB_MUTEX(cl));
return (rval);
}
@@ -3332,7 +3338,7 @@ cmlb_dkio_get_vtoc(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
return (EOVERFLOW);
}
- rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie);
+ rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie);
#if defined(_SUNOS_VTOC_8)
if (rval == EINVAL &&
@@ -3445,7 +3451,7 @@ cmlb_dkio_get_extvtoc(struct cmlb_lun *cl, caddr_t arg, int flag,
bzero(&ext_vtoc, sizeof (struct extvtoc));
mutex_enter(CMLB_MUTEX(cl));
- rval = cmlb_validate_geometry(cl, 1, 0, tg_cookie);
+ rval = cmlb_validate_geometry(cl, B_TRUE, 0, tg_cookie);
#if defined(_SUNOS_VTOC_8)
if (rval == EINVAL &&
@@ -3705,7 +3711,8 @@ cmlb_dkio_set_vtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
int rval = 0;
boolean_t internal;
- internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
+ internal = VOID2BOOLEAN(
+ (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
#ifdef _MULTI_DATAMODEL
switch (ddi_model_convert_from(flag & FMODELS)) {
@@ -3777,7 +3784,8 @@ cmlb_dkio_set_vtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
if ((rval = cmlb_build_label_vtoc(cl, &user_vtoc)) == 0) {
if ((rval = cmlb_write_label(cl, tg_cookie)) == 0) {
- if (cmlb_validate_geometry(cl, 1, 0, tg_cookie) != 0) {
+ if (cmlb_validate_geometry(cl,
+ B_TRUE, 0, tg_cookie) != 0) {
cmlb_dbg(CMLB_ERROR, cl,
"cmlb_dkio_set_vtoc: "
"Failed validate geometry\n");
@@ -3822,7 +3830,8 @@ cmlb_dkio_set_extvtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
vtoctovtoc32(user_extvtoc, user_vtoc);
#endif
- internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
+ internal = VOID2BOOLEAN(
+ (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
mutex_enter(CMLB_MUTEX(cl));
#if defined(__i386) || defined(__amd64)
if (cl->cl_tgt_blocksize != cl->cl_sys_blocksize) {
@@ -3851,7 +3860,8 @@ cmlb_dkio_set_extvtoc(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
if ((rval = cmlb_build_label_vtoc(cl, &user_vtoc)) == 0) {
if ((rval = cmlb_write_label(cl, tg_cookie)) == 0) {
- if (cmlb_validate_geometry(cl, 1, 0, tg_cookie) != 0) {
+ if (cmlb_validate_geometry(cl,
+ B_TRUE, 0, tg_cookie) != 0) {
cmlb_dbg(CMLB_ERROR, cl,
"cmlb_dkio_set_vtoc: "
"Failed validate geometry\n");
@@ -4307,7 +4317,8 @@ cmlb_dkio_set_efi(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
if (ddi_copyin(arg, &user_efi, sizeof (dk_efi_t), flag))
return (EFAULT);
- internal = ((cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
+ internal = VOID2BOOLEAN(
+ (cl->cl_alter_behavior & (CMLB_INTERNAL_MINOR_NODES)) != 0);
user_efi.dki_data = (void *)(uintptr_t)user_efi.dki_data_64;
@@ -4359,7 +4370,7 @@ cmlb_dkio_set_efi(struct cmlb_lun *cl, dev_t dev, caddr_t arg, int flag,
if (rval == 0) {
mutex_enter(CMLB_MUTEX(cl));
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
mutex_exit(CMLB_MUTEX(cl));
}
}
@@ -4491,7 +4502,7 @@ cmlb_dkio_set_mboot(struct cmlb_lun *cl, caddr_t arg, int flag, void *tg_cookie)
* update the fdisk and vtoc tables in memory
*/
rval = cmlb_update_fdisk_and_vtoc(cl, tg_cookie);
- if ((cl->cl_f_geometry_is_valid == FALSE) || (rval != 0)) {
+ if ((!cl->cl_f_geometry_is_valid) || (rval != 0)) {
mutex_exit(CMLB_MUTEX(cl));
kmem_free(mboot, (size_t)(sizeof (struct mboot)));
return (rval);
@@ -4572,7 +4583,7 @@ cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie)
ret = DK_TG_GETPHYGEOM(cl, pgeomp, tg_cookie);
mutex_enter(CMLB_MUTEX(cl));
- if (ret == 0) {
+ if (ret == 0) {
geom_base_cap = 0;
} else {
cmlb_dbg(CMLB_ERROR, cl,
@@ -4648,7 +4659,7 @@ cmlb_setup_default_geometry(struct cmlb_lun *cl, void *tg_cookie)
" hd %d sec %d", cl->cl_g.dkg_ncyl, cl->cl_g.dkg_acyl,
cl->cl_g.dkg_nhead, cl->cl_g.dkg_nsect);
- cl->cl_f_geometry_is_valid = FALSE;
+ cl->cl_f_geometry_is_valid = B_FALSE;
}
@@ -4727,7 +4738,7 @@ cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie)
* a default label.
*/
label_rc = 0;
- cl->cl_f_geometry_is_valid = TRUE;
+ cl->cl_f_geometry_is_valid = B_TRUE;
goto no_solaris_partition;
}
} else if (capacity < 0) {
@@ -4738,11 +4749,11 @@ cmlb_update_fdisk_and_vtoc(struct cmlb_lun *cl, void *tg_cookie)
/*
* For Removable media We reach here if we have found a
* SOLARIS PARTITION.
- * If cl_f_geometry_is_valid is FALSE it indicates that the SOLARIS
+ * If cl_f_geometry_is_valid is B_FALSE it indicates that the SOLARIS
* PARTITION has changed from the previous one, hence we will setup a
* default VTOC in this case.
*/
- if (cl->cl_f_geometry_is_valid == FALSE) {
+ if (!cl->cl_f_geometry_is_valid) {
/* if we get here it is writable */
/* we are called from SMBOOT, and after a write of fdisk */
cmlb_build_default_label(cl, tg_cookie);
diff --git a/usr/src/uts/common/io/scsi/targets/sd.c b/usr/src/uts/common/io/scsi/targets/sd.c
index ca11d4aa41..158872e401 100644
--- a/usr/src/uts/common/io/scsi/targets/sd.c
+++ b/usr/src/uts/common/io/scsi/targets/sd.c
@@ -74,10 +74,10 @@
#if (defined(__fibre))
#define SD_MODULE_NAME "SCSI SSA/FCAL Disk Driver"
char _depends_on[] = "misc/scsi misc/cmlb drv/fcp";
-#else
+#else /* !__fibre */
#define SD_MODULE_NAME "SCSI Disk Driver"
char _depends_on[] = "misc/scsi misc/cmlb";
-#endif
+#endif /* !__fibre */
/*
* Define the interconnect type, to allow the driver to distinguish
@@ -1674,7 +1674,7 @@ static struct cb_ops sd_cb_ops = {
sdawrite /* async I/O write entry point */
};
-static struct dev_ops sd_ops = {
+struct dev_ops sd_ops = {
DEVO_REV, /* devo_rev, */
0, /* refcnt */
sdinfo, /* info */
@@ -1689,30 +1689,39 @@ static struct dev_ops sd_ops = {
ddi_quiesce_not_needed, /* quiesce */
};
-
/*
* This is the loadable module wrapper.
*/
#include <sys/modctl.h>
+#ifndef XPV_HVM_DRIVER
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
SD_MODULE_NAME, /* Module name. */
&sd_ops /* driver ops */
};
+static struct modlinkage modlinkage = {
+ MODREV_1, &modldrv, NULL
+};
+
+#else /* XPV_HVM_DRIVER */
+static struct modlmisc modlmisc = {
+ &mod_miscops, /* Type of module. This one is a misc */
+ "HVM " SD_MODULE_NAME, /* Module name. */
+};
static struct modlinkage modlinkage = {
- MODREV_1,
- &modldrv,
- NULL
+ MODREV_1, &modlmisc, NULL
};
+#endif /* XPV_HVM_DRIVER */
+
static cmlb_tg_ops_t sd_tgops = {
TG_DK_OPS_VERSION_1,
sd_tg_rdwr,
sd_tg_getinfo
- };
+};
static struct scsi_asq_key_strings sd_additional_codes[] = {
0x81, 0, "Logical Unit is Reserved",
@@ -2204,13 +2213,20 @@ _init(void)
/* establish driver name from module name */
sd_label = (char *)mod_modname(&modlinkage);
+#ifndef XPV_HVM_DRIVER
err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
SD_MAXUNIT);
-
if (err != 0) {
return (err);
}
+#else /* XPV_HVM_DRIVER */
+ /* Remove the leading "hvm_" from the module name */
+ ASSERT(strncmp(sd_label, "hvm_", strlen("hvm_")) == 0);
+ sd_label += strlen("hvm_");
+
+#endif /* XPV_HVM_DRIVER */
+
mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&sd_log_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&sd_label_mutex, NULL, MUTEX_DRIVER, NULL);
@@ -2250,7 +2266,9 @@ _init(void)
sd_scsi_target_lun_fini();
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_fini(&sd_state);
+#endif /* !XPV_HVM_DRIVER */
return (err);
}
@@ -2291,7 +2309,9 @@ _fini(void)
cv_destroy(&sd_tr.srq_resv_reclaim_cv);
cv_destroy(&sd_tr.srq_inprocess_cv);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_fini(&sd_state);
+#endif /* !XPV_HVM_DRIVER */
return (err);
}
@@ -2495,7 +2515,9 @@ sdprobe(dev_info_t *devi)
{
struct scsi_device *devp;
int rval;
- int instance;
+#ifndef XPV_HVM_DRIVER
+ int instance = ddi_get_instance(devi);
+#endif /* !XPV_HVM_DRIVER */
/*
* if it wasn't for pln, sdprobe could actually be nulldev
@@ -2512,11 +2534,11 @@ sdprobe(dev_info_t *devi)
return (DDI_PROBE_FAILURE);
}
- instance = ddi_get_instance(devi);
-
+#ifndef XPV_HVM_DRIVER
if (ddi_get_soft_state(sd_state, instance) != NULL) {
return (DDI_PROBE_PARTIAL);
}
+#endif /* !XPV_HVM_DRIVER */
/*
* Call the SCSA utility probe routine to see if we actually
@@ -6903,9 +6925,11 @@ sd_unit_attach(dev_info_t *devi)
* this routine will have a value of zero.
*/
instance = ddi_get_instance(devp->sd_dev);
+#ifndef XPV_HVM_DRIVER
if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
goto probe_failed;
}
+#endif /* !XPV_HVM_DRIVER */
/*
* Retrieve a pointer to the newly-allocated soft state.
@@ -7798,7 +7822,8 @@ sd_unit_attach(dev_info_t *devi)
#endif
if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
- un->un_f_has_removable_media, un->un_f_is_hotpluggable,
+ VOID2BOOLEAN(un->un_f_has_removable_media != 0),
+ VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
un->un_node_type, offbyone, un->un_cmlbhandle,
(void *)SD_PATH_DIRECT) != 0) {
goto cmlb_attach_failed;
@@ -8082,7 +8107,9 @@ get_softstate_failed:
* ddi_get_soft_state() fails. The implication seems to be
* that the get_soft_state cannot fail if the zalloc succeeds.
*/
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(sd_state, instance);
+#endif /* !XPV_HVM_DRIVER */
probe_failed:
scsi_unprobe(devp);
@@ -8111,7 +8138,9 @@ sd_unit_detach(dev_info_t *devi)
int tgt;
dev_t dev;
dev_info_t *pdip = ddi_get_parent(devi);
+#ifndef XPV_HVM_DRIVER
int instance = ddi_get_instance(devi);
+#endif /* !XPV_HVM_DRIVER */
mutex_enter(&sd_detach_mutex);
@@ -8523,7 +8552,9 @@ sd_unit_detach(dev_info_t *devi)
devp->sd_private = NULL;
bzero(un, sizeof (struct sd_lun));
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(sd_state, instance);
+#endif /* !XPV_HVM_DRIVER */
mutex_exit(&sd_detach_mutex);
@@ -10148,7 +10179,7 @@ sd_ready_and_valid(sd_ssc_t *ssc, int part)
uint_t lbasize;
int rval = SD_READY_VALID;
char name_str[48];
- int is_valid;
+ boolean_t is_valid;
struct sd_lun *un;
int status;
@@ -21268,7 +21299,7 @@ sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
int i = 0;
cred_t *cr;
int tmprval = EINVAL;
- int is_valid;
+ boolean_t is_valid;
sd_ssc_t *ssc;
/*
diff --git a/usr/src/uts/common/os/modctl.c b/usr/src/uts/common/os/modctl.c
index b8f4d61378..4b5a3685d7 100644
--- a/usr/src/uts/common/os/modctl.c
+++ b/usr/src/uts/common/os/modctl.c
@@ -477,22 +477,6 @@ modctl_modreserve(modid_t id, int *data)
return (0);
}
-/* to be removed when Ed introduces these */
-static char *
-ddi_strdup(const char *str, int flag)
-{
- char *rv;
- int n = strlen(str) + 1;
- rv = kmem_alloc(n, flag);
- bcopy(str, rv, n);
- return (rv);
-}
-static void
-strfree(char *str)
-{
- kmem_free(str, strlen(str)+1);
-}
-
/* Add/Remove driver and binding aliases */
static int
modctl_update_driver_aliases(int add, int *data)
@@ -603,7 +587,7 @@ modctl_update_driver_aliases(int add, int *data)
}
#endif
check_esc_sequences(name, cname);
- aip->alias_name = ddi_strdup(cname, KM_SLEEP);
+ aip->alias_name = strdup(cname);
ap = alias.a_next;
aip++;
}
diff --git a/usr/src/uts/common/os/sunddi.c b/usr/src/uts/common/os/sunddi.c
index 70d6e0a6d1..827ee186ee 100644
--- a/usr/src/uts/common/os/sunddi.c
+++ b/usr/src/uts/common/os/sunddi.c
@@ -8963,6 +8963,35 @@ ddi_quiesce_not_supported(dev_info_t *dip)
return (DDI_FAILURE);
}
+char *
+ddi_strdup(const char *str, int flag)
+{
+ int n;
+ char *ptr;
+
+ ASSERT(str != NULL);
+ ASSERT((flag == KM_SLEEP) || (flag == KM_NOSLEEP));
+
+ n = strlen(str);
+ if ((ptr = kmem_alloc(n + 1, flag)) == NULL)
+ return (NULL);
+ bcopy(str, ptr, n + 1);
+ return (ptr);
+}
+
+char *
+strdup(const char *str)
+{
+ return (ddi_strdup(str, KM_SLEEP));
+}
+
+void
+strfree(char *str)
+{
+ ASSERT(str != NULL);
+ kmem_free(str, strlen(str) + 1);
+}
+
/*
* Generic DDI callback interfaces.
*/
diff --git a/usr/src/uts/common/sys/cmlb.h b/usr/src/uts/common/sys/cmlb.h
index e95cce9ade..cf1db2a1d3 100644
--- a/usr/src/uts/common/sys/cmlb.h
+++ b/usr/src/uts/common/sys/cmlb.h
@@ -19,15 +19,13 @@
* CDDL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_CMLB_H
#define _SYS_CMLB_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -246,10 +244,8 @@ cmlb_alloc_handle(cmlb_handle_t *cmlbhandlep);
* scsi/generic/inquiry.h
*
* is_removable whether or not device is removable.
- * 0 non-removable, 1 removable.
*
* is_hotpluggable whether or not device is hotpluggable.
- * 0 non-hotpluggable, 1 hotpluggable.
*
* node_type minor node type (as used by ddi_create_minor_node)
*
@@ -308,7 +304,7 @@ cmlb_alloc_handle(cmlb_handle_t *cmlbhandlep);
*/
int
cmlb_attach(dev_info_t *devi, cmlb_tg_ops_t *tgopsp, int device_type,
- int is_removable, int is_hotpluggable, char *node_type,
+ boolean_t is_removable, boolean_t is_hotpluggable, char *node_type,
int alter_behavior, cmlb_handle_t cmlbhandle, void *tg_cookie);
@@ -370,7 +366,7 @@ cmlb_invalidate(cmlb_handle_t cmlbhandle, void *tg_cookie);
* FALSE otherwise.
*
*/
-int
+boolean_t
cmlb_is_valid(cmlb_handle_t cmlbhandle);
/*
diff --git a/usr/src/uts/common/sys/cmlb_impl.h b/usr/src/uts/common/sys/cmlb_impl.h
index 797213c52a..b77f6c0239 100644
--- a/usr/src/uts/common/sys/cmlb_impl.h
+++ b/usr/src/uts/common/sys/cmlb_impl.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -72,9 +72,9 @@ extern "C" {
#define CMLB_LABEL(cl) (DEVI(((cl)->cl_devi))->devi_binding_name)
-#define ISREMOVABLE(cl) (cl->cl_is_removable == 1)
+#define ISREMOVABLE(cl) (cl->cl_is_removable)
#define ISCD(cl) (cl->cl_device_type == DTYPE_RODIRECT)
-#define ISHOTPLUGGABLE(cl) (cl->cl_is_hotpluggable == 1)
+#define ISHOTPLUGGABLE(cl) (cl->cl_is_hotpluggable)
#if defined(_SUNOS_VTOC_8)
@@ -93,10 +93,6 @@ extern "C" {
#define CMLBUNIT(dev) (getminor((dev)) >> CMLBUNIT_SHIFT)
#define CMLBPART(dev) (getminor((dev)) & CMLBPART_MASK)
-
-#define TRUE 1
-#define FALSE 0
-
/*
* Return codes of cmlb_uselabel().
*/
@@ -181,14 +177,14 @@ typedef struct cmlb_lun {
cmlb_state_t cl_state; /* state of handle */
- int cl_f_geometry_is_valid;
+ boolean_t cl_f_geometry_is_valid;
int cl_sys_blocksize;
kmutex_t cl_mutex;
/* the following are passed in at attach time */
- int cl_is_removable; /* 1 is removable */
- int cl_is_hotpluggable; /* 1 is hotpluggable */
+ boolean_t cl_is_removable; /* is removable */
+ boolean_t cl_is_hotpluggable; /* is hotpluggable */
int cl_alter_behavior;
char *cl_node_type; /* DDI_NT_... */
int cl_device_type; /* DTYPE_DIRECT,.. */
diff --git a/usr/src/uts/common/sys/dktp/bbh.h b/usr/src/uts/common/sys/dktp/bbh.h
index d6176ce47f..31b3cedd88 100644
--- a/usr/src/uts/common/sys/dktp/bbh.h
+++ b/usr/src/uts/common/sys/dktp/bbh.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,14 +19,14 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_DKTP_BBH_H
#define _SYS_DKTP_BBH_H
-#pragma ident "%Z%%M% %I% %E% SMI"
+#include <sys/scsi/scsi_types.h>
#ifdef __cplusplus
extern "C" {
diff --git a/usr/src/uts/common/sys/sunddi.h b/usr/src/uts/common/sys/sunddi.h
index 51627aa992..7f619b8da6 100644
--- a/usr/src/uts/common/sys/sunddi.h
+++ b/usr/src/uts/common/sys/sunddi.h
@@ -388,6 +388,10 @@ typedef enum {
#define DDI_MODEL_NATIVE DATAMODEL_NATIVE
#define DDI_MODEL_NONE DATAMODEL_NONE
+extern char *ddi_strdup(const char *str, int flag);
+extern char *strdup(const char *str);
+extern void strfree(char *str);
+
/*
* Functions and data references which really should be in <sys/ddi.h>
*/
diff --git a/usr/src/uts/common/sys/types.h b/usr/src/uts/common/sys/types.h
index 1745b91e87..8f133c1056 100644
--- a/usr/src/uts/common/sys/types.h
+++ b/usr/src/uts/common/sys/types.h
@@ -23,15 +23,13 @@
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#ifndef _SYS_TYPES_H
#define _SYS_TYPES_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/feature_tests.h>
#include <sys/isa_defs.h>
@@ -191,8 +189,16 @@ typedef long blksize_t; /* used for block sizes */
typedef enum { _B_FALSE, _B_TRUE } boolean_t;
#else
typedef enum { B_FALSE, B_TRUE } boolean_t;
+#ifdef _KERNEL
+#define VALID_BOOLEAN(x) (((x) == B_FALSE) || ((x) == B_TRUE))
+#define VOID2BOOLEAN(x) (((uintptr_t)(x) == 0) ? B_FALSE : B_TRUE)
+#endif /* _KERNEL */
#endif /* defined(__XOPEN_OR_POSIX) */
+#ifdef _KERNEL
+#define BOOLEAN2VOID(x) ((x) ? 1 : 0)
+#endif /* _KERNEL */
+
/*
* The {u,}pad64_t types can be used in structures such that those structures
* may be accessed by code produced by compilation environments which don't
diff --git a/usr/src/uts/common/xen/io/xdb.c b/usr/src/uts/common/xen/io/xdb.c
index 5f327bb9ff..16fd5aff9d 100644
--- a/usr/src/uts/common/xen/io/xdb.c
+++ b/usr/src/uts/common/xen/io/xdb.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -62,6 +62,7 @@
#include <sys/promif.h>
#include <sys/sysmacros.h>
#include <public/io/xenbus.h>
+#include <public/io/xs_wire.h>
#include <xen/sys/xenbus_impl.h>
#include <xen/sys/xendev.h>
#include <sys/gnttab.h>
@@ -77,10 +78,13 @@
static xdb_t *xdb_statep;
static int xdb_debug = 0;
+static void xdb_close(dev_info_t *);
static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t);
static int xdb_get_request(xdb_t *, blkif_request_t *);
static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *);
static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *);
+static int xdb_biodone(buf_t *);
+
#ifdef DEBUG
/*
@@ -216,7 +220,18 @@ xdb_kstat_init(xdb_t *vdp)
return (B_TRUE);
}
-static int xdb_biodone(buf_t *);
+static char *
+i_pathname(dev_info_t *dip)
+{
+ char *path, *rv;
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ rv = strdup(path);
+ kmem_free(path, MAXPATHLEN);
+
+ return (rv);
+}
static buf_t *
xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq)
@@ -501,14 +516,13 @@ xdb_uninit_ioreqs(xdb_t *vdp)
static uint_t
xdb_intr(caddr_t arg)
{
- blkif_request_t req;
- blkif_request_t *reqp = &req;
- xdb_request_t *xreq;
- buf_t *bp;
- uint8_t op;
- xdb_t *vdp = (xdb_t *)arg;
- int ret = DDI_INTR_UNCLAIMED;
- dev_info_t *dip = vdp->xs_dip;
+ xdb_t *vdp = (xdb_t *)arg;
+ dev_info_t *dip = vdp->xs_dip;
+ blkif_request_t req, *reqp = &req;
+ xdb_request_t *xreq;
+ buf_t *bp;
+ uint8_t op;
+ int ret = DDI_INTR_UNCLAIMED;
XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
"xdb@%s: I/O request received from dom %d",
@@ -517,10 +531,11 @@ xdb_intr(caddr_t arg)
mutex_enter(&vdp->xs_iomutex);
/* shouldn't touch ring buffer if not in connected state */
- if (vdp->xs_if_status != XDB_CONNECTED) {
+ if (!vdp->xs_if_connected) {
mutex_exit(&vdp->xs_iomutex);
return (DDI_INTR_UNCLAIMED);
}
+ ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
/*
* We'll loop till there is no more request in the ring
@@ -672,7 +687,8 @@ xdb_biodone(buf_t *bp)
mutex_enter(&vdp->xs_iomutex);
/* send response back to frontend */
- if (vdp->xs_if_status == XDB_CONNECTED) {
+ if (vdp->xs_if_connected) {
+ ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr))
xvdi_notify_oe(vdp->xs_dip);
XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
@@ -684,7 +700,7 @@ xdb_biodone(buf_t *bp)
xdb_free_req(xreq);
vdp->xs_ionum--;
- if ((vdp->xs_if_status != XDB_CONNECTED) && (vdp->xs_ionum == 0)) {
+ if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) {
/* we're closing, someone is waiting for I/O clean-up */
cv_signal(&vdp->xs_ionumcv);
}
@@ -704,6 +720,14 @@ xdb_bindto_frontend(xdb_t *vdp)
dev_info_t *dip = vdp->xs_dip;
char protocol[64] = "";
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
+ /*
+ * Switch to the XenbusStateInitialised state. This let's the
+ * frontend know that we're about to negotiate a connection.
+ */
+ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
+
/*
* Gather info from frontend
*/
@@ -712,9 +736,11 @@ xdb_bindto_frontend(xdb_t *vdp)
return (DDI_FAILURE);
err = xenbus_gather(XBT_NULL, oename,
- "ring-ref", "%lu", &gref, "event-channel", "%u", &evtchn, NULL);
+ XBP_RING_REF, "%lu", &gref,
+ XBP_EVENT_CHAN, "%u", &evtchn,
+ NULL);
if (err != 0) {
- xvdi_fatal_error(dip, err,
+ xvdi_dev_error(dip, err,
"Getting ring-ref and evtchn from frontend");
return (DDI_FAILURE);
}
@@ -724,7 +750,7 @@ xdb_bindto_frontend(xdb_t *vdp)
vdp->xs_entrysize = sizeof (union blkif_sring_entry);
err = xenbus_gather(XBT_NULL, oename,
- "protocol", "%63s", protocol, NULL);
+ XBP_PROTOCOL, "%63s", protocol, NULL);
if (err)
(void) strcpy(protocol, "unspecified, assuming native");
else {
@@ -756,15 +782,13 @@ xdb_bindto_frontend(xdb_t *vdp)
#endif
/*
- * map and init ring
- *
- * The ring parameters must match those which have been allocated
- * in the front end.
+ * Map and init ring. The ring parameters must match those which
+ * have been allocated in the front end.
*/
- err = xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
- gref, &vdp->xs_ring);
- if (err != DDI_SUCCESS)
+ if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
+ gref, &vdp->xs_ring) != DDI_SUCCESS)
return (DDI_FAILURE);
+
/*
* This will be removed after we use shadow I/O ring request since
* we don't need to access the ring itself directly, thus the access
@@ -772,9 +796,7 @@ xdb_bindto_frontend(xdb_t *vdp)
*/
vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl;
- /*
- * bind event channel
- */
+ /* bind event channel */
err = xvdi_bind_evtchn(dip, evtchn);
if (err != DDI_SUCCESS) {
xvdi_unmap_ring(vdp->xs_ring);
@@ -787,43 +809,313 @@ xdb_bindto_frontend(xdb_t *vdp)
static void
xdb_unbindfrom_frontend(xdb_t *vdp)
{
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
xvdi_free_evtchn(vdp->xs_dip);
xvdi_unmap_ring(vdp->xs_ring);
}
+/*
+ * xdb_params_change() initiates a allows change to the underlying device/file
+ * that the backend is accessing. It does this by disconnecting from the
+ * frontend, closing the old device, clearing a bunch of xenbus parameters,
+ * and switching back to the XenbusStateInitialising state. The frontend
+ * should notice this transition to the XenbusStateInitialising state and
+ * should attempt to reconnect to us (the backend).
+ */
+static void
+xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs)
+{
+ xenbus_transaction_t xbt;
+ dev_info_t *dip = vdp->xs_dip;
+ char *xsname;
+ int err;
+
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+ ASSERT(vdp->xs_params_path != NULL);
+
+ if ((xsname = xvdi_get_xsname(dip)) == NULL)
+ return;
+ if (strcmp(vdp->xs_params_path, params) == 0)
+ return;
+
+ /*
+ * Close the device we're currently accessing and update the
+ * path which points to our backend device/file.
+ */
+ xdb_close(dip);
+ vdp->xs_fe_initialised = B_FALSE;
+
+trans_retry:
+ if ((err = xenbus_transaction_start(&xbt)) != 0) {
+ xvdi_dev_error(dip, err, "params change transaction init");
+ goto errout;
+ }
+
+ /*
+ * Delete all the xenbus properties that are connection dependant
+ * and go back to the initializing state so that the frontend
+ * driver can re-negotiate a connection.
+ */
+ if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) ||
+ ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) ||
+ ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) ||
+ ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) ||
+ ((err = xenbus_rm(xbt, xsname, "instance")) != 0) ||
+ ((err = xenbus_rm(xbt, xsname, "node")) != 0) ||
+ (update_xs && ((err = xenbus_printf(xbt, xsname,
+ "params", "%s", params)) != 0)) ||
+ ((err = xvdi_switch_state(dip,
+ xbt, XenbusStateInitialising) > 0))) {
+ (void) xenbus_transaction_end(xbt, 1);
+ xvdi_dev_error(dip, err, "params change transaction setup");
+ goto errout;
+ }
+
+ if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
+ if (err == EAGAIN) {
+ /* transaction is ended, don't need to abort it */
+ goto trans_retry;
+ }
+ xvdi_dev_error(dip, err, "params change transaction commit");
+ goto errout;
+ }
+
+ /* Change the device that we plan to access */
+ strfree(vdp->xs_params_path);
+ vdp->xs_params_path = strdup(params);
+ return;
+
+errout:
+ (void) xvdi_switch_state(dip, xbt, XenbusStateInitialising);
+}
+
+/*
+ * xdb_watch_params_cb() - This callback is invoked whenever there
+ * is an update to the following xenbus parameter:
+ * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
+ *
+ * This normally happens during xm block-configure operations, which
+ * are used to change CD device images for HVM domUs.
+ */
+/*ARGSUSED*/
+static void
+xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg)
+{
+ xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
+ char *xsname, *oename, *str, *str2;
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((oename = xvdi_get_oename(dip)) == NULL)) {
+ return;
+ }
+
+ mutex_enter(&vdp->xs_cbmutex);
+
+ if (xenbus_read_str(xsname, "params", &str) != 0) {
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
+ }
+
+ if (strcmp(vdp->xs_params_path, str) == 0) {
+ /* Nothing todo */
+ mutex_exit(&vdp->xs_cbmutex);
+ strfree(str);
+ return;
+ }
+
+ /*
+ * If the frontend isn't a cd device, doesn't support media
+ * requests, or has locked the media, then we can't change
+ * the params value. restore the current value.
+ */
+ str2 = NULL;
+ if (!XDB_IS_FE_CD(vdp) ||
+ (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) ||
+ (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) {
+ if (str2 != NULL)
+ strfree(str2);
+ strfree(str);
+
+ str = i_pathname(dip);
+ cmn_err(CE_NOTE,
+ "!%s: media locked, ignoring params update", str);
+ strfree(str);
+
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
+ }
+
+ XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
+ "block-configure params request: \"%s\"", str));
+
+ xdb_params_change(vdp, str, B_FALSE);
+ mutex_exit(&vdp->xs_cbmutex);
+ strfree(str);
+}
+
+/*
+ * xdb_watch_media_req_cb() - This callback is invoked whenever there
+ * is an update to the following xenbus parameter:
+ * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
+ *
+ * Media requests are only supported on CD devices and are issued by
+ * the frontend. Currently the only supported media request operaions
+ * are "lock" and "eject". A "lock" prevents the backend from changing
+ * the backing device/file (via xm block-configure). An "eject" requests
+ * tells the backend device that it should disconnect from the frontend
+ * and closing the backing device/file that is currently in use.
+ */
+/*ARGSUSED*/
+static void
+xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg)
+{
+ xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
+ char *oename, *str;
+
+ mutex_enter(&vdp->xs_cbmutex);
+
+ if ((oename = xvdi_get_oename(dip)) == NULL) {
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
+ }
+
+ if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) {
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
+ }
+
+ if (!XDB_IS_FE_CD(vdp)) {
+ xvdi_dev_error(dip, EINVAL,
+ "media-req only supported for cdrom devices");
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
+ }
+
+ if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
+ mutex_exit(&vdp->xs_cbmutex);
+ strfree(str);
+ return;
+ }
+ strfree(str);
+
+ XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request"));
+
+ xdb_params_change(vdp, "", B_TRUE);
+ (void) xenbus_printf(XBT_NULL, oename,
+ XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE);
+ mutex_exit(&vdp->xs_cbmutex);
+}
+
+/*
+ * If we're dealing with a cdrom device, let the frontend know that
+ * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
+ * to handle those frontend media request changes, which modify the
+ * following xenstore parameter:
+ * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
+ */
+static boolean_t
+xdb_media_req_init(xdb_t *vdp)
+{
+ dev_info_t *dip = vdp->xs_dip;
+ char *xsname, *oename;
+
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((oename = xvdi_get_oename(dip)) == NULL))
+ return (B_FALSE);
+
+ if (!XDB_IS_FE_CD(vdp))
+ return (B_TRUE);
+
+ if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0)
+ return (B_FALSE);
+
+ if (xvdi_add_xb_watch_handler(dip, oename,
+ XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) {
+ xvdi_dev_error(dip, EAGAIN,
+ "Failed to register watch for cdrom media requests");
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * Get our params value. Also, if we're using "params" then setup a
+ * watch to handle xm block-configure operations which modify the
+ * following xenstore parameter:
+ * /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
+ */
+static boolean_t
+xdb_params_init(xdb_t *vdp)
+{
+ dev_info_t *dip = vdp->xs_dip;
+ char *str, *xsname;
+ int err, watch_params = B_FALSE;
+
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+ ASSERT(vdp->xs_params_path == NULL);
+
+ if ((xsname = xvdi_get_xsname(dip)) == NULL)
+ return (B_FALSE);
+
+ if ((err = xenbus_read_str(xsname,
+ "dynamic-device-path", &str)) == ENOENT) {
+ err = xenbus_read_str(xsname, "params", &str);
+ watch_params = B_TRUE;
+ }
+ if (err != 0)
+ return (B_FALSE);
+ vdp->xs_params_path = str;
+
+ /*
+ * If we got our backing store path from "dynamic-device-path" then
+ * there's no reason to watch "params"
+ */
+ if (!watch_params)
+ return (B_TRUE);
+
+ if (xvdi_add_xb_watch_handler(dip, xsname, "params",
+ xdb_watch_params_cb, NULL) != DDI_SUCCESS) {
+ strfree(vdp->xs_params_path);
+ vdp->xs_params_path = NULL;
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
#define LOFI_CTRL_NODE "/dev/lofictl"
#define LOFI_DEV_NODE "/devices/pseudo/lofi@0:"
-#define LOFI_MODE FREAD | FWRITE | FEXCL
+#define LOFI_MODE (FREAD | FWRITE | FEXCL)
static int
xdb_setup_node(xdb_t *vdp, char *path)
{
- dev_info_t *dip;
- char *xsnode, *node;
- ldi_handle_t ldi_hdl;
- struct lofi_ioctl *li;
- int minor;
- int err;
- unsigned int len;
+ dev_info_t *dip = vdp->xs_dip;
+ char *xsname, *str;
+ ldi_handle_t ldi_hdl;
+ struct lofi_ioctl *li;
+ int minor, err;
- dip = vdp->xs_dip;
- xsnode = xvdi_get_xsname(dip);
- if (xsnode == NULL)
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
+ if ((xsname = xvdi_get_xsname(dip)) == NULL)
return (DDI_FAILURE);
- err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path",
- (void **)&node, &len);
- if (err == ENOENT)
- err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node,
- &len);
- if (err != 0) {
- xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'");
+ if ((err = xenbus_read_str(xsname, "type", &str)) != 0) {
+ xvdi_dev_error(dip, err, "Getting type from backend device");
return (DDI_FAILURE);
}
+ if (strcmp(str, "file") == 0)
+ vdp->xs_type |= XDB_DEV_BE_LOFI;
+ strfree(str);
- if (!XDB_IS_LOFI(vdp)) {
- (void) strlcpy(path, node, MAXPATHLEN);
- kmem_free(node, len);
+ if (!XDB_IS_BE_LOFI(vdp)) {
+ (void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN);
+ ASSERT(vdp->xs_lofi_path == NULL);
return (DDI_SUCCESS);
}
@@ -832,63 +1124,55 @@ xdb_setup_node(xdb_t *vdp, char *path)
&ldi_hdl, vdp->xs_ldi_li);
} while (err == EBUSY);
if (err != 0) {
- kmem_free(node, len);
return (DDI_FAILURE);
}
li = kmem_zalloc(sizeof (*li), KM_SLEEP);
- (void) strlcpy(li->li_filename, node, MAXPATHLEN);
- kmem_free(node, len);
- if (ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
- LOFI_MODE | FKIOCTL, kcred, &minor) != 0) {
+ (void) strlcpy(li->li_filename, vdp->xs_params_path,
+ sizeof (li->li_filename));
+ err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
+ LOFI_MODE | FKIOCTL, kcred, &minor);
+ (void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
+ kmem_free(li, sizeof (*li));
+
+ if (err != 0) {
cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s",
- ddi_get_name_addr(dip), li->li_filename);
- (void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
- kmem_free(li, sizeof (*li));
+ ddi_get_name_addr(dip), vdp->xs_params_path);
return (DDI_FAILURE);
}
+
/*
* return '/devices/...' instead of '/dev/lofi/...' since the
* former is available immediately after calling ldi_ioctl
*/
(void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor);
- (void) xenbus_printf(XBT_NULL, xsnode, "node", "%s", path);
- (void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
- kmem_free(li, sizeof (*li));
+ (void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path);
+
+ ASSERT(vdp->xs_lofi_path == NULL);
+ vdp->xs_lofi_path = strdup(path);
+
return (DDI_SUCCESS);
}
static void
xdb_teardown_node(xdb_t *vdp)
{
- dev_info_t *dip;
- char *xsnode, *node;
+ dev_info_t *dip = vdp->xs_dip;
ldi_handle_t ldi_hdl;
struct lofi_ioctl *li;
int err;
- unsigned int len;
- if (!XDB_IS_LOFI(vdp))
- return;
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
- dip = vdp->xs_dip;
- xsnode = xvdi_get_xsname(dip);
- if (xsnode == NULL)
+ if (!XDB_IS_BE_LOFI(vdp))
return;
- err = xenbus_read(XBT_NULL, xsnode, "dynamic-device-path",
- (void **)&node, &len);
- if (err == ENOENT)
- err = xenbus_read(XBT_NULL, xsnode, "params", (void **)&node,
- &len);
- if (err != 0) {
- xvdi_fatal_error(vdp->xs_dip, err, "reading 'params'");
- return;
- }
+ vdp->xs_type &= ~XDB_DEV_BE_LOFI;
+ ASSERT(vdp->xs_lofi_path != NULL);
li = kmem_zalloc(sizeof (*li), KM_SLEEP);
- (void) strlcpy(li->li_filename, node, MAXPATHLEN);
- kmem_free(node, len);
+ (void) strlcpy(li->li_filename, vdp->xs_params_path,
+ sizeof (li->li_filename));
do {
err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
@@ -908,67 +1192,47 @@ xdb_teardown_node(xdb_t *vdp)
(void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
kmem_free(li, sizeof (*li));
+
+ strfree(vdp->xs_lofi_path);
+ vdp->xs_lofi_path = NULL;
}
static int
xdb_open_device(xdb_t *vdp)
{
+ dev_info_t *dip = vdp->xs_dip;
uint64_t devsize;
- dev_info_t *dip;
- char *xsnode;
char *nodepath;
- char *mode = NULL;
- char *type = NULL;
- int err;
- dip = vdp->xs_dip;
- xsnode = xvdi_get_xsname(dip);
- if (xsnode == NULL)
- return (DDI_FAILURE);
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
- err = xenbus_gather(XBT_NULL, xsnode,
- "mode", NULL, &mode, "type", NULL, &type, NULL);
- if (err != 0) {
- if (mode)
- kmem_free(mode, strlen(mode) + 1);
- if (type)
- kmem_free(type, strlen(type) + 1);
- xvdi_fatal_error(dip, err,
- "Getting mode and type from backend device");
- return (DDI_FAILURE);
- }
- if (strcmp(type, "file") == 0) {
- vdp->xs_type |= XDB_DEV_LOFI;
- }
- kmem_free(type, strlen(type) + 1);
- if ((strcmp(mode, "r") == NULL) || (strcmp(mode, "ro") == NULL)) {
- vdp->xs_type |= XDB_DEV_RO;
+ if (strlen(vdp->xs_params_path) == 0) {
+ /*
+ * it's possible to have no backing device when dealing
+ * with a pv cdrom drive that has no virtual cd associated
+ * with it.
+ */
+ ASSERT(XDB_IS_FE_CD(vdp));
+ ASSERT(vdp->xs_sectors == 0);
+ ASSERT(vdp->xs_ldi_li == NULL);
+ ASSERT(vdp->xs_ldi_hdl == NULL);
+ return (DDI_SUCCESS);
}
- kmem_free(mode, strlen(mode) + 1);
- /*
- * try to open backend device
- */
if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0)
return (DDI_FAILURE);
nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- err = xdb_setup_node(vdp, nodepath);
- if (err != DDI_SUCCESS) {
- xvdi_fatal_error(dip, err,
+
+ /* try to open backend device */
+ if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) {
+ xvdi_dev_error(dip, ENXIO,
"Getting device path of backend device");
ldi_ident_release(vdp->xs_ldi_li);
kmem_free(nodepath, MAXPATHLEN);
return (DDI_FAILURE);
}
- if (*nodepath == '\0') {
- /* Allow a CD-ROM device with an empty backend. */
- vdp->xs_sectors = 0;
- kmem_free(nodepath, MAXPATHLEN);
- return (DDI_SUCCESS);
- }
-
if (ldi_open_by_name(nodepath,
FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE),
kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) {
@@ -980,16 +1244,6 @@ xdb_open_device(xdb_t *vdp)
return (DDI_FAILURE);
}
- /* check if it's a CD/DVD disc */
- if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
- "inquiry-device-type", DTYPE_DIRECT) == DTYPE_RODIRECT)
- vdp->xs_type |= XDB_DEV_CD;
- /* check if it's a removable disk */
- if (ldi_prop_exists(vdp->xs_ldi_hdl,
- LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
- "removable-media"))
- vdp->xs_type |= XDB_DEV_RMB;
-
if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) {
(void) ldi_close(vdp->xs_ldi_hdl,
FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
@@ -1000,6 +1254,17 @@ xdb_open_device(xdb_t *vdp)
}
vdp->xs_sectors = devsize / XB_BSIZE;
+ /* check if the underlying device is a CD/DVD disc */
+ if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
+ INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT)
+ vdp->xs_type |= XDB_DEV_BE_CD;
+
+ /* check if the underlying device is a removable disk */
+ if (ldi_prop_exists(vdp->xs_ldi_hdl,
+ LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
+ "removable-media"))
+ vdp->xs_type |= XDB_DEV_BE_RMB;
+
kmem_free(nodepath, MAXPATHLEN);
return (DDI_SUCCESS);
}
@@ -1007,171 +1272,155 @@ xdb_open_device(xdb_t *vdp)
static void
xdb_close_device(xdb_t *vdp)
{
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
+ if (strlen(vdp->xs_params_path) == 0) {
+ ASSERT(XDB_IS_FE_CD(vdp));
+ ASSERT(vdp->xs_sectors == 0);
+ ASSERT(vdp->xs_ldi_li == NULL);
+ ASSERT(vdp->xs_ldi_hdl == NULL);
+ return;
+ }
+
(void) ldi_close(vdp->xs_ldi_hdl,
FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
xdb_teardown_node(vdp);
ldi_ident_release(vdp->xs_ldi_li);
+ vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB);
+ vdp->xs_sectors = 0;
vdp->xs_ldi_li = NULL;
vdp->xs_ldi_hdl = NULL;
}
/*
* Kick-off connect process
- * If xs_fe_status == XDB_FE_READY and xs_dev_status == XDB_DEV_READY
- * the xs_if_status will be changed to XDB_CONNECTED on success,
- * otherwise, xs_if_status will not be changed
+ * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
+ * the xs_if_connected will be changed to B_TRUE on success,
*/
-static int
+static void
xdb_start_connect(xdb_t *vdp)
{
- uint32_t dinfo;
- xenbus_transaction_t xbt;
- int err, svdst;
- char *xsnode;
- dev_info_t *dip = vdp->xs_dip;
- char *barrier;
- uint_t len;
+ xenbus_transaction_t xbt;
+ dev_info_t *dip = vdp->xs_dip;
+ boolean_t fb_exists;
+ int err, instance = ddi_get_instance(dip);
+ uint64_t sectors;
+ uint_t dinfo, ssize;
+ char *xsname;
+
+ ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1))
+ return;
+
+ mutex_enter(&vdp->xs_iomutex);
+ /*
+ * if the hotplug scripts haven't run or if the frontend is not
+ * initialized, then we can't try to connect.
+ */
+ if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
+ ASSERT(!vdp->xs_if_connected);
+ mutex_exit(&vdp->xs_iomutex);
+ return;
+ }
+
+ /* If we're already connected then there's nothing todo */
+ if (vdp->xs_if_connected) {
+ mutex_exit(&vdp->xs_iomutex);
+ return;
+ }
+ mutex_exit(&vdp->xs_iomutex);
/*
* Start connect to frontend only when backend device are ready
* and frontend has moved to XenbusStateInitialised, which means
- * ready to connect
+ * ready to connect.
*/
- ASSERT((vdp->xs_fe_status == XDB_FE_READY) &&
- (vdp->xs_dev_status == XDB_DEV_READY));
-
- if (((xsnode = xvdi_get_xsname(dip)) == NULL) ||
- ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1) ||
- (xdb_open_device(vdp) != DDI_SUCCESS))
- return (DDI_FAILURE);
+ XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
+ "xdb@%s: starting connection process", ddi_get_name_addr(dip)));
- (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
+ if (xdb_open_device(vdp) != DDI_SUCCESS)
+ return;
- if (xdb_bindto_frontend(vdp) != DDI_SUCCESS)
- goto errout1;
+ if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) {
+ xdb_close_device(vdp);
+ return;
+ }
/* init i/o requests */
xdb_init_ioreqs(vdp);
if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp)
- != DDI_SUCCESS)
- goto errout2;
+ != DDI_SUCCESS) {
+ xdb_uninit_ioreqs(vdp);
+ xdb_unbindfrom_frontend(vdp);
+ xdb_close_device(vdp);
+ return;
+ }
+
+ dinfo = 0;
+ if (XDB_IS_RO(vdp))
+ dinfo |= VDISK_READONLY;
+ if (XDB_IS_BE_RMB(vdp))
+ dinfo |= VDISK_REMOVABLE;
+ if (XDB_IS_BE_CD(vdp))
+ dinfo |= VDISK_CDROM;
+ if (XDB_IS_FE_CD(vdp))
+ dinfo |= VDISK_REMOVABLE | VDISK_CDROM;
/*
* we can recieve intr any time from now on
* mark that we're ready to take intr
*/
mutex_enter(&vdp->xs_iomutex);
- /*
- * save it in case we need to restore when we
- * fail to write xenstore later
- */
- svdst = vdp->xs_if_status;
- vdp->xs_if_status = XDB_CONNECTED;
+ ASSERT(vdp->xs_fe_initialised);
+ vdp->xs_if_connected = B_TRUE;
mutex_exit(&vdp->xs_iomutex);
- /* write into xenstore the info needed by frontend */
trans_retry:
- if (xenbus_transaction_start(&xbt)) {
- xvdi_fatal_error(dip, EIO, "transaction start");
- goto errout3;
+ /* write into xenstore the info needed by frontend */
+ if ((err = xenbus_transaction_start(&xbt)) != 0) {
+ xvdi_dev_error(dip, err, "connect transaction init");
+ goto errout;
}
- /*
- * If feature-barrier isn't present in xenstore, add it.
- */
- if (xenbus_read(xbt, xsnode, "feature-barrier",
- (void **)&barrier, &len) != 0) {
- if ((err = xenbus_printf(xbt, xsnode, "feature-barrier",
- "%d", 1)) != 0) {
- cmn_err(CE_WARN, "xdb@%s: failed to write "
- "'feature-barrier'", ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, err, "writing 'feature-barrier'");
- goto abort_trans;
- }
- } else
- kmem_free(barrier, len);
-
- dinfo = 0;
- if (XDB_IS_RO(vdp))
- dinfo |= VDISK_READONLY;
- if (XDB_IS_CD(vdp))
- dinfo |= VDISK_CDROM;
- if (XDB_IS_RMB(vdp))
- dinfo |= VDISK_REMOVABLE;
- if (err = xenbus_printf(xbt, xsnode, "info", "%u", dinfo)) {
- xvdi_fatal_error(dip, err, "writing 'info'");
- goto abort_trans;
- }
+ /* If feature-barrier isn't present in xenstore, add it. */
+ fb_exists = xenbus_exists(xsname, XBP_FB);
/* hard-coded 512-byte sector size */
- if (err = xenbus_printf(xbt, xsnode, "sector-size", "%u", DEV_BSIZE)) {
- xvdi_fatal_error(dip, err, "writing 'sector-size'");
- goto abort_trans;
- }
-
- if (err = xenbus_printf(xbt, xsnode, "sectors", "%"PRIu64,
- vdp->xs_sectors)) {
- xvdi_fatal_error(dip, err, "writing 'sectors'");
- goto abort_trans;
+ ssize = DEV_BSIZE;
+ sectors = vdp->xs_sectors;
+ if (((!fb_exists &&
+ (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) ||
+ (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) ||
+ (err = xenbus_printf(xbt, xsname, "sector-size", "%u", ssize)) ||
+ (err = xenbus_printf(xbt, xsname,
+ XBP_SECTORS, "%"PRIu64, sectors)) ||
+ (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) ||
+ ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) {
+ (void) xenbus_transaction_end(xbt, 1);
+ xvdi_dev_error(dip, err, "connect transaction setup");
+ goto errout;
}
- if (err = xenbus_printf(xbt, xsnode, "instance", "%d",
- ddi_get_instance(dip))) {
- xvdi_fatal_error(dip, err, "writing 'instance'");
- goto abort_trans;
- }
-
- if ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0) {
- xvdi_fatal_error(dip, err, "writing 'state'");
- goto abort_trans;
- }
-
- if (err = xenbus_transaction_end(xbt, 0)) {
- if (err == EAGAIN)
+ if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
+ if (err == EAGAIN) {
/* transaction is ended, don't need to abort it */
goto trans_retry;
- xvdi_fatal_error(dip, err, "completing transaction");
- goto errout3;
+ }
+ xvdi_dev_error(dip, err, "connect transaction commit");
+ goto errout;
}
- return (DDI_SUCCESS);
+ return;
-abort_trans:
- (void) xenbus_transaction_end(xbt, 1);
-errout3:
- mutex_enter(&vdp->xs_iomutex);
- vdp->xs_if_status = svdst;
- mutex_exit(&vdp->xs_iomutex);
- ddi_remove_intr(dip, 0, NULL);
-errout2:
- xdb_uninit_ioreqs(vdp);
- xdb_unbindfrom_frontend(vdp);
-errout1:
- xdb_close_device(vdp);
- return (DDI_FAILURE);
-}
-
-/*
- * Kick-off disconnect process
- * xs_if_status will not be changed
- */
-static int
-xdb_start_disconnect(xdb_t *vdp)
-{
- /*
- * Kick-off disconnect process
- */
- if (xvdi_switch_state(vdp->xs_dip, XBT_NULL, XenbusStateClosing) > 0)
- return (DDI_FAILURE);
-
- return (DDI_SUCCESS);
+errout:
+ xdb_close(dip);
}
/*
* Disconnect from frontend and close backend device
- * ifstatus will be changed to XDB_DISCONNECTED
- * Xenbus state will be changed to XenbusStateClosed
*/
static void
xdb_close(dev_info_t *dip)
@@ -1179,23 +1428,36 @@ xdb_close(dev_info_t *dip)
xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
-
mutex_enter(&vdp->xs_iomutex);
- if (vdp->xs_if_status != XDB_CONNECTED) {
- vdp->xs_if_status = XDB_DISCONNECTED;
+ /*
+ * if the hotplug scripts haven't run or if the frontend is not
+ * initialized, then we can't be connected, so there's no
+ * connection to close.
+ */
+ if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
+ ASSERT(!vdp->xs_if_connected);
+ mutex_exit(&vdp->xs_iomutex);
+ return;
+ }
+
+ /* if we're not connected, there's nothing to do */
+ if (!vdp->xs_if_connected) {
cv_broadcast(&vdp->xs_iocv);
mutex_exit(&vdp->xs_iomutex);
- (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
return;
}
- vdp->xs_if_status = XDB_DISCONNECTED;
+
+ XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected"));
+
+ vdp->xs_if_connected = B_FALSE;
cv_broadcast(&vdp->xs_iocv);
mutex_exit(&vdp->xs_iomutex);
/* stop accepting I/O request from frontend */
ddi_remove_intr(dip, 0, NULL);
+
/* clear all on-going I/Os, if any */
mutex_enter(&vdp->xs_iomutex);
while (vdp->xs_ionum > 0)
@@ -1207,109 +1469,53 @@ xdb_close(dev_info_t *dip)
xdb_unbindfrom_frontend(vdp);
xdb_close_device(vdp);
vdp->xs_peer = (domid_t)-1;
- (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
-}
-
-/*
- * Xdb_check_state_transition will check the XenbusState change to see
- * if the change is a valid transition or not.
- * The new state is written by frontend domain, or by running xenstore-write
- * to change it manually in dom0
- */
-static int
-xdb_check_state_transition(xdb_t *vdp, XenbusState oestate)
-{
- enum xdb_state status;
- int stcheck;
-#define STOK 0 /* need further process */
-#define STNOP 1 /* no action need taking */
-#define STBUG 2 /* unexpected state change, could be a bug */
-
- status = vdp->xs_if_status;
- stcheck = STOK;
-
- switch (status) {
- case XDB_UNKNOWN:
- if (vdp->xs_fe_status == XDB_FE_UNKNOWN) {
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateConnected))
- stcheck = STBUG;
- else if ((oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait))
- stcheck = STNOP;
- } else {
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateConnected))
- stcheck = STBUG;
- else if (oestate == XenbusStateInitialised)
- stcheck = STNOP;
- }
- break;
- case XDB_CONNECTED:
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised))
- stcheck = STBUG;
- else if (oestate == XenbusStateConnected)
- stcheck = STNOP;
- break;
- case XDB_DISCONNECTED:
- default:
- stcheck = STBUG;
- }
-
- if (stcheck == STOK)
- return (DDI_SUCCESS);
-
- if (stcheck == STBUG)
- cmn_err(CE_NOTE, "xdb@%s: unexpected otherend "
- "state change to %d!, when status is %d",
- ddi_get_name_addr(vdp->xs_dip), oestate, status);
-
- return (DDI_FAILURE);
}
static void
xdb_send_buf(void *arg)
{
- buf_t *bp;
- xdb_t *vdp = (xdb_t *)arg;
+ xdb_t *vdp = (xdb_t *)arg;
+ buf_t *bp;
+ int err;
mutex_enter(&vdp->xs_iomutex);
+ while (vdp->xs_send_buf) {
+ if ((bp = vdp->xs_f_iobuf) == NULL) {
+ /* wait for some io to send */
+ XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
+ "send buf waiting for io"));
+ cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
+ continue;
+ }
- while (vdp->xs_if_status != XDB_DISCONNECTED) {
- while ((bp = vdp->xs_f_iobuf) != NULL) {
- vdp->xs_f_iobuf = bp->av_forw;
- bp->av_forw = NULL;
- vdp->xs_ionum++;
- mutex_exit(&vdp->xs_iomutex);
- if (bp->b_bcount != 0) {
- int err = ldi_strategy(vdp->xs_ldi_hdl, bp);
- if (err != 0) {
- bp->b_flags |= B_ERROR;
- (void) xdb_biodone(bp);
- XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
- "xdb@%s: sent buf to backend dev"
- "failed, err=%d",
- ddi_get_name_addr(vdp->xs_dip),
- err));
- } else {
- XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
- "sent buf to backend ok"));
- }
- } else /* no I/O need to be done */
- (void) xdb_biodone(bp);
+ vdp->xs_f_iobuf = bp->av_forw;
+ bp->av_forw = NULL;
+ vdp->xs_ionum++;
+ mutex_exit(&vdp->xs_iomutex);
+ if (bp->b_bcount == 0) {
+ /* no I/O needs to be done */
+ (void) xdb_biodone(bp);
mutex_enter(&vdp->xs_iomutex);
+ continue;
}
- if (vdp->xs_if_status != XDB_DISCONNECTED)
- cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
+ err = EIO;
+ if (vdp->xs_ldi_hdl != NULL)
+ err = ldi_strategy(vdp->xs_ldi_hdl, bp);
+ if (err != 0) {
+ bp->b_flags |= B_ERROR;
+ (void) xdb_biodone(bp);
+ XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
+ "xdb@%s: sent buf to backend devfailed, err=%d",
+ ddi_get_name_addr(vdp->xs_dip), err));
+ } else {
+ XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
+ "sent buf to backend ok"));
+ }
+ mutex_enter(&vdp->xs_iomutex);
}
-
+ XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing"));
mutex_exit(&vdp->xs_iomutex);
}
@@ -1324,17 +1530,19 @@ xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
"hotplug status change to %d!", ddi_get_name_addr(dip), state));
+ if (state != Connected)
+ return;
+
mutex_enter(&vdp->xs_cbmutex);
- if (state == Connected) {
- /* Hotplug script has completed successfully */
- if (vdp->xs_dev_status == XDB_DEV_UNKNOWN) {
- vdp->xs_dev_status = XDB_DEV_READY;
- if (vdp->xs_fe_status == XDB_FE_READY)
- /* try to connect to frontend */
- if (xdb_start_connect(vdp) != DDI_SUCCESS)
- (void) xdb_start_disconnect(vdp);
- }
+
+ /* If hotplug script have already run, there's nothing todo */
+ if (vdp->xs_hp_connected) {
+ mutex_exit(&vdp->xs_cbmutex);
+ return;
}
+
+ vdp->xs_hp_connected = B_TRUE;
+ xdb_start_connect(vdp);
mutex_exit(&vdp->xs_cbmutex);
}
@@ -1351,29 +1559,47 @@ xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
mutex_enter(&vdp->xs_cbmutex);
- if (xdb_check_state_transition(vdp, new_state) == DDI_FAILURE) {
- mutex_exit(&vdp->xs_cbmutex);
- return;
- }
-
+ /*
+ * Now it'd really be nice if there was a well defined state
+ * transition model for xen frontend drivers, but unfortunatly
+ * there isn't. So we're stuck with assuming that all state
+ * transitions are possible, and we'll just have to deal with
+ * them regardless of what state we're in.
+ */
switch (new_state) {
- case XenbusStateInitialised:
- ASSERT(vdp->xs_if_status == XDB_UNKNOWN);
+ case XenbusStateUnknown:
+ case XenbusStateInitialising:
+ case XenbusStateInitWait:
+ /* tear down our connection to the frontend */
+ xdb_close(dip);
+ vdp->xs_fe_initialised = B_FALSE;
+ break;
- /* frontend is ready for connecting */
- vdp->xs_fe_status = XDB_FE_READY;
+ case XenbusStateInitialised:
+ /*
+ * If we were conected, then we need to drop the connection
+ * and re-negotiate it.
+ */
+ xdb_close(dip);
+ vdp->xs_fe_initialised = B_TRUE;
+ xdb_start_connect(vdp);
+ break;
- if (vdp->xs_dev_status == XDB_DEV_READY)
- if (xdb_start_connect(vdp) != DDI_SUCCESS)
- (void) xdb_start_disconnect(vdp);
+ case XenbusStateConnected:
+ /* nothing todo here other than congratulate the frontend */
break;
+
case XenbusStateClosing:
+ /* monkey see monkey do */
(void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
break;
+
case XenbusStateClosed:
- /* clean up */
+ /* tear down our connection to the frontend */
xdb_close(dip);
-
+ vdp->xs_fe_initialised = B_FALSE;
+ (void) xvdi_switch_state(dip, XBT_NULL, new_state);
+ break;
}
mutex_exit(&vdp->xs_cbmutex);
@@ -1382,9 +1608,11 @@ xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
static int
xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- xdb_t *vdp;
- ddi_iblock_cookie_t ibc;
- int instance;
+ ddi_iblock_cookie_t ibc;
+ xdb_t *vdp;
+ int instance = ddi_get_instance(dip);
+ char *xsname, *oename;
+ char *str;
switch (cmd) {
case DDI_RESUME:
@@ -1394,42 +1622,69 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
default:
return (DDI_FAILURE);
}
-
/* DDI_ATTACH */
- instance = ddi_get_instance(dip);
- if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((oename = xvdi_get_oename(dip)) == NULL))
return (DDI_FAILURE);
- vdp = ddi_get_soft_state(xdb_statep, instance);
- vdp->xs_dip = dip;
+ /*
+ * Disable auto-detach. This is necessary so that we don't get
+ * detached while we're disconnected from the front end.
+ */
+ (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1);
+
if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
- goto errout1;
+ return (DDI_FAILURE);
- if (!xdb_kstat_init(vdp))
- goto errout1;
+ if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+ vdp = ddi_get_soft_state(xdb_statep, instance);
+ vdp->xs_dip = dip;
mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc);
mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc);
cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL);
cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL);
-
ddi_set_driver_private(dip, vdp);
+ if (!xdb_kstat_init(vdp))
+ goto errout1;
+
+ /* Check if the frontend device is supposed to be a cdrom */
+ if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0)
+ return (DDI_FAILURE);
+ if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
+ vdp->xs_type |= XDB_DEV_FE_CD;
+ strfree(str);
+
+ /* Check if the frontend device is supposed to be read only */
+ if (xenbus_read_str(xsname, "mode", &str) != 0)
+ return (DDI_FAILURE);
+ if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL))
+ vdp->xs_type |= XDB_DEV_RO;
+ strfree(str);
+
+ mutex_enter(&vdp->xs_cbmutex);
+ if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) {
+ xvdi_remove_xb_watch_handlers(dip);
+ mutex_exit(&vdp->xs_cbmutex);
+ goto errout2;
+ }
+ mutex_exit(&vdp->xs_cbmutex);
+
+ vdp->xs_send_buf = B_TRUE;
vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1,
TASKQ_DEFAULTPRI, 0);
- if (vdp->xs_iotaskq == NULL)
- goto errout2;
(void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp,
DDI_SLEEP);
/* Watch frontend and hotplug state change */
- if (xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change,
- NULL) != DDI_SUCCESS)
+ if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change,
+ NULL) != DDI_SUCCESS) ||
+ (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change,
+ NULL) != DDI_SUCCESS))
goto errout3;
- if (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change,
- NULL) != DDI_SUCCESS) {
- goto errout4;
- }
/*
* Kick-off hotplug script
@@ -1437,7 +1692,7 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) {
cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script",
ddi_get_name_addr(dip));
- goto errout4;
+ goto errout3;
}
/*
@@ -1450,25 +1705,40 @@ xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
ddi_get_name_addr(dip)));
return (DDI_SUCCESS);
-errout4:
- xvdi_remove_event_handler(dip, NULL);
errout3:
+ ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected);
+
+ xvdi_remove_event_handler(dip, NULL);
+
+ /* Disconnect from the backend */
mutex_enter(&vdp->xs_cbmutex);
mutex_enter(&vdp->xs_iomutex);
- vdp->xs_if_status = XDB_DISCONNECTED;
+ vdp->xs_send_buf = B_FALSE;
cv_broadcast(&vdp->xs_iocv);
mutex_exit(&vdp->xs_iomutex);
mutex_exit(&vdp->xs_cbmutex);
+
+ /* wait for all io to dtrain and destroy io taskq */
ddi_taskq_destroy(vdp->xs_iotaskq);
+
+ /* tear down block-configure watch */
+ mutex_enter(&vdp->xs_cbmutex);
+ xvdi_remove_xb_watch_handlers(dip);
+ mutex_exit(&vdp->xs_cbmutex);
+
errout2:
+ /* remove kstats */
+ kstat_delete(vdp->xs_kstats);
+
+errout1:
+ /* free up driver state */
ddi_set_driver_private(dip, NULL);
cv_destroy(&vdp->xs_iocv);
cv_destroy(&vdp->xs_ionumcv);
mutex_destroy(&vdp->xs_cbmutex);
mutex_destroy(&vdp->xs_iomutex);
- kstat_delete(vdp->xs_kstats);
-errout1:
ddi_soft_state_free(xdb_statep, instance);
+
return (DDI_FAILURE);
}
@@ -1490,19 +1760,25 @@ xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
/* DDI_DETACH handling */
- /* shouldn't detach, if still used by frontend */
+ /* refuse to detach if we're still in use by the frontend */
mutex_enter(&vdp->xs_iomutex);
- if (vdp->xs_if_status != XDB_DISCONNECTED) {
+ if (vdp->xs_if_connected) {
mutex_exit(&vdp->xs_iomutex);
return (DDI_FAILURE);
}
+ vdp->xs_send_buf = B_FALSE;
+ cv_broadcast(&vdp->xs_iocv);
mutex_exit(&vdp->xs_iomutex);
xvdi_remove_event_handler(dip, NULL);
- /* can do nothing about it, if it fails */
(void) xvdi_post_event(dip, XEN_HP_REMOVE);
ddi_taskq_destroy(vdp->xs_iotaskq);
+
+ mutex_enter(&vdp->xs_cbmutex);
+ xvdi_remove_xb_watch_handlers(dip);
+ mutex_exit(&vdp->xs_cbmutex);
+
cv_destroy(&vdp->xs_iocv);
cv_destroy(&vdp->xs_ionumcv);
mutex_destroy(&vdp->xs_cbmutex);
@@ -1528,7 +1804,7 @@ static struct dev_ops xdb_dev_ops = {
NULL, /* devo_cb_ops */
NULL, /* devo_bus_ops */
NULL, /* power */
- ddi_quiesce_not_needed, /* quiesce */
+ ddi_quiesce_not_needed, /* quiesce */
};
/*
@@ -1536,7 +1812,7 @@ static struct dev_ops xdb_dev_ops = {
*/
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. */
- "vbd backend driver", /* Name of the module */
+ "vbd backend driver", /* Name of the module */
&xdb_dev_ops /* driver ops */
};
diff --git a/usr/src/uts/common/xen/io/xdb.h b/usr/src/uts/common/xen/io/xdb.h
index 0abd008d0a..f8046e8219 100644
--- a/usr/src/uts/common/xen/io/xdb.h
+++ b/usr/src/uts/common/xen/io/xdb.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -28,8 +28,6 @@
#ifndef _SYS_XDB_H
#define _SYS_XDB_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -42,57 +40,17 @@ extern "C" {
/*
* Info of the exported blk device
*/
-#define XDB_DEV_RO (1) /* read-only or writable */
-#define XDB_IS_RO(vdp) ((vdp)->xs_type & XDB_DEV_RO)
-#define XDB_DEV_LOFI (1 << 1) /* lofi device or physical device */
-#define XDB_IS_LOFI(vdp) ((vdp)->xs_type & XDB_DEV_LOFI)
-#define XDB_DEV_CD (1 << 2) /* cdrom disc */
-#define XDB_IS_CD(vdp) ((vdp)->xs_type & XDB_DEV_CD)
-#define XDB_DEV_RMB (1 << 3) /* removable device */
-#define XDB_IS_RMB(vdp) ((vdp)->xs_type & XDB_DEV_RMB)
-
-/*
- * Xdb interface status
- */
-enum xdb_state {
- /*
- * initial state
- */
- XDB_UNKNOWN,
- /*
- * frontend xenbus state changed to XenbusStateConnected,
- * we finally connect
- */
- XDB_CONNECTED,
- /*
- * frontend xenbus state changed to XenbusStateClosed,
- * interface disconnected
- */
- XDB_DISCONNECTED
-};
-
-/*
- * backend device status
- */
-enum xdb_dev_state {
- /* initial state */
- XDB_DEV_UNKNOWN,
- /* backend device is ready (hotplug script finishes successfully) */
- XDB_DEV_READY
-};
-
-/*
- * frontend status
- */
-enum xdb_fe_state {
- /* initial state */
- XDB_FE_UNKNOWN,
- /*
- * frontend's xenbus state has changed to
- * XenbusStateInitialised, is ready for connecting
- */
- XDB_FE_READY
-};
+#define XDB_DEV_RO (1 << 0) /* backend and frontend are read-only */
+#define XDB_DEV_BE_LOFI (1 << 1) /* backend device is a lofi device */
+#define XDB_DEV_BE_RMB (1 << 2) /* backend device is removable */
+#define XDB_DEV_BE_CD (1 << 3) /* backend device is cdrom */
+#define XDB_DEV_FE_CD (1 << 4) /* frontend device is cdrom */
+
+#define XDB_IS_RO(vdp) ((vdp)->xs_type & XDB_DEV_RO)
+#define XDB_IS_BE_LOFI(vdp) ((vdp)->xs_type & XDB_DEV_BE_LOFI)
+#define XDB_IS_BE_RMB(vdp) ((vdp)->xs_type & XDB_DEV_BE_RMB)
+#define XDB_IS_BE_CD(vdp) ((vdp)->xs_type & XDB_DEV_BE_CD)
+#define XDB_IS_FE_CD(vdp) ((vdp)->xs_type & XDB_DEV_FE_CD)
/*
* Other handy macrosx
@@ -183,12 +141,6 @@ struct xdb {
*/
buf_t *xs_f_iobuf;
buf_t *xs_l_iobuf;
- /* xdb interface status */
- enum xdb_state xs_if_status;
- /* backend device status */
- enum xdb_dev_state xs_dev_status;
- /* frontend status */
- enum xdb_fe_state xs_fe_status;
/* head of free list of xdb_request_t */
int xs_free_req;
/* pre-allocated xdb_request_t pool */
@@ -201,6 +153,23 @@ struct xdb {
enum blkif_protocol xs_blk_protocol;
size_t xs_nentry;
size_t xs_entrysize;
+
+ /* Protected by xs_cbmutex */
+ boolean_t xs_hp_connected; /* hot plug scripts have run */
+ boolean_t xs_fe_initialised; /* frontend is initialized */
+ char *xs_lofi_path;
+ char *xs_params_path;
+ struct xenbus_watch *xs_watch_params;
+ struct xenbus_watch *xs_watch_media_req;
+ ddi_taskq_t *xs_watch_taskq;
+ int xs_watch_taskq_count;
+
+ /* Protected by xs_cbmutex and xs_iomutex */
+ boolean_t xs_if_connected; /* connected to frontend */
+
+ /* Protected by xs_iomutex */
+ boolean_t xs_send_buf;
+
#ifdef DEBUG
uint64_t *page_addrs; /* for debug aid */
#endif /* DEBUG */
diff --git a/usr/src/uts/common/xen/io/xdf.c b/usr/src/uts/common/xen/io/xdf.c
index 56c18a6cec..8a5105b82c 100644
--- a/usr/src/uts/common/xen/io/xdf.c
+++ b/usr/src/uts/common/xen/io/xdf.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -29,12 +29,47 @@
* TODO:
* - support alternate block size (currently only DEV_BSIZE supported)
* - revalidate geometry for removable devices
+ *
+ * This driver export solaris disk device nodes, accepts IO requests from
+ * those nodes, and services those requests by talking to a backend device
+ * in another domain.
+ *
+ * Communication with the backend device is done via a ringbuffer (which is
+ * managed via xvdi interfaces) and dma memory (which is managed via ddi
+ * interfaces).
+ *
+ * Communication with the backend device is dependant upon establishing a
+ * connection to the backend device. This connection process involves
+ * reading device configuration information from xenbus and publishing
+ * some frontend runtime configuration parameters via the xenbus (for
+ * consumption by the backend). Once we've published runtime configuration
+ * information via the xenbus, the backend device can enter the connected
+ * state and we'll enter the XD_CONNECTED state. But before we can allow
+ * random IO to begin, we need to do IO to the backend device to determine
+ * the device label and if flush operations are supported. Once this is
+ * done we enter the XD_READY state and can process any IO operations.
+ *
+ * We recieve notifications of xenbus state changes for the backend device
+ * (aka, the "other end") via the xdf_oe_change() callback. This callback
+ * is single threaded, meaning that we can't recieve new notification of
+ * other end state changes while we're processing an outstanding
+ * notification of an other end state change. There for we can't do any
+ * blocking operations from the xdf_oe_change() callback. This is why we
+ * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
+ * IO to get us from the XD_CONNECTED to the XD_READY state. All IO
+ * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
+ * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
+ * generated by the xdf_ready_tq_thread thread have priority over all
+ * other IO requests.
+ *
+ * We also communicate with the backend device via the xenbus "media-req"
+ * (XBP_MEDIA_REQ) property. For more information on this see the
+ * comments in blkif.h.
*/
-#include <sys/ddi.h>
-#include <sys/sunddi.h>
+#include <io/xdf.h>
+
#include <sys/conf.h>
-#include <sys/cmlb.h>
#include <sys/dkio.h>
#include <sys/promif.h>
#include <sys/sysmacros.h>
@@ -43,140 +78,78 @@
#ifdef XPV_HVM_DRIVER
#include <sys/xpv_support.h>
#include <sys/sunndi.h>
-#endif /* XPV_HVM_DRIVER */
+#else /* !XPV_HVM_DRIVER */
+#include <sys/evtchn_impl.h>
+#endif /* !XPV_HVM_DRIVER */
#include <public/io/xenbus.h>
#include <xen/sys/xenbus_impl.h>
-#include <xen/sys/xendev.h>
-#include <sys/gnttab.h>
#include <sys/scsi/generic/inquiry.h>
#include <xen/io/blkif_impl.h>
-#include <io/xdf.h>
+#include <sys/fdio.h>
+#include <sys/cdio.h>
+
+/*
+ * DEBUG_EVAL can be used to include debug only statements without
+ * having to use '#ifdef DEBUG' statements
+ */
+#ifdef DEBUG
+#define DEBUG_EVAL(x) (x)
+#else /* !DEBUG */
+#define DEBUG_EVAL(x)
+#endif /* !DEBUG */
+
+#define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */
+#define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */
+#define INVALID_DOMID ((domid_t)-1)
#define FLUSH_DISKCACHE 0x1
#define WRITE_BARRIER 0x2
#define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
-#define USE_WRITE_BARRIER(vdp) \
+#define USE_WRITE_BARRIER(vdp) \
((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
-#define USE_FLUSH_DISKCACHE(vdp) \
+#define USE_FLUSH_DISKCACHE(vdp) \
((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
-#define IS_WRITE_BARRIER(vdp, bp) \
- (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
+#define IS_WRITE_BARRIER(vdp, bp) \
+ (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
-#define IS_FLUSH_DISKCACHE(bp) \
+#define IS_FLUSH_DISKCACHE(bp) \
(!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
-static void *vbd_ss;
-static kmem_cache_t *xdf_vreq_cache;
-static kmem_cache_t *xdf_gs_cache;
-static int xdf_maxphys = XB_MAXPHYS;
-int xdfdebug = 0;
-extern int do_polled_io;
-diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK;
-int xdf_barrier_flush_disable = 0;
+#define VREQ_DONE(vreq) \
+ VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \
+ (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \
+ (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
-/*
- * dev_ops and cb_ops entrypoints
- */
-static int xdf_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
-static int xdf_attach(dev_info_t *, ddi_attach_cmd_t);
-static int xdf_detach(dev_info_t *, ddi_detach_cmd_t);
-static int xdf_reset(dev_info_t *, ddi_reset_cmd_t);
-static int xdf_open(dev_t *, int, int, cred_t *);
-static int xdf_close(dev_t, int, int, struct cred *);
-static int xdf_strategy(struct buf *);
-static int xdf_read(dev_t, struct uio *, cred_t *);
-static int xdf_aread(dev_t, struct aio_req *, cred_t *);
-static int xdf_write(dev_t, struct uio *, cred_t *);
-static int xdf_awrite(dev_t, struct aio_req *, cred_t *);
-static int xdf_dump(dev_t, caddr_t, daddr_t, int);
-static int xdf_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
-static uint_t xdf_intr(caddr_t);
-static int xdf_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
- caddr_t, int *);
+#define BP_VREQ(bp) ((v_req_t *)((bp)->av_back))
+#define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq)))
-/*
- * misc private functions
- */
-static int xdf_suspend(dev_info_t *);
-static int xdf_resume(dev_info_t *);
-static int xdf_start_connect(xdf_t *);
-static int xdf_start_disconnect(xdf_t *);
-static int xdf_post_connect(xdf_t *);
-static void xdf_post_disconnect(xdf_t *);
-static void xdf_oe_change(dev_info_t *, ddi_eventcookie_t, void *, void *);
-static void xdf_iostart(xdf_t *);
-static void xdf_iofini(xdf_t *, uint64_t, int);
-static int xdf_prepare_rreq(xdf_t *, struct buf *, blkif_request_t *);
-static int xdf_drain_io(xdf_t *);
-static boolean_t xdf_isopen(xdf_t *, int);
-static int xdf_check_state_transition(xdf_t *, XenbusState);
-static int xdf_connect(xdf_t *, boolean_t);
-static int xdf_dmacallback(caddr_t);
-static void xdf_timeout_handler(void *);
-static uint_t xdf_iorestart(caddr_t);
-static v_req_t *vreq_get(xdf_t *, buf_t *);
-static void vreq_free(xdf_t *, v_req_t *);
-static int vreq_setup(xdf_t *, v_req_t *);
-static ge_slot_t *gs_get(xdf_t *, int);
-static void gs_free(xdf_t *, ge_slot_t *);
-static grant_ref_t gs_grant(ge_slot_t *, mfn_t);
-static void unexpectedie(xdf_t *);
-static void xdfmin(struct buf *);
-static void xdf_synthetic_pgeom(dev_info_t *, cmlb_geom_t *);
-extern int xdf_kstat_create(dev_info_t *, char *, int);
-extern void xdf_kstat_delete(dev_info_t *);
+extern int do_polled_io;
-#if defined(XPV_HVM_DRIVER)
-static void xdf_hvm_add(dev_info_t *);
-static void xdf_hvm_rm(dev_info_t *);
-static void xdf_hvm_init(void);
-static void xdf_hvm_fini(void);
-#endif /* XPV_HVM_DRIVER */
+/* run-time tunables that we don't want the compiler to optimize away */
+volatile int xdf_debug = 0;
+volatile boolean_t xdf_barrier_flush_disable = B_FALSE;
-static struct cb_ops xdf_cbops = {
- xdf_open,
- xdf_close,
- xdf_strategy,
- nodev,
- xdf_dump,
- xdf_read,
- xdf_write,
- xdf_ioctl,
- nodev,
- nodev,
- nodev,
- nochpoll,
- xdf_prop_op,
- NULL,
- D_MP | D_NEW | D_64BIT,
- CB_REV,
- xdf_aread,
- xdf_awrite
-};
+/* per module globals */
+major_t xdf_major;
+static void *xdf_ssp;
+static kmem_cache_t *xdf_vreq_cache;
+static kmem_cache_t *xdf_gs_cache;
+static int xdf_maxphys = XB_MAXPHYS;
+static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK;
+static int xdf_fbrewrites; /* flush block re-write count */
-struct dev_ops xdf_devops = {
- DEVO_REV, /* devo_rev */
- 0, /* devo_refcnt */
- xdf_getinfo, /* devo_getinfo */
- nulldev, /* devo_identify */
- nulldev, /* devo_probe */
- xdf_attach, /* devo_attach */
- xdf_detach, /* devo_detach */
- xdf_reset, /* devo_reset */
- &xdf_cbops, /* devo_cb_ops */
- (struct bus_ops *)NULL, /* devo_bus_ops */
- NULL, /* devo_power */
- ddi_quiesce_not_supported, /* devo_quiesce */
-};
+/* misc public functions (used by xdf_shell.c) */
+int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
+int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
-static struct modldrv modldrv = {
- &mod_driverops, /* Type of module. This one is a driver */
- "virtual block driver", /* short description */
- &xdf_devops /* driver specific ops */
-};
+/* misc private functions */
+static void xdf_io_start(xdf_t *);
-static struct modlinkage xdf_modlinkage = {
- MODREV_1, (void *)&modldrv, NULL
+/* callbacks from commmon label */
+static cmlb_tg_ops_t xdf_lb_ops = {
+ TG_DK_OPS_VERSION_1,
+ xdf_lb_rdwr,
+ xdf_lb_getinfo
};
/*
@@ -204,992 +177,762 @@ static ddi_device_acc_attr_t xc_acc_attr = {
DDI_STRICTORDER_ACC
};
-/* callbacks from commmon label */
-
-int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
-int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
-
-static cmlb_tg_ops_t xdf_lb_ops = {
- TG_DK_OPS_VERSION_1,
- xdf_lb_rdwr,
- xdf_lb_getinfo
-};
-
-int
-_init(void)
+static void
+xdf_timeout_handler(void *arg)
{
- int rc;
+ xdf_t *vdp = arg;
- if ((rc = ddi_soft_state_init(&vbd_ss, sizeof (xdf_t), 0)) != 0)
- return (rc);
+ mutex_enter(&vdp->xdf_dev_lk);
+ vdp->xdf_timeout_id = 0;
+ mutex_exit(&vdp->xdf_dev_lk);
- xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
- sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
- xdf_gs_cache = kmem_cache_create("xdf_gs_cache",
- sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ /* new timeout thread could be re-scheduled */
+ xdf_io_start(vdp);
+}
-#if defined(XPV_HVM_DRIVER)
- xdf_hvm_init();
-#endif /* XPV_HVM_DRIVER */
+/*
+ * callback func when DMA/GTE resources is available
+ *
+ * Note: we only register one callback function to grant table subsystem
+ * since we only have one 'struct gnttab_free_callback' in xdf_t.
+ */
+static int
+xdf_dmacallback(caddr_t arg)
+{
+ xdf_t *vdp = (xdf_t *)arg;
+ ASSERT(vdp != NULL);
- if ((rc = mod_install(&xdf_modlinkage)) != 0) {
-#if defined(XPV_HVM_DRIVER)
- xdf_hvm_fini();
-#endif /* XPV_HVM_DRIVER */
- kmem_cache_destroy(xdf_vreq_cache);
- kmem_cache_destroy(xdf_gs_cache);
- ddi_soft_state_fini(&vbd_ss);
- return (rc);
- }
+ DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
+ vdp->xdf_addr));
- return (rc);
+ ddi_trigger_softintr(vdp->xdf_softintr_id);
+ return (DDI_DMA_CALLBACK_DONE);
}
-int
-_fini(void)
+static ge_slot_t *
+gs_get(xdf_t *vdp, int isread)
{
+ grant_ref_t gh;
+ ge_slot_t *gs;
- int err;
- if ((err = mod_remove(&xdf_modlinkage)) != 0)
- return (err);
-
-#if defined(XPV_HVM_DRIVER)
- xdf_hvm_fini();
-#endif /* XPV_HVM_DRIVER */
+ /* try to alloc GTEs needed in this slot, first */
+ if (gnttab_alloc_grant_references(
+ BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
+ if (vdp->xdf_gnt_callback.next == NULL) {
+ SETDMACBON(vdp);
+ gnttab_request_free_callback(
+ &vdp->xdf_gnt_callback,
+ (void (*)(void *))xdf_dmacallback,
+ (void *)vdp,
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ }
+ return (NULL);
+ }
- kmem_cache_destroy(xdf_vreq_cache);
- kmem_cache_destroy(xdf_gs_cache);
- ddi_soft_state_fini(&vbd_ss);
+ gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
+ if (gs == NULL) {
+ gnttab_free_grant_references(gh);
+ if (vdp->xdf_timeout_id == 0)
+ /* restart I/O after one second */
+ vdp->xdf_timeout_id =
+ timeout(xdf_timeout_handler, vdp, hz);
+ return (NULL);
+ }
- return (0);
-}
+ /* init gs_slot */
+ gs->gs_oeid = vdp->xdf_peer;
+ gs->gs_isread = isread;
+ gs->gs_ghead = gh;
+ gs->gs_ngrefs = 0;
-int
-_info(struct modinfo *modinfop)
-{
- return (mod_info(&xdf_modlinkage, modinfop));
+ return (gs);
}
-/*ARGSUSED*/
-static int
-xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
+static void
+gs_free(ge_slot_t *gs)
{
- int instance;
- xdf_t *vbdp;
-
- instance = XDF_INST(getminor((dev_t)arg));
-
- switch (cmd) {
- case DDI_INFO_DEVT2DEVINFO:
- if ((vbdp = ddi_get_soft_state(vbd_ss, instance)) == NULL) {
- *rp = NULL;
- return (DDI_FAILURE);
- }
- *rp = vbdp->xdf_dip;
- return (DDI_SUCCESS);
+ int i;
- case DDI_INFO_DEVT2INSTANCE:
- *rp = (void *)(uintptr_t)instance;
- return (DDI_SUCCESS);
-
- default:
- return (DDI_FAILURE);
- }
+ /* release all grant table entry resources used in this slot */
+ for (i = 0; i < gs->gs_ngrefs; i++)
+ gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0);
+ gnttab_free_grant_references(gs->gs_ghead);
+ list_remove(&gs->gs_vreq->v_gs, gs);
+ kmem_cache_free(xdf_gs_cache, gs);
}
-static int
-xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
- char *name, caddr_t valuep, int *lengthp)
+static grant_ref_t
+gs_grant(ge_slot_t *gs, mfn_t mfn)
{
- xdf_t *vdp;
+ grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead);
- if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(dip))) == NULL)
- return (ddi_prop_op(dev, dip, prop_op, mod_flags,
- name, valuep, lengthp));
+ ASSERT(gr != -1);
+ ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ gs->gs_ge[gs->gs_ngrefs++] = gr;
+ gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread);
- return (cmlb_prop_op(vdp->xdf_vd_lbl,
- dev, dip, prop_op, mod_flags, name, valuep, lengthp,
- XDF_PART(getminor(dev)), NULL));
+ return (gr);
}
-static int
-xdf_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
+/*
+ * Alloc a vreq for this bp
+ * bp->av_back contains the pointer to the vreq upon return
+ */
+static v_req_t *
+vreq_get(xdf_t *vdp, buf_t *bp)
{
- xdf_t *vdp;
- ddi_iblock_cookie_t softibc;
- int instance;
-
- xdfdebug = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_NOTPROM,
- "xdfdebug", 0);
-
- switch (cmd) {
- case DDI_ATTACH:
- break;
+ v_req_t *vreq = NULL;
- case DDI_RESUME:
- return (xdf_resume(devi));
+ ASSERT(BP_VREQ(bp) == NULL);
- default:
- return (DDI_FAILURE);
+ vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
+ if (vreq == NULL) {
+ if (vdp->xdf_timeout_id == 0)
+ /* restart I/O after one second */
+ vdp->xdf_timeout_id =
+ timeout(xdf_timeout_handler, vdp, hz);
+ return (NULL);
}
+ bzero(vreq, sizeof (v_req_t));
+ list_create(&vreq->v_gs, sizeof (ge_slot_t),
+ offsetof(ge_slot_t, gs_vreq_link));
+ vreq->v_buf = bp;
+ vreq->v_status = VREQ_INIT;
+ vreq->v_runq = B_FALSE;
+ BP_VREQ_SET(bp, vreq);
+ /* init of other fields in vreq is up to the caller */
- instance = ddi_get_instance(devi);
- if (ddi_soft_state_zalloc(vbd_ss, instance) != DDI_SUCCESS)
- return (DDI_FAILURE);
-
- DPRINTF(DDI_DBG, ("xdf%d: attaching\n", instance));
- vdp = ddi_get_soft_state(vbd_ss, instance);
- ddi_set_driver_private(devi, vdp);
- vdp->xdf_dip = devi;
- cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL);
+ list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
- if (ddi_get_iblock_cookie(devi, 0, &vdp->xdf_ibc) != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: failed to get iblock cookie",
- ddi_get_name_addr(devi));
- goto errout0;
- }
- mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc);
- mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)vdp->xdf_ibc);
- mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER,
- (void *)vdp->xdf_ibc);
+ return (vreq);
+}
- if (ddi_get_soft_iblock_cookie(devi, DDI_SOFTINT_LOW, &softibc)
- != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: failed to get softintr iblock cookie",
- ddi_get_name_addr(devi));
- goto errout0;
- }
- if (ddi_add_softintr(devi, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
- &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: failed to add softintr",
- ddi_get_name_addr(devi));
- goto errout0;
- }
+static void
+vreq_free(xdf_t *vdp, v_req_t *vreq)
+{
+ buf_t *bp = vreq->v_buf;
-#if !defined(XPV_HVM_DRIVER)
- /* create kstat for iostat(1M) */
- if (xdf_kstat_create(devi, "xdf", instance) != 0) {
- cmn_err(CE_WARN, "xdf@%s: failed to create kstat",
- ddi_get_name_addr(devi));
- goto errout0;
- }
-#endif /* !XPV_HVM_DRIVER */
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(BP_VREQ(bp) == vreq);
- /* driver handles kernel-issued IOCTLs */
- if (ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
- DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: cannot create DDI_KERNEL_IOCTL prop",
- ddi_get_name_addr(devi));
- goto errout0;
- }
+ list_remove(&vdp->xdf_vreq_act, vreq);
- /*
- * Initialize the physical geometry stucture. Note that currently
- * we don't know the size of the backend device so the number
- * of blocks on the device will be initialized to zero. Once
- * we connect to the backend device we'll update the physical
- * geometry to reflect the real size of the device.
- */
- xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom);
+ if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
+ goto done;
- /*
- * create default device minor nodes: non-removable disk
- * we will adjust minor nodes after we are connected w/ backend
- */
- cmlb_alloc_handle(&vdp->xdf_vd_lbl);
- if (cmlb_attach(devi, &xdf_lb_ops, DTYPE_DIRECT, 0, 1,
- DDI_NT_BLOCK_XVMD,
-#if defined(XPV_HVM_DRIVER)
- CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT |
- CMLB_INTERNAL_MINOR_NODES,
-#else /* !XPV_HVM_DRIVER */
- CMLB_FAKE_LABEL_ONE_PARTITION,
-#endif /* !XPV_HVM_DRIVER */
- vdp->xdf_vd_lbl, NULL) != 0) {
- cmn_err(CE_WARN, "xdf@%s: default cmlb attach failed",
- ddi_get_name_addr(devi));
- goto errout0;
+ switch (vreq->v_status) {
+ case VREQ_DMAWIN_DONE:
+ case VREQ_GS_ALLOCED:
+ case VREQ_DMABUF_BOUND:
+ (void) ddi_dma_unbind_handle(vreq->v_dmahdl);
+ /*FALLTHRU*/
+ case VREQ_DMAMEM_ALLOCED:
+ if (!ALIGNED_XFER(bp)) {
+ ASSERT(vreq->v_abuf != NULL);
+ if (!IS_ERROR(bp) && IS_READ(bp))
+ bcopy(vreq->v_abuf, bp->b_un.b_addr,
+ bp->b_bcount);
+ ddi_dma_mem_free(&vreq->v_align);
+ }
+ /*FALLTHRU*/
+ case VREQ_MEMDMAHDL_ALLOCED:
+ if (!ALIGNED_XFER(bp))
+ ddi_dma_free_handle(&vreq->v_memdmahdl);
+ /*FALLTHRU*/
+ case VREQ_DMAHDL_ALLOCED:
+ ddi_dma_free_handle(&vreq->v_dmahdl);
+ break;
+ default:
+ break;
}
+done:
+ ASSERT(!vreq->v_runq);
+ list_destroy(&vreq->v_gs);
+ kmem_cache_free(xdf_vreq_cache, vreq);
+}
- /*
- * We ship with cache-enabled disks
- */
- vdp->xdf_wce = 1;
-
- mutex_enter(&vdp->xdf_cb_lk);
+/*
+ * Snarf new data if our flush block was re-written
+ */
+static void
+check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
+{
+ int nblks;
+ boolean_t mapin;
- /* Watch backend XenbusState change */
- if (xvdi_add_event_handler(devi, XS_OE_STATE, xdf_oe_change,
- NULL) != DDI_SUCCESS) {
- mutex_exit(&vdp->xdf_cb_lk);
- goto errout0;
- }
+ if (IS_WRITE_BARRIER(vdp, bp))
+ return; /* write was a flush write */
- if (xdf_start_connect(vdp) != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: start connection failed",
- ddi_get_name_addr(devi));
- (void) xdf_start_disconnect(vdp);
- mutex_exit(&vdp->xdf_cb_lk);
- goto errout1;
+ mapin = B_FALSE;
+ nblks = bp->b_bcount >> DEV_BSHIFT;
+ if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
+ xdf_fbrewrites++;
+ if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
+ mapin = B_TRUE;
+ bp_mapin(bp);
+ }
+ bcopy(bp->b_un.b_addr +
+ ((xdf_flush_block - blkno) << DEV_BSHIFT),
+ vdp->xdf_cache_flush_block, DEV_BSIZE);
+ if (mapin)
+ bp_mapout(bp);
}
+}
- mutex_exit(&vdp->xdf_cb_lk);
+/*
+ * Initalize the DMA and grant table resources for the buf
+ */
+static int
+vreq_setup(xdf_t *vdp, v_req_t *vreq)
+{
+ int rc;
+ ddi_dma_attr_t dmaattr;
+ uint_t ndcs, ndws;
+ ddi_dma_handle_t dh;
+ ddi_dma_handle_t mdh;
+ ddi_dma_cookie_t dc;
+ ddi_acc_handle_t abh;
+ caddr_t aba;
+ ge_slot_t *gs;
+ size_t bufsz;
+ off_t off;
+ size_t sz;
+ buf_t *bp = vreq->v_buf;
+ int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
+ DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
- list_create(&vdp->xdf_vreq_act, sizeof (v_req_t),
- offsetof(v_req_t, v_link));
- list_create(&vdp->xdf_gs_act, sizeof (ge_slot_t),
- offsetof(ge_slot_t, link));
+ switch (vreq->v_status) {
+ case VREQ_INIT:
+ if (IS_FLUSH_DISKCACHE(bp)) {
+ if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
+ DPRINTF(DMA_DBG, ("xdf@%s: "
+ "get ge_slotfailed\n", vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
+ vreq->v_blkno = 0;
+ vreq->v_nslots = 1;
+ vreq->v_flush_diskcache = FLUSH_DISKCACHE;
+ vreq->v_status = VREQ_GS_ALLOCED;
+ gs->gs_vreq = vreq;
+ list_insert_head(&vreq->v_gs, gs);
+ return (DDI_SUCCESS);
+ }
-#if defined(XPV_HVM_DRIVER)
- xdf_hvm_add(devi);
+ if (IS_WRITE_BARRIER(vdp, bp))
+ vreq->v_flush_diskcache = WRITE_BARRIER;
+ vreq->v_blkno = bp->b_blkno +
+ (diskaddr_t)(uintptr_t)bp->b_private;
+ /* See if we wrote new data to our flush block */
+ if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
+ check_fbwrite(vdp, bp, vreq->v_blkno);
+ vreq->v_status = VREQ_INIT_DONE;
+ /*FALLTHRU*/
- (void) ddi_prop_update_int(DDI_DEV_T_NONE, devi, DDI_NO_AUTODETACH, 1);
+ case VREQ_INIT_DONE:
+ /*
+ * alloc DMA handle
+ */
+ rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
+ xdf_dmacallback, (caddr_t)vdp, &dh);
+ if (rc != DDI_SUCCESS) {
+ SETDMACBON(vdp);
+ DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
- /*
- * Report our version to dom0.
- */
- if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d",
- HVMPV_XDF_VERS))
- cmn_err(CE_WARN, "xdf: couldn't write version\n");
-#endif /* XPV_HVM_DRIVER */
+ vreq->v_dmahdl = dh;
+ vreq->v_status = VREQ_DMAHDL_ALLOCED;
+ /*FALLTHRU*/
- ddi_report_dev(devi);
+ case VREQ_DMAHDL_ALLOCED:
+ /*
+ * alloc dma handle for 512-byte aligned buf
+ */
+ if (!ALIGNED_XFER(bp)) {
+ /*
+ * XXPV: we need to temporarily enlarge the seg
+ * boundary and s/g length to work round CR6381968
+ */
+ dmaattr = xb_dma_attr;
+ dmaattr.dma_attr_seg = (uint64_t)-1;
+ dmaattr.dma_attr_sgllen = INT_MAX;
+ rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
+ xdf_dmacallback, (caddr_t)vdp, &mdh);
+ if (rc != DDI_SUCCESS) {
+ SETDMACBON(vdp);
+ DPRINTF(DMA_DBG, ("xdf@%s: "
+ "unaligned buf DMAhandle alloc failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
+ vreq->v_memdmahdl = mdh;
+ vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
+ }
+ /*FALLTHRU*/
- DPRINTF(DDI_DBG, ("xdf%d: attached\n", instance));
+ case VREQ_MEMDMAHDL_ALLOCED:
+ /*
+ * alloc 512-byte aligned buf
+ */
+ if (!ALIGNED_XFER(bp)) {
+ if (bp->b_flags & (B_PAGEIO | B_PHYS))
+ bp_mapin(bp);
- return (DDI_SUCCESS);
+ rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
+ roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
+ DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
+ &aba, &bufsz, &abh);
+ if (rc != DDI_SUCCESS) {
+ SETDMACBON(vdp);
+ DPRINTF(DMA_DBG, ("xdf@%s: "
+ "DMA mem allocation failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
-errout1:
- xvdi_remove_event_handler(devi, XS_OE_STATE);
-errout0:
- if (vdp->xdf_vd_lbl != NULL) {
- cmlb_detach(vdp->xdf_vd_lbl, NULL);
- cmlb_free_handle(&vdp->xdf_vd_lbl);
- vdp->xdf_vd_lbl = NULL;
- }
-#if !defined(XPV_HVM_DRIVER)
- xdf_kstat_delete(devi);
-#endif /* !XPV_HVM_DRIVER */
- if (vdp->xdf_softintr_id != NULL)
- ddi_remove_softintr(vdp->xdf_softintr_id);
- if (vdp->xdf_ibc != NULL) {
- mutex_destroy(&vdp->xdf_cb_lk);
- mutex_destroy(&vdp->xdf_dev_lk);
- }
- cv_destroy(&vdp->xdf_dev_cv);
- ddi_soft_state_free(vbd_ss, instance);
- ddi_set_driver_private(devi, NULL);
- ddi_prop_remove_all(devi);
- cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(devi));
- return (DDI_FAILURE);
-}
+ vreq->v_abuf = aba;
+ vreq->v_align = abh;
+ vreq->v_status = VREQ_DMAMEM_ALLOCED;
-static int
-xdf_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
-{
- xdf_t *vdp;
- int instance;
+ ASSERT(bufsz >= bp->b_bcount);
+ if (!IS_READ(bp))
+ bcopy(bp->b_un.b_addr, vreq->v_abuf,
+ bp->b_bcount);
+ }
+ /*FALLTHRU*/
- switch (cmd) {
+ case VREQ_DMAMEM_ALLOCED:
+ /*
+ * dma bind
+ */
+ if (ALIGNED_XFER(bp)) {
+ rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
+ dma_flags, xdf_dmacallback, (caddr_t)vdp,
+ &dc, &ndcs);
+ } else {
+ rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
+ NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
+ xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
+ }
+ if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
+ /* get num of dma windows */
+ if (rc == DDI_DMA_PARTIAL_MAP) {
+ rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
+ ASSERT(rc == DDI_SUCCESS);
+ } else {
+ ndws = 1;
+ }
+ } else {
+ SETDMACBON(vdp);
+ DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
- case DDI_PM_SUSPEND:
- break;
+ vreq->v_dmac = dc;
+ vreq->v_dmaw = 0;
+ vreq->v_ndmacs = ndcs;
+ vreq->v_ndmaws = ndws;
+ vreq->v_nslots = ndws;
+ vreq->v_status = VREQ_DMABUF_BOUND;
+ /*FALLTHRU*/
- case DDI_SUSPEND:
- return (xdf_suspend(devi));
+ case VREQ_DMABUF_BOUND:
+ /*
+ * get ge_slot, callback is set upon failure from gs_get(),
+ * if not set previously
+ */
+ if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
+ DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
- case DDI_DETACH:
+ vreq->v_status = VREQ_GS_ALLOCED;
+ gs->gs_vreq = vreq;
+ list_insert_head(&vreq->v_gs, gs);
break;
- default:
- return (DDI_FAILURE);
- }
+ case VREQ_GS_ALLOCED:
+ /* nothing need to be done */
+ break;
- instance = ddi_get_instance(devi);
- DPRINTF(DDI_DBG, ("xdf%d: detaching\n", instance));
- vdp = ddi_get_soft_state(vbd_ss, instance);
+ case VREQ_DMAWIN_DONE:
+ /*
+ * move to the next dma window
+ */
+ ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
- if (vdp == NULL)
- return (DDI_FAILURE);
+ /* get a ge_slot for this DMA window */
+ if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
+ DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
+ vdp->xdf_addr));
+ return (DDI_FAILURE);
+ }
- mutex_enter(&vdp->xdf_dev_lk);
- if (xdf_isopen(vdp, -1)) {
- mutex_exit(&vdp->xdf_dev_lk);
- return (DDI_FAILURE);
- }
+ vreq->v_dmaw++;
+ VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
+ &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS);
+ vreq->v_status = VREQ_GS_ALLOCED;
+ gs->gs_vreq = vreq;
+ list_insert_head(&vreq->v_gs, gs);
+ break;
- if (vdp->xdf_status != XD_CLOSED) {
- mutex_exit(&vdp->xdf_dev_lk);
+ default:
return (DDI_FAILURE);
}
-#if defined(XPV_HVM_DRIVER)
- xdf_hvm_rm(devi);
-#endif /* XPV_HVM_DRIVER */
-
- ASSERT(!ISDMACBON(vdp));
- mutex_exit(&vdp->xdf_dev_lk);
-
- if (vdp->xdf_timeout_id != 0)
- (void) untimeout(vdp->xdf_timeout_id);
-
- xvdi_remove_event_handler(devi, XS_OE_STATE);
-
- /* we'll support backend running in domU later */
-#ifdef DOMU_BACKEND
- (void) xvdi_post_event(devi, XEN_HP_REMOVE);
-#endif
-
- list_destroy(&vdp->xdf_vreq_act);
- list_destroy(&vdp->xdf_gs_act);
- ddi_prop_remove_all(devi);
- xdf_kstat_delete(devi);
- ddi_remove_softintr(vdp->xdf_softintr_id);
- ddi_set_driver_private(devi, NULL);
- cv_destroy(&vdp->xdf_dev_cv);
- mutex_destroy(&vdp->xdf_cb_lk);
- mutex_destroy(&vdp->xdf_dev_lk);
- if (vdp->xdf_cache_flush_block != NULL)
- kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE);
- ddi_soft_state_free(vbd_ss, instance);
return (DDI_SUCCESS);
}
static int
-xdf_suspend(dev_info_t *devi)
+xdf_cmlb_attach(xdf_t *vdp)
{
- xdf_t *vdp;
- int instance;
- enum xdf_state st;
-
- instance = ddi_get_instance(devi);
-
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_suspend: xdf#%d\n", instance);
-
- if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
- return (DDI_FAILURE);
-
- xvdi_suspend(devi);
-
- mutex_enter(&vdp->xdf_cb_lk);
- mutex_enter(&vdp->xdf_dev_lk);
- st = vdp->xdf_status;
- /* change status to stop further I/O requests */
- if (st == XD_READY)
- vdp->xdf_status = XD_SUSPEND;
- mutex_exit(&vdp->xdf_dev_lk);
- mutex_exit(&vdp->xdf_cb_lk);
+ dev_info_t *dip = vdp->xdf_dip;
- /* make sure no more I/O responses left in the ring buffer */
- if ((st == XD_INIT) || (st == XD_READY)) {
-#ifdef XPV_HVM_DRIVER
- ec_unbind_evtchn(vdp->xdf_evtchn);
- xvdi_free_evtchn(devi);
+ return (cmlb_attach(dip, &xdf_lb_ops,
+ XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
+ XD_IS_RM(vdp),
+ B_TRUE,
+ XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
+#if defined(XPV_HVM_DRIVER)
+ (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) |
+ CMLB_INTERNAL_MINOR_NODES,
#else /* !XPV_HVM_DRIVER */
- (void) ddi_remove_intr(devi, 0, NULL);
+ XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION,
#endif /* !XPV_HVM_DRIVER */
- (void) xdf_drain_io(vdp);
- /*
- * no need to teardown the ring buffer here
- * it will be simply re-init'ed during resume when
- * we call xvdi_alloc_ring
- */
- }
-
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_suspend: SUCCESS\n");
-
- return (DDI_SUCCESS);
+ vdp->xdf_vd_lbl, NULL));
}
-/*ARGSUSED*/
-static int
-xdf_resume(dev_info_t *devi)
+static void
+xdf_io_err(buf_t *bp, int err, size_t resid)
{
- xdf_t *vdp;
- int instance;
-
- instance = ddi_get_instance(devi);
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_resume: xdf%d\n", instance);
+ bioerror(bp, err);
+ if (resid == 0)
+ bp->b_resid = bp->b_bcount;
+ biodone(bp);
+}
- if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
- return (DDI_FAILURE);
+static void
+xdf_kstat_enter(xdf_t *vdp, buf_t *bp)
+{
+ v_req_t *vreq = BP_VREQ(bp);
- mutex_enter(&vdp->xdf_cb_lk);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- if (xvdi_resume(devi) != DDI_SUCCESS) {
- mutex_exit(&vdp->xdf_cb_lk);
- return (DDI_FAILURE);
+ if (vdp->xdf_xdev_iostat == NULL)
+ return;
+ if ((vreq != NULL) && vreq->v_runq) {
+ kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
+ } else {
+ kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
}
+}
- mutex_enter(&vdp->xdf_dev_lk);
- ASSERT(vdp->xdf_status != XD_READY);
- vdp->xdf_status = XD_UNKNOWN;
- mutex_exit(&vdp->xdf_dev_lk);
-
- if (xdf_start_connect(vdp) != DDI_SUCCESS) {
- mutex_exit(&vdp->xdf_cb_lk);
- return (DDI_FAILURE);
- }
+static void
+xdf_kstat_exit(xdf_t *vdp, buf_t *bp)
+{
+ v_req_t *vreq = BP_VREQ(bp);
- mutex_exit(&vdp->xdf_cb_lk);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_resume: done\n");
- return (DDI_SUCCESS);
+ if (vdp->xdf_xdev_iostat == NULL)
+ return;
+ if ((vreq != NULL) && vreq->v_runq) {
+ kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
+ } else {
+ kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
+ }
}
-/*ARGSUSED*/
-static int
-xdf_reset(dev_info_t *devi, ddi_reset_cmd_t cmd)
+static void
+xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp)
{
- xdf_t *vdp;
- int instance;
-
- instance = ddi_get_instance(devi);
- DPRINTF(DDI_DBG, ("xdf%d: resetting\n", instance));
- if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
- return (DDI_FAILURE);
+ v_req_t *vreq = BP_VREQ(bp);
- /*
- * wait for any outstanding I/O to complete
- */
- (void) xdf_drain_io(vdp);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(!vreq->v_runq);
- DPRINTF(DDI_DBG, ("xdf%d: reset complete\n", instance));
- return (DDI_SUCCESS);
+ vreq->v_runq = B_TRUE;
+ if (vdp->xdf_xdev_iostat == NULL)
+ return;
+ kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
}
-static int
-xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
+static void
+xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp)
{
- minor_t minor;
- xdf_t *vdp;
- int part;
- ulong_t parbit;
- diskaddr_t p_blkct = 0;
- boolean_t firstopen;
- boolean_t nodelay;
+ v_req_t *vreq = BP_VREQ(bp);
- minor = getminor(*devp);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(vreq->v_runq);
- nodelay = (flag & (FNDELAY | FNONBLOCK));
+ vreq->v_runq = B_FALSE;
+ if (vdp->xdf_xdev_iostat == NULL)
+ return;
+ kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
+}
- DPRINTF(DDI_DBG, ("xdf%d: opening\n", XDF_INST(minor)));
+int
+xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance)
+{
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ kstat_t *kstat;
+ buf_t *bp;
- /* do cv_wait until connected or failed */
- mutex_enter(&vdp->xdf_dev_lk);
- if (!nodelay && (xdf_connect(vdp, B_TRUE) != XD_READY)) {
- mutex_exit(&vdp->xdf_dev_lk);
- return (ENXIO);
- }
+ if ((kstat = kstat_create(
+ ks_module, instance, NULL, "disk",
+ KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL)
+ return (-1);
- if ((flag & FWRITE) && XD_IS_RO(vdp)) {
- mutex_exit(&vdp->xdf_dev_lk);
- return (EROFS);
- }
+ /* See comment about locking in xdf_kstat_delete(). */
+ mutex_enter(&vdp->xdf_iostat_lk);
+ mutex_enter(&vdp->xdf_dev_lk);
- part = XDF_PART(minor);
- parbit = 1 << part;
- if ((vdp->xdf_vd_exclopen & parbit) ||
- ((flag & FEXCL) && xdf_isopen(vdp, part))) {
+ /* only one kstat can exist at a time */
+ if (vdp->xdf_xdev_iostat != NULL) {
mutex_exit(&vdp->xdf_dev_lk);
- return (EBUSY);
+ mutex_exit(&vdp->xdf_iostat_lk);
+ kstat_delete(kstat);
+ return (-1);
}
- /* are we the first one to open this node? */
- firstopen = !xdf_isopen(vdp, -1);
-
- if (otyp == OTYP_LYR)
- vdp->xdf_vd_lyropen[part]++;
-
- vdp->xdf_vd_open[otyp] |= parbit;
-
- if (flag & FEXCL)
- vdp->xdf_vd_exclopen |= parbit;
-
- mutex_exit(&vdp->xdf_dev_lk);
-
- /* force a re-validation */
- if (firstopen)
- cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
+ vdp->xdf_xdev_iostat = kstat;
+ vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
+ kstat_install(vdp->xdf_xdev_iostat);
/*
- * check size
- * ignore CD/DVD which contains a zero-sized s0
+ * Now that we've created a kstat, we need to update the waitq and
+ * runq counts for the kstat to reflect our current state.
+ *
+ * For a buf_t structure to be on the runq, it must have a ring
+ * buffer slot associated with it. To get a ring buffer slot the
+ * buf must first have a v_req_t and a ge_slot_t associated with it.
+ * Then when it is granted a ring buffer slot, v_runq will be set to
+ * true.
+ *
+ * For a buf_t structure to be on the waitq, it must not be on the
+ * runq. So to find all the buf_t's that should be on waitq, we
+ * walk the active buf list and add any buf_t's which aren't on the
+ * runq to the waitq.
*/
- if (!nodelay && !XD_IS_CD(vdp) &&
- ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
- NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0))) {
- (void) xdf_close(*devp, flag, otyp, credp);
- return (ENXIO);
+ bp = vdp->xdf_f_act;
+ while (bp != NULL) {
+ xdf_kstat_enter(vdp, bp);
+ bp = bp->av_forw;
}
+ if (vdp->xdf_ready_tq_bp != NULL)
+ xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp);
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_iostat_lk);
return (0);
}
-/*ARGSUSED*/
-static int
-xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
+void
+xdf_kstat_delete(dev_info_t *dip)
{
- minor_t minor;
- xdf_t *vdp;
- int part;
- ulong_t parbit;
-
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ kstat_t *kstat;
+ buf_t *bp;
+ /*
+ * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
+ * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
+ * and the contents of the our kstat. xdf_iostat_lk is used
+ * to protect the allocation and freeing of the actual kstat.
+ * xdf_dev_lk can't be used for this purpose because kstat
+ * readers use it to access the contents of the kstat and
+ * hence it can't be held when calling kstat_delete().
+ */
+ mutex_enter(&vdp->xdf_iostat_lk);
mutex_enter(&vdp->xdf_dev_lk);
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part)) {
+
+ if (vdp->xdf_xdev_iostat == NULL) {
mutex_exit(&vdp->xdf_dev_lk);
- return (ENXIO);
+ mutex_exit(&vdp->xdf_iostat_lk);
+ return;
}
- parbit = 1 << part;
- ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0);
- if (otyp == OTYP_LYR) {
- ASSERT(vdp->xdf_vd_lyropen[part] > 0);
- if (--vdp->xdf_vd_lyropen[part] == 0)
- vdp->xdf_vd_open[otyp] &= ~parbit;
- } else {
- vdp->xdf_vd_open[otyp] &= ~parbit;
+ /*
+ * We're about to destroy the kstat structures, so it isn't really
+ * necessary to update the runq and waitq counts. But, since this
+ * isn't a hot code path we can afford to be a little pedantic and
+ * go ahead and decrement the runq and waitq kstat counters to zero
+ * before free'ing them. This helps us ensure that we've gotten all
+ * our accounting correct.
+ *
+ * For an explanation of how we determine which buffers go on the
+ * runq vs which go on the waitq, see the comments in
+ * xdf_kstat_create().
+ */
+ bp = vdp->xdf_f_act;
+ while (bp != NULL) {
+ xdf_kstat_exit(vdp, bp);
+ bp = bp->av_forw;
}
- vdp->xdf_vd_exclopen &= ~parbit;
+ if (vdp->xdf_ready_tq_bp != NULL)
+ xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp);
+ kstat = vdp->xdf_xdev_iostat;
+ vdp->xdf_xdev_iostat = NULL;
mutex_exit(&vdp->xdf_dev_lk);
- return (0);
+ kstat_delete(kstat);
+ mutex_exit(&vdp->xdf_iostat_lk);
}
-static int
-xdf_strategy(struct buf *bp)
+/*
+ * Add an IO requests onto the active queue.
+ *
+ * We have to detect IOs generated by xdf_ready_tq_thread. These IOs
+ * are used to establish a connection to the backend, so they recieve
+ * priority over all other IOs. Since xdf_ready_tq_thread only does
+ * synchronous IO, there can only be one xdf_ready_tq_thread request at any
+ * given time and we record the buf associated with that request in
+ * xdf_ready_tq_bp.
+ */
+static void
+xdf_bp_push(xdf_t *vdp, buf_t *bp)
{
- xdf_t *vdp;
- minor_t minor;
- diskaddr_t p_blkct, p_blkst;
- ulong_t nblks;
- int part;
-
- minor = getminor(bp->b_edev);
- part = XDF_PART(minor);
-
- vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor));
- if ((vdp == NULL) || !xdf_isopen(vdp, part)) {
- bioerror(bp, ENXIO);
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
- }
-
- /* Check for writes to a read only device */
- if (!IS_READ(bp) && XD_IS_RO(vdp)) {
- bioerror(bp, EROFS);
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
- }
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(bp->av_forw == NULL);
- /* Check if this I/O is accessing a partition or the entire disk */
- if ((long)bp->b_private == XB_SLICE_NONE) {
- /* This I/O is using an absolute offset */
- p_blkct = vdp->xdf_xdev_nblocks;
- p_blkst = 0;
- } else {
- /* This I/O is using a partition relative offset */
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
- &p_blkst, NULL, NULL, NULL)) {
- bioerror(bp, ENXIO);
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
- }
- }
-
- /* check for a starting block beyond the disk or partition limit */
- if (bp->b_blkno > p_blkct) {
- DPRINTF(IO_DBG, ("xdf: block %lld exceeds VBD size %"PRIu64,
- (longlong_t)bp->b_blkno, (uint64_t)p_blkct));
- bioerror(bp, EINVAL);
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
- }
-
- /* Legacy: don't set error flag at this case */
- if (bp->b_blkno == p_blkct) {
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
- }
+ xdf_kstat_enter(vdp, bp);
- /* Adjust for partial transfer */
- nblks = bp->b_bcount >> XB_BSHIFT;
- if ((bp->b_blkno + nblks) > p_blkct) {
- bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT;
- bp->b_bcount -= bp->b_resid;
+ if (curthread == vdp->xdf_ready_tq_thread) {
+ /* new IO requests from the ready thread */
+ ASSERT(vdp->xdf_ready_tq_bp == NULL);
+ vdp->xdf_ready_tq_bp = bp;
+ return;
}
- DPRINTF(IO_DBG, ("xdf: strategy blk %lld len %lu\n",
- (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount));
-
- /* Fix up the buf struct */
- bp->b_flags |= B_BUSY;
- bp->av_forw = bp->av_back = NULL; /* not tagged with a v_req */
- bp->b_private = (void *)(uintptr_t)p_blkst;
+ /* this is normal IO request */
+ ASSERT(bp != vdp->xdf_ready_tq_bp);
- mutex_enter(&vdp->xdf_dev_lk);
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
if (vdp->xdf_f_act == NULL) {
- vdp->xdf_f_act = vdp->xdf_l_act = bp;
- } else {
- vdp->xdf_l_act->av_forw = bp;
- vdp->xdf_l_act = bp;
+ /* this is only only IO on the active queue */
+ ASSERT(vdp->xdf_l_act == NULL);
+ ASSERT(vdp->xdf_i_act == NULL);
+ vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp;
+ return;
}
- mutex_exit(&vdp->xdf_dev_lk);
-
- xdf_iostart(vdp);
- if (do_polled_io)
- (void) xdf_drain_io(vdp);
- return (0);
-}
-
-/*ARGSUSED*/
-static int
-xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
-{
-
- xdf_t *vdp;
- minor_t minor;
- diskaddr_t p_blkcnt;
- int part;
-
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
-
- DPRINTF(IO_DBG, ("xdf: read offset 0x%"PRIx64"\n",
- (int64_t)uiop->uio_offset));
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
- NULL, NULL, NULL, NULL))
- return (ENXIO);
-
- if (U_INVAL(uiop))
- return (EINVAL);
-
- return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
-}
-
-/*ARGSUSED*/
-static int
-xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
-{
- xdf_t *vdp;
- minor_t minor;
- diskaddr_t p_blkcnt;
- int part;
-
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
-
- DPRINTF(IO_DBG, ("xdf: write offset 0x%"PRIx64"\n",
- (int64_t)uiop->uio_offset));
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
- NULL, NULL, NULL, NULL))
- return (ENXIO);
-
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
- return (ENOSPC);
-
- if (U_INVAL(uiop))
- return (EINVAL);
-
- return (physio(xdf_strategy, NULL, dev, B_WRITE, minphys, uiop));
-}
-
-/*ARGSUSED*/
-static int
-xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
-{
- xdf_t *vdp;
- minor_t minor;
- struct uio *uiop = aiop->aio_uio;
- diskaddr_t p_blkcnt;
- int part;
-
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
- NULL, NULL, NULL, NULL))
- return (ENXIO);
-
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
- return (ENOSPC);
- if (U_INVAL(uiop))
- return (EINVAL);
-
- return (aphysio(xdf_strategy, anocancel, dev, B_READ, minphys, aiop));
+ /* add this IO to the tail of the active queue */
+ vdp->xdf_l_act->av_forw = bp;
+ vdp->xdf_l_act = bp;
+ if (vdp->xdf_i_act == NULL)
+ vdp->xdf_i_act = bp;
}
-/*ARGSUSED*/
-static int
-xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
-{
- xdf_t *vdp;
- minor_t minor;
- struct uio *uiop = aiop->aio_uio;
- diskaddr_t p_blkcnt;
- int part;
-
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
- NULL, NULL, NULL, NULL))
- return (ENXIO);
-
- if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
- return (ENOSPC);
-
- if (U_INVAL(uiop))
- return (EINVAL);
-
- return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, minphys, aiop));
-}
-
-static int
-xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
+static void
+xdf_bp_pop(xdf_t *vdp, buf_t *bp)
{
- struct buf dumpbuf, *dbp;
- xdf_t *vdp;
- minor_t minor;
- int err = 0;
- int part;
- diskaddr_t p_blkcnt, p_blkst;
+ buf_t *bp_iter;
- minor = getminor(dev);
- if ((vdp = ddi_get_soft_state(vbd_ss, XDF_INST(minor))) == NULL)
- return (ENXIO);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(VREQ_DONE(BP_VREQ(bp)));
- DPRINTF(IO_DBG, ("xdf: dump addr (0x%p) blk (%ld) nblks (%d)\n",
- (void *)addr, blkno, nblk));
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
- NULL, NULL, NULL))
- return (ENXIO);
-
- if ((blkno + nblk) > p_blkcnt) {
- cmn_err(CE_WARN, "xdf: block %ld exceeds VBD size %"PRIu64,
- blkno + nblk, (uint64_t)p_blkcnt);
- return (EINVAL);
+ if (vdp->xdf_ready_tq_bp == bp) {
+ /* we're done with a ready thread IO request */
+ ASSERT(bp->av_forw == NULL);
+ vdp->xdf_ready_tq_bp = NULL;
+ return;
}
- dbp = &dumpbuf;
- bioinit(dbp);
- dbp->b_flags = B_BUSY;
- dbp->b_un.b_addr = addr;
- dbp->b_bcount = nblk << DEV_BSHIFT;
- dbp->b_blkno = blkno;
- dbp->b_edev = dev;
- dbp->b_private = (void *)(uintptr_t)p_blkst;
+ /* we're done with a normal IO request */
+ ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act));
+ ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act));
+ ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act)));
+ ASSERT(vdp->xdf_f_act != vdp->xdf_i_act);
- mutex_enter(&vdp->xdf_dev_lk);
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
- if (vdp->xdf_f_act == NULL) {
- vdp->xdf_f_act = vdp->xdf_l_act = dbp;
+ if (bp == vdp->xdf_f_act) {
+ /* This IO was at the head of our active queue. */
+ vdp->xdf_f_act = bp->av_forw;
+ if (bp == vdp->xdf_l_act)
+ vdp->xdf_l_act = NULL;
} else {
- vdp->xdf_l_act->av_forw = dbp;
- vdp->xdf_l_act = dbp;
+ /* There IO finished before some other pending IOs. */
+ bp_iter = vdp->xdf_f_act;
+ while (bp != bp_iter->av_forw) {
+ bp_iter = bp_iter->av_forw;
+ ASSERT(VREQ_DONE(BP_VREQ(bp_iter)));
+ ASSERT(bp_iter != vdp->xdf_i_act);
+ }
+ bp_iter->av_forw = bp->av_forw;
+ if (bp == vdp->xdf_l_act)
+ vdp->xdf_l_act = bp_iter;
}
- dbp->av_forw = NULL;
- dbp->av_back = NULL;
- mutex_exit(&vdp->xdf_dev_lk);
- xdf_iostart(vdp);
- err = xdf_drain_io(vdp);
- biofini(dbp);
- return (err);
+ bp->av_forw = NULL;
}
-/*ARGSUSED*/
-static int
-xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
- int *rvalp)
+static buf_t *
+xdf_bp_next(xdf_t *vdp)
{
- int instance;
- xdf_t *vdp;
- minor_t minor;
- int part;
-
- minor = getminor(dev);
- instance = XDF_INST(minor);
-
- if ((vdp = ddi_get_soft_state(vbd_ss, instance)) == NULL)
- return (ENXIO);
-
- DPRINTF(IOCTL_DBG, ("xdf%d:ioctl: cmd %d (0x%x)\n",
- instance, cmd, cmd));
-
- part = XDF_PART(minor);
- if (!xdf_isopen(vdp, part))
- return (ENXIO);
-
- switch (cmd) {
- case DKIOCGMEDIAINFO: {
- struct dk_minfo media_info;
-
- media_info.dki_lbsize = DEV_BSIZE;
- media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
- media_info.dki_media_type = DK_FIXED_DISK;
+ v_req_t *vreq;
+ buf_t *bp;
- if (ddi_copyout(&media_info, (void *)arg,
- sizeof (struct dk_minfo), mode)) {
- return (EFAULT);
- } else {
- return (0);
- }
+ if (vdp->xdf_state == XD_CONNECTED) {
+ /*
+ * If we're in the XD_CONNECTED state, we only service IOs
+ * from the xdf_ready_tq_thread thread.
+ */
+ if ((bp = vdp->xdf_ready_tq_bp) == NULL)
+ return (NULL);
+ if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
+ return (bp);
+ return (NULL);
}
- case DKIOCINFO: {
- struct dk_cinfo info;
-
- /* controller information */
- if (XD_IS_CD(vdp))
- info.dki_ctype = DKC_CDROM;
- else
- info.dki_ctype = DKC_VBD;
-
- info.dki_cnum = 0;
- (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
+ /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
+ if (vdp->xdf_state != XD_READY)
+ return (NULL);
- /* unit information */
- info.dki_unit = ddi_get_instance(vdp->xdf_dip);
- (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
- info.dki_flags = DKI_FMTVOL;
- info.dki_partition = part;
- info.dki_maxtransfer = maxphys / DEV_BSIZE;
- info.dki_addr = 0;
- info.dki_space = 0;
- info.dki_prio = 0;
- info.dki_vec = 0;
+ ASSERT(vdp->xdf_ready_tq_bp == NULL);
+ for (;;) {
+ if ((bp = vdp->xdf_i_act) == NULL)
+ return (NULL);
+ if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
+ return (bp);
- if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
- return (EFAULT);
- else
- return (0);
+ /* advance the active buf index pointer */
+ vdp->xdf_i_act = bp->av_forw;
}
+}
- case DKIOCSTATE: {
- enum dkio_state dkstate = DKIO_INSERTED;
- if (ddi_copyout(&dkstate, (void *)arg, sizeof (dkstate),
- mode) != 0)
- return (EFAULT);
- return (0);
- }
+static void
+xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr)
+{
+ ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id;
+ v_req_t *vreq = gs->gs_vreq;
+ buf_t *bp = vreq->v_buf;
- /*
- * is media removable?
- */
- case DKIOCREMOVABLE: {
- int i = XD_IS_RM(vdp) ? 1 : 0;
- if (ddi_copyout(&i, (caddr_t)arg, sizeof (int), mode))
- return (EFAULT);
- return (0);
- }
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(BP_VREQ(bp) == vreq);
- case DKIOCG_PHYGEOM:
- case DKIOCG_VIRTGEOM:
- case DKIOCGGEOM:
- case DKIOCSGEOM:
- case DKIOCGAPART:
- case DKIOCSAPART:
- case DKIOCGVTOC:
- case DKIOCSVTOC:
- case DKIOCPARTINFO:
- case DKIOCGEXTVTOC:
- case DKIOCSEXTVTOC:
- case DKIOCEXTPARTINFO:
- case DKIOCGMBOOT:
- case DKIOCSMBOOT:
- case DKIOCGETEFI:
- case DKIOCSETEFI:
- case DKIOCPARTITION: {
- int rc;
+ gs_free(gs);
- rc = cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
- rvalp, NULL);
- return (rc);
- }
+ if (bioerr != 0)
+ bioerror(bp, bioerr);
+ ASSERT(vreq->v_nslots > 0);
+ if (--vreq->v_nslots > 0)
+ return;
- case DKIOCGETWCE:
- if (ddi_copyout(&vdp->xdf_wce, (void *)arg,
- sizeof (vdp->xdf_wce), mode))
- return (EFAULT);
- return (0);
- case DKIOCSETWCE:
- if (ddi_copyin((void *)arg, &vdp->xdf_wce,
- sizeof (vdp->xdf_wce), mode))
- return (EFAULT);
- return (0);
- case DKIOCFLUSHWRITECACHE: {
- int rc;
- struct dk_callback *dkc = (struct dk_callback *)arg;
+ /* remove this IO from our active queue */
+ xdf_bp_pop(vdp, bp);
- if (vdp->xdf_flush_supported) {
- rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
- NULL, 0, 0, (void *)dev);
- } else if (vdp->xdf_feature_barrier &&
- !xdf_barrier_flush_disable) {
- rc = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
- vdp->xdf_cache_flush_block, xdf_flush_block,
- DEV_BSIZE, (void *)dev);
- } else {
- return (ENOTTY);
- }
- if ((mode & FKIOCTL) && (dkc != NULL) &&
- (dkc->dkc_callback != NULL)) {
- (*dkc->dkc_callback)(dkc->dkc_cookie, rc);
- /* need to return 0 after calling callback */
- rc = 0;
- }
- return (rc);
- }
+ ASSERT(vreq->v_runq);
+ xdf_kstat_exit(vdp, bp);
+ vreq->v_runq = B_FALSE;
+ vreq_free(vdp, vreq);
- default:
- return (ENOTTY);
+ if (IS_ERROR(bp)) {
+ xdf_io_err(bp, geterror(bp), 0);
+ } else if (bp->b_resid != 0) {
+ /* Partial transfers are an error */
+ xdf_io_err(bp, EIO, bp->b_resid);
+ } else {
+ biodone(bp);
}
}
@@ -1197,24 +940,20 @@ xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
* xdf interrupt handler
*/
static uint_t
-xdf_intr(caddr_t arg)
+xdf_intr_locked(xdf_t *vdp)
{
- xdf_t *vdp = (xdf_t *)arg;
xendev_ring_t *xbr;
blkif_response_t *resp;
int bioerr;
uint64_t id;
- extern int do_polled_io;
uint8_t op;
uint16_t status;
ddi_acc_handle_t acchdl;
- mutex_enter(&vdp->xdf_dev_lk);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- if ((xbr = vdp->xdf_xb_ring) == NULL) {
- mutex_exit(&vdp->xdf_dev_lk);
+ if ((xbr = vdp->xdf_xb_ring) == NULL)
return (DDI_INTR_UNCLAIMED);
- }
acchdl = vdp->xdf_xb_ring_hdl;
@@ -1228,164 +967,256 @@ xdf_intr(caddr_t arg)
DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
op, id, status));
- /*
- * XXPV - close connection to the backend and restart
- */
if (status != BLKIF_RSP_OKAY) {
DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s",
- ddi_get_name_addr(vdp->xdf_dip),
+ vdp->xdf_addr,
(op == BLKIF_OP_READ) ? "reading" : "writing"));
bioerr = EIO;
} else {
bioerr = 0;
}
- xdf_iofini(vdp, id, bioerr);
+ xdf_io_fini(vdp, id, bioerr);
}
+ return (DDI_INTR_CLAIMED);
+}
+static uint_t
+xdf_intr(caddr_t arg)
+{
+ xdf_t *vdp = (xdf_t *)arg;
+ int rv;
+
+ mutex_enter(&vdp->xdf_dev_lk);
+ rv = xdf_intr_locked(vdp);
mutex_exit(&vdp->xdf_dev_lk);
if (!do_polled_io)
- xdf_iostart(vdp);
+ xdf_io_start(vdp);
- return (DDI_INTR_CLAIMED);
+ return (rv);
}
-int xdf_fbrewrites; /* how many times was our flush block rewritten */
-
-/*
- * Snarf new data if our flush block was re-written
- */
static void
-check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
+xdf_ring_push(xdf_t *vdp)
{
- int nblks;
- boolean_t mapin;
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- if (IS_WRITE_BARRIER(vdp, bp))
- return; /* write was a flush write */
+ if (vdp->xdf_xb_ring == NULL)
+ return;
- mapin = B_FALSE;
- nblks = bp->b_bcount >> DEV_BSHIFT;
- if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
- xdf_fbrewrites++;
- if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
- mapin = B_TRUE;
- bp_mapin(bp);
- }
- bcopy(bp->b_un.b_addr +
- ((xdf_flush_block - blkno) << DEV_BSHIFT),
- vdp->xdf_cache_flush_block, DEV_BSIZE);
- if (mapin)
- bp_mapout(bp);
+ if (xvdi_ring_push_request(vdp->xdf_xb_ring)) {
+ DPRINTF(IO_DBG, (
+ "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
+ vdp->xdf_addr));
}
+
+ if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN)
+ xvdi_notify_oe(vdp->xdf_dip);
}
-static void
-xdf_iofini(xdf_t *vdp, uint64_t id, int bioerr)
+static int
+xdf_ring_drain_locked(xdf_t *vdp)
{
- ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id;
- v_req_t *vreq = gs->vreq;
- buf_t *bp = vreq->v_buf;
+ int pollc, rv = 0;
- gs_free(vdp, gs);
- if (bioerr)
- bioerror(bp, bioerr);
- vreq->v_nslots--;
- if (vreq->v_nslots != 0)
- return;
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- XDF_UPDATE_IO_STAT(vdp, bp);
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf_ring_drain: start\n");
- if (IS_ERROR(bp))
- bp->b_resid = bp->b_bcount;
+ for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) {
+ if (vdp->xdf_xb_ring == NULL)
+ goto out;
- vreq_free(vdp, vreq);
- biodone(bp);
+ if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
+ (void) xdf_intr_locked(vdp);
+ if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring))
+ goto out;
+ xdf_ring_push(vdp);
+
+ /* file-backed devices can be slow */
+ mutex_exit(&vdp->xdf_dev_lk);
+#ifdef XPV_HVM_DRIVER
+ (void) HYPERVISOR_yield();
+#endif /* XPV_HVM_DRIVER */
+ delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY));
+ mutex_enter(&vdp->xdf_dev_lk);
+ }
+ cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr);
+
+out:
+ if (vdp->xdf_xb_ring != NULL) {
+ if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) ||
+ xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
+ rv = EIO;
+ }
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
+ vdp->xdf_addr, rv);
+ return (rv);
+}
+
+static int
+xdf_ring_drain(xdf_t *vdp)
+{
+ int rv;
+ mutex_enter(&vdp->xdf_dev_lk);
+ rv = xdf_ring_drain_locked(vdp);
+ mutex_exit(&vdp->xdf_dev_lk);
+ return (rv);
}
/*
- * return value of xdf_prepare_rreq()
- * used in xdf_iostart()
+ * Destroy all v_req_t, grant table entries, and our ring buffer.
*/
-#define XF_PARTIAL 0 /* rreq is full, not all I/O in buf transferred */
-#define XF_COMP 1 /* no more I/O left in buf */
-
static void
-xdf_iostart(xdf_t *vdp)
+xdf_ring_destroy(xdf_t *vdp)
{
- xendev_ring_t *xbr;
- struct buf *bp;
- blkif_request_t *rreq;
- int retval;
- int rreqready = 0;
+ v_req_t *vreq;
+ buf_t *bp;
+ ge_slot_t *gs;
+
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+
+ if ((vdp->xdf_state != XD_INIT) &&
+ (vdp->xdf_state != XD_CONNECTED) &&
+ (vdp->xdf_state != XD_READY)) {
+ ASSERT(vdp->xdf_xb_ring == NULL);
+ ASSERT(vdp->xdf_xb_ring_hdl == NULL);
+ ASSERT(vdp->xdf_peer == INVALID_DOMID);
+ ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN);
+ ASSERT(list_is_empty(&vdp->xdf_vreq_act));
+ return;
+ }
- xbr = vdp->xdf_xb_ring;
+ /*
+ * We don't want to recieve async notifications from the backend
+ * when it finishes processing ring entries.
+ */
+#ifdef XPV_HVM_DRIVER
+ ec_unbind_evtchn(vdp->xdf_evtchn);
+#else /* !XPV_HVM_DRIVER */
+ (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
+#endif /* !XPV_HVM_DRIVER */
/*
- * populate the ring request(s)
- *
- * loop until there is no buf to transfer or no free slot
- * available in I/O ring
+ * Drain any requests in the ring. We need to do this before we
+ * can free grant table entries, because if active ring entries
+ * point to grants, then the backend could be trying to access
+ * those grants.
*/
- mutex_enter(&vdp->xdf_dev_lk);
+ (void) xdf_ring_drain_locked(vdp);
- for (;;) {
- if (vdp->xdf_status != XD_READY)
- break;
+ /* We're done talking to the backend so free up our event channel */
+ xvdi_free_evtchn(vdp->xdf_dip);
+ vdp->xdf_evtchn = INVALID_EVTCHN;
- /* active buf queue empty? */
- if ((bp = vdp->xdf_f_act) == NULL)
- break;
+ while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) {
+ bp = vreq->v_buf;
+ ASSERT(BP_VREQ(bp) == vreq);
- /* try to grab a vreq for this bp */
- if ((BP2VREQ(bp) == NULL) && (vreq_get(vdp, bp) == NULL))
- break;
- /* alloc DMA/GTE resources */
- if (vreq_setup(vdp, BP2VREQ(bp)) != DDI_SUCCESS)
- break;
+ /* Free up any grant table entries associaed with this IO */
+ while ((gs = list_head(&vreq->v_gs)) != NULL)
+ gs_free(gs);
- /* get next blkif_request in the ring */
- if ((rreq = xvdi_ring_get_request(xbr)) == NULL)
- break;
- bzero(rreq, sizeof (blkif_request_t));
+ /* If this IO was on the runq, move it back to the waitq. */
+ if (vreq->v_runq)
+ xdf_kstat_runq_to_waitq(vdp, bp);
- /* populate blkif_request with this buf */
- rreqready++;
- retval = xdf_prepare_rreq(vdp, bp, rreq);
- if (retval == XF_COMP) {
- /* finish this bp, switch to next one */
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_waitq_to_runq(
- KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
- vdp->xdf_f_act = bp->av_forw;
- bp->av_forw = NULL;
- }
+ /*
+ * Reset any buf IO state since we're going to re-issue the
+ * IO when we reconnect.
+ */
+ vreq_free(vdp, vreq);
+ BP_VREQ_SET(bp, NULL);
+ bioerror(bp, 0);
}
- /*
- * Send the request(s) to the backend
- */
- if (rreqready) {
- if (xvdi_ring_push_request(xbr)) {
- DPRINTF(IO_DBG, ("xdf_iostart: "
- "sent request(s) to backend\n"));
- xvdi_notify_oe(vdp->xdf_dip);
- }
+ /* reset the active queue index pointer */
+ vdp->xdf_i_act = vdp->xdf_f_act;
+
+ /* Destroy the ring */
+ xvdi_free_ring(vdp->xdf_xb_ring);
+ vdp->xdf_xb_ring = NULL;
+ vdp->xdf_xb_ring_hdl = NULL;
+ vdp->xdf_peer = INVALID_DOMID;
+}
+
+void
+xdfmin(struct buf *bp)
+{
+ if (bp->b_bcount > xdf_maxphys)
+ bp->b_bcount = xdf_maxphys;
+}
+
+/*
+ * Check if we have a pending "eject" media request.
+ */
+static int
+xdf_eject_pending(xdf_t *vdp)
+{
+ dev_info_t *dip = vdp->xdf_dip;
+ char *xsname, *str;
+
+ if (!vdp->xdf_media_req_supported)
+ return (B_FALSE);
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0))
+ return (B_FALSE);
+
+ if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
+ strfree(str);
+ return (B_FALSE);
}
+ strfree(str);
+ return (B_TRUE);
+}
- mutex_exit(&vdp->xdf_dev_lk);
+/*
+ * Generate a media request.
+ */
+static int
+xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required)
+{
+ dev_info_t *dip = vdp->xdf_dip;
+ char *xsname;
+
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+
+ if ((xsname = xvdi_get_xsname(dip)) == NULL)
+ return (ENXIO);
+
+ /* Check if we support media requests */
+ if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported)
+ return (ENOTTY);
+
+ /* If an eject is pending then don't allow any new requests */
+ if (xdf_eject_pending(vdp))
+ return (ENXIO);
+
+ /* Make sure that there is media present */
+ if (media_required && (vdp->xdf_xdev_nblocks == 0))
+ return (ENXIO);
+
+ /* We only allow operations when the device is ready and connected */
+ if (vdp->xdf_state != XD_READY)
+ return (EIO);
+
+ if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0)
+ return (EIO);
+
+ return (0);
}
/*
* populate a single blkif_request_t w/ a buf
*/
-static int
-xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
+static void
+xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
{
- int rval;
grant_ref_t gr;
uint8_t fsect, lsect;
size_t bcnt;
@@ -1393,12 +1224,16 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
off_t blk_off;
dev_info_t *dip = vdp->xdf_dip;
blkif_vdev_t vdev = xvdi_get_vdevnum(dip);
- v_req_t *vreq = BP2VREQ(bp);
+ v_req_t *vreq = BP_VREQ(bp);
uint64_t blkno = vreq->v_blkno;
uint_t ndmacs = vreq->v_ndmacs;
ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl;
int seg = 0;
int isread = IS_READ(bp);
+ ge_slot_t *gs = list_head(&vreq->v_gs);
+
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(vreq->v_status == VREQ_GS_ALLOCED);
if (isread)
ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ);
@@ -1409,9 +1244,10 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
BLKIF_OP_FLUSH_DISKCACHE);
ddi_put16(acchdl, &rreq->handle, vdev);
ddi_put64(acchdl, &rreq->id,
- (uint64_t)(uintptr_t)(vreq->v_gs));
+ (uint64_t)(uintptr_t)(gs));
ddi_put8(acchdl, &rreq->nr_segments, 0);
- return (XF_COMP);
+ vreq->v_status = VREQ_DMAWIN_DONE;
+ return;
case WRITE_BARRIER:
ddi_put8(acchdl, &rreq->operation,
BLKIF_OP_WRITE_BARRIER);
@@ -1429,34 +1265,39 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
ddi_put16(acchdl, &rreq->handle, vdev);
ddi_put64(acchdl, &rreq->sector_number, blkno);
- ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(vreq->v_gs));
+ ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs));
/*
* loop until all segments are populated or no more dma cookie in buf
*/
for (;;) {
- /*
- * Each segment of a blkif request can transfer up to
- * one 4K page of data.
- */
+ /*
+ * Each segment of a blkif request can transfer up to
+ * one 4K page of data.
+ */
bcnt = vreq->v_dmac.dmac_size;
- ASSERT(bcnt <= PAGESIZE);
- ASSERT((bcnt % XB_BSIZE) == 0);
dma_addr = vreq->v_dmac.dmac_laddress;
blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
- ASSERT((blk_off & XB_BMASK) == 0);
fsect = blk_off >> XB_BSHIFT;
lsect = fsect + (bcnt >> XB_BSHIFT) - 1;
+
+ ASSERT(bcnt <= PAGESIZE);
+ ASSERT((bcnt % XB_BSIZE) == 0);
+ ASSERT((blk_off & XB_BMASK) == 0);
ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE &&
lsect < XB_MAX_SEGLEN / XB_BSIZE);
- DPRINTF(IO_DBG, (" ""seg%d: dmacS %lu blk_off %ld\n",
- seg, vreq->v_dmac.dmac_size, blk_off));
- gr = gs_grant(vreq->v_gs, PATOMA(dma_addr) >> PAGESHIFT);
+
+ gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT);
ddi_put32(acchdl, &rreq->seg[seg].gref, gr);
ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect);
- DPRINTF(IO_DBG, (" ""seg%d: fs %d ls %d gr %d dma 0x%"PRIx64
- "\n", seg, fsect, lsect, gr, dma_addr));
+
+ DPRINTF(IO_DBG, (
+ "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
+ vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off));
+ DPRINTF(IO_DBG, (
+ "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n",
+ vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr));
blkno += (bcnt >> XB_BSHIFT);
seg++;
@@ -1468,243 +1309,243 @@ xdf_prepare_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
vreq->v_status = VREQ_DMAWIN_DONE;
vreq->v_blkno = blkno;
- if (vreq->v_dmaw + 1 == vreq->v_ndmaws)
- /* last win */
- rval = XF_COMP;
- else
- rval = XF_PARTIAL;
break;
}
ddi_put8(acchdl, &rreq->nr_segments, seg);
- DPRINTF(IO_DBG, ("xdf_prepare_rreq: request id=%"PRIx64" ready\n",
- rreq->id));
-
- return (rval);
+ DPRINTF(IO_DBG, (
+ "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n",
+ vdp->xdf_addr, rreq->id));
}
-#define XDF_QSEC 50000 /* .005 second */
-#define XDF_POLLCNT 12 /* loop for 12 times before time out */
-
-static int
-xdf_drain_io(xdf_t *vdp)
+static void
+xdf_io_start(xdf_t *vdp)
{
- int pollc, rval;
- xendev_ring_t *xbr;
-
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_drain_io: start\n");
+ struct buf *bp;
+ v_req_t *vreq;
+ blkif_request_t *rreq;
+ boolean_t rreqready = B_FALSE;
mutex_enter(&vdp->xdf_dev_lk);
- if ((vdp->xdf_status != XD_READY) && (vdp->xdf_status != XD_SUSPEND))
- goto out;
+ /*
+ * Populate the ring request(s). Loop until there is no buf to
+ * transfer or no free slot available in I/O ring.
+ */
+ for (;;) {
+ /* don't start any new IO if we're suspending */
+ if (vdp->xdf_suspending)
+ break;
+ if ((bp = xdf_bp_next(vdp)) == NULL)
+ break;
- rval = 0;
- xbr = vdp->xdf_xb_ring;
- ASSERT(xbr != NULL);
+ /* if the buf doesn't already have a vreq, allocate one */
+ if (((vreq = BP_VREQ(bp)) == NULL) &&
+ ((vreq = vreq_get(vdp, bp)) == NULL))
+ break;
- for (pollc = 0; pollc < XDF_POLLCNT; pollc++) {
- if (xvdi_ring_has_unconsumed_responses(xbr)) {
- mutex_exit(&vdp->xdf_dev_lk);
- (void) xdf_intr((caddr_t)vdp);
- mutex_enter(&vdp->xdf_dev_lk);
- }
- if (!xvdi_ring_has_incomp_request(xbr))
- goto out;
+ /* alloc DMA/GTE resources */
+ if (vreq_setup(vdp, vreq) != DDI_SUCCESS)
+ break;
+
+ /* get next blkif_request in the ring */
+ if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL)
+ break;
+ bzero(rreq, sizeof (blkif_request_t));
+ rreqready = B_TRUE;
+
+ /* populate blkif_request with this buf */
+ xdf_process_rreq(vdp, bp, rreq);
-#ifndef XPV_HVM_DRIVER
- (void) HYPERVISOR_yield();
-#endif /* XPV_HVM_DRIVER */
/*
- * file-backed devices can be slow
+ * This buffer/vreq pair is has been allocated a ring buffer
+ * resources, so if it isn't already in our runq, add it.
*/
- drv_usecwait(XDF_QSEC << pollc);
+ if (!vreq->v_runq)
+ xdf_kstat_waitq_to_runq(vdp, bp);
}
- cmn_err(CE_WARN, "xdf_polled_io: timeout");
- rval = EIO;
-out:
+
+ /* Send the request(s) to the backend */
+ if (rreqready)
+ xdf_ring_push(vdp);
+
mutex_exit(&vdp->xdf_dev_lk);
- if (xdfdebug & SUSRES_DBG)
- xen_printf("xdf_drain_io: end, err=%d\n", rval);
- return (rval);
}
-/* ARGSUSED5 */
-int
-xdf_lb_rdwr(dev_info_t *devi, uchar_t cmd, void *bufp,
- diskaddr_t start, size_t reqlen, void *tg_cookie)
-{
- xdf_t *vdp;
- struct buf *bp;
- int err = 0;
- vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
- if (vdp == NULL)
- return (ENXIO);
+/* check if partition is open, -1 - check all partitions on the disk */
+static boolean_t
+xdf_isopen(xdf_t *vdp, int partition)
+{
+ int i;
+ ulong_t parbit;
+ boolean_t rval = B_FALSE;
- if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
- return (EINVAL);
+ ASSERT((partition == -1) ||
+ ((partition >= 0) || (partition < XDF_PEXT)));
- bp = getrbuf(KM_SLEEP);
- if (cmd == TG_READ)
- bp->b_flags = B_BUSY | B_READ;
+ if (partition == -1)
+ parbit = (ulong_t)-1;
else
- bp->b_flags = B_BUSY | B_WRITE;
- bp->b_un.b_addr = bufp;
- bp->b_bcount = reqlen;
- bp->b_blkno = start;
- bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
+ parbit = 1 << partition;
- mutex_enter(&vdp->xdf_dev_lk);
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
- if (vdp->xdf_f_act == NULL) {
- vdp->xdf_f_act = vdp->xdf_l_act = bp;
- } else {
- vdp->xdf_l_act->av_forw = bp;
- vdp->xdf_l_act = bp;
+ for (i = 0; i < OTYPCNT; i++) {
+ if (vdp->xdf_vd_open[i] & parbit)
+ rval = B_TRUE;
}
- mutex_exit(&vdp->xdf_dev_lk);
- xdf_iostart(vdp);
- err = biowait(bp);
-
- ASSERT(bp->b_flags & B_DONE);
- freerbuf(bp);
- return (err);
+ return (rval);
}
/*
- * synthetic geometry
+ * The connection should never be closed as long as someone is holding
+ * us open, there is pending IO, or someone is waiting waiting for a
+ * connection.
*/
-#define XDF_NSECTS 256
-#define XDF_NHEADS 16
-
-static void
-xdf_synthetic_pgeom(dev_info_t *devi, cmlb_geom_t *geomp)
+static boolean_t
+xdf_busy(xdf_t *vdp)
{
- xdf_t *vdp;
- uint_t ncyl;
-
- vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
-
- ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
- geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
- geomp->g_acyl = 0;
- geomp->g_nhead = XDF_NHEADS;
- geomp->g_secsize = XB_BSIZE;
- geomp->g_nsect = XDF_NSECTS;
- geomp->g_intrlv = 0;
- geomp->g_rpm = 7200;
- geomp->g_capacity = vdp->xdf_xdev_nblocks;
-}
+ if ((vdp->xdf_xb_ring != NULL) &&
+ xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
+ ASSERT(vdp->xdf_state != XD_CLOSED);
+ return (B_TRUE);
+ }
-static int
-xdf_lb_getcap(dev_info_t *devi, diskaddr_t *capp)
-{
- xdf_t *vdp;
+ if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) {
+ ASSERT(vdp->xdf_state != XD_CLOSED);
+ return (B_TRUE);
+ }
- vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi));
+ if (xdf_isopen(vdp, -1)) {
+ ASSERT(vdp->xdf_state != XD_CLOSED);
+ return (B_TRUE);
+ }
- if (vdp == NULL)
- return (ENXIO);
+ if (vdp->xdf_connect_req > 0) {
+ ASSERT(vdp->xdf_state != XD_CLOSED);
+ return (B_TRUE);
+ }
- mutex_enter(&vdp->xdf_dev_lk);
- *capp = vdp->xdf_pgeom.g_capacity;
- DPRINTF(LBL_DBG, ("capacity %llu\n", *capp));
- mutex_exit(&vdp->xdf_dev_lk);
- return (0);
+ return (B_FALSE);
}
-static int
-xdf_lb_getpgeom(dev_info_t *devi, cmlb_geom_t *geomp)
+static void
+xdf_set_state(xdf_t *vdp, xdf_state_t new_state)
{
- xdf_t *vdp;
-
- if ((vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))) == NULL)
- return (ENXIO);
- *geomp = vdp->xdf_pgeom;
- return (0);
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n",
+ vdp->xdf_addr, vdp->xdf_state, new_state));
+ vdp->xdf_state = new_state;
+ cv_broadcast(&vdp->xdf_dev_cv);
}
-/*
- * No real HBA, no geometry available from it
- */
-/*ARGSUSED*/
-static int
-xdf_lb_getvgeom(dev_info_t *devi, cmlb_geom_t *geomp)
+static void
+xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet)
{
- return (EINVAL);
-}
+ dev_info_t *dip = vdp->xdf_dip;
+ boolean_t busy;
-static int
-xdf_lb_getattribute(dev_info_t *devi, tg_attribute_t *tgattributep)
-{
- xdf_t *vdp;
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
+ ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED));
- if (!(vdp = ddi_get_soft_state(vbd_ss, ddi_get_instance(devi))))
- return (ENXIO);
+ /* Check if we're already there. */
+ if (vdp->xdf_state == new_state)
+ return;
- if (XD_IS_RO(vdp))
- tgattributep->media_is_writable = 0;
- else
- tgattributep->media_is_writable = 1;
- return (0);
-}
+ mutex_enter(&vdp->xdf_dev_lk);
+ busy = xdf_busy(vdp);
-/* ARGSUSED3 */
-int
-xdf_lb_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
-{
- switch (cmd) {
- case TG_GETPHYGEOM:
- return (xdf_lb_getpgeom(devi, (cmlb_geom_t *)arg));
- case TG_GETVIRTGEOM:
- return (xdf_lb_getvgeom(devi, (cmlb_geom_t *)arg));
- case TG_GETCAPACITY:
- return (xdf_lb_getcap(devi, (diskaddr_t *)arg));
- case TG_GETBLOCKSIZE:
- *(uint32_t *)arg = XB_BSIZE;
- return (0);
- case TG_GETATTR:
- return (xdf_lb_getattribute(devi, (tg_attribute_t *)arg));
- default:
- return (ENOTTY);
+ /* If we're already closed then there's nothing todo. */
+ if (vdp->xdf_state == XD_CLOSED) {
+ ASSERT(!busy);
+ xdf_set_state(vdp, new_state);
+ mutex_exit(&vdp->xdf_dev_lk);
+ return;
+ }
+
+#ifdef DEBUG
+ /* UhOh. Warn the user that something bad has happened. */
+ if (!quiet && busy && (vdp->xdf_state == XD_READY) &&
+ (vdp->xdf_xdev_nblocks != 0)) {
+ cmn_err(CE_WARN, "xdf@%s: disconnected while in use",
+ vdp->xdf_addr);
}
+#endif /* DEBUG */
+
+ xdf_ring_destroy(vdp);
+
+ /* If we're busy then we can only go into the unknown state */
+ xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state);
+ mutex_exit(&vdp->xdf_dev_lk);
+
+ /* if we're closed now, let the other end know */
+ if (vdp->xdf_state == XD_CLOSED)
+ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
}
+
/*
* Kick-off connect process
* Status should be XD_UNKNOWN or XD_CLOSED
* On success, status will be changed to XD_INIT
- * On error, status won't be changed
+ * On error, it will be changed to XD_UNKNOWN
*/
static int
-xdf_start_connect(xdf_t *vdp)
+xdf_setstate_init(xdf_t *vdp)
{
- char *xsnode;
- grant_ref_t gref;
- xenbus_transaction_t xbt;
- int rv;
- dev_info_t *dip = vdp->xdf_dip;
+ dev_info_t *dip = vdp->xdf_dip;
+ xenbus_transaction_t xbt;
+ grant_ref_t gref;
+ char *xsname, *str;
+ int rv;
+
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
+ ASSERT((vdp->xdf_state == XD_UNKNOWN) ||
+ (vdp->xdf_state == XD_CLOSED));
+
+ DPRINTF(DDI_DBG,
+ ("xdf@%s: starting connection process\n", vdp->xdf_addr));
- if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == (domid_t)-1)
+ /*
+ * If an eject is pending then don't allow a new connection, but
+ * we want to return without displaying an error message.
+ */
+ if (xdf_eject_pending(vdp)) {
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ return (DDI_FAILURE);
+ }
+
+ if ((xsname = xvdi_get_xsname(dip)) == NULL)
goto errout;
- if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s: failed to alloc event channel",
- ddi_get_name_addr(dip));
+ if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID)
goto errout;
- }
+
+ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising);
+
+ /*
+ * Sanity check for the existance of the xenbus device-type property.
+ * This property might not exist if we our xenbus device nodes was
+ * force destroyed while we were still connected to the backend.
+ */
+ if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0)
+ goto errout;
+ strfree(str);
+
+ if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS)
+ goto errout;
+
vdp->xdf_evtchn = xvdi_get_evtchn(dip);
#ifdef XPV_HVM_DRIVER
ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
#else /* !XPV_HVM_DRIVER */
if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf_start_connect: xdf@%s: "
- "failed to add intr handler", ddi_get_name_addr(dip));
+ cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: "
+ "failed to add intr handler", vdp->xdf_addr);
goto errout1;
}
#endif /* !XPV_HVM_DRIVER */
@@ -1713,7 +1554,7 @@ xdf_start_connect(xdf_t *vdp)
sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
DDI_SUCCESS) {
cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
- ddi_get_name_addr(dip));
+ vdp->xdf_addr);
goto errout2;
}
vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
@@ -1721,75 +1562,48 @@ xdf_start_connect(xdf_t *vdp)
/*
* Write into xenstore the info needed by backend
*/
- if ((xsnode = xvdi_get_xsname(dip)) == NULL) {
- cmn_err(CE_WARN, "xdf@%s: "
- "failed to get xenstore node path",
- ddi_get_name_addr(dip));
- goto fail_trans;
- }
trans_retry:
if (xenbus_transaction_start(&xbt)) {
cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, EIO, "transaction start");
+ vdp->xdf_addr);
+ xvdi_fatal_error(dip, EIO, "connect transaction init");
goto fail_trans;
}
- if (rv = xenbus_printf(xbt, xsnode, "ring-ref", "%u", gref)) {
- cmn_err(CE_WARN, "xdf@%s: failed to write ring-ref",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, rv, "writing ring-ref");
- goto abort_trans;
- }
-
- if (rv = xenbus_printf(xbt, xsnode, "event-channel", "%u",
- vdp->xdf_evtchn)) {
- cmn_err(CE_WARN, "xdf@%s: failed to write event-channel",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, rv, "writing event-channel");
- goto abort_trans;
- }
-
/*
- * "protocol" is written by the domain builder in the case of PV
+ * XBP_PROTOCOL is written by the domain builder in the case of PV
* domains. However, it is not written for HVM domains, so let's
* write it here.
*/
- if (rv = xenbus_printf(xbt, xsnode, "protocol", "%s",
- XEN_IO_PROTO_ABI_NATIVE)) {
- cmn_err(CE_WARN, "xdf@%s: failed to write protocol",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, rv, "writing protocol");
- goto abort_trans;
- }
-
- if ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0) {
- cmn_err(CE_WARN, "xdf@%s: "
- "failed to switch state to XenbusStateInitialised",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, rv, "writing state");
- goto abort_trans;
+ if (((rv = xenbus_printf(xbt, xsname,
+ XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) ||
+ ((rv = xenbus_printf(xbt, xsname,
+ XBP_RING_REF, "%u", gref)) != 0) ||
+ ((rv = xenbus_printf(xbt, xsname,
+ XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) ||
+ ((rv = xenbus_printf(xbt, xsname,
+ XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) ||
+ ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) {
+ (void) xenbus_transaction_end(xbt, 1);
+ xvdi_fatal_error(dip, rv, "connect transaction setup");
+ goto fail_trans;
}
/* kick-off connect process */
if (rv = xenbus_transaction_end(xbt, 0)) {
if (rv == EAGAIN)
goto trans_retry;
- cmn_err(CE_WARN, "xdf@%s: failed to end transaction",
- ddi_get_name_addr(dip));
- xvdi_fatal_error(dip, rv, "completing transaction");
+ xvdi_fatal_error(dip, rv, "connect transaction commit");
goto fail_trans;
}
- ASSERT(mutex_owned(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_status = XD_INIT;
+ xdf_set_state(vdp, XD_INIT);
mutex_exit(&vdp->xdf_dev_lk);
return (DDI_SUCCESS);
-abort_trans:
- (void) xenbus_transaction_end(xbt, 1);
fail_trans:
xvdi_free_ring(vdp->xdf_xb_ring);
errout2:
@@ -1800,28 +1614,14 @@ errout2:
#endif /* !XPV_HVM_DRIVER */
errout1:
xvdi_free_evtchn(dip);
+ vdp->xdf_evtchn = INVALID_EVTCHN;
errout:
- cmn_err(CE_WARN, "xdf@%s: fail to kick-off connecting",
- ddi_get_name_addr(dip));
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend",
+ vdp->xdf_addr);
return (DDI_FAILURE);
}
-/*
- * Kick-off disconnect process
- * Status won't be changed
- */
-static int
-xdf_start_disconnect(xdf_t *vdp)
-{
- if (xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed) > 0) {
- cmn_err(CE_WARN, "xdf@%s: fail to kick-off disconnecting",
- ddi_get_name_addr(vdp->xdf_dip));
- return (DDI_FAILURE);
- }
-
- return (DDI_SUCCESS);
-}
-
int
xdf_get_flush_block(xdf_t *vdp)
{
@@ -1837,92 +1637,12 @@ xdf_get_flush_block(xdf_t *vdp)
return (DDI_SUCCESS);
}
-/*
- * Finish other initialization after we've connected to backend
- * Status should be XD_INIT before calling this routine
- * On success, status should be changed to XD_READY
- * On error, status should stay XD_INIT
- */
-static int
-xdf_post_connect(xdf_t *vdp)
+static void
+xdf_setstate_ready(void *arg)
{
- int rv;
- uint_t len;
- char *type;
- char *barrier;
- dev_info_t *devi = vdp->xdf_dip;
+ xdf_t *vdp = (xdf_t *)arg;
- /*
- * Determine if feature barrier is supported by backend
- */
- if (xenbus_read(XBT_NULL, xvdi_get_oename(devi),
- "feature-barrier", (void **)&barrier, &len) == 0) {
- vdp->xdf_feature_barrier = 1;
- kmem_free(barrier, len);
- } else {
- cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier",
- ddi_get_name_addr(vdp->xdf_dip));
- vdp->xdf_feature_barrier = 0;
- }
-
- /* probe backend */
- if (rv = xenbus_gather(XBT_NULL, xvdi_get_oename(devi),
- "sectors", "%"SCNu64, &vdp->xdf_xdev_nblocks,
- "info", "%u", &vdp->xdf_xdev_info, NULL)) {
- cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: "
- "cannot read backend info", ddi_get_name_addr(devi));
- xvdi_fatal_error(devi, rv, "reading backend info");
- return (DDI_FAILURE);
- }
-
- /*
- * Make sure that the device we're connecting isn't smaller than
- * the old connected device.
- */
- if (vdp->xdf_xdev_nblocks < vdp->xdf_pgeom.g_capacity) {
- cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: "
- "backend disk device shrank", ddi_get_name_addr(devi));
- /* XXX: call xvdi_fatal_error() here? */
- xvdi_fatal_error(devi, rv, "reading backend info");
- return (DDI_FAILURE);
- }
-
-#ifdef _ILP32
- if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
- cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: "
- "backend disk device too large with %llu blocks for"
- " 32-bit kernel", ddi_get_name_addr(devi),
- vdp->xdf_xdev_nblocks);
- xvdi_fatal_error(devi, rv, "reading backend info");
- return (DDI_FAILURE);
- }
-#endif
-
-
- /*
- * Only update the physical geometry to reflect the new device
- * size if this is the first time we're connecting to the backend
- * device. Once we assign a physical geometry to a device it stays
- * fixed until:
- * - we get detach and re-attached (at which point we
- * automatically assign a new physical geometry).
- * - someone calls TG_SETPHYGEOM to explicity set the
- * physical geometry.
- */
- if (vdp->xdf_pgeom.g_capacity == 0)
- xdf_synthetic_pgeom(devi, &vdp->xdf_pgeom);
-
- /* fix disk type */
- if (xenbus_read(XBT_NULL, xvdi_get_xsname(devi), "device-type",
- (void **)&type, &len) != 0) {
- cmn_err(CE_WARN, "xdf_post_connect: xdf@%s: "
- "cannot read device-type", ddi_get_name_addr(devi));
- xvdi_fatal_error(devi, rv, "reading device-type");
- return (DDI_FAILURE);
- }
- if (strcmp(type, "cdrom") == 0)
- vdp->xdf_xdev_info |= VDISK_CDROM;
- kmem_free(type, len);
+ vdp->xdf_ready_tq_thread = curthread;
/*
* We've created all the minor nodes via cmlb_attach() using default
@@ -1930,50 +1650,41 @@ xdf_post_connect(xdf_t *vdp)
* in case there's anyone (say, booting thread) ever trying to open
* it before connected to backend. We will refresh all those minor
* nodes w/ latest info we've got now when we are almost connected.
- *
- * Don't do this when xdf is already opened by someone (could happen
- * during resume), for that cmlb_attach() will invalid the label info
- * and confuse those who has already opened the node, which is bad.
*/
- if (!xdf_isopen(vdp, -1) && (XD_IS_CD(vdp) || XD_IS_RM(vdp))) {
- /* re-init cmlb w/ latest info we got from backend */
- if (cmlb_attach(devi, &xdf_lb_ops,
- XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
- XD_IS_RM(vdp), 1,
- XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
-#if defined(XPV_HVM_DRIVER)
- CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT |
- CMLB_INTERNAL_MINOR_NODES,
-#else /* !XPV_HVM_DRIVER */
- CMLB_FAKE_LABEL_ONE_PARTITION,
-#endif /* !XPV_HVM_DRIVER */
- vdp->xdf_vd_lbl, NULL) != 0) {
- cmn_err(CE_WARN, "xdf@%s: cmlb attach failed",
- ddi_get_name_addr(devi));
- return (DDI_FAILURE);
+ mutex_enter(&vdp->xdf_dev_lk);
+ if (vdp->xdf_cmbl_reattach) {
+ vdp->xdf_cmbl_reattach = B_FALSE;
+
+ mutex_exit(&vdp->xdf_dev_lk);
+ if (xdf_cmlb_attach(vdp) != 0) {
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ return;
}
+ mutex_enter(&vdp->xdf_dev_lk);
}
- /* mark vbd is ready for I/O */
- ASSERT(mutex_owned(&vdp->xdf_cb_lk));
- mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_status = XD_READY;
+ /* If we're not still trying to get to the ready state, then bail. */
+ if (vdp->xdf_state != XD_CONNECTED) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ return;
+ }
mutex_exit(&vdp->xdf_dev_lk);
+
/*
* If backend has feature-barrier, see if it supports disk
* cache flush op.
*/
- vdp->xdf_flush_supported = 0;
+ vdp->xdf_flush_supported = B_FALSE;
if (vdp->xdf_feature_barrier) {
/*
* Pretend we already know flush is supported so probe
* will attempt the correct op.
*/
- vdp->xdf_flush_supported = 1;
+ vdp->xdf_flush_supported = B_TRUE;
if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
- vdp->xdf_flush_supported = 1;
+ vdp->xdf_flush_supported = B_TRUE;
} else {
- vdp->xdf_flush_supported = 0;
+ vdp->xdf_flush_supported = B_FALSE;
/*
* If the other end does not support the cache flush op
* then we must use a barrier-write to force disk
@@ -1985,39 +1696,182 @@ xdf_post_connect(xdf_t *vdp)
* (512 bytes) from whatever write we did last
* and rewrite that block?
*/
- if (xdf_get_flush_block(vdp) != DDI_SUCCESS)
- return (DDI_FAILURE);
+ if (xdf_get_flush_block(vdp) != DDI_SUCCESS) {
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ return;
+ }
}
}
- cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", ddi_get_name_addr(devi),
- (uint64_t)vdp->xdf_xdev_nblocks);
+ mutex_enter(&vdp->xdf_cb_lk);
+ mutex_enter(&vdp->xdf_dev_lk);
+ if (vdp->xdf_state == XD_CONNECTED)
+ xdf_set_state(vdp, XD_READY);
+ mutex_exit(&vdp->xdf_dev_lk);
- return (DDI_SUCCESS);
+ /* Restart any currently queued up io */
+ xdf_io_start(vdp);
+
+ mutex_exit(&vdp->xdf_cb_lk);
}
/*
- * Finish other uninitialization after we've disconnected from backend
- * when status is XD_CLOSING or XD_INIT. After returns, status is XD_CLOSED
+ * synthetic geometry
*/
+#define XDF_NSECTS 256
+#define XDF_NHEADS 16
+
static void
-xdf_post_disconnect(xdf_t *vdp)
+xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
{
-#ifdef XPV_HVM_DRIVER
- ec_unbind_evtchn(vdp->xdf_evtchn);
-#else /* !XPV_HVM_DRIVER */
- (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
-#endif /* !XPV_HVM_DRIVER */
- xvdi_free_evtchn(vdp->xdf_dip);
- xvdi_free_ring(vdp->xdf_xb_ring);
- vdp->xdf_xb_ring = NULL;
- vdp->xdf_xb_ring_hdl = NULL;
- vdp->xdf_peer = (domid_t)-1;
+ xdf_t *vdp;
+ uint_t ncyl;
+
+ vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
+
+ ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
- ASSERT(mutex_owned(&vdp->xdf_cb_lk));
+ bzero(geomp, sizeof (*geomp));
+ geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
+ geomp->g_acyl = 0;
+ geomp->g_nhead = XDF_NHEADS;
+ geomp->g_nsect = XDF_NSECTS;
+ geomp->g_secsize = XB_BSIZE;
+ geomp->g_capacity = vdp->xdf_xdev_nblocks;
+ geomp->g_intrlv = 0;
+ geomp->g_rpm = 7200;
+}
+
+/*
+ * Finish other initialization after we've connected to backend
+ * Status should be XD_INIT before calling this routine
+ * On success, status should be changed to XD_CONNECTED.
+ * On error, status should stay XD_INIT
+ */
+static int
+xdf_setstate_connected(xdf_t *vdp)
+{
+ dev_info_t *dip = vdp->xdf_dip;
+ cmlb_geom_t pgeom;
+ diskaddr_t nblocks = 0;
+ char *oename, *xsname, *str;
+ uint_t dinfo;
+
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
+ ASSERT(vdp->xdf_state == XD_INIT);
+
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((oename = xvdi_get_oename(dip)) == NULL))
+ return (DDI_FAILURE);
+
+ /* Determine if feature barrier is supported by backend */
+ if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB)))
+ cmn_err(CE_NOTE, "xdf@%s: failed to read feature-barrier",
+ vdp->xdf_addr);
+
+ /*
+ * Probe backend. Read the device size into xdf_xdev_nblocks
+ * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
+ * flags in xdf_dinfo. If the emulated device type is "cdrom",
+ * we always set VDISK_CDROM, regardless of if it's present in
+ * the xenbus info parameter.
+ */
+ if (xenbus_gather(XBT_NULL, oename,
+ XBP_SECTORS, "%"SCNu64, &nblocks,
+ XBP_INFO, "%u", &dinfo,
+ NULL) != 0) {
+ cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
+ "cannot read backend info", vdp->xdf_addr);
+ return (DDI_FAILURE);
+ }
+ if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
+ cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
+ vdp->xdf_addr);
+ return (DDI_FAILURE);
+ }
+ if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
+ dinfo |= VDISK_CDROM;
+ strfree(str);
+
+ vdp->xdf_xdev_nblocks = nblocks;
+#ifdef _ILP32
+ if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
+ cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
+ "backend disk device too large with %llu blocks for"
+ " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks);
+ xvdi_fatal_error(dip, EFBIG, "reading backend info");
+ return (DDI_FAILURE);
+ }
+#endif
+
+ /*
+ * If the physical geometry for a fixed disk has been explicity
+ * set then make sure that the specified physical geometry isn't
+ * larger than the device we connected to.
+ */
+ if (vdp->xdf_pgeom_fixed &&
+ (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) {
+ cmn_err(CE_WARN,
+ "xdf@%s: connect failed, fixed geometry too large",
+ vdp->xdf_addr);
+ return (DDI_FAILURE);
+ }
+
+ vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP);
+
+ /* mark vbd is ready for I/O */
mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_status = XD_CLOSED;
+ xdf_set_state(vdp, XD_CONNECTED);
+
+ /* check if the cmlb label should be updated */
+ xdf_synthetic_pgeom(dip, &pgeom);
+ if ((vdp->xdf_dinfo != dinfo) ||
+ (!vdp->xdf_pgeom_fixed &&
+ (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) {
+ vdp->xdf_cmbl_reattach = B_TRUE;
+
+ vdp->xdf_dinfo = dinfo;
+ if (!vdp->xdf_pgeom_fixed)
+ vdp->xdf_pgeom = pgeom;
+ }
+
+ if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) {
+ if (vdp->xdf_xdev_nblocks == 0) {
+ vdp->xdf_mstate = DKIO_EJECTED;
+ cv_broadcast(&vdp->xdf_mstate_cv);
+ } else {
+ vdp->xdf_mstate = DKIO_INSERTED;
+ cv_broadcast(&vdp->xdf_mstate_cv);
+ }
+ } else {
+ if (vdp->xdf_mstate != DKIO_NONE) {
+ vdp->xdf_mstate = DKIO_NONE;
+ cv_broadcast(&vdp->xdf_mstate_cv);
+ }
+ }
+
mutex_exit(&vdp->xdf_dev_lk);
+
+ cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr,
+ (uint64_t)vdp->xdf_xdev_nblocks);
+
+ /* Restart any currently queued up io */
+ xdf_io_start(vdp);
+
+ /*
+ * To get to the ready state we have to do IO to the backend device,
+ * but we can't initiate IO from the other end change callback thread
+ * (which is the current context we're executing in.) This is because
+ * if the other end disconnects while we're doing IO from the callback
+ * thread, then we can't recieve that disconnect event and we hang
+ * waiting for an IO that can never complete.
+ */
+ (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp,
+ DDI_SLEEP);
+
+ (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
+ return (DDI_SUCCESS);
}
/*ARGSUSED*/
@@ -2026,881 +1880,1646 @@ xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
{
XenbusState new_state = *(XenbusState *)impl_data;
xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
- boolean_t unexpect_die = B_FALSE;
- int status;
DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
- ddi_get_name_addr(dip), new_state));
+ vdp->xdf_addr, new_state));
mutex_enter(&vdp->xdf_cb_lk);
- if (xdf_check_state_transition(vdp, new_state) == DDI_FAILURE) {
+ /* We assume that this callback is single threaded */
+ ASSERT(vdp->xdf_oe_change_thread == NULL);
+ DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread);
+
+ /* ignore any backend state changes if we're suspending/suspended */
+ if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) {
+ DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
mutex_exit(&vdp->xdf_cb_lk);
return;
}
switch (new_state) {
+ case XenbusStateUnknown:
case XenbusStateInitialising:
- ASSERT(vdp->xdf_status == XD_CLOSED);
- /*
- * backend recovered from a previous failure,
- * kick-off connect process again
- */
- if (xdf_start_connect(vdp) != DDI_SUCCESS) {
- cmn_err(CE_WARN, "xdf@%s:"
- " failed to start reconnecting to backend",
- ddi_get_name_addr(dip));
- }
+ case XenbusStateInitWait:
+ case XenbusStateInitialised:
+ if (vdp->xdf_state == XD_INIT)
+ break;
+
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ if (xdf_setstate_init(vdp) != DDI_SUCCESS)
+ break;
+ ASSERT(vdp->xdf_state == XD_INIT);
break;
+
case XenbusStateConnected:
- ASSERT(vdp->xdf_status == XD_INIT);
- (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
- /* finish final init after connect */
- if (xdf_post_connect(vdp) != DDI_SUCCESS)
- (void) xdf_start_disconnect(vdp);
+ if ((vdp->xdf_state == XD_CONNECTED) ||
+ (vdp->xdf_state == XD_READY))
+ break;
+
+ if (vdp->xdf_state != XD_INIT) {
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ if (xdf_setstate_init(vdp) != DDI_SUCCESS)
+ break;
+ ASSERT(vdp->xdf_state == XD_INIT);
+ }
+
+ if (xdf_setstate_connected(vdp) != DDI_SUCCESS) {
+ xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
+ break;
+ }
+ ASSERT(vdp->xdf_state == XD_CONNECTED);
break;
+
case XenbusStateClosing:
- mutex_enter(&vdp->xdf_dev_lk);
if (xdf_isopen(vdp, -1)) {
- cmn_err(CE_NOTE, "xdf@%s: hot-unplug failed, "
- "still in use", ddi_get_name_addr(dip));
- } else {
- if ((vdp->xdf_status == XD_READY) ||
- (vdp->xdf_status == XD_INIT))
- vdp->xdf_status = XD_CLOSING;
- (void) xdf_start_disconnect(vdp);
+ cmn_err(CE_NOTE,
+ "xdf@%s: hot-unplug failed, still in use",
+ vdp->xdf_addr);
+ break;
}
- mutex_exit(&vdp->xdf_dev_lk);
- break;
+ /*FALLTHROUGH*/
case XenbusStateClosed:
- /* first check if BE closed unexpectedly */
- mutex_enter(&vdp->xdf_dev_lk);
- if (xdf_isopen(vdp, -1)) {
- unexpect_die = B_TRUE;
- unexpectedie(vdp);
- cmn_err(CE_WARN, "xdf@%s: backend closed, "
- "reconnecting...", ddi_get_name_addr(dip));
- }
- mutex_exit(&vdp->xdf_dev_lk);
+ xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
+ break;
+ }
- if (vdp->xdf_status == XD_READY) {
- mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_status = XD_CLOSING;
- mutex_exit(&vdp->xdf_dev_lk);
+ /* notify anybody waiting for oe state change */
+ cv_broadcast(&vdp->xdf_dev_cv);
+ DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
+ mutex_exit(&vdp->xdf_cb_lk);
+}
-#ifdef DOMU_BACKEND
- (void) xvdi_post_event(dip, XEN_HP_REMOVE);
-#endif
+static int
+xdf_connect_locked(xdf_t *vdp, boolean_t wait)
+{
+ int rv;
- xdf_post_disconnect(vdp);
- (void) xvdi_switch_state(dip, XBT_NULL,
- XenbusStateClosed);
- } else if ((vdp->xdf_status == XD_INIT) ||
- (vdp->xdf_status == XD_CLOSING)) {
- xdf_post_disconnect(vdp);
- } else {
- mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_status = XD_CLOSED;
- mutex_exit(&vdp->xdf_dev_lk);
- }
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+
+ /* we can't connect once we're in the closed state */
+ if (vdp->xdf_state == XD_CLOSED)
+ return (XD_CLOSED);
+
+ vdp->xdf_connect_req++;
+ while (vdp->xdf_state != XD_READY) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ if (vdp->xdf_state == XD_UNKNOWN)
+ (void) xdf_setstate_init(vdp);
+ mutex_enter(&vdp->xdf_dev_lk);
+
+ if (!wait || (vdp->xdf_state == XD_READY))
+ goto out;
+
+ mutex_exit((&vdp->xdf_cb_lk));
+ rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk);
+ mutex_exit((&vdp->xdf_dev_lk));
+ mutex_enter((&vdp->xdf_cb_lk));
+ mutex_enter((&vdp->xdf_dev_lk));
+ if (rv == 0)
+ goto out;
}
- /* notify anybody waiting for oe state change */
+out:
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+
+ /* Try to lock the media */
+ (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
+
+ vdp->xdf_connect_req--;
+ return (vdp->xdf_state);
+}
+
+static uint_t
+xdf_iorestart(caddr_t arg)
+{
+ xdf_t *vdp = (xdf_t *)arg;
+
+ ASSERT(vdp != NULL);
+
mutex_enter(&vdp->xdf_dev_lk);
- cv_broadcast(&vdp->xdf_dev_cv);
+ ASSERT(ISDMACBON(vdp));
+ SETDMACBOFF(vdp);
mutex_exit(&vdp->xdf_dev_lk);
- status = vdp->xdf_status;
- mutex_exit(&vdp->xdf_cb_lk);
+ xdf_io_start(vdp);
+
+ return (DDI_INTR_CLAIMED);
+}
+
+#if defined(XPV_HVM_DRIVER)
+
+typedef struct xdf_hvm_entry {
+ list_node_t xdf_he_list;
+ char *xdf_he_path;
+ dev_info_t *xdf_he_dip;
+} xdf_hvm_entry_t;
+
+static list_t xdf_hvm_list;
+static kmutex_t xdf_hvm_list_lock;
- if (status == XD_READY) {
- xdf_iostart(vdp);
- } else if ((status == XD_CLOSED) && !unexpect_die) {
- /* interface is closed successfully, remove all minor nodes */
- if (vdp->xdf_vd_lbl != NULL) {
- cmlb_detach(vdp->xdf_vd_lbl, NULL);
- cmlb_free_handle(&vdp->xdf_vd_lbl);
- vdp->xdf_vd_lbl = NULL;
+static xdf_hvm_entry_t *
+i_xdf_hvm_find(const char *path, dev_info_t *dip)
+{
+ xdf_hvm_entry_t *i;
+
+ ASSERT((path != NULL) || (dip != NULL));
+ ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
+
+ i = list_head(&xdf_hvm_list);
+ while (i != NULL) {
+ if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
+ i = list_next(&xdf_hvm_list, i);
+ continue;
}
+ if ((dip != NULL) && (i->xdf_he_dip != dip)) {
+ i = list_next(&xdf_hvm_list, i);
+ continue;
+ }
+ break;
}
+ return (i);
}
-/* check if partition is open, -1 - check all partitions on the disk */
-static boolean_t
-xdf_isopen(xdf_t *vdp, int partition)
+dev_info_t *
+xdf_hvm_hold(const char *path)
{
- int i;
- ulong_t parbit;
- boolean_t rval = B_FALSE;
+ xdf_hvm_entry_t *i;
+ dev_info_t *dip;
- ASSERT((partition == -1) ||
- ((partition >= 0) || (partition < XDF_PEXT)));
+ mutex_enter(&xdf_hvm_list_lock);
+ i = i_xdf_hvm_find(path, NULL);
+ if (i == NULL) {
+ mutex_exit(&xdf_hvm_list_lock);
+ return (B_FALSE);
+ }
+ ndi_hold_devi(dip = i->xdf_he_dip);
+ mutex_exit(&xdf_hvm_list_lock);
+ return (dip);
+}
- if (partition == -1)
- parbit = (ulong_t)-1;
- else
- parbit = 1 << partition;
+static void
+xdf_hvm_add(dev_info_t *dip)
+{
+ xdf_hvm_entry_t *i;
+ char *path;
- for (i = 0; i < OTYPCNT; i++) {
- if (vdp->xdf_vd_open[i] & parbit)
- rval = B_TRUE;
- }
+ /* figure out the path for the dip */
+ path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
- return (rval);
+ i = kmem_alloc(sizeof (*i), KM_SLEEP);
+ i->xdf_he_dip = dip;
+ i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
+
+ mutex_enter(&xdf_hvm_list_lock);
+ ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
+ ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
+ list_insert_head(&xdf_hvm_list, i);
+ mutex_exit(&xdf_hvm_list_lock);
+
+ kmem_free(path, MAXPATHLEN);
}
-/*
- * Xdf_check_state_transition will check the XenbusState change to see
- * if the change is a valid transition or not.
- * The new state is written by backend domain, or by running xenstore-write
- * to change it manually in dom0
- */
-static int
-xdf_check_state_transition(xdf_t *vdp, XenbusState oestate)
-{
- int status;
- int stcheck;
-#define STOK 0 /* need further process */
-#define STNOP 1 /* no action need taking */
-#define STBUG 2 /* unexpected state change, could be a bug */
-
- status = vdp->xdf_status;
- stcheck = STOK;
-
- switch (status) {
- case XD_UNKNOWN:
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateConnected))
- stcheck = STBUG;
- else if ((oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised))
- stcheck = STNOP;
- break;
- case XD_INIT:
- if (oestate == XenbusStateUnknown)
- stcheck = STBUG;
- else if ((oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised))
- stcheck = STNOP;
- break;
- case XD_READY:
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised))
- stcheck = STBUG;
- else if (oestate == XenbusStateConnected)
- stcheck = STNOP;
- break;
- case XD_CLOSING:
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateInitialising) ||
- (oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised) ||
- (oestate == XenbusStateConnected))
- stcheck = STBUG;
- else if (oestate == XenbusStateClosing)
- stcheck = STNOP;
- break;
- case XD_CLOSED:
- if ((oestate == XenbusStateUnknown) ||
- (oestate == XenbusStateConnected))
- stcheck = STBUG;
- else if ((oestate == XenbusStateInitWait) ||
- (oestate == XenbusStateInitialised) ||
- (oestate == XenbusStateClosing) ||
- (oestate == XenbusStateClosed))
- stcheck = STNOP;
- break;
- case XD_SUSPEND:
- default:
- stcheck = STBUG;
- }
+static void
+xdf_hvm_rm(dev_info_t *dip)
+{
+ xdf_hvm_entry_t *i;
- if (stcheck == STOK)
- return (DDI_SUCCESS);
+ mutex_enter(&xdf_hvm_list_lock);
+ VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
+ list_remove(&xdf_hvm_list, i);
+ mutex_exit(&xdf_hvm_list_lock);
- if (stcheck == STBUG)
- cmn_err(CE_NOTE, "xdf@%s: unexpected otherend "
- "state change to %d!, when status is %d",
- ddi_get_name_addr(vdp->xdf_dip), oestate, status);
+ kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
+ kmem_free(i, sizeof (*i));
+}
- return (DDI_FAILURE);
+static void
+xdf_hvm_init(void)
+{
+ list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
+ offsetof(xdf_hvm_entry_t, xdf_he_list));
+ mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
}
-static int
-xdf_connect(xdf_t *vdp, boolean_t wait)
+static void
+xdf_hvm_fini(void)
{
- ASSERT(mutex_owned(&vdp->xdf_dev_lk));
- while (vdp->xdf_status != XD_READY) {
- if (!wait || (vdp->xdf_status > XD_READY))
- break;
+ ASSERT(list_head(&xdf_hvm_list) == NULL);
+ list_destroy(&xdf_hvm_list);
+ mutex_destroy(&xdf_hvm_list_lock);
+}
+
+boolean_t
+xdf_hvm_connect(dev_info_t *dip)
+{
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ char *oename, *str;
+ int rv;
+
+ mutex_enter(&vdp->xdf_cb_lk);
+ mutex_enter(&vdp->xdf_dev_lk);
+
+ /*
+ * Before try to establish a connection we need to wait for the
+ * backend hotplug scripts to have run. Once they are run the
+ * "<oename>/hotplug-status" property will be set to "connected".
+ */
+ for (;;) {
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+
+ /*
+ * Get the xenbus path to the backend device. Note that
+ * we can't cache this path (and we look it up on each pass
+ * through this loop) because it could change during
+ * suspend, resume, and migration operations.
+ */
+ if ((oename = xvdi_get_oename(dip)) == NULL) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (B_FALSE);
+ }
- if (cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk) == 0)
+ str = NULL;
+ if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) &&
+ (strcmp(str, XBV_HP_STATUS_CONN) == 0))
break;
+
+ if (str != NULL)
+ strfree(str);
+
+ /* wait for an update to "<oename>/hotplug-status" */
+ mutex_exit(&vdp->xdf_dev_lk);
+ if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) {
+ /* we got interrupted by a signal */
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (B_FALSE);
+ }
+ mutex_enter(&vdp->xdf_dev_lk);
+ }
+
+ /* Good news. The backend hotplug scripts have been run. */
+ ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
+ ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
+ ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0);
+ strfree(str);
+
+ /*
+ * If we're emulating a cd device and if the backend doesn't support
+ * media request opreations, then we're not going to bother trying
+ * to establish a connection for a couple reasons. First off, media
+ * requests support is required to support operations like eject and
+ * media locking. Second, other backend platforms like Linux don't
+ * support hvm pv cdrom access. They don't even have a backend pv
+ * driver for cdrom device nodes, so we don't want to block forever
+ * waiting for a connection to a backend driver that doesn't exist.
+ */
+ if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (B_FALSE);
}
- return (vdp->xdf_status);
+ rv = xdf_connect_locked(vdp, B_TRUE);
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+
+ return ((rv == XD_READY) ? B_TRUE : B_FALSE);
}
-/*
- * callback func when DMA/GTE resources is available
- *
- * Note: we only register one callback function to grant table subsystem
- * since we only have one 'struct gnttab_free_callback' in xdf_t.
- */
-static int
-xdf_dmacallback(caddr_t arg)
+int
+xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
{
- xdf_t *vdp = (xdf_t *)arg;
- ASSERT(vdp != NULL);
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
- DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
- ddi_get_name_addr(vdp->xdf_dip)));
+ /* sanity check the requested physical geometry */
+ mutex_enter(&vdp->xdf_dev_lk);
+ if ((geomp->g_secsize != XB_BSIZE) ||
+ (geomp->g_capacity == 0)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ return (EINVAL);
+ }
- ddi_trigger_softintr(vdp->xdf_softintr_id);
- return (DDI_DMA_CALLBACK_DONE);
+ /*
+ * If we've already connected to the backend device then make sure
+ * we're not defining a physical geometry larger than our backend
+ * device.
+ */
+ if ((vdp->xdf_xdev_nblocks != 0) &&
+ (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ return (EINVAL);
+ }
+
+ bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom));
+ vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl;
+ vdp->xdf_pgeom.g_acyl = geomp->g_acyl;
+ vdp->xdf_pgeom.g_nhead = geomp->g_nhead;
+ vdp->xdf_pgeom.g_nsect = geomp->g_nsect;
+ vdp->xdf_pgeom.g_secsize = geomp->g_secsize;
+ vdp->xdf_pgeom.g_capacity = geomp->g_capacity;
+ vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv;
+ vdp->xdf_pgeom.g_rpm = geomp->g_rpm;
+
+ vdp->xdf_pgeom_fixed = B_TRUE;
+ mutex_exit(&vdp->xdf_dev_lk);
+
+ /* force a re-validation */
+ cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
+
+ return (0);
}
-static uint_t
-xdf_iorestart(caddr_t arg)
+boolean_t
+xdf_is_cd(dev_info_t *dip)
{
- xdf_t *vdp = (xdf_t *)arg;
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ boolean_t rv;
- ASSERT(vdp != NULL);
+ mutex_enter(&vdp->xdf_cb_lk);
+ rv = XD_IS_CD(vdp);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
+}
- mutex_enter(&vdp->xdf_dev_lk);
- ASSERT(ISDMACBON(vdp));
- SETDMACBOFF(vdp);
- mutex_exit(&vdp->xdf_dev_lk);
+boolean_t
+xdf_is_rm(dev_info_t *dip)
+{
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ boolean_t rv;
- xdf_iostart(vdp);
+ mutex_enter(&vdp->xdf_cb_lk);
+ rv = XD_IS_RM(vdp);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
+}
- return (DDI_INTR_CLAIMED);
+boolean_t
+xdf_media_req_supported(dev_info_t *dip)
+{
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ boolean_t rv;
+
+ mutex_enter(&vdp->xdf_cb_lk);
+ rv = vdp->xdf_media_req_supported;
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
}
-static void
-xdf_timeout_handler(void *arg)
+#endif /* XPV_HVM_DRIVER */
+
+static int
+xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp)
{
- xdf_t *vdp = arg;
+ xdf_t *vdp;
+ vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
+
+ if (vdp == NULL)
+ return (ENXIO);
mutex_enter(&vdp->xdf_dev_lk);
- vdp->xdf_timeout_id = 0;
+ *capp = vdp->xdf_pgeom.g_capacity;
+ DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp));
mutex_exit(&vdp->xdf_dev_lk);
+ return (0);
+}
- /* new timeout thread could be re-scheduled */
- xdf_iostart(vdp);
+static int
+xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
+{
+ xdf_t *vdp;
+
+ if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
+ return (ENXIO);
+ *geomp = vdp->xdf_pgeom;
+ return (0);
}
/*
- * Alloc a vreq for this bp
- * bp->av_back contains the pointer to the vreq upon return
+ * No real HBA, no geometry available from it
*/
-static v_req_t *
-vreq_get(xdf_t *vdp, buf_t *bp)
+/*ARGSUSED*/
+static int
+xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp)
{
- v_req_t *vreq = NULL;
+ return (EINVAL);
+}
- ASSERT(BP2VREQ(bp) == NULL);
+static int
+xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
+{
+ xdf_t *vdp;
- vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
- if (vreq == NULL) {
- if (vdp->xdf_timeout_id == 0)
- /* restart I/O after one second */
- vdp->xdf_timeout_id =
- timeout(xdf_timeout_handler, vdp, hz);
- return (NULL);
+ if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))))
+ return (ENXIO);
+
+ if (XD_IS_RO(vdp))
+ tgattributep->media_is_writable = 0;
+ else
+ tgattributep->media_is_writable = 1;
+ return (0);
+}
+
+/* ARGSUSED3 */
+int
+xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
+{
+ switch (cmd) {
+ case TG_GETPHYGEOM:
+ return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
+ case TG_GETVIRTGEOM:
+ return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg));
+ case TG_GETCAPACITY:
+ return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
+ case TG_GETBLOCKSIZE:
+ *(uint32_t *)arg = XB_BSIZE;
+ return (0);
+ case TG_GETATTR:
+ return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
+ default:
+ return (ENOTTY);
}
- bzero(vreq, sizeof (v_req_t));
+}
- list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
- bp->av_back = (buf_t *)vreq;
- vreq->v_buf = bp;
- vreq->v_status = VREQ_INIT;
- /* init of other fields in vreq is up to the caller */
+/* ARGSUSED5 */
+int
+xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
+ diskaddr_t start, size_t reqlen, void *tg_cookie)
+{
+ xdf_t *vdp;
+ struct buf *bp;
+ int err = 0;
- return (vreq);
+ vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
+
+ /* We don't allow IO from the oe_change callback thread */
+ ASSERT(curthread != vdp->xdf_oe_change_thread);
+
+ if ((start + (reqlen >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
+ return (EINVAL);
+
+ bp = getrbuf(KM_SLEEP);
+ if (cmd == TG_READ)
+ bp->b_flags = B_BUSY | B_READ;
+ else
+ bp->b_flags = B_BUSY | B_WRITE;
+ bp->b_un.b_addr = bufp;
+ bp->b_bcount = reqlen;
+ bp->b_blkno = start;
+ bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
+
+ mutex_enter(&vdp->xdf_dev_lk);
+ xdf_bp_push(vdp, bp);
+ mutex_exit(&vdp->xdf_dev_lk);
+ xdf_io_start(vdp);
+ if (curthread == vdp->xdf_ready_tq_thread)
+ (void) xdf_ring_drain(vdp);
+ err = biowait(bp);
+ ASSERT(bp->b_flags & B_DONE);
+ freerbuf(bp);
+ return (err);
}
-static void
-vreq_free(xdf_t *vdp, v_req_t *vreq)
+/*
+ * Lock the current media. Set the media state to "lock".
+ * (Media locks are only respected by the backend driver.)
+ */
+static int
+xdf_ioctl_mlock(xdf_t *vdp)
{
- buf_t *bp = vreq->v_buf;
+ int rv;
+ mutex_enter(&vdp->xdf_cb_lk);
+ rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
+}
- list_remove(&vdp->xdf_vreq_act, (void *)vreq);
+/*
+ * Release a media lock. Set the media state to "none".
+ */
+static int
+xdf_ioctl_munlock(xdf_t *vdp)
+{
+ int rv;
+ mutex_enter(&vdp->xdf_cb_lk);
+ rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
+}
- if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
- goto done;
+/*
+ * Eject the current media. Ignores any media locks. (Media locks
+ * are only for benifit of the the backend.)
+ */
+static int
+xdf_ioctl_eject(xdf_t *vdp)
+{
+ int rv;
- switch (vreq->v_status) {
- case VREQ_DMAWIN_DONE:
- case VREQ_GS_ALLOCED:
- case VREQ_DMABUF_BOUND:
- (void) ddi_dma_unbind_handle(vreq->v_dmahdl);
- /*FALLTHRU*/
- case VREQ_DMAMEM_ALLOCED:
- if (!ALIGNED_XFER(bp)) {
- ASSERT(vreq->v_abuf != NULL);
- if (!IS_ERROR(bp) && IS_READ(bp))
- bcopy(vreq->v_abuf, bp->b_un.b_addr,
- bp->b_bcount);
- ddi_dma_mem_free(&vreq->v_align);
- }
- /*FALLTHRU*/
- case VREQ_MEMDMAHDL_ALLOCED:
- if (!ALIGNED_XFER(bp))
- ddi_dma_free_handle(&vreq->v_memdmahdl);
- /*FALLTHRU*/
- case VREQ_DMAHDL_ALLOCED:
- ddi_dma_free_handle(&vreq->v_dmahdl);
- break;
- default:
- break;
+ mutex_enter(&vdp->xdf_cb_lk);
+ if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (rv);
}
-done:
- vreq->v_buf->av_back = NULL;
- kmem_cache_free(xdf_vreq_cache, vreq);
+
+ /*
+ * We've set the media requests xenbus parameter to eject, so now
+ * disconnect from the backend, wait for the backend to clear
+ * the media requets xenbus paramter, and then we can reconnect
+ * to the backend.
+ */
+ (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
+ mutex_enter(&vdp->xdf_dev_lk);
+ if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (EIO);
+ }
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (0);
}
/*
- * Initalize the DMA and grant table resources for the buf
+ * Watch for media state changes. This can be an insertion of a device
+ * (triggered by a 'xm block-configure' request in another domain) or
+ * the ejection of a device (triggered by a local "eject" operation).
+ * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
*/
static int
-vreq_setup(xdf_t *vdp, v_req_t *vreq)
+xdf_dkstate(xdf_t *vdp, enum dkio_state mstate)
{
- int rc;
- ddi_dma_attr_t dmaattr;
- uint_t ndcs, ndws;
- ddi_dma_handle_t dh;
- ddi_dma_handle_t mdh;
- ddi_dma_cookie_t dc;
- ddi_acc_handle_t abh;
- caddr_t aba;
- ge_slot_t *gs;
- size_t bufsz;
- off_t off;
- size_t sz;
- buf_t *bp = vreq->v_buf;
- int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
- DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
+ enum dkio_state prev_state;
- switch (vreq->v_status) {
- case VREQ_INIT:
- if (IS_FLUSH_DISKCACHE(bp)) {
- if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
- DPRINTF(DMA_DBG, (
- "xdf@%s: get ge_slotfailed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
+ mutex_enter(&vdp->xdf_cb_lk);
+ prev_state = vdp->xdf_mstate;
+
+ if (vdp->xdf_mstate == mstate) {
+ while (vdp->xdf_mstate == prev_state) {
+ if (cv_wait_sig(&vdp->xdf_mstate_cv,
+ &vdp->xdf_cb_lk) == 0) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (EINTR);
}
- vreq->v_blkno = 0;
- vreq->v_nslots = 1;
- vreq->v_gs = gs;
- vreq->v_flush_diskcache = FLUSH_DISKCACHE;
- vreq->v_status = VREQ_GS_ALLOCED;
- gs->vreq = vreq;
- return (DDI_SUCCESS);
}
+ }
- if (IS_WRITE_BARRIER(vdp, bp))
- vreq->v_flush_diskcache = WRITE_BARRIER;
- vreq->v_blkno = bp->b_blkno +
- (diskaddr_t)(uintptr_t)bp->b_private;
- bp->b_private = NULL;
- /* See if we wrote new data to our flush block */
- if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
- check_fbwrite(vdp, bp, vreq->v_blkno);
- vreq->v_status = VREQ_INIT_DONE;
- /*FALLTHRU*/
+ if ((prev_state != DKIO_INSERTED) &&
+ (vdp->xdf_mstate == DKIO_INSERTED)) {
+ (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (0);
+ }
- case VREQ_INIT_DONE:
- /*
- * alloc DMA handle
- */
- rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
- xdf_dmacallback, (caddr_t)vdp, &dh);
- if (rc != DDI_SUCCESS) {
- SETDMACBON(vdp);
- DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
- }
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (0);
+}
- vreq->v_dmahdl = dh;
- vreq->v_status = VREQ_DMAHDL_ALLOCED;
- /*FALLTHRU*/
+/*ARGSUSED*/
+static int
+xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
+ int *rvalp)
+{
+ minor_t minor = getminor(dev);
+ int part = XDF_PART(minor);
+ xdf_t *vdp;
+ int rv;
- case VREQ_DMAHDL_ALLOCED:
- /*
- * alloc dma handle for 512-byte aligned buf
- */
- if (!ALIGNED_XFER(bp)) {
- /*
- * XXPV: we need to temporarily enlarge the seg
- * boundary and s/g length to work round CR6381968
- */
- dmaattr = xb_dma_attr;
- dmaattr.dma_attr_seg = (uint64_t)-1;
- dmaattr.dma_attr_sgllen = INT_MAX;
- rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
- xdf_dmacallback, (caddr_t)vdp, &mdh);
- if (rc != DDI_SUCCESS) {
- SETDMACBON(vdp);
- DPRINTF(DMA_DBG, ("xdf@%s: unaligned buf DMA"
- "handle alloc failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
- }
- vreq->v_memdmahdl = mdh;
- vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
- }
- /*FALLTHRU*/
+ if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) ||
+ (!xdf_isopen(vdp, part)))
+ return (ENXIO);
- case VREQ_MEMDMAHDL_ALLOCED:
- /*
- * alloc 512-byte aligned buf
- */
- if (!ALIGNED_XFER(bp)) {
- if (bp->b_flags & (B_PAGEIO | B_PHYS))
- bp_mapin(bp);
+ DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
+ vdp->xdf_addr, cmd, cmd));
- rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
- roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
- DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
- &aba, &bufsz, &abh);
- if (rc != DDI_SUCCESS) {
- SETDMACBON(vdp);
- DPRINTF(DMA_DBG, (
- "xdf@%s: DMA mem allocation failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
- }
+ switch (cmd) {
+ default:
+ return (ENOTTY);
+ case DKIOCG_PHYGEOM:
+ case DKIOCG_VIRTGEOM:
+ case DKIOCGGEOM:
+ case DKIOCSGEOM:
+ case DKIOCGAPART:
+ case DKIOCSAPART:
+ case DKIOCGVTOC:
+ case DKIOCSVTOC:
+ case DKIOCPARTINFO:
+ case DKIOCGEXTVTOC:
+ case DKIOCSEXTVTOC:
+ case DKIOCEXTPARTINFO:
+ case DKIOCGMBOOT:
+ case DKIOCSMBOOT:
+ case DKIOCGETEFI:
+ case DKIOCSETEFI:
+ case DKIOCPARTITION:
+ return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
+ rvalp, NULL));
+ case FDEJECT:
+ case DKIOCEJECT:
+ case CDROMEJECT:
+ return (xdf_ioctl_eject(vdp));
+ case DKIOCLOCK:
+ return (xdf_ioctl_mlock(vdp));
+ case DKIOCUNLOCK:
+ return (xdf_ioctl_munlock(vdp));
+ case CDROMREADOFFSET: {
+ int offset = 0;
+ if (!XD_IS_CD(vdp))
+ return (ENOTTY);
+ if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode))
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCGMEDIAINFO: {
+ struct dk_minfo media_info;
- vreq->v_abuf = aba;
- vreq->v_align = abh;
- vreq->v_status = VREQ_DMAMEM_ALLOCED;
+ media_info.dki_lbsize = DEV_BSIZE;
+ media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
+ if (XD_IS_CD(vdp))
+ media_info.dki_media_type = DK_CDROM;
+ else
+ media_info.dki_media_type = DK_FIXED_DISK;
- ASSERT(bufsz >= bp->b_bcount);
- if (!IS_READ(bp))
- bcopy(bp->b_un.b_addr, vreq->v_abuf,
- bp->b_bcount);
- }
- /*FALLTHRU*/
+ if (ddi_copyout(&media_info, (void *)arg,
+ sizeof (struct dk_minfo), mode))
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCINFO: {
+ struct dk_cinfo info;
- case VREQ_DMAMEM_ALLOCED:
- /*
- * dma bind
- */
- if (ALIGNED_XFER(bp)) {
- rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
- dma_flags, xdf_dmacallback, (caddr_t)vdp,
- &dc, &ndcs);
+ /* controller information */
+ if (XD_IS_CD(vdp))
+ info.dki_ctype = DKC_CDROM;
+ else
+ info.dki_ctype = DKC_VBD;
+
+ info.dki_cnum = 0;
+ (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
+
+ /* unit information */
+ info.dki_unit = ddi_get_instance(vdp->xdf_dip);
+ (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
+ info.dki_flags = DKI_FMTVOL;
+ info.dki_partition = part;
+ info.dki_maxtransfer = maxphys / DEV_BSIZE;
+ info.dki_addr = 0;
+ info.dki_space = 0;
+ info.dki_prio = 0;
+ info.dki_vec = 0;
+
+ if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCSTATE: {
+ enum dkio_state mstate;
+
+ if (ddi_copyin((void *)arg, &mstate,
+ sizeof (mstate), mode) != 0)
+ return (EFAULT);
+ if ((rv = xdf_dkstate(vdp, mstate)) != 0)
+ return (rv);
+ mstate = vdp->xdf_mstate;
+ if (ddi_copyout(&mstate, (void *)arg,
+ sizeof (mstate), mode) != 0)
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCREMOVABLE: {
+ int i = BOOLEAN2VOID(XD_IS_RM(vdp));
+ if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode))
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCGETWCE: {
+ int i = BOOLEAN2VOID(XD_IS_RM(vdp));
+ if (ddi_copyout(&i, (void *)arg, sizeof (i), mode))
+ return (EFAULT);
+ return (0);
+ }
+ case DKIOCSETWCE: {
+ int i;
+ if (ddi_copyin((void *)arg, &i, sizeof (i), mode))
+ return (EFAULT);
+ vdp->xdf_wce = VOID2BOOLEAN(i);
+ return (0);
+ }
+ case DKIOCFLUSHWRITECACHE: {
+ struct dk_callback *dkc = (struct dk_callback *)arg;
+
+ if (vdp->xdf_flush_supported) {
+ rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
+ NULL, 0, 0, (void *)dev);
+ } else if (vdp->xdf_feature_barrier &&
+ !xdf_barrier_flush_disable) {
+ rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
+ vdp->xdf_cache_flush_block, xdf_flush_block,
+ DEV_BSIZE, (void *)dev);
} else {
- rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
- NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
- xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
+ return (ENOTTY);
}
- if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
- /* get num of dma windows */
- if (rc == DDI_DMA_PARTIAL_MAP) {
- rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
- ASSERT(rc == DDI_SUCCESS);
- } else {
- ndws = 1;
- }
- } else {
- SETDMACBON(vdp);
- DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
+ if ((mode & FKIOCTL) && (dkc != NULL) &&
+ (dkc->dkc_callback != NULL)) {
+ (*dkc->dkc_callback)(dkc->dkc_cookie, rv);
+ /* need to return 0 after calling callback */
+ rv = 0;
}
+ return (rv);
+ }
+ }
+ /*NOTREACHED*/
+}
- vreq->v_dmac = dc;
- vreq->v_dmaw = 0;
- vreq->v_ndmacs = ndcs;
- vreq->v_ndmaws = ndws;
- vreq->v_nslots = ndws;
- vreq->v_status = VREQ_DMABUF_BOUND;
- /*FALLTHRU*/
+static int
+xdf_strategy(struct buf *bp)
+{
+ xdf_t *vdp;
+ minor_t minor;
+ diskaddr_t p_blkct, p_blkst;
+ ulong_t nblks;
+ int part;
- case VREQ_DMABUF_BOUND:
- /*
- * get ge_slot, callback is set upon failure from gs_get(),
- * if not set previously
- */
- if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
- DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
- }
+ minor = getminor(bp->b_edev);
+ part = XDF_PART(minor);
+ vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor));
- vreq->v_gs = gs;
- gs->vreq = vreq;
- vreq->v_status = VREQ_GS_ALLOCED;
- break;
+ mutex_enter(&vdp->xdf_dev_lk);
+ if (!xdf_isopen(vdp, part)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ xdf_io_err(bp, ENXIO, 0);
+ return (0);
+ }
- case VREQ_GS_ALLOCED:
- /* nothing need to be done */
- break;
+ /* We don't allow IO from the oe_change callback thread */
+ ASSERT(curthread != vdp->xdf_oe_change_thread);
- case VREQ_DMAWIN_DONE:
- /*
- * move to the next dma window
- */
- ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
+ /* Check for writes to a read only device */
+ if (!IS_READ(bp) && XD_IS_RO(vdp)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ xdf_io_err(bp, EROFS, 0);
+ return (0);
+ }
- /* get a ge_slot for this DMA window */
- if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
- DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
- ddi_get_name_addr(vdp->xdf_dip)));
- return (DDI_FAILURE);
+ /* Check if this I/O is accessing a partition or the entire disk */
+ if ((long)bp->b_private == XB_SLICE_NONE) {
+ /* This I/O is using an absolute offset */
+ p_blkct = vdp->xdf_xdev_nblocks;
+ p_blkst = 0;
+ } else {
+ /* This I/O is using a partition relative offset */
+ mutex_exit(&vdp->xdf_dev_lk);
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
+ &p_blkst, NULL, NULL, NULL)) {
+ xdf_io_err(bp, ENXIO, 0);
+ return (0);
}
+ mutex_enter(&vdp->xdf_dev_lk);
+ }
- vreq->v_gs = gs;
- gs->vreq = vreq;
- vreq->v_dmaw++;
- rc = ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
- &vreq->v_dmac, &vreq->v_ndmacs);
- ASSERT(rc == DDI_SUCCESS);
- vreq->v_status = VREQ_GS_ALLOCED;
- break;
+ /* check for a starting block beyond the disk or partition limit */
+ if (bp->b_blkno > p_blkct) {
+ DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
+ vdp->xdf_addr, (longlong_t)bp->b_blkno, (uint64_t)p_blkct));
+ xdf_io_err(bp, EINVAL, 0);
+ return (0);
+ }
- default:
- return (DDI_FAILURE);
+ /* Legacy: don't set error flag at this case */
+ if (bp->b_blkno == p_blkct) {
+ bp->b_resid = bp->b_bcount;
+ biodone(bp);
+ return (0);
}
- return (DDI_SUCCESS);
+ /* sanitize the input buf */
+ bioerror(bp, 0);
+ bp->b_resid = 0;
+ bp->av_back = bp->av_forw = NULL;
+
+ /* Adjust for partial transfer, this will result in an error later */
+ nblks = bp->b_bcount >> XB_BSHIFT;
+ if ((bp->b_blkno + nblks) > p_blkct) {
+ bp->b_resid = ((bp->b_blkno + nblks) - p_blkct) << XB_BSHIFT;
+ bp->b_bcount -= bp->b_resid;
+ }
+
+ DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
+ vdp->xdf_addr, (longlong_t)bp->b_blkno, (ulong_t)bp->b_bcount));
+
+ /* Fix up the buf struct */
+ bp->b_flags |= B_BUSY;
+ bp->b_private = (void *)(uintptr_t)p_blkst;
+
+ xdf_bp_push(vdp, bp);
+ mutex_exit(&vdp->xdf_dev_lk);
+ xdf_io_start(vdp);
+ if (do_polled_io)
+ (void) xdf_ring_drain(vdp);
+ return (0);
}
-static ge_slot_t *
-gs_get(xdf_t *vdp, int isread)
+/*ARGSUSED*/
+static int
+xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
{
- grant_ref_t gh;
- ge_slot_t *gs;
+ xdf_t *vdp;
+ minor_t minor;
+ diskaddr_t p_blkcnt;
+ int part;
- /* try to alloc GTEs needed in this slot, first */
- if (gnttab_alloc_grant_references(
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
- if (vdp->xdf_gnt_callback.next == NULL) {
- SETDMACBON(vdp);
- gnttab_request_free_callback(
- &vdp->xdf_gnt_callback,
- (void (*)(void *))xdf_dmacallback,
- (void *)vdp,
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
- }
- return (NULL);
- }
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
- gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
- if (gs == NULL) {
- gnttab_free_grant_references(gh);
- if (vdp->xdf_timeout_id == 0)
- /* restart I/O after one second */
- vdp->xdf_timeout_id =
- timeout(xdf_timeout_handler, vdp, hz);
- return (NULL);
- }
+ DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n",
+ vdp->xdf_addr, (int64_t)uiop->uio_offset));
- /* init gs_slot */
- list_insert_head(&vdp->xdf_gs_act, (void *)gs);
- gs->oeid = vdp->xdf_peer;
- gs->isread = isread;
- gs->ghead = gh;
- gs->ngrefs = 0;
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part))
+ return (ENXIO);
- return (gs);
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
+ NULL, NULL, NULL, NULL))
+ return (ENXIO);
+
+ if (U_INVAL(uiop))
+ return (EINVAL);
+
+ return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
}
-static void
-gs_free(xdf_t *vdp, ge_slot_t *gs)
+/*ARGSUSED*/
+static int
+xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
{
- int i;
- grant_ref_t *gp = gs->ge;
- int ngrefs = gs->ngrefs;
- boolean_t isread = gs->isread;
+ xdf_t *vdp;
+ minor_t minor;
+ diskaddr_t p_blkcnt;
+ int part;
- list_remove(&vdp->xdf_gs_act, (void *)gs);
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
- /* release all grant table entry resources used in this slot */
- for (i = 0; i < ngrefs; i++, gp++)
- gnttab_end_foreign_access(*gp, !isread, 0);
- gnttab_free_grant_references(gs->ghead);
+ DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n",
+ vdp->xdf_addr, (int64_t)uiop->uio_offset));
- kmem_cache_free(xdf_gs_cache, (void *)gs);
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part))
+ return (ENXIO);
+
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
+ NULL, NULL, NULL, NULL))
+ return (ENXIO);
+
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ return (ENOSPC);
+
+ if (U_INVAL(uiop))
+ return (EINVAL);
+
+ return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop));
}
-static grant_ref_t
-gs_grant(ge_slot_t *gs, mfn_t mfn)
+/*ARGSUSED*/
+static int
+xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
{
- grant_ref_t gr = gnttab_claim_grant_reference(&gs->ghead);
+ xdf_t *vdp;
+ minor_t minor;
+ struct uio *uiop = aiop->aio_uio;
+ diskaddr_t p_blkcnt;
+ int part;
- ASSERT(gr != -1);
- ASSERT(gs->ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
- gs->ge[gs->ngrefs++] = gr;
- gnttab_grant_foreign_access_ref(gr, gs->oeid, mfn, !gs->isread);
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
- return (gr);
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part))
+ return (ENXIO);
+
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
+ NULL, NULL, NULL, NULL))
+ return (ENXIO);
+
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ return (ENOSPC);
+
+ if (U_INVAL(uiop))
+ return (EINVAL);
+
+ return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop));
}
-static void
-unexpectedie(xdf_t *vdp)
+/*ARGSUSED*/
+static int
+xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
{
- /* clean up I/Os in ring that have responses */
- if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
- mutex_exit(&vdp->xdf_dev_lk);
- (void) xdf_intr((caddr_t)vdp);
- mutex_enter(&vdp->xdf_dev_lk);
- }
+ xdf_t *vdp;
+ minor_t minor;
+ struct uio *uiop = aiop->aio_uio;
+ diskaddr_t p_blkcnt;
+ int part;
- /* free up all grant table entries */
- while (!list_is_empty(&vdp->xdf_gs_act))
- gs_free(vdp, list_head(&vdp->xdf_gs_act));
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
- /*
- * move bp back to active list orderly
- * vreq_busy is updated in vreq_free()
- */
- while (!list_is_empty(&vdp->xdf_vreq_act)) {
- v_req_t *vreq = list_head(&vdp->xdf_vreq_act);
- buf_t *bp = vreq->v_buf;
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part))
+ return (ENXIO);
- bp->av_back = NULL;
- bp->b_resid = bp->b_bcount;
- if (vdp->xdf_f_act == NULL) {
- vdp->xdf_f_act = vdp->xdf_l_act = bp;
- } else {
- /* move to the head of list */
- bp->av_forw = vdp->xdf_f_act;
- vdp->xdf_f_act = bp;
- }
- if (vdp->xdf_xdev_iostat != NULL)
- kstat_runq_back_to_waitq(
- KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
- vreq_free(vdp, vreq);
- }
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
+ NULL, NULL, NULL, NULL))
+ return (ENXIO);
+
+ if (uiop->uio_loffset >= XB_DTOB(p_blkcnt))
+ return (ENOSPC);
+
+ if (U_INVAL(uiop))
+ return (EINVAL);
+
+ return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop));
}
-static void
-xdfmin(struct buf *bp)
+static int
+xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
{
- if (bp->b_bcount > xdf_maxphys)
- bp->b_bcount = xdf_maxphys;
+ struct buf dumpbuf, *dbp = &dumpbuf;
+ xdf_t *vdp;
+ minor_t minor;
+ int err = 0;
+ int part;
+ diskaddr_t p_blkcnt, p_blkst;
+
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
+
+ DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
+ vdp->xdf_addr, (void *)addr, blkno, nblk));
+
+ /* We don't allow IO from the oe_change callback thread */
+ ASSERT(curthread != vdp->xdf_oe_change_thread);
+
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part))
+ return (ENXIO);
+
+ if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
+ NULL, NULL, NULL))
+ return (ENXIO);
+
+ if ((blkno + nblk) > p_blkcnt) {
+ cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
+ vdp->xdf_addr, blkno + nblk, (uint64_t)p_blkcnt);
+ return (EINVAL);
+ }
+
+ bioinit(dbp);
+ dbp->b_flags = B_BUSY;
+ dbp->b_un.b_addr = addr;
+ dbp->b_bcount = nblk << DEV_BSHIFT;
+ dbp->b_blkno = blkno;
+ dbp->b_edev = dev;
+ dbp->b_private = (void *)(uintptr_t)p_blkst;
+
+ mutex_enter(&vdp->xdf_dev_lk);
+ xdf_bp_push(vdp, dbp);
+ mutex_exit(&vdp->xdf_dev_lk);
+ xdf_io_start(vdp);
+ err = xdf_ring_drain(vdp);
+ biofini(dbp);
+ return (err);
}
-void
-xdf_kstat_delete(dev_info_t *dip)
+/*ARGSUSED*/
+static int
+xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
{
- xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
- kstat_t *kstat;
+ minor_t minor;
+ xdf_t *vdp;
+ int part;
+ ulong_t parbit;
- /*
- * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
- * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
- * and the contents of the our kstat. xdf_iostat_lk is used
- * to protect the allocation and freeing of the actual kstat.
- * xdf_dev_lk can't be used for this purpose because kstat
- * readers use it to access the contents of the kstat and
- * hence it can't be held when calling kstat_delete().
- */
- mutex_enter(&vdp->xdf_iostat_lk);
- mutex_enter(&vdp->xdf_dev_lk);
+ minor = getminor(dev);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
- if (vdp->xdf_xdev_iostat == NULL) {
+ mutex_enter(&vdp->xdf_dev_lk);
+ part = XDF_PART(minor);
+ if (!xdf_isopen(vdp, part)) {
mutex_exit(&vdp->xdf_dev_lk);
- mutex_exit(&vdp->xdf_iostat_lk);
- return;
+ return (ENXIO);
}
+ parbit = 1 << part;
- kstat = vdp->xdf_xdev_iostat;
- vdp->xdf_xdev_iostat = NULL;
- mutex_exit(&vdp->xdf_dev_lk);
+ ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0);
+ if (otyp == OTYP_LYR) {
+ ASSERT(vdp->xdf_vd_lyropen[part] > 0);
+ if (--vdp->xdf_vd_lyropen[part] == 0)
+ vdp->xdf_vd_open[otyp] &= ~parbit;
+ } else {
+ vdp->xdf_vd_open[otyp] &= ~parbit;
+ }
+ vdp->xdf_vd_exclopen &= ~parbit;
- kstat_delete(kstat);
- mutex_exit(&vdp->xdf_iostat_lk);
+ mutex_exit(&vdp->xdf_dev_lk);
+ return (0);
}
-int
-xdf_kstat_create(dev_info_t *dip, char *ks_module, int ks_instance)
+static int
+xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
{
- xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ minor_t minor;
+ xdf_t *vdp;
+ int part;
+ ulong_t parbit;
+ diskaddr_t p_blkct = 0;
+ boolean_t firstopen;
+ boolean_t nodelay;
- /* See comment about locking in xdf_kstat_delete(). */
- mutex_enter(&vdp->xdf_iostat_lk);
+ minor = getminor(*devp);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
+ return (ENXIO);
+
+ nodelay = (flag & (FNDELAY | FNONBLOCK));
+
+ DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr));
+
+ /* do cv_wait until connected or failed */
+ mutex_enter(&vdp->xdf_cb_lk);
mutex_enter(&vdp->xdf_dev_lk);
+ if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) {
+ mutex_exit(&vdp->xdf_dev_lk);
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (ENXIO);
+ }
+ mutex_exit(&vdp->xdf_cb_lk);
- if (vdp->xdf_xdev_iostat != NULL) {
+ if ((flag & FWRITE) && XD_IS_RO(vdp)) {
mutex_exit(&vdp->xdf_dev_lk);
- mutex_exit(&vdp->xdf_iostat_lk);
- return (-1);
+ return (EROFS);
}
- if ((vdp->xdf_xdev_iostat = kstat_create(
- ks_module, ks_instance, NULL, "disk",
- KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
+ part = XDF_PART(minor);
+ parbit = 1 << part;
+ if ((vdp->xdf_vd_exclopen & parbit) ||
+ ((flag & FEXCL) && xdf_isopen(vdp, part))) {
mutex_exit(&vdp->xdf_dev_lk);
- mutex_exit(&vdp->xdf_iostat_lk);
- return (-1);
+ return (EBUSY);
}
- vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
- kstat_install(vdp->xdf_xdev_iostat);
+ /* are we the first one to open this node? */
+ firstopen = !xdf_isopen(vdp, -1);
+
+ if (otyp == OTYP_LYR)
+ vdp->xdf_vd_lyropen[part]++;
+
+ vdp->xdf_vd_open[otyp] |= parbit;
+
+ if (flag & FEXCL)
+ vdp->xdf_vd_exclopen |= parbit;
+
mutex_exit(&vdp->xdf_dev_lk);
- mutex_exit(&vdp->xdf_iostat_lk);
+
+ /* force a re-validation */
+ if (firstopen)
+ cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
+
+ /* If this is a non-blocking open then we're done */
+ if (nodelay)
+ return (0);
+
+ /*
+ * This is a blocking open, so we require:
+ * - that the disk have a valid label on it
+ * - that the size of the partition that we're opening is non-zero
+ */
+ if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
+ NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) {
+ (void) xdf_close(*devp, flag, otyp, credp);
+ return (ENXIO);
+ }
return (0);
}
-#if defined(XPV_HVM_DRIVER)
+/*ARGSUSED*/
+static void
+xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg)
+{
+ xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ cv_broadcast(&vdp->xdf_hp_status_cv);
+}
-typedef struct xdf_hvm_entry {
- list_node_t xdf_he_list;
- char *xdf_he_path;
- dev_info_t *xdf_he_dip;
-} xdf_hvm_entry_t;
+static int
+xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
+ char *name, caddr_t valuep, int *lengthp)
+{
+ xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
-static list_t xdf_hvm_list;
-static kmutex_t xdf_hvm_list_lock;
+ /*
+ * Sanity check that if a dev_t or dip were specified that they
+ * correspond to this device driver. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ ASSERT(ddi_driver_major(dip) == xdf_major);
+ ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major));
+ if ((ddi_driver_major(dip) != xdf_major) ||
+ ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major)))
+ return (DDI_PROP_NOT_FOUND);
-static xdf_hvm_entry_t *
-i_xdf_hvm_find(char *path, dev_info_t *dip)
-{
- xdf_hvm_entry_t *i;
+ if (vdp == NULL)
+ return (ddi_prop_op(dev, dip, prop_op, flags,
+ name, valuep, lengthp));
- ASSERT((path != NULL) || (dip != NULL));
- ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
+ return (cmlb_prop_op(vdp->xdf_vd_lbl,
+ dev, dip, prop_op, flags, name, valuep, lengthp,
+ XDF_PART(getminor(dev)), NULL));
+}
- i = list_head(&xdf_hvm_list);
- while (i != NULL) {
- if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
- i = list_next(&xdf_hvm_list, i);
- continue;
- }
- if ((dip != NULL) && (i->xdf_he_dip != dip)) {
- i = list_next(&xdf_hvm_list, i);
- continue;
+/*ARGSUSED*/
+static int
+xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
+{
+ int instance = XDF_INST(getminor((dev_t)arg));
+ xdf_t *vbdp;
+
+ switch (cmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) {
+ *rp = NULL;
+ return (DDI_FAILURE);
}
- break;
+ *rp = vbdp->xdf_dip;
+ return (DDI_SUCCESS);
+
+ case DDI_INFO_DEVT2INSTANCE:
+ *rp = (void *)(uintptr_t)instance;
+ return (DDI_SUCCESS);
+
+ default:
+ return (DDI_FAILURE);
}
- return (i);
}
-dev_info_t *
-xdf_hvm_hold(char *path)
+/*ARGSUSED*/
+static int
+xdf_resume(dev_info_t *dip)
{
- xdf_hvm_entry_t *i;
- dev_info_t *dip;
+ xdf_t *vdp;
+ char *oename;
- mutex_enter(&xdf_hvm_list_lock);
- i = i_xdf_hvm_find(path, NULL);
- if (i == NULL) {
- mutex_exit(&xdf_hvm_list_lock);
- return (B_FALSE);
+ if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
+ goto err;
+
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr);
+
+ mutex_enter(&vdp->xdf_cb_lk);
+
+ if (xvdi_resume(dip) != DDI_SUCCESS) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ goto err;
}
- ndi_hold_devi(dip = i->xdf_he_dip);
- mutex_exit(&xdf_hvm_list_lock);
- return (dip);
-}
-static void
-xdf_hvm_add(dev_info_t *dip)
-{
- xdf_hvm_entry_t *i;
- char *path;
+ if (((oename = xvdi_get_oename(dip)) == NULL) ||
+ (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
+ xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ goto err;
+ }
- /* figure out the path for the dip */
- path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- (void) ddi_pathname(dip, path);
+ mutex_enter(&vdp->xdf_dev_lk);
+ ASSERT(vdp->xdf_state != XD_READY);
+ xdf_set_state(vdp, XD_UNKNOWN);
+ mutex_exit(&vdp->xdf_dev_lk);
- i = kmem_alloc(sizeof (*i), KM_SLEEP);
- i->xdf_he_dip = dip;
- i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
+ if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ goto err;
+ }
- mutex_enter(&xdf_hvm_list_lock);
- ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
- ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
- list_insert_head(&xdf_hvm_list, i);
- mutex_exit(&xdf_hvm_list_lock);
+ mutex_exit(&vdp->xdf_cb_lk);
- kmem_free(path, MAXPATHLEN);
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr);
+ return (DDI_SUCCESS);
+err:
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr);
+ return (DDI_FAILURE);
}
-static void
-xdf_hvm_rm(dev_info_t *dip)
+static int
+xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
{
- xdf_hvm_entry_t *i;
+ int n, instance = ddi_get_instance(dip);
+ ddi_iblock_cookie_t ibc, softibc;
+ boolean_t dev_iscd = B_FALSE;
+ xdf_t *vdp;
+ char *oename, *xsname, *str;
- mutex_enter(&xdf_hvm_list_lock);
- VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
- list_remove(&xdf_hvm_list, i);
- mutex_exit(&xdf_hvm_list_lock);
+ if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM,
+ "xdf_debug", 0)) != 0)
+ xdf_debug = n;
- kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
- kmem_free(i, sizeof (*i));
-}
+ switch (cmd) {
+ case DDI_RESUME:
+ return (xdf_resume(dip));
+ case DDI_ATTACH:
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+ /* DDI_ATTACH */
-static void
-xdf_hvm_init(void)
-{
- list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
- offsetof(xdf_hvm_entry_t, xdf_he_list));
- mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
-}
+ if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
+ ((oename = xvdi_get_oename(dip)) == NULL))
+ return (DDI_FAILURE);
-static void
-xdf_hvm_fini(void)
-{
- ASSERT(list_head(&xdf_hvm_list) == NULL);
- list_destroy(&xdf_hvm_list);
- mutex_destroy(&xdf_hvm_list_lock);
+ /*
+ * Disable auto-detach. This is necessary so that we don't get
+ * detached while we're disconnected from the back end.
+ */
+ if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip,
+ DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS))
+ return (DDI_FAILURE);
+
+ /* driver handles kernel-issued IOCTLs */
+ if (ddi_prop_create(DDI_DEV_T_NONE, dip,
+ DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (ddi_get_soft_iblock_cookie(dip,
+ DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
+ cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
+ ddi_get_name_addr(dip));
+ return (DDI_FAILURE);
+ }
+ if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
+ dev_iscd = B_TRUE;
+ strfree(str);
+
+ if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS)
+ return (DDI_FAILURE);
+
+ DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip)));
+ vdp = ddi_get_soft_state(xdf_ssp, instance);
+ ddi_set_driver_private(dip, vdp);
+ vdp->xdf_dip = dip;
+ vdp->xdf_addr = ddi_get_name_addr(dip);
+ vdp->xdf_suspending = B_FALSE;
+ vdp->xdf_media_req_supported = B_FALSE;
+ vdp->xdf_peer = INVALID_DOMID;
+ vdp->xdf_evtchn = INVALID_EVTCHN;
+ list_create(&vdp->xdf_vreq_act, sizeof (v_req_t),
+ offsetof(v_req_t, v_link));
+ cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc);
+ mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc);
+ mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc);
+ vdp->xdf_cmbl_reattach = B_TRUE;
+ if (dev_iscd) {
+ vdp->xdf_dinfo |= VDISK_CDROM;
+ vdp->xdf_mstate = DKIO_EJECTED;
+ } else {
+ vdp->xdf_mstate = DKIO_NONE;
+ }
+
+ if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq",
+ 1, TASKQ_DEFAULTPRI, 0)) == NULL)
+ goto errout0;
+
+ if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
+ xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)
+ goto errout0;
+
+ if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
+ &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "xdf@%s: failed to add softintr",
+ ddi_get_name_addr(dip));
+ goto errout0;
+ }
+
+ /*
+ * Initialize the physical geometry stucture. Note that currently
+ * we don't know the size of the backend device so the number
+ * of blocks on the device will be initialized to zero. Once
+ * we connect to the backend device we'll update the physical
+ * geometry to reflect the real size of the device.
+ */
+ xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom);
+ vdp->xdf_pgeom_fixed = B_FALSE;
+
+ /*
+ * create default device minor nodes: non-removable disk
+ * we will adjust minor nodes after we are connected w/ backend
+ */
+ cmlb_alloc_handle(&vdp->xdf_vd_lbl);
+ if (xdf_cmlb_attach(vdp) != 0) {
+ cmn_err(CE_WARN,
+ "xdf@%s: attach failed, cmlb attach failed",
+ ddi_get_name_addr(dip));
+ goto errout0;
+ }
+
+ /*
+ * We ship with cache-enabled disks
+ */
+ vdp->xdf_wce = B_TRUE;
+
+ mutex_enter(&vdp->xdf_cb_lk);
+ /* Watch backend XenbusState change */
+ if (xvdi_add_event_handler(dip,
+ XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ goto errout0;
+ }
+
+ if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "xdf@%s: start connection failed",
+ ddi_get_name_addr(dip));
+ mutex_exit(&vdp->xdf_cb_lk);
+ goto errout1;
+ }
+ mutex_exit(&vdp->xdf_cb_lk);
+
+#if defined(XPV_HVM_DRIVER)
+
+ xdf_hvm_add(dip);
+
+ /* Report our version to dom0. */
+ if (xenbus_printf(XBT_NULL, "hvmpv/xdf", "version", "%d",
+ HVMPV_XDF_VERS))
+ cmn_err(CE_WARN, "xdf: couldn't write version\n");
+
+#else /* !XPV_HVM_DRIVER */
+
+ /* create kstat for iostat(1M) */
+ if (xdf_kstat_create(dip, "xdf", instance) != 0) {
+ cmn_err(CE_WARN, "xdf@%s: failed to create kstat",
+ ddi_get_name_addr(dip));
+ goto errout1;
+ }
+
+#endif /* !XPV_HVM_DRIVER */
+
+ ddi_report_dev(dip);
+ DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr));
+ return (DDI_SUCCESS);
+
+errout1:
+ (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed);
+ xvdi_remove_event_handler(dip, XS_OE_STATE);
+errout0:
+ if (vdp->xdf_vd_lbl != NULL) {
+ cmlb_detach(vdp->xdf_vd_lbl, NULL);
+ cmlb_free_handle(&vdp->xdf_vd_lbl);
+ vdp->xdf_vd_lbl = NULL;
+ }
+ if (vdp->xdf_softintr_id != NULL)
+ ddi_remove_softintr(vdp->xdf_softintr_id);
+ xvdi_remove_xb_watch_handlers(dip);
+ if (vdp->xdf_ready_tq != NULL)
+ ddi_taskq_destroy(vdp->xdf_ready_tq);
+ mutex_destroy(&vdp->xdf_cb_lk);
+ mutex_destroy(&vdp->xdf_dev_lk);
+ cv_destroy(&vdp->xdf_dev_cv);
+ cv_destroy(&vdp->xdf_hp_status_cv);
+ ddi_soft_state_free(xdf_ssp, instance);
+ ddi_set_driver_private(dip, NULL);
+ ddi_prop_remove_all(dip);
+ cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip));
+ return (DDI_FAILURE);
}
-int
-xdf_hvm_connect(dev_info_t *dip)
+static int
+xdf_suspend(dev_info_t *dip)
{
- xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
- int rv;
+ int instance = ddi_get_instance(dip);
+ xdf_t *vdp;
- /* do cv_wait until connected or failed */
+ if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
+ return (DDI_FAILURE);
+
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr);
+
+ xvdi_suspend(dip);
+
+ mutex_enter(&vdp->xdf_cb_lk);
mutex_enter(&vdp->xdf_dev_lk);
- rv = xdf_connect(vdp, B_TRUE);
+
+ vdp->xdf_suspending = B_TRUE;
+ xdf_ring_destroy(vdp);
+ xdf_set_state(vdp, XD_SUSPEND);
+ vdp->xdf_suspending = B_FALSE;
+
mutex_exit(&vdp->xdf_dev_lk);
- return ((rv == XD_READY) ? 0 : -1);
+ mutex_exit(&vdp->xdf_cb_lk);
+
+ if (xdf_debug & SUSRES_DBG)
+ xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr);
+
+ return (DDI_SUCCESS);
}
-int
-xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
+static int
+xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
{
- xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
+ xdf_t *vdp;
+ int instance;
- /* sanity check the requested physical geometry */
- mutex_enter(&vdp->xdf_dev_lk);
- if ((geomp->g_secsize != XB_BSIZE) ||
- (geomp->g_capacity == 0)) {
- mutex_exit(&vdp->xdf_dev_lk);
- return (EINVAL);
+ switch (cmd) {
+
+ case DDI_PM_SUSPEND:
+ break;
+
+ case DDI_SUSPEND:
+ return (xdf_suspend(dip));
+
+ case DDI_DETACH:
+ break;
+
+ default:
+ return (DDI_FAILURE);
}
- /*
- * If we've already connected to the backend device then make sure
- * we're not defining a physical geometry larger than our backend
- * device.
- */
- if ((vdp->xdf_xdev_nblocks != 0) &&
- (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
- mutex_exit(&vdp->xdf_dev_lk);
+ instance = ddi_get_instance(dip);
+ DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip)));
+ vdp = ddi_get_soft_state(xdf_ssp, instance);
+
+ if (vdp == NULL)
+ return (DDI_FAILURE);
+
+ mutex_enter(&vdp->xdf_cb_lk);
+ xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
+ if (vdp->xdf_state != XD_CLOSED) {
+ mutex_exit(&vdp->xdf_cb_lk);
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&vdp->xdf_cb_lk);
+
+ ASSERT(!ISDMACBON(vdp));
+
+#if defined(XPV_HVM_DRIVER)
+ xdf_hvm_rm(dip);
+#endif /* XPV_HVM_DRIVER */
+
+ if (vdp->xdf_timeout_id != 0)
+ (void) untimeout(vdp->xdf_timeout_id);
+
+ xvdi_remove_event_handler(dip, XS_OE_STATE);
+ ddi_taskq_destroy(vdp->xdf_ready_tq);
+
+ cmlb_detach(vdp->xdf_vd_lbl, NULL);
+ cmlb_free_handle(&vdp->xdf_vd_lbl);
+
+ /* we'll support backend running in domU later */
+#ifdef DOMU_BACKEND
+ (void) xvdi_post_event(dip, XEN_HP_REMOVE);
+#endif
+
+ list_destroy(&vdp->xdf_vreq_act);
+ ddi_prop_remove_all(dip);
+ xdf_kstat_delete(dip);
+ ddi_remove_softintr(vdp->xdf_softintr_id);
+ xvdi_remove_xb_watch_handlers(dip);
+ ddi_set_driver_private(dip, NULL);
+ cv_destroy(&vdp->xdf_dev_cv);
+ mutex_destroy(&vdp->xdf_cb_lk);
+ mutex_destroy(&vdp->xdf_dev_lk);
+ if (vdp->xdf_cache_flush_block != NULL)
+ kmem_free(vdp->xdf_flush_mem, 2 * DEV_BSIZE);
+ ddi_soft_state_free(xdf_ssp, instance);
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Driver linkage structures.
+ */
+static struct cb_ops xdf_cbops = {
+ xdf_open,
+ xdf_close,
+ xdf_strategy,
+ nodev,
+ xdf_dump,
+ xdf_read,
+ xdf_write,
+ xdf_ioctl,
+ nodev,
+ nodev,
+ nodev,
+ nochpoll,
+ xdf_prop_op,
+ NULL,
+ D_MP | D_NEW | D_64BIT,
+ CB_REV,
+ xdf_aread,
+ xdf_awrite
+};
+
+struct dev_ops xdf_devops = {
+ DEVO_REV, /* devo_rev */
+ 0, /* devo_refcnt */
+ xdf_getinfo, /* devo_getinfo */
+ nulldev, /* devo_identify */
+ nulldev, /* devo_probe */
+ xdf_attach, /* devo_attach */
+ xdf_detach, /* devo_detach */
+ nodev, /* devo_reset */
+ &xdf_cbops, /* devo_cb_ops */
+ NULL, /* devo_bus_ops */
+ NULL, /* devo_power */
+ ddi_quiesce_not_supported, /* devo_quiesce */
+};
+
+/*
+ * Module linkage structures.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver */
+ "virtual block driver", /* short description */
+ &xdf_devops /* driver specific ops */
+};
+
+static struct modlinkage xdf_modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+/*
+ * standard module entry points
+ */
+int
+_init(void)
+{
+ int rc;
+
+ xdf_major = ddi_name_to_major("xdf");
+ if (xdf_major == (major_t)-1)
return (EINVAL);
+
+ if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0)
+ return (rc);
+
+ xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
+ sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ xdf_gs_cache = kmem_cache_create("xdf_gs_cache",
+ sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+#if defined(XPV_HVM_DRIVER)
+ xdf_hvm_init();
+#endif /* XPV_HVM_DRIVER */
+
+ if ((rc = mod_install(&xdf_modlinkage)) != 0) {
+#if defined(XPV_HVM_DRIVER)
+ xdf_hvm_fini();
+#endif /* XPV_HVM_DRIVER */
+ kmem_cache_destroy(xdf_vreq_cache);
+ kmem_cache_destroy(xdf_gs_cache);
+ ddi_soft_state_fini(&xdf_ssp);
+ return (rc);
}
- vdp->xdf_pgeom = *geomp;
- mutex_exit(&vdp->xdf_dev_lk);
+ return (rc);
+}
- /* force a re-validation */
- cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
+int
+_fini(void)
+{
+
+ int err;
+ if ((err = mod_remove(&xdf_modlinkage)) != 0)
+ return (err);
+
+#if defined(XPV_HVM_DRIVER)
+ xdf_hvm_fini();
+#endif /* XPV_HVM_DRIVER */
+
+ kmem_cache_destroy(xdf_vreq_cache);
+ kmem_cache_destroy(xdf_gs_cache);
+ ddi_soft_state_fini(&xdf_ssp);
return (0);
}
-#endif /* XPV_HVM_DRIVER */
+int
+_info(struct modinfo *modinfop)
+{
+ return (mod_info(&xdf_modlinkage, modinfop));
+}
diff --git a/usr/src/uts/common/xen/io/xdf.h b/usr/src/uts/common/xen/io/xdf.h
index 7e61824096..acf606ba6c 100644
--- a/usr/src/uts/common/xen/io/xdf.h
+++ b/usr/src/uts/common/xen/io/xdf.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -28,6 +28,14 @@
#ifndef _SYS_XDF_H
#define _SYS_XDF_H
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/cmlb.h>
+#include <sys/dkio.h>
+
+#include <sys/gnttab.h>
+#include <xen/sys/xendev.h>
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -61,43 +69,50 @@ extern "C" {
* vdc driver, where as here it is used as an interface between the pv_cmdk
* driver and the xdf driver.)
*/
-#define XB_SLICE_NONE 0xFF
+#define XB_SLICE_NONE 0xFF
/*
* blkif status
*/
-enum xdf_state {
+typedef enum xdf_state {
/*
* initial state
*/
- XD_UNKNOWN,
+ XD_UNKNOWN = 0,
/*
* ring and evtchn alloced, xenbus state changed to
* XenbusStateInitialised, wait for backend to connect
*/
- XD_INIT,
+ XD_INIT = 1,
+ /*
+ * backend and frontend xenbus state has changed to
+ * XenbusStateConnected. IO is now allowed, but we are not still
+ * fully initialized.
+ */
+ XD_CONNECTED = 2,
/*
- * backend's xenbus state has changed to XenbusStateConnected,
- * this is the only state allowing I/Os
+ * We're fully initialized and allowing regular IO.
*/
- XD_READY,
+ XD_READY = 3,
/*
* vbd interface close request received from backend, no more I/O
* requestis allowed to be put into ring buffer, while interrupt handler
* is allowed to run to finish any outstanding I/O request, disconnect
* process is kicked off by changing xenbus state to XenbusStateClosed
*/
- XD_CLOSING,
+ XD_CLOSING = 4,
/*
* disconnection process finished, both backend and frontend's
* xenbus state has been changed to XenbusStateClosed, can be detached
*/
- XD_CLOSED,
+ XD_CLOSED = 5,
/*
- * disconnection process finished, frontend is suspended
+ * We're either being suspended or resuming from a suspend. If we're
+ * in the process of suspending, we block all new IO, but but allow
+ * existing IO to drain.
*/
- XD_SUSPEND
-};
+ XD_SUSPEND = 6
+} xdf_state_t;
/*
* 16 partitions + fdisk
@@ -117,13 +132,13 @@ enum xdf_state {
* each blkif_request_t when sent out to the ring buffer.
*/
typedef struct ge_slot {
- list_node_t link;
- domid_t oeid;
- struct v_req *vreq;
- int isread;
- grant_ref_t ghead;
- int ngrefs;
- grant_ref_t ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+ list_node_t gs_vreq_link;
+ struct v_req *gs_vreq;
+ domid_t gs_oeid;
+ int gs_isread;
+ grant_ref_t gs_ghead;
+ int gs_ngrefs;
+ grant_ref_t gs_ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
} ge_slot_t;
/*
@@ -148,20 +163,21 @@ typedef struct ge_slot {
*/
typedef struct v_req {
list_node_t v_link;
+ list_t v_gs;
int v_status;
buf_t *v_buf;
- ddi_dma_handle_t v_dmahdl;
- ddi_dma_cookie_t v_dmac;
uint_t v_ndmacs;
uint_t v_dmaw;
uint_t v_ndmaws;
uint_t v_nslots;
- ge_slot_t *v_gs;
uint64_t v_blkno;
+ ddi_dma_handle_t v_memdmahdl;
ddi_acc_handle_t v_align;
+ ddi_dma_handle_t v_dmahdl;
+ ddi_dma_cookie_t v_dmac;
caddr_t v_abuf;
- ddi_dma_handle_t v_memdmahdl;
uint8_t v_flush_diskcache;
+ boolean_t v_runq;
} v_req_t;
/*
@@ -184,43 +200,56 @@ typedef struct v_req {
*/
typedef struct xdf {
dev_info_t *xdf_dip;
+ char *xdf_addr;
ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
domid_t xdf_peer; /* otherend's dom ID */
xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */
ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
list_t xdf_vreq_act; /* active vreq list */
- list_t xdf_gs_act; /* active grant table slot list */
buf_t *xdf_f_act; /* active buf list head */
buf_t *xdf_l_act; /* active buf list tail */
- enum xdf_state xdf_status; /* status of this virtual disk */
+ buf_t *xdf_i_act; /* active buf list index */
+ xdf_state_t xdf_state; /* status of this virtual disk */
+ boolean_t xdf_suspending;
ulong_t xdf_vd_open[OTYPCNT];
ulong_t xdf_vd_lyropen[XDF_PEXT];
+ ulong_t xdf_connect_req;
ulong_t xdf_vd_exclopen;
kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */
kmutex_t xdf_dev_lk; /* mutex lock for I/O path */
kmutex_t xdf_cb_lk; /* mutex lock for event handling path */
kcondvar_t xdf_dev_cv; /* cv used in I/O path */
- uint_t xdf_xdev_info; /* disk info from backend xenstore */
+ uint_t xdf_dinfo; /* disk info from backend xenstore */
diskaddr_t xdf_xdev_nblocks; /* total size in block */
cmlb_geom_t xdf_pgeom;
+ boolean_t xdf_pgeom_set;
+ boolean_t xdf_pgeom_fixed;
kstat_t *xdf_xdev_iostat;
cmlb_handle_t xdf_vd_lbl;
ddi_softintr_t xdf_softintr_id;
timeout_id_t xdf_timeout_id;
struct gnttab_free_callback xdf_gnt_callback;
- int xdf_feature_barrier;
- int xdf_flush_supported;
- int xdf_wce;
+ boolean_t xdf_feature_barrier;
+ boolean_t xdf_flush_supported;
+ boolean_t xdf_media_req_supported;
+ boolean_t xdf_wce;
+ boolean_t xdf_cmbl_reattach;
char *xdf_flush_mem;
char *xdf_cache_flush_block;
int xdf_evtchn;
+ enum dkio_state xdf_mstate;
+ kcondvar_t xdf_mstate_cv;
+ kcondvar_t xdf_hp_status_cv;
+ struct buf *xdf_ready_bp;
+ ddi_taskq_t *xdf_ready_tq;
+ kthread_t *xdf_ready_tq_thread;
+ struct buf *xdf_ready_tq_bp;
#ifdef DEBUG
int xdf_dmacallback_num;
+ kthread_t *xdf_oe_change_thread;
#endif
} xdf_t;
-#define BP2VREQ(bp) ((v_req_t *)((bp)->av_back))
-
/*
* VBD I/O requests must be aligned on a 512-byte boundary and specify
* a transfer size which is a mutiple of 512-bytes
@@ -235,14 +264,14 @@ typedef struct xdf {
/* wrap pa_to_ma() for xdf to run in dom0 */
#define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
-#define XD_IS_RO(vbd) ((vbd)->xdf_xdev_info & VDISK_READONLY)
-#define XD_IS_CD(vbd) ((vbd)->xdf_xdev_info & VDISK_CDROM)
-#define XD_IS_RM(vbd) ((vbd)->xdf_xdev_info & VDISK_REMOVABLE)
-#define IS_READ(bp) ((bp)->b_flags & B_READ)
-#define IS_ERROR(bp) ((bp)->b_flags & B_ERROR)
+#define XD_IS_RO(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_READONLY)
+#define XD_IS_CD(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_CDROM)
+#define XD_IS_RM(vbd) VOID2BOOLEAN((vbd)->xdf_dinfo & VDISK_REMOVABLE)
+#define IS_READ(bp) VOID2BOOLEAN((bp)->b_flags & B_READ)
+#define IS_ERROR(bp) VOID2BOOLEAN((bp)->b_flags & B_ERROR)
#define XDF_UPDATE_IO_STAT(vdp, bp) \
- if ((vdp)->xdf_xdev_iostat != NULL) { \
+ { \
kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \
size_t n_done = (bp)->b_bcount - (bp)->b_resid; \
if ((bp)->b_flags & B_READ) { \
@@ -254,9 +283,8 @@ typedef struct xdf {
} \
}
-extern int xdfdebug;
#ifdef DEBUG
-#define DPRINTF(flag, args) {if (xdfdebug & (flag)) prom_printf args; }
+#define DPRINTF(flag, args) {if (xdf_debug & (flag)) prom_printf args; }
#define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; }
#define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; }
#define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0)
@@ -276,11 +304,18 @@ extern int xdfdebug;
#define LBL_DBG 0x80
#if defined(XPV_HVM_DRIVER)
-extern dev_info_t *xdf_hvm_hold(char *);
-extern int xdf_hvm_connect(dev_info_t *);
+extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
+extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
+ void *);
+extern void xdfmin(struct buf *bp);
+extern dev_info_t *xdf_hvm_hold(const char *);
+extern boolean_t xdf_hvm_connect(dev_info_t *);
extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
extern int xdf_kstat_create(dev_info_t *, char *, int);
extern void xdf_kstat_delete(dev_info_t *);
+extern boolean_t xdf_is_cd(dev_info_t *);
+extern boolean_t xdf_is_rm(dev_info_t *);
+extern boolean_t xdf_media_req_supported(dev_info_t *);
#endif /* XPV_HVM_DRIVER */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/xen/io/xenbus_probe.c b/usr/src/uts/common/xen/io/xenbus_probe.c
index ebf3a12a3e..050f11ad1c 100644
--- a/usr/src/uts/common/xen/io/xenbus_probe.c
+++ b/usr/src/uts/common/xen/io/xenbus_probe.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -53,8 +53,6 @@
* IN THE SOFTWARE.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#ifdef XPV_HVM_DRIVER
#include <sys/xpv_support.h>
#endif
@@ -76,7 +74,7 @@ read_otherend_details(struct xenbus_device *xendev,
return (err);
}
if (strlen(xendev->otherend) == 0 ||
- !xenbus_exists(XBT_NULL, xendev->otherend, "")) {
+ !xenbus_exists_dir(xendev->otherend, "")) {
xenbus_dev_fatal(xendev, X_ENOENT, "missing other end from %s",
xendev->nodename);
kmem_free((void *)xendev->otherend,
diff --git a/usr/src/uts/common/xen/io/xenbus_xs.c b/usr/src/uts/common/xen/io/xenbus_xs.c
index 39f41ecd60..12f07530c0 100644
--- a/usr/src/uts/common/xen/io/xenbus_xs.c
+++ b/usr/src/uts/common/xen/io/xenbus_xs.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -64,8 +64,6 @@
* the functions return error codes.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/errno.h>
#include <sys/types.h>
#include <sys/sysmacros.h>
@@ -445,21 +443,34 @@ xenbus_directory(xenbus_transaction_t t,
return (split(strings, len, num));
}
-/* Check if a path exists. Return 1 if it does. */
-int
-xenbus_exists(xenbus_transaction_t t, const char *dir, const char *node)
+/* Check if a path exists. */
+boolean_t
+xenbus_exists(const char *dir, const char *node)
+{
+ void *p;
+ uint_t n;
+
+ if (xenbus_read(XBT_NULL, dir, node, &p, &n) != 0)
+ return (B_FALSE);
+ kmem_free(p, n);
+ return (B_TRUE);
+}
+
+/* Check if a directory path exists. */
+boolean_t
+xenbus_exists_dir(const char *dir, const char *node)
{
char **d;
unsigned int dir_n;
int i, len;
- d = xenbus_directory(t, dir, node, &dir_n);
+ d = xenbus_directory(XBT_NULL, dir, node, &dir_n);
if (d == NULL)
- return (0);
+ return (B_FALSE);
for (i = 0, len = 0; i < dir_n; i++)
len += strlen(d[i]) + 1 + sizeof (char *);
kmem_free(d, len);
- return (1);
+ return (B_TRUE);
}
/*
@@ -480,6 +491,34 @@ xenbus_read(xenbus_transaction_t t,
return (err);
}
+int
+xenbus_read_str(const char *dir, const char *node, char **retp)
+{
+ uint_t n;
+ int err;
+ char *str;
+
+ /*
+ * Since we access the xenbus value immediatly we can't be
+ * part of a transaction.
+ */
+ if ((err = xenbus_read(XBT_NULL, dir, node, (void **)&str, &n)) != 0)
+ return (err);
+ ASSERT((str != NULL) && (n > 0));
+
+ /*
+ * Why bother with this? Because xenbus is truly annoying in the
+ * fact that when it returns a string, it doesn't guarantee that
+ * the memory that holds the string is of size strlen() + 1.
+ * This forces callers to keep track of the size of the memory
+ * containing the string. Ugh. We'll work around this by
+ * re-allocate strings to always be of size strlen() + 1.
+ */
+ *retp = strdup(str);
+ kmem_free(str, n);
+ return (0);
+}
+
/*
* Write the value of a single file.
* Returns err on failure.
diff --git a/usr/src/uts/common/xen/os/xvdi.c b/usr/src/uts/common/xen/os/xvdi.c
index 74c8ccb14c..67e0ad1e42 100644
--- a/usr/src/uts/common/xen/os/xvdi.c
+++ b/usr/src/uts/common/xen/os/xvdi.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -62,6 +62,7 @@
#include <sys/bootsvcs.h>
#include <sys/bootinfo.h>
#include <sys/note.h>
+#include <sys/sysmacros.h>
#ifdef XPV_HVM_DRIVER
#include <sys/xpv_support.h>
#include <sys/hypervisor.h>
@@ -263,6 +264,8 @@ xvdi_init_dev(dev_info_t *dip)
pdp->xd_vdevnum = vdevnum;
pdp->xd_devclass = devcls;
pdp->xd_evtchn = INVALID_EVTCHN;
+ list_create(&pdp->xd_xb_watches, sizeof (xd_xb_watches_t),
+ offsetof(xd_xb_watches_t, xxw_list));
mutex_init(&pdp->xd_evt_lk, NULL, MUTEX_DRIVER, NULL);
mutex_init(&pdp->xd_ndi_lk, NULL, MUTEX_DRIVER, NULL);
ddi_set_parent_data(dip, pdp);
@@ -1196,6 +1199,188 @@ i_xvdi_bepath_cb(struct xenbus_watch *w, const char **vec, unsigned int len)
}
}
+static void
+i_xvdi_xb_watch_free(xd_xb_watches_t *xxwp)
+{
+ ASSERT(xxwp->xxw_ref == 0);
+ strfree((char *)xxwp->xxw_watch.node);
+ kmem_free(xxwp, sizeof (*xxwp));
+}
+
+static void
+i_xvdi_xb_watch_release(xd_xb_watches_t *xxwp)
+{
+ ASSERT(MUTEX_HELD(&xxwp->xxw_xppd->xd_ndi_lk));
+ ASSERT(xxwp->xxw_ref > 0);
+ if (--xxwp->xxw_ref == 0)
+ i_xvdi_xb_watch_free(xxwp);
+}
+
+static void
+i_xvdi_xb_watch_hold(xd_xb_watches_t *xxwp)
+{
+ ASSERT(MUTEX_HELD(&xxwp->xxw_xppd->xd_ndi_lk));
+ ASSERT(xxwp->xxw_ref > 0);
+ xxwp->xxw_ref++;
+}
+
+static void
+i_xvdi_xb_watch_cb_tq(void *arg)
+{
+ xd_xb_watches_t *xxwp = (xd_xb_watches_t *)arg;
+ dev_info_t *dip = (dev_info_t *)xxwp->xxw_watch.dev;
+ struct xendev_ppd *pdp = xxwp->xxw_xppd;
+
+ xxwp->xxw_cb(dip, xxwp->xxw_watch.node, xxwp->xxw_arg);
+
+ mutex_enter(&pdp->xd_ndi_lk);
+ i_xvdi_xb_watch_release(xxwp);
+ mutex_exit(&pdp->xd_ndi_lk);
+}
+
+static void
+i_xvdi_xb_watch_cb(struct xenbus_watch *w, const char **vec, unsigned int len)
+{
+ dev_info_t *dip = (dev_info_t *)w->dev;
+ struct xendev_ppd *pdp = ddi_get_parent_data(dip);
+ xd_xb_watches_t *xxwp;
+
+ ASSERT(len > XS_WATCH_PATH);
+ ASSERT(vec[XS_WATCH_PATH] != NULL);
+
+ mutex_enter(&pdp->xd_ndi_lk);
+ for (xxwp = list_head(&pdp->xd_xb_watches); xxwp != NULL;
+ xxwp = list_next(&pdp->xd_xb_watches, xxwp)) {
+ if (w == &xxwp->xxw_watch)
+ break;
+ }
+
+ if (xxwp == NULL) {
+ mutex_exit(&pdp->xd_ndi_lk);
+ return;
+ }
+
+ i_xvdi_xb_watch_hold(xxwp);
+ (void) ddi_taskq_dispatch(pdp->xd_xb_watch_taskq,
+ i_xvdi_xb_watch_cb_tq, xxwp, DDI_SLEEP);
+ mutex_exit(&pdp->xd_ndi_lk);
+}
+
+/*
+ * Any watches registered with xvdi_add_xb_watch_handler() get torn down during
+ * a suspend operation. So if a frontend driver want's to use these interfaces,
+ * that driver is responsible for re-registering any watches it had before
+ * the suspend operation.
+ */
+int
+xvdi_add_xb_watch_handler(dev_info_t *dip, const char *dir, const char *node,
+ xvdi_xb_watch_cb_t cb, void *arg)
+{
+ struct xendev_ppd *pdp = ddi_get_parent_data(dip);
+ xd_xb_watches_t *xxw_new, *xxwp;
+ char *path;
+ int n;
+
+ ASSERT((dip != NULL) && (dir != NULL) && (node != NULL));
+ ASSERT(cb != NULL);
+
+ n = strlen(dir) + 1 + strlen(node) + 1;
+ path = kmem_zalloc(n, KM_SLEEP);
+ (void) strlcat(path, dir, n);
+ (void) strlcat(path, "/", n);
+ (void) strlcat(path, node, n);
+ ASSERT((strlen(path) + 1) == n);
+
+ xxw_new = kmem_zalloc(sizeof (*xxw_new), KM_SLEEP);
+ xxw_new->xxw_ref = 1;
+ xxw_new->xxw_watch.node = path;
+ xxw_new->xxw_watch.callback = i_xvdi_xb_watch_cb;
+ xxw_new->xxw_watch.dev = (struct xenbus_device *)dip;
+ xxw_new->xxw_xppd = pdp;
+ xxw_new->xxw_cb = cb;
+ xxw_new->xxw_arg = arg;
+
+ mutex_enter(&pdp->xd_ndi_lk);
+
+ /*
+ * If this is the first watch we're setting up, create a taskq
+ * to dispatch watch events and initialize the watch list.
+ */
+ if (pdp->xd_xb_watch_taskq == NULL) {
+ char tq_name[TASKQ_NAMELEN];
+
+ ASSERT(list_is_empty(&pdp->xd_xb_watches));
+
+ (void) snprintf(tq_name, sizeof (tq_name),
+ "%s_xb_watch_tq", ddi_get_name(dip));
+
+ if ((pdp->xd_xb_watch_taskq = ddi_taskq_create(dip, tq_name,
+ 1, TASKQ_DEFAULTPRI, 0)) == NULL) {
+ i_xvdi_xb_watch_release(xxw_new);
+ mutex_exit(&pdp->xd_ndi_lk);
+ return (DDI_FAILURE);
+ }
+ }
+
+ /* Don't allow duplicate watches to be registered */
+ for (xxwp = list_head(&pdp->xd_xb_watches); xxwp != NULL;
+ xxwp = list_next(&pdp->xd_xb_watches, xxwp)) {
+
+ ASSERT(strcmp(xxwp->xxw_watch.node, path) != 0);
+ if (strcmp(xxwp->xxw_watch.node, path) != 0)
+ continue;
+ i_xvdi_xb_watch_release(xxw_new);
+ mutex_exit(&pdp->xd_ndi_lk);
+ return (DDI_FAILURE);
+ }
+
+ if (register_xenbus_watch(&xxw_new->xxw_watch) != 0) {
+ if (list_is_empty(&pdp->xd_xb_watches)) {
+ ddi_taskq_destroy(pdp->xd_xb_watch_taskq);
+ pdp->xd_xb_watch_taskq = NULL;
+ }
+ i_xvdi_xb_watch_release(xxw_new);
+ mutex_exit(&pdp->xd_ndi_lk);
+ return (DDI_FAILURE);
+ }
+
+ list_insert_head(&pdp->xd_xb_watches, xxw_new);
+ mutex_exit(&pdp->xd_ndi_lk);
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Tear down all xenbus watches registered by the specified dip.
+ */
+void
+xvdi_remove_xb_watch_handlers(dev_info_t *dip)
+{
+ struct xendev_ppd *pdp = ddi_get_parent_data(dip);
+ xd_xb_watches_t *xxwp;
+ ddi_taskq_t *tq;
+
+ mutex_enter(&pdp->xd_ndi_lk);
+
+ while ((xxwp = list_remove_head(&pdp->xd_xb_watches)) != NULL) {
+ unregister_xenbus_watch(&xxwp->xxw_watch);
+ i_xvdi_xb_watch_release(xxwp);
+ }
+ ASSERT(list_is_empty(&pdp->xd_xb_watches));
+
+ /*
+ * We can't hold xd_ndi_lk while we destroy the xd_xb_watch_taskq.
+ * This is because if there are currently any executing taskq threads,
+ * we will block until they are finished, and to finish they need
+ * to aquire xd_ndi_lk in i_xvdi_xb_watch_cb_tq() so they can release
+ * their reference on their corresponding xxwp structure.
+ */
+ tq = pdp->xd_xb_watch_taskq;
+ pdp->xd_xb_watch_taskq = NULL;
+ mutex_exit(&pdp->xd_ndi_lk);
+ if (tq != NULL)
+ ddi_taskq_destroy(tq);
+}
+
static int
i_xvdi_add_watch_oestate(dev_info_t *dip)
{
@@ -1417,6 +1602,8 @@ i_xvdi_rem_watches(dev_info_t *dip)
i_xvdi_rem_watch_hpstate(dip);
mutex_exit(&pdp->xd_ndi_lk);
+
+ xvdi_remove_xb_watch_handlers(dip);
}
static int
diff --git a/usr/src/uts/common/xen/sys/xenbus_impl.h b/usr/src/uts/common/xen/sys/xenbus_impl.h
index 0042b1bc99..b633a529f9 100644
--- a/usr/src/uts/common/xen/sys/xenbus_impl.h
+++ b/usr/src/uts/common/xen/sys/xenbus_impl.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -57,8 +57,6 @@
#ifndef _SYS_XENBUS_H
#define _SYS_XENBUS_H
-#pragma ident "%Z%%M% %I% %E% SMI"
-
#include <sys/mutex.h>
#include <sys/list.h>
@@ -71,13 +69,14 @@ extern "C" {
typedef uint32_t xenbus_transaction_t;
/* Register callback to watch this node. */
-struct xenbus_watch
-{
+struct xenbus_watch;
+typedef void (*xenbus_watch_cb_t)(struct xenbus_watch *,
+ const char **vec, unsigned int len);
+struct xenbus_watch {
list_t list;
- const char *node; /* path being watched */
- void (*callback)(struct xenbus_watch *,
- const char **vec, unsigned int len);
- struct xenbus_device *dev;
+ const char *node; /* path being watched */
+ xenbus_watch_cb_t callback;
+ struct xenbus_device *dev;
};
/*
@@ -103,17 +102,29 @@ struct xenbus_device {
void *data;
};
+typedef void (*xvdi_xb_watch_cb_t)(dev_info_t *dip, const char *path,
+ void *arg);
+
+typedef struct xd_xb_watches {
+ list_node_t xxw_list;
+ int xxw_ref;
+ struct xenbus_watch xxw_watch;
+ struct xendev_ppd *xxw_xppd;
+ xvdi_xb_watch_cb_t xxw_cb;
+ void *xxw_arg;
+} xd_xb_watches_t;
extern char **xenbus_directory(xenbus_transaction_t t, const char *dir,
const char *node, unsigned int *num);
extern int xenbus_read(xenbus_transaction_t t, const char *dir,
const char *node, void **rstr, unsigned int *len);
+extern int xenbus_read_str(const char *dir, const char *node, char **rstr);
extern int xenbus_write(xenbus_transaction_t t, const char *dir,
const char *node, const char *string);
extern int xenbus_mkdir(xenbus_transaction_t t, const char *dir,
const char *node);
-extern int xenbus_exists(xenbus_transaction_t t, const char *dir,
- const char *node);
+extern boolean_t xenbus_exists(const char *dir, const char *node);
+extern boolean_t xenbus_exists_dir(const char *dir, const char *node);
extern int xenbus_rm(xenbus_transaction_t t, const char *dir,
const char *node);
extern int xenbus_transaction_start(xenbus_transaction_t *t);
diff --git a/usr/src/uts/common/xen/sys/xendev.h b/usr/src/uts/common/xen/sys/xendev.h
index 1f3df3c1ba..8e5921dc3f 100644
--- a/usr/src/uts/common/xen/sys/xendev.h
+++ b/usr/src/uts/common/xen/sys/xendev.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,6 +42,75 @@ extern "C" {
#endif
/*
+ * Xenbus property interfaces, initialized by framework
+ */
+#define XBP_HP_STATUS "hotplug-status" /* backend prop: str */
+#define XBV_HP_STATUS_CONN "connected" /* backend prop val */
+#define XBP_DEV_TYPE "device-type" /* backend prop: str */
+#define XBV_DEV_TYPE_CD "cdrom" /* backend prop val */
+
+/*
+ * Xenbus property interfaces, initialized by backend disk driver
+ */
+#define XBP_SECTORS "sectors" /* backend prop: uint64 */
+#define XBP_INFO "info" /* backend prop: uint */
+#define XBP_FB "feature-barrier" /* backend prop: boolean int */
+
+/*
+ * Xenbus property interfaces, initialized by frontend disk driver
+ */
+#define XBP_RING_REF "ring-ref" /* frontend prop: long */
+#define XBP_EVENT_CHAN "event-channel" /* frontend prop: long */
+#define XBP_PROTOCOL "protocol" /* frontend prop: string */
+
+/*
+ * Xenbus CDROM property interfaces, used by backend and frontend
+ *
+ * XBP_MEDIA_REQ_SUP
+ * - Backend xenbus property located at:
+ * backend/vbd/<domU_id>/<domU_dev>/media-req-sup
+ * - Set by the backend, consumed by the frontend.
+ * - Cosumed by the frontend.
+ * - A boolean integer property indicating backend support
+ * for the XBP_MEDIA_REQ property.
+ *
+ * XBP_MEDIA_REQ
+ * - Frontend xenbus property located at:
+ * /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
+ * - Set and consumed by both the frontend and backend.
+ * - Possible values:
+ * XBV_MEDIA_REQ_NONE, XBV_MEDIA_REQ_LOCK, and XBV_MEDIA_REQ_EJECT
+ * - Only applies to CDROM devices.
+ *
+ * XBV_MEDIA_REQ_NONE
+ * - XBP_MEDIA_REQ property valud
+ * - Set and consumed by both the frontend and backend.
+ * - Indicates that there are no currently outstanding media requet
+ * operations.
+ *
+ * XBV_MEDIA_REQ_LOCK
+ * - XBP_MEDIA_REQ property valud
+ * - Set by the frontend, consumed by the backend.
+ * - Indicates to the backend that the currenct media is locked
+ * and changes to the media (via xm block-configure for example)
+ * should not be allowed.
+ *
+ * XBV_MEDIA_REQ_EJECT
+ * - XBP_MEDIA_REQ property valud
+ * - Set by the frontend, consumed by the backend.
+ * - Indicates to the backend that the currenct media should be ejected.
+ * This means that the backend should close it's connection to
+ * the frontend device, close it's current backing store device/file,
+ * and then set the media-req property to XBV_MEDIA_REQ_NONE. (to
+ * indicate that the eject operation is complete.)
+ */
+#define XBP_MEDIA_REQ_SUP "media-req-sup" /* backend prop: boolean int */
+#define XBP_MEDIA_REQ "media-req" /* frontend prop: str */
+#define XBV_MEDIA_REQ_NONE "none" /* frontend prop val */
+#define XBV_MEDIA_REQ_LOCK "lock" /* frontend prop val */
+#define XBV_MEDIA_REQ_EJECT "eject" /* frontend prop val */
+
+/*
* Xen device class codes
*/
typedef enum {
@@ -95,6 +164,8 @@ struct xendev_ppd {
ddi_callback_id_t xd_hp_ehid;
ddi_taskq_t *xd_oe_taskq;
ddi_taskq_t *xd_hp_taskq;
+ ddi_taskq_t *xd_xb_watch_taskq;
+ list_t xd_xb_watches;
};
#define XS_OE_STATE "SUNW,xendev:otherend_state"
@@ -137,6 +208,10 @@ int xvdi_init_dev(dev_info_t *);
void xvdi_uninit_dev(dev_info_t *);
dev_info_t *xvdi_find_dev(dev_info_t *, xendev_devclass_t, domid_t, int);
+extern int xvdi_add_xb_watch_handler(dev_info_t *, const char *,
+ const char *, xvdi_xb_watch_cb_t cb, void *);
+extern void xvdi_remove_xb_watch_handlers(dev_info_t *);
+
/*
* common ring interfaces
*/
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.files b/usr/src/uts/i86pc/i86hvm/Makefile.files
index 03ff880f7c..e912c36f7c 100644
--- a/usr/src/uts/i86pc/i86hvm/Makefile.files
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.files
@@ -20,11 +20,9 @@
#
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# This Makefile defines file modules in the directory uts/i86pc/i86hvm
# and its children. These are the source files which are i86pc/i86hvm
# "implementation architecture" dependent.
@@ -33,9 +31,12 @@
#
# Define objects
#
-PV_CMDK_OBJS += pv_cmdk.o
-PV_RTLS_OBJS += pv_rtls.o
HVM_BOOTSTRAP_OBJS += hvm_bootstrap.o
+HVM_CMDK_OBJS += cmdk.o
+HVM_SD_OBJS += sd.o sd_xbuf.o
+PV_CMDK_OBJS += pv_cmdk.o xdf_shell.o
+PV_RTLS_OBJS += pv_rtls.o
+PV_SD_OBJS += pv_sd.o xdf_shell.o
XDF_OBJS += xdf.o
XNF_OBJS += xnf.o
XPV_OBJS += xpv_support.o xvdi.o gnttab.o evtchn.o \
diff --git a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
index 0e414c5fb1..721950a871 100644
--- a/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
+++ b/usr/src/uts/i86pc/i86hvm/Makefile.i86hvm
@@ -21,11 +21,9 @@
#
# uts/i86pc/Makefile.hvm
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
-#
# This makefile provides support for building PV drivers that run
# in an HVM environment.
#
@@ -45,8 +43,8 @@ HVM_LINT_LIB_DIR= $(UTSBASE)/$(PLATFORM)/i86hvm/lint-libs/$(OBJS_DIR)
#
# Define modules.
#
-HVM_DRV_KMODS = pv_cmdk pv_rtls xdf xnf xpv xpvd
-HVM_MISC_KMODS = hvm_bootstrap
+HVM_DRV_KMODS = pv_cmdk pv_sd pv_rtls xdf xnf xpv xpvd
+HVM_MISC_KMODS = hvm_bootstrap hvm_cmdk hvm_sd
HVM_KMODS = $(HVM_DRV_KMODS) $(HVM_MISC_KMODS)
include $(UTSBASE)/i86pc/i86hvm/Makefile.files
diff --git a/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile
new file mode 100644
index 0000000000..a4b0995bed
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/hvm_cmdk/Makefile
@@ -0,0 +1,100 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/i86hvm/hvm_cmdk/Makefile
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the xdc driver.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = hvm_cmdk
+OBJECTS = $(HVM_CMDK_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(HVM_CMDK_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+#
+# Overrides.
+#
+DEBUG_FLGS =
+DEBUG_DEFS += $(DEBUG_FLGS)
+LDFLAGS += -dy -Nmisc/dadk -Nmisc/strategy -Nmisc/cmlb
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+LINTTAGS += -erroff=E_STATIC_UNUSED
+
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile b/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile
new file mode 100644
index 0000000000..f6b3802cbd
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/hvm_sd/Makefile
@@ -0,0 +1,98 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/i86hvm/hvm_sd/Makefile
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the xdc driver.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+# Normally when compiling sd there are .conf file definitions and
+# definitions for warlock, but we don't both with those here.
+#
+MODULE = hvm_sd
+OBJECTS = $(HVM_SD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(HVM_SD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_MISC_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+INC_PATH += -I$(UTSBASE)/intel/io/scsi/targets
+
+#
+# For now, disable these lint checks; maintainers should endeavor
+# to investigate and remove these for maximum lint coverage.
+# Please do not carry these forward to new Makefiles.
+#
+LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_STATIC_UNUSED
+LINTTAGS += -erroff=E_PTRDIFF_OVERFLOW
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
index efa30c35e1..35dc9afa2d 100644
--- a/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
+++ b/usr/src/uts/i86pc/i86hvm/io/pv_cmdk.c
@@ -18,152 +18,42 @@
*
* CDDL HEADER END
*/
-
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-#include <sys/scsi/scsi_types.h>
-#include <sys/modctl.h>
-#include <sys/cmlb.h>
-#include <sys/types.h>
-#include <sys/xpv_support.h>
-#include <sys/xendev.h>
-#include <sys/gnttab.h>
-#include <public/xen.h>
-#include <public/grant_table.h>
-#include <io/xdf.h>
-#include <sys/vtoc.h>
-#include <sys/dkio.h>
-#include <sys/dktp/dadev.h>
-#include <sys/dktp/dadkio.h>
-#include <sys/dktp/tgdk.h>
-#include <sys/dktp/bbh.h>
-#include <sys/dktp/cmdk.h>
-#include <sys/dktp/altsctr.h>
+#include <io/xdf_shell.h>
/*
- * General Notes
- *
- * We don't support disks with bad block mappins. We have this
- * limitation because the underlying xdf driver doesn't support
- * bad block remapping. If there is a need to support this feature
- * it should be added directly to the xdf driver and we should just
- * pass requests strait on through and let it handle the remapping.
- * Also, it's probably worth pointing out that most modern disks do bad
- * block remapping internally in the hardware so there's actually less
- * of a chance of us ever discovering bad blocks. Also, in most cases
- * this driver (and the xdf driver) will only be used with virtualized
- * devices, so one might wonder why a virtual device would ever actually
- * experience bad blocks. To wrap this up, you might be wondering how
- * these bad block mappings get created and how they are managed. Well,
- * there are two tools for managing bad block mappings, format(1M) and
- * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk
- * to attempt to find bad block and create mappings for them. Format(1M)
- * and addbadsec(1M) can also be used to edit existing mappings that may
- * be saved on the disk.
- *
- * The underlying PV driver that this driver passes on requests to is the
- * xdf driver. Since in most cases the xdf driver doesn't deal with
- * physical disks it has it's own algorithm for assigning a physical
- * geometry to a virtual disk (ie, cylinder count, head count, etc.)
- * The default values chosen by the xdf driver may not match those
- * assigned to a disk by a hardware disk emulator in an HVM environment.
- * This is a problem since these physical geometry attributes affect
- * things like the partition table, backup label location, etc. So
- * to emulate disk devices correctly we need to know the physical geometry
- * that was assigned to a disk at the time of it's initalization.
- * Normally in an HVM environment this information will passed to
- * the BIOS and operating system from the hardware emulator that is
- * emulating the disk devices. In the case of a solaris dom0+xvm
- * this would be qemu. So to work around this issue, this driver will
- * query the emulated hardware to get the assigned physical geometry
- * and then pass this geometry onto the xdf driver so that it can use it.
- * But really, this information is essentially metadata about the disk
- * that should be kept with the disk image itself. (Assuming or course
- * that a disk image is the actual backingstore for this emulated device.)
- * This metadata should also be made available to PV drivers via a common
- * mechamisn, probably the xenstore. The fact that this metadata isn't
- * available outside of HVM domains means that it's difficult to move
- * disks between HVM and PV domains, since a fully PV domain will have no
- * way of knowing what the correct geometry of the target device is.
- * (Short of reading the disk, looking for things like partition tables
- * and labels, and taking a best guess at what the geometry was when
- * the disk was initialized. Unsuprisingly, qemu actually does this.)
- *
- * This driver has to map cmdk device instances into their corresponding
- * xdf device instances. We have to do this to ensure that when a user
- * accesses a emulated cmdk device we map those accesses to the proper
- * paravirtualized device. Basically what we need to know is how multiple
- * 'disk' entries in a domU configuration file get mapped to emulated
- * cmdk devices and to xdf devices. The 'disk' entry to xdf instance
- * mappings we know because those are done within the Solaris xvdi code
- * and the xpvd nexus driver. But the config to emulated devices mappings
- * are handled entirely within the xen management tool chain and the
- * hardware emulator. Since all the tools that establish these mappings
- * live in dom0, dom0 should really supply us with this information,
- * probably via the xenstore. Unfortunatly it doesn't so, since there's
- * no good way to determine this mapping dynamically, this driver uses
- * a hard coded set of static mappings. These mappings are hardware
- * emulator specific because each different hardware emulator could have
- * a different device tree with different cmdk device paths. This
- * means that if we want to continue to use this static mapping approach
- * to allow Solaris to run on different hardware emulators we'll have
- * to analyze each of those emulators to determine what paths they
- * use and hard code those paths into this driver. yech. This metadata
- * really needs to be supplied to us by dom0.
- *
- * This driver access underlying xdf nodes. Unfortunatly, devices
- * must create minor nodes during attach, and for disk devices to create
- * minor nodes, they have to look at the label on the disk, so this means
- * that disk drivers must be able to access a disk contents during
- * attach. That means that this disk driver must be able to access
- * underlying xdf nodes during attach. Unfortunatly, due to device tree
- * locking restrictions, we cannot have an attach operation occuring on
- * this device and then attempt to access another device which may
- * cause another attach to occur in a different device tree branch
- * since this could result in deadlock. Hence, this driver can only
- * access xdf device nodes that we know are attached, and it can't use
- * any ddi interfaces to access those nodes if those interfaces could
- * trigger an attach of the xdf device. So this driver works around
- * these restrictions by talking directly to xdf devices via
- * xdf_hvm_hold(). This interface takes a pathname to an xdf device,
- * and if that device is already attached then it returns the a held dip
- * pointer for that device node. This prevents us from getting into
- * deadlock situations, but now we need a mechanism to ensure that all
- * the xdf device nodes this driver might access are attached before
- * this driver tries to access them. This is accomplished via the
- * hvmboot_rootconf() callback which is invoked just before root is
- * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure
- * all xdf device visible to the system. All these xdf device nodes
- * will also be marked with the "ddi-no-autodetach" property so that
- * once they are configured, the will not be automatically unconfigured.
- * The only way that they could be unconfigured is if the administrator
- * explicitly attempts to unload required modules via rem_drv(1M)
- * or modunload(1M).
+ * We're emulating (and possibly layering on top of) cmdk devices, so xdf
+ * disk unit mappings must match up with cmdk disk unit mappings'.
*/
+#if !defined(XDF_PSHIFT)
+#error "can't find definition for xdf unit mappings - XDF_PSHIFT"
+#endif /* XDF_PSHIFT */
-/*
- * 16 paritions + fdisk (see xdf.h)
- */
-#define XDF_DEV2UNIT(dev) XDF_INST((getminor((dev))))
-#define XDF_DEV2PART(dev) XDF_PART((getminor((dev))))
-
-#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \
- (otyp == OTYP_CHR) || \
- (otyp == OTYP_LYR))
+#if !defined(CMDK_UNITSHF)
+#error "can't find definition for cmdk unit mappings - CMDK_UNITSHF"
+#endif /* CMDK_UNITSHF */
-#define PV_CMDK_NODES 4
+#if ((XDF_PSHIFT - CMDK_UNITSHF) != 0)
+#error "cmdk and xdf unit mappings don't match."
+#endif /* ((XDF_PSHIFT - CMDK_UNITSHF) != 0) */
-typedef struct hvm_to_pv {
- char *h2p_hvm_path;
- char *h2p_pv_path;
-} hvm_to_pv_t;
+extern const struct dev_ops cmdk_ops;
+extern void *cmdk_state;
/*
+ * Globals required by xdf_shell.c
*/
-static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = {
+const char *xdfs_c_name = "cmdk";
+const char *xdfs_c_linkinfo = "PV Common Direct Access Disk";
+void **xdfs_c_hvm_ss = &cmdk_state;
+const size_t xdfs_c_hvm_ss_size = sizeof (struct cmdk);
+const struct dev_ops *xdfs_c_hvm_dev_ops = &cmdk_ops;
+
+const xdfs_h2p_map_t xdfs_c_h2p_map[] = {
/*
* The paths mapping here are very specific to xen and qemu. When a
* domU is booted under xen in HVM mode, qemu is normally used to
@@ -217,132 +107,16 @@ static hvm_to_pv_t pv_cmdk_h2p_xen_qemu[] = {
{ NULL, 0 }
};
-typedef struct pv_cmdk {
- dev_info_t *dk_dip;
- cmlb_handle_t dk_cmlbhandle;
- ddi_devid_t dk_devid;
- kmutex_t dk_mutex;
- dev_info_t *dk_xdf_dip;
- dev_t dk_xdf_dev;
- int dk_xdf_otyp_count[OTYPCNT][XDF_PEXT];
- ldi_handle_t dk_xdf_lh[XDF_PEXT];
-} pv_cmdk_t;
-
/*
- * Globals
+ * Private functions
*/
-static void *pv_cmdk_state;
-static major_t pv_cmdk_major;
-static hvm_to_pv_t *pv_cmdk_h2p;
-
-/*
- * Function prototypes for xdf callback functions
- */
-extern int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
-extern int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
- void *);
-
-static boolean_t
-pv_cmdk_isopen_part(struct pv_cmdk *dkp, int part)
-{
- int otyp;
-
- ASSERT(MUTEX_HELD(&dkp->dk_mutex));
-
- for (otyp = 0; (otyp < OTYPCNT); otyp++) {
- if (dkp->dk_xdf_otyp_count[otyp][part] != 0)
- return (B_TRUE);
- }
- return (B_FALSE);
-}
-
/*
- * Cmlb ops vectors, allows the cmlb module to directly access the entire
- * pv_cmdk disk device without going through any partitioning layers.
- */
-/*ARGSUSED*/
-static int
-pv_cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
- diskaddr_t start, size_t count, void *tg_cookie)
-{
- int instance = ddi_get_instance(dip);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- if (dkp == NULL)
- return (ENXIO);
-
- return (xdf_lb_rdwr(dkp->dk_xdf_dip, cmd, bufaddr, start, count,
- tg_cookie));
-}
-
-/*ARGSUSED*/
-static int
-pv_cmdk_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
-{
- int instance = ddi_get_instance(dip);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- int err;
-
- if (dkp == NULL)
- return (ENXIO);
-
- if (cmd == TG_GETVIRTGEOM) {
- cmlb_geom_t pgeom, *vgeomp;
- diskaddr_t capacity;
-
- /*
- * The native xdf driver doesn't support this ioctl.
- * Intead of passing it on, emulate it here so that the
- * results look the same as what we get for a real cmdk
- * device.
- *
- * Get the real size of the device
- */
- if ((err = xdf_lb_getinfo(dkp->dk_xdf_dip,
- TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
- return (err);
- capacity = pgeom.g_capacity;
-
- /*
- * If the controller returned us something that doesn't
- * really fit into an Int 13/function 8 geometry
- * result, just fail the ioctl. See PSARC 1998/313.
- */
- if (capacity >= (63 * 254 * 1024))
- return (EINVAL);
-
- vgeomp = (cmlb_geom_t *)arg;
- vgeomp->g_capacity = capacity;
- vgeomp->g_nsect = 63;
- vgeomp->g_nhead = 254;
- vgeomp->g_ncyl = capacity / (63 * 254);
- vgeomp->g_acyl = 0;
- vgeomp->g_secsize = 512;
- vgeomp->g_intrlv = 1;
- vgeomp->g_rpm = 3600;
- return (0);
- }
-
- return (xdf_lb_getinfo(dkp->dk_xdf_dip, cmd, arg, tg_cookie));
-}
-
-static cmlb_tg_ops_t pv_cmdk_lb_ops = {
- TG_DK_OPS_VERSION_1,
- pv_cmdk_lb_rdwr,
- pv_cmdk_lb_getinfo
-};
-
-/*
- * devid management functions
- */
-
-/*
- * pv_cmdk_get_modser() is basically a local copy of
+ * xdfs_get_modser() is basically a local copy of
* cmdk_get_modser() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*/
static int
-pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len)
+xdfs_get_modser(xdfs_state_t *xsp, int ioccmd, char *buf, int len)
{
struct scsi_device *scsi_device;
opaque_t ctlobjp;
@@ -355,7 +129,7 @@ pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len)
strarg.is_buf = buf;
strarg.is_size = len;
- scsi_device = ddi_get_driver_private(dkp->dk_dip);
+ scsi_device = ddi_get_driver_private(xsp->xdfss_dip);
ctlobjp = scsi_device->sd_address.a_hba_tran;
if (CTL_IOCTL(ctlobjp,
ioccmd, (uintptr_t)&strarg, FNATIVE | FKIOCTL) != 0)
@@ -382,14 +156,14 @@ pv_cmdk_get_modser(struct pv_cmdk *dkp, int ioccmd, char *buf, int len)
}
/*
- * pv_cmdk_devid_modser() is basically a copy of cmdk_devid_modser()
+ * xdfs_devid_modser() is basically a copy of cmdk_devid_modser()
* that has been modified to use local pv cmdk driver functions.
*
* Build a devid from the model and serial number
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
-pv_cmdk_devid_modser(struct pv_cmdk *dkp)
+xdfs_devid_modser(xdfs_state_t *xsp)
{
int rc = DDI_FAILURE;
char *hwid;
@@ -400,12 +174,12 @@ pv_cmdk_devid_modser(struct pv_cmdk *dkp)
* device ID is a concatenation of model number, '=', serial number.
*/
hwid = kmem_alloc(CMDK_HWIDLEN, KM_SLEEP);
- modlen = pv_cmdk_get_modser(dkp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN);
+ modlen = xdfs_get_modser(xsp, DIOCTL_GETMODEL, hwid, CMDK_HWIDLEN);
if (modlen == 0)
goto err;
hwid[modlen++] = '=';
- serlen = pv_cmdk_get_modser(dkp, DIOCTL_GETSERIAL,
+ serlen = xdfs_get_modser(xsp, DIOCTL_GETSERIAL,
hwid + modlen, CMDK_HWIDLEN - modlen);
if (serlen == 0)
goto err;
@@ -413,8 +187,8 @@ pv_cmdk_devid_modser(struct pv_cmdk *dkp)
hwid[modlen + serlen] = 0;
/* Initialize the device ID, trailing NULL not included */
- rc = ddi_devid_init(dkp->dk_dip, DEVID_ATA_SERIAL, modlen + serlen,
- hwid, (ddi_devid_t *)&dkp->dk_devid);
+ rc = ddi_devid_init(xsp->xdfss_dip, DEVID_ATA_SERIAL, modlen + serlen,
+ hwid, (ddi_devid_t *)&xsp->xdfss_tgt_devid);
if (rc != DDI_SUCCESS)
goto err;
@@ -427,7 +201,7 @@ err:
}
/*
- * pv_cmdk_devid_read() is basically a local copy of
+ * xdfs_devid_read() is basically a local copy of
* cmdk_devid_read() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*
@@ -436,18 +210,18 @@ err:
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
-pv_cmdk_devid_read(struct pv_cmdk *dkp)
+xdfs_devid_read(xdfs_state_t *xsp)
{
diskaddr_t blk;
struct dk_devid *dkdevidp;
uint_t *ip, chksum;
int i;
- if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
+ if (cmlb_get_devid_block(xsp->xdfss_cmlbhandle, &blk, 0) != 0)
return (DDI_FAILURE);
dkdevidp = kmem_zalloc(NBPSCTR, KM_SLEEP);
- if (pv_cmdk_lb_rdwr(dkp->dk_dip,
+ if (xdfs_lb_rdwr(xsp->xdfss_dip,
TG_READ, dkdevidp, blk, NBPSCTR, NULL) != 0)
goto err;
@@ -470,8 +244,8 @@ pv_cmdk_devid_read(struct pv_cmdk *dkp)
/* keep a copy of the device id */
i = ddi_devid_sizeof((ddi_devid_t)dkdevidp->dkd_devid);
- dkp->dk_devid = kmem_alloc(i, KM_SLEEP);
- bcopy(dkdevidp->dkd_devid, dkp->dk_devid, i);
+ xsp->xdfss_tgt_devid = kmem_alloc(i, KM_SLEEP);
+ bcopy(dkdevidp->dkd_devid, xsp->xdfss_tgt_devid, i);
kmem_free(dkdevidp, NBPSCTR);
return (DDI_SUCCESS);
@@ -481,7 +255,7 @@ err:
}
/*
- * pv_cmdk_devid_fabricate() is basically a local copy of
+ * xdfs_devid_fabricate() is basically a local copy of
* cmdk_devid_fabricate() modified to work without the dadk layer.
* (which the non-pv version of the cmdk driver uses.)
*
@@ -490,7 +264,7 @@ err:
* Return DDI_SUCCESS or DDI_FAILURE.
*/
static int
-pv_cmdk_devid_fabricate(struct pv_cmdk *dkp)
+xdfs_devid_fabricate(xdfs_state_t *xsp)
{
ddi_devid_t devid = NULL; /* devid made by ddi_devid_init */
struct dk_devid *dkdevidp = NULL; /* devid struct stored on disk */
@@ -498,10 +272,10 @@ pv_cmdk_devid_fabricate(struct pv_cmdk *dkp)
uint_t *ip, chksum;
int i;
- if (cmlb_get_devid_block(dkp->dk_cmlbhandle, &blk, 0) != 0)
+ if (cmlb_get_devid_block(xsp->xdfss_cmlbhandle, &blk, 0) != 0)
return (DDI_FAILURE);
- if (ddi_devid_init(dkp->dk_dip, DEVID_FAB, 0, NULL, &devid) !=
+ if (ddi_devid_init(xsp->xdfss_dip, DEVID_FAB, 0, NULL, &devid) !=
DDI_SUCCESS)
return (DDI_FAILURE);
@@ -527,13 +301,13 @@ pv_cmdk_devid_fabricate(struct pv_cmdk *dkp)
/* Fill in the checksum */
DKD_FORMCHKSUM(chksum, dkdevidp);
- if (pv_cmdk_lb_rdwr(dkp->dk_dip,
+ if (xdfs_lb_rdwr(xsp->xdfss_dip,
TG_WRITE, dkdevidp, blk, NBPSCTR, NULL) != 0)
goto err;
kmem_free(dkdevidp, NBPSCTR);
- dkp->dk_devid = devid;
+ xsp->xdfss_tgt_devid = devid;
return (DDI_SUCCESS);
err:
@@ -545,180 +319,10 @@ err:
}
/*
- * pv_cmdk_devid_setup() is basically a local copy ofcmdk_devid_setup()
- * that has been modified to use local pv cmdk driver functions.
- *
- * Create and register the devid.
- * There are 4 different ways we can get a device id:
- * 1. Already have one - nothing to do
- * 2. Build one from the drive's model and serial numbers
- * 3. Read one from the disk (first sector of last track)
- * 4. Fabricate one and write it on the disk.
- * If any of these succeeds, register the deviceid
- */
-static void
-pv_cmdk_devid_setup(struct pv_cmdk *dkp)
-{
- int rc;
-
- /* Try options until one succeeds, or all have failed */
-
- /* 1. All done if already registered */
-
- if (dkp->dk_devid != NULL)
- return;
-
- /* 2. Build a devid from the model and serial number */
- rc = pv_cmdk_devid_modser(dkp);
- if (rc != DDI_SUCCESS) {
- /* 3. Read devid from the disk, if present */
- rc = pv_cmdk_devid_read(dkp);
-
- /* 4. otherwise make one up and write it on the disk */
- if (rc != DDI_SUCCESS)
- rc = pv_cmdk_devid_fabricate(dkp);
- }
-
- /* If we managed to get a devid any of the above ways, register it */
- if (rc == DDI_SUCCESS)
- (void) ddi_devid_register(dkp->dk_dip, dkp->dk_devid);
-}
-
-/*
- * Local Functions
+ * xdfs_rwcmd_copyin() is a duplicate of rwcmd_copyin().
*/
static int
-pv_cmdk_iodone(struct buf *bp)
-{
- struct buf *bp_orig = bp->b_chain;
-
- /* Propegate back the io results */
- bp_orig->b_resid = bp->b_resid;
- bioerror(bp_orig, geterror(bp));
- biodone(bp_orig);
-
- freerbuf(bp);
- return (0);
-}
-
-static int
-pv_cmdkstrategy(struct buf *bp)
-{
- dev_t dev = bp->b_edev;
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- dev_t xdf_devt;
- struct buf *bp_clone;
-
- /*
- * Sanity checks that the dev_t associated with the buf we were
- * passed actually corresponds us and that the partition we're
- * trying to access is actually open. On debug kernels we'll
- * panic and on non-debug kernels we'll return failure.
- */
- ASSERT(getmajor(dev) == pv_cmdk_major);
- if (getmajor(dev) != pv_cmdk_major)
- goto err;
-
- mutex_enter(&dkp->dk_mutex);
- ASSERT(pv_cmdk_isopen_part(dkp, part));
- if (!pv_cmdk_isopen_part(dkp, part)) {
- mutex_exit(&dkp->dk_mutex);
- goto err;
- }
- mutex_exit(&dkp->dk_mutex);
-
- /* clone this buffer */
- xdf_devt = dkp->dk_xdf_dev | part;
- bp_clone = bioclone(bp, 0, bp->b_bcount, xdf_devt, bp->b_blkno,
- pv_cmdk_iodone, NULL, KM_SLEEP);
- bp_clone->b_chain = bp;
-
- /*
- * If we're being invoked on behalf of the physio() call in
- * pv_cmdk_dioctl_rwcmd() then b_private will be set to
- * XB_SLICE_NONE and we need to propegate this flag into the
- * cloned buffer so that the xdf driver will see it.
- */
- if (bp->b_private == (void *)XB_SLICE_NONE)
- bp_clone->b_private = (void *)XB_SLICE_NONE;
-
- /*
- * Pass on the cloned buffer. Note that we don't bother to check
- * for failure because the xdf strategy routine will have to
- * invoke biodone() if it wants to return an error, which means
- * that the pv_cmdk_iodone() callback will get invoked and it
- * will propegate the error back up the stack and free the cloned
- * buffer.
- */
- ASSERT(dkp->dk_xdf_lh[part] != NULL);
- return (ldi_strategy(dkp->dk_xdf_lh[part], bp_clone));
-
-err:
- bioerror(bp, ENXIO);
- bp->b_resid = bp->b_bcount;
- biodone(bp);
- return (0);
-}
-
-/*ARGSUSED*/
-static int
-pv_cmdkread(dev_t dev, struct uio *uio, cred_t *credp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- return (ldi_read(dkp->dk_xdf_lh[part], uio, credp));
-}
-
-/*ARGSUSED*/
-static int
-pv_cmdkwrite(dev_t dev, struct uio *uio, cred_t *credp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- return (ldi_write(dkp->dk_xdf_lh[part], uio, credp));
-}
-
-/*ARGSUSED*/
-static int
-pv_cmdkaread(dev_t dev, struct aio_req *aio, cred_t *credp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- return (ldi_aread(dkp->dk_xdf_lh[part], aio, credp));
-}
-
-/*ARGSUSED*/
-static int
-pv_cmdkawrite(dev_t dev, struct aio_req *aio, cred_t *credp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- return (ldi_awrite(dkp->dk_xdf_lh[part], aio, credp));
-}
-
-static int
-pv_cmdkdump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- return (ldi_dump(dkp->dk_xdf_lh[part], addr, blkno, nblk));
-}
-
-/*
- * pv_rwcmd_copyin() is a duplicate of rwcmd_copyin().
- */
-static int
-pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag)
+xdfs_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag)
{
switch (ddi_model_convert_from(flag)) {
case DDI_MODEL_ILP32: {
@@ -753,10 +357,10 @@ pv_rwcmd_copyin(struct dadkio_rwcmd *rwcmdp, caddr_t inaddr, int flag)
}
/*
- * pv_rwcmd_copyout() is a duplicate of rwcmd_copyout().
+ * xdfs_rwcmd_copyout() is a duplicate of rwcmd_copyout().
*/
static int
-pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag)
+xdfs_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag)
{
switch (ddi_model_convert_from(flag)) {
case DDI_MODEL_ILP32: {
@@ -795,15 +399,8 @@ pv_rwcmd_copyout(struct dadkio_rwcmd *rwcmdp, caddr_t outaddr, int flag)
return (0);
}
-static void
-pv_cmdkmin(struct buf *bp)
-{
- if (bp->b_bcount > DK_MAXRECSIZE)
- bp->b_bcount = DK_MAXRECSIZE;
-}
-
static int
-pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
+xdfs_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
{
struct dadkio_rwcmd *rwcmdp;
struct iovec aiov;
@@ -812,7 +409,7 @@ pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
int rw, status;
rwcmdp = kmem_alloc(sizeof (struct dadkio_rwcmd), KM_SLEEP);
- status = pv_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag);
+ status = xdfs_rwcmd_copyin(rwcmdp, (caddr_t)arg, flag);
if (status != 0)
goto out;
@@ -845,35 +442,46 @@ pv_cmdk_dioctl_rwcmd(dev_t dev, intptr_t arg, int flag)
bp->b_private = (void *)XB_SLICE_NONE;
rw = ((rwcmdp->cmd == DADKIO_RWCMD_WRITE) ? B_WRITE : B_READ);
- status = physio(pv_cmdkstrategy, bp, dev, rw, pv_cmdkmin, &auio);
+ status = physio(xdfs_strategy, bp, dev, rw, xdfs_minphys, &auio);
biofini(bp);
kmem_free(bp, sizeof (buf_t));
if (status == 0)
- status = pv_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag);
+ status = xdfs_rwcmd_copyout(rwcmdp, (caddr_t)arg, flag);
out:
kmem_free(rwcmdp, sizeof (struct dadkio_rwcmd));
return (status);
}
-static int
-pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
- int *rvalp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- int err;
+/*
+ * xdf_shell callback functions
+ */
+/*ARGSUSED*/
+int
+xdfs_c_ioctl(xdfs_state_t *xsp, dev_t dev, int part,
+ int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp, boolean_t *done)
+{
+ *done = B_TRUE;
switch (cmd) {
default:
- return (ldi_ioctl(dkp->dk_xdf_lh[part],
- cmd, arg, flag, credp, rvalp));
+ *done = B_FALSE;
+ return (0);
+ case DKIOCLOCK:
+ case DKIOCUNLOCK:
+ case FDEJECT:
+ case DKIOCEJECT:
+ case CDROMEJECT: {
+ /* we don't support ejectable devices */
+ return (ENOTTY);
+ }
case DKIOCGETWCE:
- case DKIOCSETWCE:
+ case DKIOCSETWCE: {
+ /* we don't support write cache get/set */
return (EIO);
+ }
case DKIOCADDBAD: {
/*
* This is for ata/ide bad block handling. It is supposed
@@ -889,7 +497,7 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
* I can't actually find any code that utilizes this ioctl,
* hence we're leaving it explicitly unimplemented.
*/
- ASSERT("ioctl cmd unsupported by pv_cmdk: DKIOCGETDEF");
+ ASSERT("ioctl cmd unsupported by xdf shell: DKIOCGETDEF");
return (EIO);
}
case DIOCTL_RWCMD: {
@@ -898,16 +506,18 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
* reading and writing the disk. Great, another way to
* do the same thing...
*/
- return (pv_cmdk_dioctl_rwcmd(dev, arg, flag));
+ return (xdfs_dioctl_rwcmd(dev, arg, flag));
}
case DKIOCINFO: {
- dev_info_t *dip = dkp->dk_dip;
+ int instance = ddi_get_instance(xsp->xdfss_dip);
+ dev_info_t *dip = xsp->xdfss_dip;
struct dk_cinfo info;
+ int rv;
/* Pass on the ioctl request, save the response */
- if ((err = ldi_ioctl(dkp->dk_xdf_lh[part],
+ if ((rv = ldi_ioctl(xsp->xdfss_tgt_lh[part],
cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0)
- return (err);
+ return (rv);
/* Update controller info */
info.dki_cnum = ddi_get_instance(ddi_get_parent(dip));
@@ -930,129 +540,47 @@ pv_cmdkioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
/*NOTREACHED*/
}
-/*ARGSUSED*/
-static int
-pv_cmdkopen(dev_t *dev_p, int flag, int otyp, cred_t *credp)
+/*
+ * xdfs_c_devid_setup() is a slightly modified copy of cmdk_devid_setup().
+ *
+ * Create and register the devid.
+ * There are 4 different ways we can get a device id:
+ * 1. Already have one - nothing to do
+ * 2. Build one from the drive's model and serial numbers
+ * 3. Read one from the disk (first sector of last track)
+ * 4. Fabricate one and write it on the disk.
+ * If any of these succeeds, register the deviceid
+ */
+void
+xdfs_c_devid_setup(xdfs_state_t *xsp)
{
- ldi_ident_t li;
- dev_t dev = *dev_p;
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- dev_t xdf_devt = dkp->dk_xdf_dev | part;
- int err = 0;
-
- if ((otyp < 0) || (otyp >= OTYPCNT))
- return (EINVAL);
-
- /* allocate an ldi handle */
- VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);
-
- mutex_enter(&dkp->dk_mutex);
-
- /*
- * We translate all device opens (chr, blk, and lyr) into
- * block device opens. Why? Because for all the opens that
- * come through this driver, we only keep around one LDI handle.
- * So that handle can only be of one open type. The reason
- * that we choose the block interface for this is that to use
- * the block interfaces for a device the system needs to allocatex
- * buf_ts, which are associated with system memory which can act
- * as a cache for device data. So normally when a block device
- * is closed the system will ensure that all these pages get
- * flushed out of memory. But if we were to open the device
- * as a character device, then when we went to close the underlying
- * device (even if we had invoked the block interfaces) any data
- * remaining in memory wouldn't necessairly be flushed out
- * before the device was closed.
- */
- if (dkp->dk_xdf_lh[part] == NULL) {
- ASSERT(!pv_cmdk_isopen_part(dkp, part));
-
- err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
- &dkp->dk_xdf_lh[part], li);
-
- if (err != 0) {
- mutex_exit(&dkp->dk_mutex);
- ldi_ident_release(li);
- return (err);
- }
-
- /* Disk devices really shouldn't clone */
- ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
- } else {
- ldi_handle_t lh_tmp;
-
- ASSERT(pv_cmdk_isopen_part(dkp, part));
-
- /* do ldi open/close to get flags and cred check */
- err = ldi_open_by_dev(&xdf_devt, OTYP_BLK, flag, credp,
- &lh_tmp, li);
- if (err != 0) {
- mutex_exit(&dkp->dk_mutex);
- ldi_ident_release(li);
- return (err);
- }
-
- /* Disk devices really shouldn't clone */
- ASSERT(xdf_devt == (dkp->dk_xdf_dev | part));
- (void) ldi_close(lh_tmp, flag, credp);
- }
- ldi_ident_release(li);
-
- dkp->dk_xdf_otyp_count[otyp][part]++;
-
- mutex_exit(&dkp->dk_mutex);
- return (0);
-}
+ int rc;
-/*ARGSUSED*/
-static int
-pv_cmdkclose(dev_t dev, int flag, int otyp, cred_t *credp)
-{
- int instance = XDF_DEV2UNIT(dev);
- int part = XDF_DEV2PART(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- int err = 0;
+ /* Try options until one succeeds, or all have failed */
- ASSERT((otyp >= 0) && otyp < OTYPCNT);
+ /* 1. All done if already registered */
- /*
- * Sanity check that that the dev_t specified corresponds to this
- * driver and that the device is actually open. On debug kernels we'll
- * panic and on non-debug kernels we'll return failure.
- */
- ASSERT(getmajor(dev) == pv_cmdk_major);
- if (getmajor(dev) != pv_cmdk_major)
- return (ENXIO);
-
- mutex_enter(&dkp->dk_mutex);
- ASSERT(pv_cmdk_isopen_part(dkp, part));
- if (!pv_cmdk_isopen_part(dkp, part)) {
- mutex_exit(&dkp->dk_mutex);
- return (ENXIO);
- }
+ if (xsp->xdfss_tgt_devid != NULL)
+ return;
- ASSERT(dkp->dk_xdf_lh[part] != NULL);
- ASSERT(dkp->dk_xdf_otyp_count[otyp][part] > 0);
- if (otyp == OTYP_LYR) {
- dkp->dk_xdf_otyp_count[otyp][part]--;
- } else {
- dkp->dk_xdf_otyp_count[otyp][part] = 0;
- }
+ /* 2. Build a devid from the model and serial number */
+ rc = xdfs_devid_modser(xsp);
+ if (rc != DDI_SUCCESS) {
+ /* 3. Read devid from the disk, if present */
+ rc = xdfs_devid_read(xsp);
- if (!pv_cmdk_isopen_part(dkp, part)) {
- err = ldi_close(dkp->dk_xdf_lh[part], flag, credp);
- dkp->dk_xdf_lh[part] = NULL;
+ /* 4. otherwise make one up and write it on the disk */
+ if (rc != DDI_SUCCESS)
+ rc = xdfs_devid_fabricate(xsp);
}
- mutex_exit(&dkp->dk_mutex);
-
- return (err);
+ /* If we managed to get a devid any of the above ways, register it */
+ if (rc == DDI_SUCCESS)
+ (void) ddi_devid_register(xsp->xdfss_dip, xsp->xdfss_tgt_devid);
}
-static int
-pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
+int
+xdfs_c_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
{
struct scsi_device *scsi_device;
struct tgdk_geom tgdk_geom;
@@ -1079,13 +607,8 @@ pv_cmdk_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
return (0);
}
-/*
- * pv_cmdk_bb_check() checks for the existance of bad blocks mappings in
- * the alternate partition/slice. Returns B_FALSE is there are no bad
- * block mappins found, and B_TRUE is there are bad block mappins found.
- */
-static boolean_t
-pv_cmdk_bb_check(struct pv_cmdk *dkp)
+boolean_t
+xdfs_c_bb_check(xdfs_state_t *xsp)
{
struct alts_parttbl *ap;
diskaddr_t nblocks, blk;
@@ -1096,7 +619,7 @@ pv_cmdk_bb_check(struct pv_cmdk *dkp)
/* find slice with V_ALTSCTR tag */
for (alts = 0; alts < NDKMAP; alts++) {
- if (cmlb_partinfo(dkp->dk_cmlbhandle, alts,
+ if (cmlb_partinfo(xsp->xdfss_cmlbhandle, alts,
&nblocks, &blk, NULL, &vtoctag, 0) != 0) {
/* no partition table exists */
return (B_FALSE);
@@ -1110,8 +633,7 @@ pv_cmdk_bb_check(struct pv_cmdk *dkp)
/* read in ALTS label block */
ap = (struct alts_parttbl *)kmem_zalloc(NBPSCTR, KM_SLEEP);
- if (pv_cmdk_lb_rdwr(dkp->dk_dip,
- TG_READ, ap, blk, NBPSCTR, NULL) != 0)
+ if (xdfs_lb_rdwr(xsp->xdfss_dip, TG_READ, ap, blk, NBPSCTR, NULL) != 0)
goto err;
altused = ap->alts_ent_used; /* number of BB entries */
@@ -1131,400 +653,22 @@ err:
return (B_FALSE);
}
-/*
- * Autoconfiguration Routines
- */
-static int
-pv_cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
-{
- int instance = ddi_get_instance(dip);
- dev_info_t *xdf_dip = NULL;
- struct pv_cmdk *dkp;
- cmlb_geom_t pgeom;
- char *path;
- int i;
-
- if (cmd != DDI_ATTACH)
- return (DDI_FAILURE);
-
- /*
- * This cmdk device layers on top of an xdf device. So the first
- * thing we need to do is determine which xdf device instance this
- * cmdk instance should be layered on top of.
- */
- path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- (void) ddi_pathname(dip, path);
- for (i = 0; pv_cmdk_h2p[i].h2p_hvm_path != NULL; i++) {
- if (strcmp(pv_cmdk_h2p[i].h2p_hvm_path, path) == 0)
- break;
- }
- kmem_free(path, MAXPATHLEN);
-
- if (pv_cmdk_h2p[i].h2p_hvm_path == NULL) {
- /*
- * UhOh. We don't know what xdf instance this cmdk device
- * should be mapped to.
- */
- return (DDI_FAILURE);
- }
-
- /* Check if this device exists */
- xdf_dip = xdf_hvm_hold(pv_cmdk_h2p[i].h2p_pv_path);
- if (xdf_dip == NULL)
- return (DDI_FAILURE);
-
- /* allocate and initialize our state structure */
- (void) ddi_soft_state_zalloc(pv_cmdk_state, instance);
- dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL);
- dkp->dk_dip = dip;
- dkp->dk_xdf_dip = xdf_dip;
- dkp->dk_xdf_dev = makedevice(ddi_driver_major(xdf_dip),
- XDF_MINOR(ddi_get_instance(xdf_dip), 0));
-
- ASSERT((dkp->dk_xdf_dev & XDF_PMASK) == 0);
-
- /*
- * GROSS HACK ALERT! GROSS HACK ALERT!
- *
- * Before we can initialize the cmlb layer, we have to tell the
- * underlying xdf device what it's physical geometry should be.
- * See the block comments at the top of this file for more info.
- */
- if ((pv_cmdk_getpgeom(dip, &pgeom) != 0) ||
- (xdf_hvm_setpgeom(dkp->dk_xdf_dip, &pgeom) != 0)) {
- ddi_release_devi(dkp->dk_xdf_dip);
- mutex_destroy(&dkp->dk_mutex);
- ddi_soft_state_free(pv_cmdk_state, instance);
- return (DDI_FAILURE);
- }
-
- /* create kstat for iostat(1M) */
- if (xdf_kstat_create(dkp->dk_xdf_dip, "cmdk", instance) != 0) {
- ddi_release_devi(dkp->dk_xdf_dip);
- mutex_destroy(&dkp->dk_mutex);
- ddi_soft_state_free(pv_cmdk_state, instance);
- return (DDI_FAILURE);
- }
-
- /*
- * Force the xdf front end driver to connect to the backend. From
- * the solaris device tree perspective, the xdf driver devinfo node
- * is already in the ATTACHED state. (Otherwise xdf_hvm_hold()
- * would not have returned a dip.) But this doesn't mean that the
- * xdf device has actually established a connection to it's back
- * end driver. For us to be able to access the xdf device it needs
- * to be connected. There are two ways to force the xdf driver to
- * connect to the backend device.
- */
- if (xdf_hvm_connect(dkp->dk_xdf_dip) != 0) {
- cmn_err(CE_WARN,
- "pv driver failed to connect: %s",
- pv_cmdk_h2p[i].h2p_pv_path);
- xdf_kstat_delete(dkp->dk_xdf_dip);
- ddi_release_devi(dkp->dk_xdf_dip);
- mutex_destroy(&dkp->dk_mutex);
- ddi_soft_state_free(pv_cmdk_state, instance);
- return (DDI_FAILURE);
- }
-
- /*
- * Initalize cmlb. Note that for partition information cmlb
- * will access the underly xdf disk device directly via
- * pv_cmdk_lb_rdwr() and pv_cmdk_lb_getinfo(). There are no
- * layered driver handles associated with this access because
- * it is a direct disk access that doesn't go through
- * any of the device nodes exported by the xdf device (since
- * all exported device nodes only reflect the portion of
- * the device visible via the partition/slice that the node
- * is associated with.) So while not observable via the LDI,
- * this direct disk access is ok since we're actually holding
- * the target device.
- */
- cmlb_alloc_handle((cmlb_handle_t *)&dkp->dk_cmlbhandle);
- if (cmlb_attach(dkp->dk_dip, &pv_cmdk_lb_ops,
- DTYPE_DIRECT, /* device_type */
- 0, /* not removable */
- 0, /* not hot pluggable */
- DDI_NT_BLOCK,
- CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* mimic cmdk */
- dkp->dk_cmlbhandle, 0) != 0) {
- cmlb_free_handle(&dkp->dk_cmlbhandle);
- xdf_kstat_delete(dkp->dk_xdf_dip);
- ddi_release_devi(dkp->dk_xdf_dip);
- mutex_destroy(&dkp->dk_mutex);
- ddi_soft_state_free(pv_cmdk_state, instance);
- return (DDI_FAILURE);
- }
-
- if (pv_cmdk_bb_check(dkp)) {
- cmn_err(CE_WARN,
- "pv cmdk disks with bad blocks are unsupported: %s",
- pv_cmdk_h2p[i].h2p_hvm_path);
-
- cmlb_detach(dkp->dk_cmlbhandle, 0);
- cmlb_free_handle(&dkp->dk_cmlbhandle);
- xdf_kstat_delete(dkp->dk_xdf_dip);
- ddi_release_devi(dkp->dk_xdf_dip);
- mutex_destroy(&dkp->dk_mutex);
- ddi_soft_state_free(pv_cmdk_state, instance);
- return (DDI_FAILURE);
- }
-
- /* setup devid string */
- pv_cmdk_devid_setup(dkp);
-
- /* Calling validate will create minor nodes according to disk label */
- (void) cmlb_validate(dkp->dk_cmlbhandle, 0, 0);
-
- /*
- * Add a zero-length attribute to tell the world we support
- * kernel ioctls (for layered drivers).
- */
- (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
- DDI_KERNEL_IOCTL, NULL, 0);
-
- /* Have the system report any newly created device nodes */
- ddi_report_dev(dip);
-
- return (DDI_SUCCESS);
-}
-
-static int
-pv_cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+char *
+xdfs_c_cmlb_node_type(xdfs_state_t *xsp)
{
- int instance = ddi_get_instance(dip);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- if (cmd != DDI_DETACH)
- return (DDI_FAILURE);
-
- ASSERT(MUTEX_NOT_HELD(&dkp->dk_mutex));
-
- ddi_devid_unregister(dip);
- if (dkp->dk_devid)
- ddi_devid_free(dkp->dk_devid);
- cmlb_detach(dkp->dk_cmlbhandle, 0);
- cmlb_free_handle(&dkp->dk_cmlbhandle);
- mutex_destroy(&dkp->dk_mutex);
- xdf_kstat_delete(dkp->dk_xdf_dip);
- ddi_release_devi(dkp->dk_xdf_dip);
- ddi_soft_state_free(pv_cmdk_state, instance);
- ddi_prop_remove_all(dip);
-
- return (DDI_SUCCESS);
+ return (xsp->xdfss_tgt_is_cd ? DDI_NT_CD : DDI_NT_BLOCK);
}
/*ARGSUSED*/
-static int
-pv_cmdk_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
- void **result)
-{
- dev_t dev = (dev_t)arg;
- int instance = XDF_DEV2UNIT(dev);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
-
- switch (infocmd) {
- case DDI_INFO_DEVT2DEVINFO:
- if (dkp == NULL)
- return (DDI_FAILURE);
- *result = (void *)dkp->dk_dip;
- break;
- case DDI_INFO_DEVT2INSTANCE:
- *result = (void *)(intptr_t)instance;
- break;
- default:
- return (DDI_FAILURE);
- }
- return (DDI_SUCCESS);
-}
-
-static int
-pv_cmdk_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
- int flags, char *name, caddr_t valuep, int *lengthp)
-{
- int instance = ddi_get_instance(dip);
- struct pv_cmdk *dkp = ddi_get_soft_state(pv_cmdk_state, instance);
- dev_info_t *xdf_dip;
- dev_t xdf_devt;
- int err;
-
- /*
- * Sanity check that if a dev_t or dip were specified that they
- * correspond to this device driver. On debug kernels we'll
- * panic and on non-debug kernels we'll return failure.
- */
- ASSERT(ddi_driver_major(dip) == pv_cmdk_major);
- ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == pv_cmdk_major));
- if ((ddi_driver_major(dip) != pv_cmdk_major) ||
- ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != pv_cmdk_major)))
- return (DDI_PROP_NOT_FOUND);
-
- /*
- * This property lookup might be associated with a device node
- * that is not yet attached, if so pass it onto ddi_prop_op().
- */
- if (dkp == NULL)
- return (ddi_prop_op(dev, dip, prop_op, flags,
- name, valuep, lengthp));
-
- /*
- * Make sure we only lookup static properties.
- *
- * If there are static properties of the underlying xdf driver
- * that we want to mirror, then we'll have to explicity look them
- * up and define them during attach. There are a few reasons
- * for this. Most importantly, most static properties are typed
- * and all dynamic properties are untyped, ie, for dynamic
- * properties the caller must know the type of the property and
- * how to interpret the value of the property. the prop_op drivedr
- * entry point is only designed for returning dynamic/untyped
- * properties, so if we were to attempt to lookup and pass back
- * static properties of the underlying device here then we would
- * be losing the type information for those properties. Another
- * reason we don't want to pass on static property requests is that
- * static properties are enumerable in the device tree, where as
- * dynamic ones are not.
- */
- flags |= DDI_PROP_DYNAMIC;
-
- /*
- * We can't use the ldi here to access the underlying device because
- * the ldi actually opens the device, and that open might fail if the
- * device has already been opened with the FEXCL flag. If we used
- * the ldi here, it would also be possible for some other caller
- * to try open the device with the FEXCL flag and get a failure
- * back because we have it open to do a property query.
- *
- * Instad we'll grab a hold on the target dip and query the
- * property directly.
- */
- mutex_enter(&dkp->dk_mutex);
-
- if ((xdf_dip = dkp->dk_xdf_dip) == NULL) {
- mutex_exit(&dkp->dk_mutex);
- return (DDI_PROP_NOT_FOUND);
- }
- e_ddi_hold_devi(xdf_dip);
-
- /* figure out the dev_t we're going to pass on down */
- if (dev == DDI_DEV_T_ANY) {
- xdf_devt = DDI_DEV_T_ANY;
- } else {
- xdf_devt = dkp->dk_xdf_dev | XDF_DEV2PART(dev);
- }
-
- mutex_exit(&dkp->dk_mutex);
-
- /*
- * Cdev_prop_op() is not a public interface, and normally the caller
- * is required to make sure that the target driver actually implements
- * this interface before trying to invoke it. In this case we know
- * that we're always accessing the xdf driver and it does have this
- * interface defined, so we can skip the check.
- */
- err = cdev_prop_op(xdf_devt, xdf_dip,
- prop_op, flags, name, valuep, lengthp);
- ddi_release_devi(xdf_dip);
- return (err);
-}
-
-/*
- * Device driver ops vector
- */
-static struct cb_ops pv_cmdk_cb_ops = {
- pv_cmdkopen, /* open */
- pv_cmdkclose, /* close */
- pv_cmdkstrategy, /* strategy */
- nodev, /* print */
- pv_cmdkdump, /* dump */
- pv_cmdkread, /* read */
- pv_cmdkwrite, /* write */
- pv_cmdkioctl, /* ioctl */
- nodev, /* devmap */
- nodev, /* mmap */
- nodev, /* segmap */
- nochpoll, /* poll */
- pv_cmdk_prop_op, /* cb_prop_op */
- 0, /* streamtab */
- D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */
- CB_REV, /* cb_rev */
- pv_cmdkaread, /* async read */
- pv_cmdkawrite /* async write */
-};
-
-struct dev_ops pv_cmdk_ops = {
- DEVO_REV, /* devo_rev, */
- 0, /* refcnt */
- pv_cmdk_getinfo, /* info */
- nulldev, /* identify */
- nulldev, /* probe */
- pv_cmdkattach, /* attach */
- pv_cmdkdetach, /* detach */
- nodev, /* reset */
- &pv_cmdk_cb_ops, /* driver operations */
- (struct bus_ops *)0, /* bus operations */
- NULL, /* power */
- ddi_quiesce_not_supported, /* devo_quiesce */
-};
-
-/*
- * Module linkage information for the kernel.
- */
-static struct modldrv modldrv = {
- &mod_driverops, /* Type of module. This one is a driver */
- "PV Common Direct Access Disk",
- &pv_cmdk_ops, /* driver ops */
-};
-
-static struct modlinkage modlinkage = {
- MODREV_1, (void *)&modldrv, NULL
-};
-
-int
-_init(void)
-{
- int rval;
-
- if ((pv_cmdk_major = ddi_name_to_major("cmdk")) == (major_t)-1)
- return (EINVAL);
-
- /*
- * In general ide usually supports 4 disk devices, this same
- * limitation also applies to software emulating ide devices.
- * so by default we pre-allocate 4 cmdk soft state structures.
- */
- if ((rval = ddi_soft_state_init(&pv_cmdk_state,
- sizeof (struct pv_cmdk), PV_CMDK_NODES)) != 0)
- return (rval);
-
- /*
- * Currently we only support qemu as the backing hardware emulator
- * for cmdk devices.
- */
- pv_cmdk_h2p = pv_cmdk_h2p_xen_qemu;
-
- /* Install our module */
- if ((rval = mod_install(&modlinkage)) != 0) {
- ddi_soft_state_fini(&pv_cmdk_state);
- return (rval);
- }
-
- return (0);
-}
-
int
-_info(struct modinfo *modinfop)
+xdfs_c_cmlb_alter_behavior(xdfs_state_t *xsp)
{
- return (mod_info(&modlinkage, modinfop));
+ return (xsp->xdfss_tgt_is_cd ?
+ 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT);
}
-int
-_fini(void)
+/*ARGSUSED*/
+void
+xdfs_c_attach(xdfs_state_t *xsp)
{
- int rval;
- if ((rval = mod_remove(&modlinkage)) != 0)
- return (rval);
- ddi_soft_state_fini(&pv_cmdk_state);
- return (0);
}
diff --git a/usr/src/uts/i86pc/i86hvm/io/pv_sd.c b/usr/src/uts/i86pc/i86hvm/io/pv_sd.c
new file mode 100644
index 0000000000..74edb42907
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/pv_sd.c
@@ -0,0 +1,187 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <io/xdf_shell.h>
+
+#include <sys/scsi/targets/sddef.h>
+
+/*
+ * We're emulating (and possibly layering on top of) sd devices, so xdf
+ * disk unit mappings must match up with sd disk unit mappings'.
+ */
+#if !defined(XDF_PSHIFT)
+#error "can't find definition for xdf unit mappings - XDF_PSHIFT"
+#endif /* XDF_PSHIFT */
+
+#if !defined(SDUNIT_SHIFT)
+#error "can't find definition for cmdk unit mappings - SDUNIT_SHIFT"
+#endif /* SDUNIT_SHIFT */
+
+#if ((XDF_PSHIFT - SDUNIT_SHIFT) != 0)
+#error "sd and xdf unit mappings don't match."
+#endif /* ((XDF_PSHIFT - SDUNIT_SHIFT) != 0) */
+
+extern const struct dev_ops sd_ops;
+extern void *sd_state;
+
+/*
+ * Globals required by xdf_shell.c
+ */
+const char *xdfs_c_name = "sd";
+const char *xdfs_c_linkinfo = "PV SCSI Disk Driver";
+void **xdfs_c_hvm_ss = &sd_state;
+const size_t xdfs_c_hvm_ss_size = sizeof (struct sd_lun);
+const struct dev_ops *xdfs_c_hvm_dev_ops = &sd_ops;
+
+const xdfs_h2p_map_t xdfs_c_h2p_map[] = {
+ { "/pci@0,0/pci-ide@1,1/ide@0/sd@0,0", "/xpvd/xdf@768" },
+ { "/pci@0,0/pci-ide@1,1/ide@0/sd@1,0", "/xpvd/xdf@832" },
+ { "/pci@0,0/pci-ide@1,1/ide@1/sd@0,0", "/xpvd/xdf@5632" },
+ { "/pci@0,0/pci-ide@1,1/ide@1/sd@1,0", "/xpvd/xdf@5696" },
+ { NULL, 0 }
+};
+
+/*ARGSUSED*/
+int
+xdfs_c_ioctl(xdfs_state_t *xsp, dev_t dev, int part,
+ int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp, boolean_t *done)
+{
+ dev_info_t *dip = xsp->xdfss_dip;
+ int instance = ddi_get_instance(dip);
+ int rv;
+
+ *done = B_TRUE;
+ switch (cmd) {
+ case DKIOCINFO: {
+ struct dk_cinfo info;
+
+ /* Pass on the ioctl request, save the response */
+ if ((rv = ldi_ioctl(xsp->xdfss_tgt_lh[part],
+ cmd, (intptr_t)&info, FKIOCTL, credp, rvalp)) != 0)
+ return (rv);
+
+ /* Update controller info */
+ info.dki_cnum = ddi_get_instance(ddi_get_parent(dip));
+ (void) strlcpy(info.dki_cname,
+ ddi_get_name(ddi_get_parent(dip)), sizeof (info.dki_cname));
+
+ /* Update unit info. */
+ if (info.dki_ctype == DKC_VBD) {
+ /*
+ * Normally a real scsi device would report the
+ * controller type as DKC_SCSI_CCS. But we don't
+ * emulate a real scsi controller. (Which becomes
+ * apparent if anyone tries to issue us a uscsi(7i)
+ * command.) So instead of reporting DKC_SCSI_CCS,
+ * we report DKC_UNKNOWN.
+ */
+ info.dki_ctype = DKC_UNKNOWN;
+ }
+ info.dki_unit = instance;
+ (void) strlcpy(info.dki_dname,
+ ddi_driver_name(dip), sizeof (info.dki_dname));
+ info.dki_addr = 1;
+
+ if (ddi_copyout(&info, (void *)arg, sizeof (info), flag))
+ return (EFAULT);
+
+ return (0);
+ }
+ default:
+ *done = B_FALSE;
+ return (0);
+ } /* switch (cmd) */
+ /*NOTREACHED*/
+}
+
+/*ARGSUSED*/
+void
+xdfs_c_devid_setup(xdfs_state_t *xsp)
+{
+ /*
+ * Currently we only support cdrom devices, which don't have
+ * devids associated with them.
+ */
+ ASSERT("cdrom devices don't have a devid");
+}
+
+/*ARGSUSED*/
+int
+xdfs_c_getpgeom(dev_info_t *dip, cmlb_geom_t *pgeom)
+{
+ /*
+ * Currently we only support cdrom devices, which don't have
+ * a physical geometry, so this routine should never get
+ * invoked.
+ */
+ ASSERT("cdrom devices don't have any physical geometry");
+ return (-1);
+}
+
+/*ARGSUSED*/
+boolean_t
+xdfs_c_bb_check(xdfs_state_t *xsp)
+{
+ /*
+ * Currently we only support cdrom devices, which don't have
+ * bad blocks, so this routine should never get invoked.
+ */
+ ASSERT("cdrom devices don't support bad block mappings");
+ return (B_TRUE);
+}
+
+char *
+xdfs_c_cmlb_node_type(xdfs_state_t *xsp)
+{
+ return (xsp->xdfss_tgt_is_cd ? DDI_NT_CD_CHAN : DDI_NT_BLOCK_CHAN);
+}
+
+/*ARGSUSED*/
+int
+xdfs_c_cmlb_alter_behavior(xdfs_state_t *xsp)
+{
+ return (0);
+}
+
+void
+xdfs_c_attach(xdfs_state_t *xsp)
+{
+ dev_info_t *dip = xsp->xdfss_dip;
+ int dtype = DTYPE_DIRECT;
+
+ if (xsp->xdfss_tgt_is_cd) {
+ dtype = DTYPE_RODIRECT;
+ (void) ddi_prop_create(DDI_DEV_T_NONE, dip,
+ DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
+ }
+
+ /*
+ * We use ndi_* instead of ddi_* because it will result in
+ * INQUIRY_DEVICE_TYPE being a hardware property instead
+ * or a driver property
+ */
+ (void) ndi_prop_update_int(DDI_DEV_T_NONE, dip,
+ INQUIRY_DEVICE_TYPE, dtype);
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c
new file mode 100644
index 0000000000..5162cb52ae
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.c
@@ -0,0 +1,1278 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <io/xdf_shell.h>
+#include <sys/dkio.h>
+#include <sys/scsi/scsi_types.h>
+
+/*
+ * General Notes
+ *
+ * We don't support disks with bad block mappins. We have this
+ * limitation because the underlying xdf driver doesn't support
+ * bad block remapping. If there is a need to support this feature
+ * it should be added directly to the xdf driver and we should just
+ * pass requests strait on through and let it handle the remapping.
+ * Also, it's probably worth pointing out that most modern disks do bad
+ * block remapping internally in the hardware so there's actually less
+ * of a chance of us ever discovering bad blocks. Also, in most cases
+ * this driver (and the xdf driver) will only be used with virtualized
+ * devices, so one might wonder why a virtual device would ever actually
+ * experience bad blocks. To wrap this up, you might be wondering how
+ * these bad block mappings get created and how they are managed. Well,
+ * there are two tools for managing bad block mappings, format(1M) and
+ * addbadsec(1M). Format(1M) can be used to do a surface scan of a disk
+ * to attempt to find bad block and create mappings for them. Format(1M)
+ * and addbadsec(1M) can also be used to edit existing mappings that may
+ * be saved on the disk.
+ *
+ * The underlying PV driver that this driver passes on requests to is the
+ * xdf driver. Since in most cases the xdf driver doesn't deal with
+ * physical disks it has it's own algorithm for assigning a physical
+ * geometry to a virtual disk (ie, cylinder count, head count, etc.)
+ * The default values chosen by the xdf driver may not match those
+ * assigned to a disk by a hardware disk emulator in an HVM environment.
+ * This is a problem since these physical geometry attributes affect
+ * things like the partition table, backup label location, etc. So
+ * to emulate disk devices correctly we need to know the physical geometry
+ * that was assigned to a disk at the time of it's initalization.
+ * Normally in an HVM environment this information will passed to
+ * the BIOS and operating system from the hardware emulator that is
+ * emulating the disk devices. In the case of a solaris dom0+xvm
+ * this would be qemu. So to work around this issue, this driver will
+ * query the emulated hardware to get the assigned physical geometry
+ * and then pass this geometry onto the xdf driver so that it can use it.
+ * But really, this information is essentially metadata about the disk
+ * that should be kept with the disk image itself. (Assuming or course
+ * that a disk image is the actual backingstore for this emulated device.)
+ * This metadata should also be made available to PV drivers via a common
+ * mechanism, probably the xenstore. The fact that this metadata isn't
+ * available outside of HVM domains means that it's difficult to move
+ * disks between HVM and PV domains, since a fully PV domain will have no
+ * way of knowing what the correct geometry of the target device is.
+ * (Short of reading the disk, looking for things like partition tables
+ * and labels, and taking a best guess at what the geometry was when
+ * the disk was initialized. Unsuprisingly, qemu actually does this.)
+ *
+ * This driver has to map xdf shell device instances into their corresponding
+ * xdf device instances. We have to do this to ensure that when a user
+ * accesses a emulated xdf shell device we map those accesses to the proper
+ * paravirtualized device. Basically what we need to know is how multiple
+ * 'disk' entries in a domU configuration file get mapped to emulated
+ * xdf shell devices and to xdf devices. The 'disk' entry to xdf instance
+ * mappings we know because those are done within the Solaris xvdi code
+ * and the xpvd nexus driver. But the config to emulated devices mappings
+ * are handled entirely within the xen management tool chain and the
+ * hardware emulator. Since all the tools that establish these mappings
+ * live in dom0, dom0 should really supply us with this information,
+ * probably via the xenstore. Unfortunatly it doesn't so, since there's
+ * no good way to determine this mapping dynamically, this driver uses
+ * a hard coded set of static mappings. These mappings are hardware
+ * emulator specific because each different hardware emulator could have
+ * a different device tree with different xdf shell device paths. This
+ * means that if we want to continue to use this static mapping approach
+ * to allow Solaris to run on different hardware emulators we'll have
+ * to analyze each of those emulators to determine what paths they
+ * use and hard code those paths into this driver. yech. This metadata
+ * really needs to be supplied to us by dom0.
+ *
+ * This driver access underlying xdf nodes. Unfortunatly, devices
+ * must create minor nodes during attach, and for disk devices to create
+ * minor nodes, they have to look at the label on the disk, so this means
+ * that disk drivers must be able to access a disk contents during
+ * attach. That means that this disk driver must be able to access
+ * underlying xdf nodes during attach. Unfortunatly, due to device tree
+ * locking restrictions, we cannot have an attach operation occuring on
+ * this device and then attempt to access another device which may
+ * cause another attach to occur in a different device tree branch
+ * since this could result in deadlock. Hence, this driver can only
+ * access xdf device nodes that we know are attached, and it can't use
+ * any ddi interfaces to access those nodes if those interfaces could
+ * trigger an attach of the xdf device. So this driver works around
+ * these restrictions by talking directly to xdf devices via
+ * xdf_hvm_hold(). This interface takes a pathname to an xdf device,
+ * and if that device is already attached then it returns the a held dip
+ * pointer for that device node. This prevents us from getting into
+ * deadlock situations, but now we need a mechanism to ensure that all
+ * the xdf device nodes this driver might access are attached before
+ * this driver tries to access them. This is accomplished via the
+ * hvmboot_rootconf() callback which is invoked just before root is
+ * mounted. hvmboot_rootconf() will attach xpvd and tell it to configure
+ * all xdf device visible to the system. All these xdf device nodes
+ * will also be marked with the "ddi-no-autodetach" property so that
+ * once they are configured, the will not be automatically unconfigured.
+ * The only way that they could be unconfigured is if the administrator
+ * explicitly attempts to unload required modules via rem_drv(1M)
+ * or modunload(1M).
+ */
+
+/*
+ * 16 paritions + fdisk (see xdf.h)
+ */
+#define XDFS_DEV2UNIT(dev) XDF_INST((getminor((dev))))
+#define XDFS_DEV2PART(dev) XDF_PART((getminor((dev))))
+
+#define OTYP_VALID(otyp) ((otyp == OTYP_BLK) || \
+ (otyp == OTYP_CHR) || \
+ (otyp == OTYP_LYR))
+
+#define XDFS_NODES 4
+
+#define XDFS_HVM_MODE(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_mode)
+#define XDFS_HVM_DIP(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_dip)
+#define XDFS_HVM_PATH(sp) (XDFS_HVM_STATE(sp)->xdfs_hs_path)
+#define XDFS_HVM_STATE(sp) \
+ ((xdfs_hvm_state_t *)(&((char *)(sp))[XDFS_HVM_STATE_OFFSET]))
+#define XDFS_HVM_STATE_OFFSET (xdfs_ss_size - sizeof (xdfs_hvm_state_t))
+#define XDFS_HVM_SANE(sp) \
+ ASSERT(XDFS_HVM_MODE(sp)); \
+ ASSERT(XDFS_HVM_DIP(sp) != NULL); \
+ ASSERT(XDFS_HVM_PATH(sp) != NULL);
+
+
+typedef struct xdfs_hvm_state {
+ boolean_t xdfs_hs_mode;
+ dev_info_t *xdfs_hs_dip;
+ char *xdfs_hs_path;
+} xdfs_hvm_state_t;
+
+/* local function and structure prototypes */
+static int xdfs_iodone(struct buf *);
+static boolean_t xdfs_isopen_part(xdfs_state_t *, int);
+static boolean_t xdfs_isopen(xdfs_state_t *);
+static cmlb_tg_ops_t xdfs_lb_ops;
+
+/*
+ * Globals
+ */
+major_t xdfs_major;
+#define xdfs_hvm_dev_ops (xdfs_c_hvm_dev_ops)
+#define xdfs_hvm_cb_ops (xdfs_hvm_dev_ops->devo_cb_ops)
+
+/*
+ * Private globals
+ */
+volatile boolean_t xdfs_pv_disable = B_FALSE;
+static void *xdfs_ssp;
+static size_t xdfs_ss_size;
+
+/*
+ * Private helper functions
+ */
+static boolean_t
+xdfs_tgt_hold(xdfs_state_t *xsp)
+{
+ mutex_enter(&xsp->xdfss_mutex);
+ ASSERT(xsp->xdfss_tgt_holds >= 0);
+ if (!xsp->xdfss_tgt_attached) {
+ mutex_exit(&xsp->xdfss_mutex);
+ return (B_FALSE);
+ }
+ xsp->xdfss_tgt_holds++;
+ mutex_exit(&xsp->xdfss_mutex);
+ return (B_TRUE);
+}
+
+static void
+xdfs_tgt_release(xdfs_state_t *xsp)
+{
+ mutex_enter(&xsp->xdfss_mutex);
+ ASSERT(xsp->xdfss_tgt_attached);
+ ASSERT(xsp->xdfss_tgt_holds > 0);
+ if (--xsp->xdfss_tgt_holds == 0)
+ cv_broadcast(&xsp->xdfss_cv);
+ mutex_exit(&xsp->xdfss_mutex);
+}
+
+/*ARGSUSED*/
+static int
+xdfs_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv;
+
+ if (xsp == NULL)
+ return (ENXIO);
+
+ if (!xdfs_tgt_hold(xsp))
+ return (ENXIO);
+
+ if (cmd == TG_GETVIRTGEOM) {
+ cmlb_geom_t pgeom, *vgeomp;
+ diskaddr_t capacity;
+
+ /*
+ * The native xdf driver doesn't support this ioctl.
+ * Intead of passing it on, emulate it here so that the
+ * results look the same as what we get for a real xdf
+ * shell device.
+ *
+ * Get the real size of the device
+ */
+ if ((rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip,
+ TG_GETPHYGEOM, &pgeom, tg_cookie)) != 0)
+ goto out;
+ capacity = pgeom.g_capacity;
+
+ /*
+ * If the controller returned us something that doesn't
+ * really fit into an Int 13/function 8 geometry
+ * result, just fail the ioctl. See PSARC 1998/313.
+ */
+ if (capacity >= (63 * 254 * 1024)) {
+ rv = EINVAL;
+ goto out;
+ }
+
+ vgeomp = (cmlb_geom_t *)arg;
+ vgeomp->g_capacity = capacity;
+ vgeomp->g_nsect = 63;
+ vgeomp->g_nhead = 254;
+ vgeomp->g_ncyl = capacity / (63 * 254);
+ vgeomp->g_acyl = 0;
+ vgeomp->g_secsize = 512;
+ vgeomp->g_intrlv = 1;
+ vgeomp->g_rpm = 3600;
+ rv = 0;
+ goto out;
+ }
+
+ rv = xdf_lb_getinfo(xsp->xdfss_tgt_dip, cmd, arg, tg_cookie);
+
+out:
+ xdfs_tgt_release(xsp);
+ return (rv);
+}
+
+static boolean_t
+xdfs_isopen_part(xdfs_state_t *xsp, int part)
+{
+ int otyp;
+
+ ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
+ for (otyp = 0; (otyp < OTYPCNT); otyp++) {
+ if (xsp->xdfss_otyp_count[otyp][part] != 0) {
+ ASSERT(xsp->xdfss_tgt_attached);
+ ASSERT(xsp->xdfss_tgt_holds >= 0);
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
+
+static boolean_t
+xdfs_isopen(xdfs_state_t *xsp)
+{
+ int part;
+
+ ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
+ for (part = 0; part < XDF_PEXT; part++) {
+ if (xdfs_isopen_part(xsp, part))
+ return (B_TRUE);
+ }
+ return (B_FALSE);
+}
+
+static int
+xdfs_iodone(struct buf *bp)
+{
+ struct buf *bp_orig = bp->b_chain;
+
+ /* Propegate back the io results */
+ bp_orig->b_resid = bp->b_resid;
+ bioerror(bp_orig, geterror(bp));
+ biodone(bp_orig);
+
+ freerbuf(bp);
+ return (0);
+}
+
+static int
+xdfs_cmlb_attach(xdfs_state_t *xsp)
+{
+ return (cmlb_attach(xsp->xdfss_dip, &xdfs_lb_ops,
+ xsp->xdfss_tgt_is_cd ? DTYPE_RODIRECT : DTYPE_DIRECT,
+ xdf_is_rm(xsp->xdfss_tgt_dip),
+ B_TRUE,
+ xdfs_c_cmlb_node_type(xsp),
+ xdfs_c_cmlb_alter_behavior(xsp),
+ xsp->xdfss_cmlbhandle, 0));
+}
+
+static boolean_t
+xdfs_tgt_probe(xdfs_state_t *xsp, dev_info_t *tgt_dip)
+{
+ cmlb_geom_t pgeom;
+ int tgt_instance = ddi_get_instance(tgt_dip);
+
+ ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
+ ASSERT(!xdfs_isopen(xsp));
+ ASSERT(!xsp->xdfss_tgt_attached);
+
+ xsp->xdfss_tgt_dip = tgt_dip;
+ xsp->xdfss_tgt_holds = 0;
+ xsp->xdfss_tgt_dev = makedevice(ddi_driver_major(tgt_dip),
+ XDF_MINOR(tgt_instance, 0));
+ ASSERT((xsp->xdfss_tgt_dev & XDF_PMASK) == 0);
+ xsp->xdfss_tgt_is_cd = xdf_is_cd(tgt_dip);
+
+ /*
+ * GROSS HACK ALERT! GROSS HACK ALERT!
+ *
+ * Before we can initialize the cmlb layer, we have to tell the
+ * underlying xdf device what it's physical geometry should be.
+ * See the block comments at the top of this file for more info.
+ */
+ if (!xsp->xdfss_tgt_is_cd &&
+ ((xdfs_c_getpgeom(xsp->xdfss_dip, &pgeom) != 0) ||
+ (xdf_hvm_setpgeom(xsp->xdfss_tgt_dip, &pgeom) != 0)))
+ return (B_FALSE);
+
+ /*
+ * Force the xdf front end driver to connect to the backend. From
+ * the solaris device tree perspective, the xdf driver devinfo node
+ * is already in the ATTACHED state. (Otherwise xdf_hvm_hold()
+ * would not have returned a dip.) But this doesn't mean that the
+ * xdf device has actually established a connection to it's back
+ * end driver. For us to be able to access the xdf device it needs
+ * to be connected.
+ */
+ if (!xdf_hvm_connect(xsp->xdfss_tgt_dip)) {
+ cmn_err(CE_WARN, "pv driver failed to connect: %s",
+ xsp->xdfss_pv);
+ return (B_FALSE);
+ }
+
+ if (xsp->xdfss_tgt_is_cd && !xdf_media_req_supported(tgt_dip)) {
+ /*
+ * Unfortunatly, the dom0 backend driver doesn't support
+ * important media request operations like eject, so fail
+ * the probe (this should cause us to fall back to emulated
+ * hvm device access, which does support things like eject).
+ */
+ return (B_FALSE);
+ }
+
+ /* create kstat for iostat(1M) */
+ if (xdf_kstat_create(xsp->xdfss_tgt_dip, (char *)xdfs_c_name,
+ tgt_instance) != 0)
+ return (B_FALSE);
+
+ /*
+ * Now we need to mark ourselves as attached and drop xdfss_mutex.
+ * We do this because the final steps in the attach process will
+ * need to access the underlying disk to read the label and
+ * possibly the devid.
+ */
+ xsp->xdfss_tgt_attached = B_TRUE;
+ mutex_exit(&xsp->xdfss_mutex);
+
+ if (!xsp->xdfss_tgt_is_cd && xdfs_c_bb_check(xsp)) {
+ cmn_err(CE_WARN, "pv disks with bad blocks are unsupported: %s",
+ xsp->xdfss_hvm);
+ mutex_enter(&xsp->xdfss_mutex);
+ xdf_kstat_delete(xsp->xdfss_tgt_dip);
+ xsp->xdfss_tgt_attached = B_FALSE;
+ return (B_FALSE);
+ }
+
+ /*
+ * Initalize cmlb. Note that for partition information cmlb
+ * will access the underly xdf disk device directly via
+ * xdfs_lb_rdwr() and xdfs_lb_getinfo(). There are no
+ * layered driver handles associated with this access because
+ * it is a direct disk access that doesn't go through
+ * any of the device nodes exported by the xdf device (since
+ * all exported device nodes only reflect the portion of
+ * the device visible via the partition/slice that the node
+ * is associated with.) So while not observable via the LDI,
+ * this direct disk access is ok since we're actually holding
+ * the target device.
+ */
+ if (xdfs_cmlb_attach(xsp) != 0) {
+ mutex_enter(&xsp->xdfss_mutex);
+ xdf_kstat_delete(xsp->xdfss_tgt_dip);
+ xsp->xdfss_tgt_attached = B_FALSE;
+ return (B_FALSE);
+ }
+
+ /* setup devid string */
+ xsp->xdfss_tgt_devid = NULL;
+ if (!xsp->xdfss_tgt_is_cd)
+ xdfs_c_devid_setup(xsp);
+
+ (void) cmlb_validate(xsp->xdfss_cmlbhandle, 0, 0);
+
+ /* Have the system report any newly created device nodes */
+ ddi_report_dev(xsp->xdfss_dip);
+
+ mutex_enter(&xsp->xdfss_mutex);
+ return (B_TRUE);
+}
+
+static boolean_t
+xdfs_tgt_detach(xdfs_state_t *xsp)
+{
+ ASSERT(MUTEX_HELD(&xsp->xdfss_mutex));
+ ASSERT(xsp->xdfss_tgt_attached);
+ ASSERT(xsp->xdfss_tgt_holds >= 0);
+
+ if ((xdfs_isopen(xsp)) || (xsp->xdfss_tgt_holds != 0))
+ return (B_FALSE);
+
+ ddi_devid_unregister(xsp->xdfss_dip);
+ if (xsp->xdfss_tgt_devid != NULL)
+ ddi_devid_free(xsp->xdfss_tgt_devid);
+
+ xdf_kstat_delete(xsp->xdfss_tgt_dip);
+ xsp->xdfss_tgt_attached = B_FALSE;
+ return (B_TRUE);
+}
+
+/*
+ * Xdf_shell interfaces that may be called from outside this file.
+ */
+void
+xdfs_minphys(struct buf *bp)
+{
+ xdfmin(bp);
+}
+
+/*
+ * Cmlb ops vector, allows the cmlb module to directly access the entire
+ * xdf disk device without going through any partitioning layers.
+ */
+int
+xdfs_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
+ diskaddr_t start, size_t count, void *tg_cookie)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv;
+
+ if (xsp == NULL)
+ return (ENXIO);
+
+ if (!xdfs_tgt_hold(xsp))
+ return (ENXIO);
+
+ rv = xdf_lb_rdwr(xsp->xdfss_tgt_dip,
+ cmd, bufaddr, start, count, tg_cookie);
+
+ xdfs_tgt_release(xsp);
+ return (rv);
+}
+
+/*
+ * Driver PV and HVM cb_ops entry points
+ */
+/*ARGSUSED*/
+static int
+xdfs_open(dev_t *dev_p, int flag, int otyp, cred_t *credp)
+{
+ ldi_ident_t li;
+ dev_t dev = *dev_p;
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ dev_t tgt_devt = xsp->xdfss_tgt_dev | part;
+ int err = 0;
+
+ if ((otyp < 0) || (otyp >= OTYPCNT))
+ return (EINVAL);
+
+ if (XDFS_HVM_MODE(xsp)) {
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_open(dev_p, flag, otyp, credp));
+ }
+
+ /* allocate an ldi handle */
+ VERIFY(ldi_ident_from_dev(*dev_p, &li) == 0);
+
+ mutex_enter(&xsp->xdfss_mutex);
+
+ /*
+ * We translate all device opens (chr, blk, and lyr) into
+ * block device opens. Why? Because for all the opens that
+ * come through this driver, we only keep around one LDI handle.
+ * So that handle can only be of one open type. The reason
+ * that we choose the block interface for this is that to use
+ * the block interfaces for a device the system needs to allocate
+ * buf_ts, which are associated with system memory which can act
+ * as a cache for device data. So normally when a block device
+ * is closed the system will ensure that all these pages get
+ * flushed out of memory. But if we were to open the device
+ * as a character device, then when we went to close the underlying
+ * device (even if we had invoked the block interfaces) any data
+ * remaining in memory wouldn't necessairly be flushed out
+ * before the device was closed.
+ */
+ if (xsp->xdfss_tgt_lh[part] == NULL) {
+ ASSERT(!xdfs_isopen_part(xsp, part));
+
+ err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
+ &xsp->xdfss_tgt_lh[part], li);
+
+ if (err != 0) {
+ mutex_exit(&xsp->xdfss_mutex);
+ ldi_ident_release(li);
+ return (err);
+ }
+
+ /* Disk devices really shouldn't clone */
+ ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
+ } else {
+ ldi_handle_t lh_tmp;
+
+ ASSERT(xdfs_isopen_part(xsp, part));
+
+ /* do ldi open/close to get flags and cred check */
+ err = ldi_open_by_dev(&tgt_devt, OTYP_BLK, flag, credp,
+ &lh_tmp, li);
+ if (err != 0) {
+ mutex_exit(&xsp->xdfss_mutex);
+ ldi_ident_release(li);
+ return (err);
+ }
+
+ /* Disk devices really shouldn't clone */
+ ASSERT(tgt_devt == (xsp->xdfss_tgt_dev | part));
+ (void) ldi_close(lh_tmp, flag, credp);
+ }
+ ldi_ident_release(li);
+
+ xsp->xdfss_otyp_count[otyp][part]++;
+
+ mutex_exit(&xsp->xdfss_mutex);
+ return (0);
+}
+
+/*ARGSUSED*/
+static int
+xdfs_close(dev_t dev, int flag, int otyp, cred_t *credp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int err = 0;
+
+ ASSERT((otyp >= 0) && otyp < OTYPCNT);
+
+ /* Sanity check the dev_t associated with this request. */
+ ASSERT(getmajor(dev) == xdfs_major);
+ if (getmajor(dev) != xdfs_major)
+ return (ENXIO);
+
+ if (XDFS_HVM_MODE(xsp)) {
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_close(dev, flag, otyp, credp));
+ }
+
+ /*
+ * Sanity check that that the device is actually open. On debug
+ * kernels we'll panic and on non-debug kernels we'll return failure.
+ */
+ mutex_enter(&xsp->xdfss_mutex);
+ ASSERT(xdfs_isopen_part(xsp, part));
+ if (!xdfs_isopen_part(xsp, part)) {
+ mutex_exit(&xsp->xdfss_mutex);
+ return (ENXIO);
+ }
+
+ ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
+ ASSERT(xsp->xdfss_otyp_count[otyp][part] > 0);
+ if (otyp == OTYP_LYR) {
+ xsp->xdfss_otyp_count[otyp][part]--;
+ } else {
+ xsp->xdfss_otyp_count[otyp][part] = 0;
+ }
+
+ if (!xdfs_isopen_part(xsp, part)) {
+ err = ldi_close(xsp->xdfss_tgt_lh[part], flag, credp);
+ xsp->xdfss_tgt_lh[part] = NULL;
+ }
+
+ mutex_exit(&xsp->xdfss_mutex);
+
+ return (err);
+}
+
+int
+xdfs_strategy(struct buf *bp)
+{
+ dev_t dev = bp->b_edev;
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ dev_t tgt_devt;
+ struct buf *bp_clone;
+
+ /* Sanity check the dev_t associated with this request. */
+ ASSERT(getmajor(dev) == xdfs_major);
+ if (getmajor(dev) != xdfs_major)
+ goto err;
+
+ if (XDFS_HVM_MODE(xsp)) {
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_strategy(bp));
+ }
+
+ /*
+ * Sanity checks that the dev_t associated with the buf we were
+ * passed corresponds to an open partition. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ mutex_enter(&xsp->xdfss_mutex);
+ ASSERT(xdfs_isopen_part(xsp, part));
+ if (!xdfs_isopen_part(xsp, part)) {
+ mutex_exit(&xsp->xdfss_mutex);
+ goto err;
+ }
+ mutex_exit(&xsp->xdfss_mutex);
+
+ /* clone this buffer */
+ tgt_devt = xsp->xdfss_tgt_dev | part;
+ bp_clone = bioclone(bp, 0, bp->b_bcount, tgt_devt, bp->b_blkno,
+ xdfs_iodone, NULL, KM_SLEEP);
+ bp_clone->b_chain = bp;
+
+ /*
+ * If we're being invoked on behalf of the physio() call in
+ * xdfs_dioctl_rwcmd() then b_private will be set to
+ * XB_SLICE_NONE and we need to propegate this flag into the
+ * cloned buffer so that the xdf driver will see it.
+ */
+ if (bp->b_private == (void *)XB_SLICE_NONE)
+ bp_clone->b_private = (void *)XB_SLICE_NONE;
+
+ /*
+ * Pass on the cloned buffer. Note that we don't bother to check
+ * for failure because the xdf strategy routine will have to
+ * invoke biodone() if it wants to return an error, which means
+ * that the xdfs_iodone() callback will get invoked and it
+ * will propegate the error back up the stack and free the cloned
+ * buffer.
+ */
+ ASSERT(xsp->xdfss_tgt_lh[part] != NULL);
+ return (ldi_strategy(xsp->xdfss_tgt_lh[part], bp_clone));
+
+err:
+ bioerror(bp, ENXIO);
+ bp->b_resid = bp->b_bcount;
+ biodone(bp);
+ return (0);
+}
+
+static int
+xdfs_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (!XDFS_HVM_MODE(xsp))
+ return (ldi_dump(xsp->xdfss_tgt_lh[part], addr, blkno, nblk));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_dump(dev, addr, blkno, nblk));
+}
+
+/*ARGSUSED*/
+static int
+xdfs_read(dev_t dev, struct uio *uio, cred_t *credp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (!XDFS_HVM_MODE(xsp))
+ return (ldi_read(xsp->xdfss_tgt_lh[part], uio, credp));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_read(dev, uio, credp));
+}
+
+/*ARGSUSED*/
+static int
+xdfs_write(dev_t dev, struct uio *uio, cred_t *credp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (!XDFS_HVM_MODE(xsp))
+ return (ldi_write(xsp->xdfss_tgt_lh[part], uio, credp));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_write(dev, uio, credp));
+}
+
+/*ARGSUSED*/
+static int
+xdfs_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (!XDFS_HVM_MODE(xsp))
+ return (ldi_aread(xsp->xdfss_tgt_lh[part], aio, credp));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
+ (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
+ (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
+ (xdfs_hvm_cb_ops->cb_aread == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_aread(dev, aio, credp));
+}
+
+/*ARGSUSED*/
+static int
+xdfs_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (!XDFS_HVM_MODE(xsp))
+ return (ldi_awrite(xsp->xdfss_tgt_lh[part], aio, credp));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
+ (xdfs_hvm_cb_ops->cb_strategy == NULL) ||
+ (xdfs_hvm_cb_ops->cb_strategy == nodev) ||
+ (xdfs_hvm_cb_ops->cb_awrite == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_awrite(dev, aio, credp));
+}
+
+static int
+xdfs_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp,
+ int *rvalp)
+{
+ int instance = XDFS_DEV2UNIT(dev);
+ int part = XDFS_DEV2PART(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv;
+ boolean_t done;
+
+ if (XDFS_HVM_MODE(xsp)) {
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL))
+ return (ENOTSUP);
+ return (xdfs_hvm_cb_ops->cb_ioctl(
+ dev, cmd, arg, flag, credp, rvalp));
+ }
+
+ rv = xdfs_c_ioctl(xsp, dev, part, cmd, arg, flag, credp, rvalp, &done);
+ if (done)
+ return (rv);
+ return (ldi_ioctl(xsp->xdfss_tgt_lh[part],
+ cmd, arg, flag, credp, rvalp));
+}
+
+static int
+xdfs_hvm_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
+ int flags, char *name, caddr_t valuep, int *lengthp)
+{
+ int instance = ddi_get_instance(dip);
+ void *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ ASSERT(XDFS_HVM_MODE(xsp));
+
+ if ((xdfs_hvm_dev_ops == NULL) || (xdfs_hvm_cb_ops == NULL) ||
+ (xdfs_hvm_cb_ops->cb_prop_op == NULL) ||
+ (xdfs_hvm_cb_ops->cb_prop_op == nodev) ||
+ (xdfs_hvm_cb_ops->cb_prop_op == nulldev))
+ return (DDI_PROP_NOT_FOUND);
+
+ return (xdfs_hvm_cb_ops->cb_prop_op(dev, dip, prop_op,
+ flags, name, valuep, lengthp));
+}
+
+static int
+xdfs_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
+ int flags, char *name, caddr_t valuep, int *lengthp)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv;
+ dev_info_t *tgt_dip;
+ dev_t tgt_devt;
+
+ /*
+ * Sanity check that if a dev_t or dip were specified that they
+ * correspond to this device driver. On debug kernels we'll
+ * panic and on non-debug kernels we'll return failure.
+ */
+ ASSERT(ddi_driver_major(dip) == xdfs_major);
+ ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdfs_major));
+ if ((ddi_driver_major(dip) != xdfs_major) ||
+ ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdfs_major)))
+ return (DDI_PROP_NOT_FOUND);
+
+ /*
+ * This property lookup might be associated with a device node
+ * that is not yet attached, if so pass it onto ddi_prop_op().
+ */
+ if (xsp == NULL)
+ return (ddi_prop_op(dev, dip, prop_op, flags,
+ name, valuep, lengthp));
+
+ /* If we're accessing the device in hvm mode, pass this request on */
+ if (XDFS_HVM_MODE(xsp))
+ return (xdfs_hvm_prop_op(dev, dip, prop_op,
+ flags, name, valuep, lengthp));
+
+ /*
+ * Make sure we only lookup static properties.
+ *
+ * If there are static properties of the underlying xdf driver
+ * that we want to mirror, then we'll have to explicity look them
+ * up and define them during attach. There are a few reasons
+ * for this. Most importantly, most static properties are typed
+ * and all dynamic properties are untyped, ie, for dynamic
+ * properties the caller must know the type of the property and
+ * how to interpret the value of the property. the prop_op drivedr
+ * entry point is only designed for returning dynamic/untyped
+ * properties, so if we were to attempt to lookup and pass back
+ * static properties of the underlying device here then we would
+ * be losing the type information for those properties. Another
+ * reason we don't want to pass on static property requests is that
+ * static properties are enumerable in the device tree, where as
+ * dynamic ones are not.
+ */
+ flags |= DDI_PROP_DYNAMIC;
+
+ /*
+ * We can't use the ldi here to access the underlying device because
+ * the ldi actually opens the device, and that open might fail if the
+ * device has already been opened with the FEXCL flag. If we used
+ * the ldi here, it would also be possible for some other caller to
+ * try open the device with the FEXCL flag and get a failure back
+ * because we have it open to do a property query. Instad we'll
+ * grab a hold on the target dip.
+ */
+ if (!xdfs_tgt_hold(xsp))
+ return (DDI_PROP_NOT_FOUND);
+
+ /* figure out dip the dev_t we're going to pass on down */
+ tgt_dip = xsp->xdfss_tgt_dip;
+ if (dev == DDI_DEV_T_ANY) {
+ tgt_devt = DDI_DEV_T_ANY;
+ } else {
+ tgt_devt = xsp->xdfss_tgt_dev | XDFS_DEV2PART(dev);
+ }
+
+ /*
+ * Cdev_prop_op() is not a public interface, and normally the caller
+ * is required to make sure that the target driver actually implements
+ * this interface before trying to invoke it. In this case we know
+ * that we're always accessing the xdf driver and it does have this
+ * interface defined, so we can skip the check.
+ */
+ rv = cdev_prop_op(tgt_devt, tgt_dip,
+ prop_op, flags, name, valuep, lengthp);
+
+ xdfs_tgt_release(xsp);
+ return (rv);
+}
+
+/*
+ * Driver PV and HVM dev_ops entry points
+ */
+/*ARGSUSED*/
+static int
+xdfs_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
+ void **result)
+{
+ dev_t dev = (dev_t)arg;
+ int instance = XDFS_DEV2UNIT(dev);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ switch (infocmd) {
+ case DDI_INFO_DEVT2DEVINFO:
+ if (xsp == NULL)
+ return (DDI_FAILURE);
+ if (XDFS_HVM_MODE(xsp))
+ *result = XDFS_HVM_DIP(xsp);
+ else
+ *result = (void *)xsp->xdfss_dip;
+ break;
+ case DDI_INFO_DEVT2INSTANCE:
+ *result = (void *)(intptr_t)instance;
+ break;
+ default:
+ return (DDI_FAILURE);
+ }
+ return (DDI_SUCCESS);
+}
+
+static int
+xdfs_hvm_probe(dev_info_t *dip, char *path)
+{
+ int instance = ddi_get_instance(dip);
+ int rv = DDI_PROBE_SUCCESS;
+ void *xsp;
+
+ ASSERT(path != NULL);
+ cmn_err(CE_WARN, "PV access to device disabled: %s", path);
+
+ (void) ddi_soft_state_zalloc(xdfs_ssp, instance);
+ VERIFY((xsp = ddi_get_soft_state(xdfs_ssp, instance)) != NULL);
+
+ if ((xdfs_hvm_dev_ops == NULL) ||
+ (xdfs_hvm_dev_ops->devo_probe == NULL) ||
+ ((rv = xdfs_hvm_dev_ops->devo_probe(dip)) == DDI_PROBE_FAILURE)) {
+ ddi_soft_state_free(xdfs_ssp, instance);
+ cmn_err(CE_WARN, "HVM probe of device failed: %s", path);
+ kmem_free(path, MAXPATHLEN);
+ return (DDI_PROBE_FAILURE);
+ }
+
+ XDFS_HVM_MODE(xsp) = B_TRUE;
+ XDFS_HVM_DIP(xsp) = dip;
+ XDFS_HVM_PATH(xsp) = path;
+
+ return (rv);
+}
+
+static int
+xdfs_probe(dev_info_t *dip)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp;
+ dev_info_t *tgt_dip;
+ char *path;
+ int i, pv_disable;
+
+ /* if we've already probed the device then there's nothing todo */
+ if (ddi_get_soft_state(xdfs_ssp, instance))
+ return (DDI_PROBE_PARTIAL);
+
+ /* Figure out our pathname */
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+ /* see if we should disable pv access mode */
+ pv_disable = ddi_prop_get_int(DDI_DEV_T_ANY,
+ dip, DDI_PROP_NOTPROM, "pv_disable", 0);
+
+ if (xdfs_pv_disable || pv_disable)
+ return (xdfs_hvm_probe(dip, path));
+
+ /*
+ * This xdf shell device layers on top of an xdf device. So the first
+ * thing we need to do is determine which xdf device instance this
+ * xdf shell instance should be layered on top of.
+ */
+ for (i = 0; xdfs_c_h2p_map[i].xdfs_h2p_hvm != NULL; i++) {
+ if (strcmp(xdfs_c_h2p_map[i].xdfs_h2p_hvm, path) == 0)
+ break;
+ }
+
+ if ((xdfs_c_h2p_map[i].xdfs_h2p_hvm == NULL) ||
+ ((tgt_dip = xdf_hvm_hold(xdfs_c_h2p_map[i].xdfs_h2p_pv)) == NULL)) {
+ /*
+ * UhOh. We either don't know what xdf instance this xdf
+ * shell device should be mapped to or the xdf node assocaited
+ * with this instance isnt' attached. in either case fall
+ * back to hvm access.
+ */
+ return (xdfs_hvm_probe(dip, path));
+ }
+
+ /* allocate and initialize our state structure */
+ (void) ddi_soft_state_zalloc(xdfs_ssp, instance);
+ xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ mutex_init(&xsp->xdfss_mutex, NULL, MUTEX_DRIVER, NULL);
+ cv_init(&xsp->xdfss_cv, NULL, CV_DEFAULT, NULL);
+ mutex_enter(&xsp->xdfss_mutex);
+
+ xsp->xdfss_dip = dip;
+ xsp->xdfss_pv = xdfs_c_h2p_map[i].xdfs_h2p_pv;
+ xsp->xdfss_hvm = xdfs_c_h2p_map[i].xdfs_h2p_hvm;
+ xsp->xdfss_tgt_attached = B_FALSE;
+ cmlb_alloc_handle((cmlb_handle_t *)&xsp->xdfss_cmlbhandle);
+
+ if (!xdfs_tgt_probe(xsp, tgt_dip)) {
+ mutex_exit(&xsp->xdfss_mutex);
+ cmlb_free_handle(&xsp->xdfss_cmlbhandle);
+ ddi_soft_state_free(xdfs_ssp, instance);
+ ddi_release_devi(tgt_dip);
+ return (xdfs_hvm_probe(dip, path));
+ }
+ mutex_exit(&xsp->xdfss_mutex);
+
+ /*
+ * Add a zero-length attribute to tell the world we support
+ * kernel ioctls (for layered drivers).
+ */
+ (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
+ DDI_KERNEL_IOCTL, NULL, 0);
+
+ return (DDI_PROBE_SUCCESS);
+}
+
+static int
+xdfs_hvm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ void *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv = DDI_FAILURE;
+
+ XDFS_HVM_SANE(xsp);
+
+ if ((xdfs_hvm_dev_ops == NULL) ||
+ (xdfs_hvm_dev_ops->devo_attach == NULL) ||
+ ((rv = xdfs_hvm_dev_ops->devo_attach(dip, cmd)) != DDI_SUCCESS)) {
+ cmn_err(CE_WARN, "HVM attach of device failed: %s",
+ XDFS_HVM_PATH(xsp));
+ kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
+ ddi_soft_state_free(xdfs_ssp, instance);
+ return (rv);
+ }
+
+ return (DDI_SUCCESS);
+}
+
+/*
+ * Autoconfiguration Routines
+ */
+static int
+xdfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (xsp == NULL)
+ return (DDI_FAILURE);
+ if (XDFS_HVM_MODE(xsp))
+ return (xdfs_hvm_attach(dip, cmd));
+ if (cmd != DDI_ATTACH)
+ return (DDI_FAILURE);
+
+ xdfs_c_attach(xsp);
+ return (DDI_SUCCESS);
+}
+
+static int
+xdfs_hvm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ void *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+ int rv;
+
+ XDFS_HVM_SANE(xsp);
+
+ if ((xdfs_hvm_dev_ops == NULL) ||
+ (xdfs_hvm_dev_ops->devo_detach == NULL))
+ return (DDI_FAILURE);
+
+ if ((rv = xdfs_hvm_dev_ops->devo_detach(dip, cmd)) != DDI_SUCCESS)
+ return (rv);
+
+ kmem_free(XDFS_HVM_PATH(xsp), MAXPATHLEN);
+ ddi_soft_state_free(xdfs_ssp, instance);
+ return (DDI_SUCCESS);
+}
+
+static int
+xdfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (XDFS_HVM_MODE(xsp))
+ return (xdfs_hvm_detach(dip, cmd));
+ if (cmd != DDI_DETACH)
+ return (DDI_FAILURE);
+
+ mutex_enter(&xsp->xdfss_mutex);
+ if (!xdfs_tgt_detach(xsp)) {
+ mutex_exit(&xsp->xdfss_mutex);
+ return (DDI_FAILURE);
+ }
+ mutex_exit(&xsp->xdfss_mutex);
+
+ cmlb_detach(xsp->xdfss_cmlbhandle, 0);
+ cmlb_free_handle(&xsp->xdfss_cmlbhandle);
+ ddi_release_devi(xsp->xdfss_tgt_dip);
+ ddi_soft_state_free(xdfs_ssp, instance);
+ ddi_prop_remove_all(dip);
+ return (DDI_SUCCESS);
+}
+
+static int
+xdfs_hvm_power(dev_info_t *dip, int component, int level)
+{
+ int instance = ddi_get_instance(dip);
+ void *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ XDFS_HVM_SANE(xsp);
+
+ if ((xdfs_hvm_dev_ops == NULL) ||
+ (xdfs_hvm_dev_ops->devo_power == NULL))
+ return (DDI_FAILURE);
+ return (xdfs_hvm_dev_ops->devo_power(dip, component, level));
+}
+
+static int
+xdfs_power(dev_info_t *dip, int component, int level)
+{
+ int instance = ddi_get_instance(dip);
+ xdfs_state_t *xsp = ddi_get_soft_state(xdfs_ssp, instance);
+
+ if (XDFS_HVM_MODE(xsp))
+ return (xdfs_hvm_power(dip, component, level));
+ return (nodev());
+}
+
+/*
+ * Cmlb ops vector
+ */
+static cmlb_tg_ops_t xdfs_lb_ops = {
+ TG_DK_OPS_VERSION_1,
+ xdfs_lb_rdwr,
+ xdfs_lb_getinfo
+};
+
+/*
+ * Device driver ops vector
+ */
+static struct cb_ops xdfs_cb_ops = {
+ xdfs_open, /* open */
+ xdfs_close, /* close */
+ xdfs_strategy, /* strategy */
+ nodev, /* print */
+ xdfs_dump, /* dump */
+ xdfs_read, /* read */
+ xdfs_write, /* write */
+ xdfs_ioctl, /* ioctl */
+ nodev, /* devmap */
+ nodev, /* mmap */
+ nodev, /* segmap */
+ nochpoll, /* poll */
+ xdfs_prop_op, /* cb_prop_op */
+ 0, /* streamtab */
+ D_64BIT | D_MP | D_NEW, /* Driver comaptibility flag */
+ CB_REV, /* cb_rev */
+ xdfs_aread, /* async read */
+ xdfs_awrite /* async write */
+};
+
+struct dev_ops xdfs_ops = {
+ DEVO_REV, /* devo_rev, */
+ 0, /* refcnt */
+ xdfs_getinfo, /* info */
+ nulldev, /* identify */
+ xdfs_probe, /* probe */
+ xdfs_attach, /* attach */
+ xdfs_detach, /* detach */
+ nodev, /* reset */
+ &xdfs_cb_ops, /* driver operations */
+ NULL, /* bus operations */
+ xdfs_power, /* power */
+ ddi_quiesce_not_supported, /* devo_quiesce */
+};
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modldrv modldrv = {
+ &mod_driverops, /* Type of module. This one is a driver. */
+ NULL, /* Module description. Set by _init() */
+ &xdfs_ops, /* Driver ops. */
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modldrv, NULL
+};
+
+int
+_init(void)
+{
+ int rval;
+
+ xdfs_major = ddi_name_to_major((char *)xdfs_c_name);
+ if (xdfs_major == (major_t)-1)
+ return (EINVAL);
+
+ /*
+ * Determine the size of our soft state structure. The base
+ * size of the structure is the larger of the hvm clients state
+ * structure, or our shell state structure. Then we'll align
+ * the end of the structure to a pointer boundry and append
+ * a xdfs_hvm_state_t structure. This way the xdfs_hvm_state_t
+ * structure is always present and we can use it to determine the
+ * current device access mode (hvm or shell).
+ */
+ xdfs_ss_size = MAX(xdfs_c_hvm_ss_size, sizeof (xdfs_state_t));
+ xdfs_ss_size = P2ROUNDUP(xdfs_ss_size, sizeof (uintptr_t));
+ xdfs_ss_size += sizeof (xdfs_hvm_state_t);
+
+ /*
+ * In general ide usually supports 4 disk devices, this same
+ * limitation also applies to software emulating ide devices.
+ * so by default we pre-allocate 4 xdf shell soft state structures.
+ */
+ if ((rval = ddi_soft_state_init(&xdfs_ssp,
+ xdfs_ss_size, XDFS_NODES)) != 0)
+ return (rval);
+ *xdfs_c_hvm_ss = xdfs_ssp;
+
+ /* Install our module */
+ if (modldrv.drv_linkinfo == NULL)
+ modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
+ if ((rval = mod_install(&modlinkage)) != 0) {
+ ddi_soft_state_fini(&xdfs_ssp);
+ return (rval);
+ }
+
+ return (0);
+}
+
+int
+_info(struct modinfo *modinfop)
+{
+ if (modldrv.drv_linkinfo == NULL)
+ modldrv.drv_linkinfo = (char *)xdfs_c_linkinfo;
+ return (mod_info(&modlinkage, modinfop));
+}
+
+int
+_fini(void)
+{
+ int rval;
+ if ((rval = mod_remove(&modlinkage)) != 0)
+ return (rval);
+ ddi_soft_state_fini(&xdfs_ssp);
+ return (0);
+}
diff --git a/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h
new file mode 100644
index 0000000000..a7dd983e7b
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/io/xdf_shell.h
@@ -0,0 +1,161 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _XDF_SHELL_H
+#define _XDF_SHELL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These interfaces are all dependant upon xdf */
+#include <io/xdf.h>
+
+/* Include files required for this header file. */
+#include <sys/vtoc.h>
+
+/*
+ * These include files are not strictly required to include this header
+ * file, but pretty much every xdf_shell client will need to include these
+ * header files, so just include them here.
+ */
+#include <sys/cdio.h>
+#include <sys/dklabel.h>
+#include <sys/dktp/altsctr.h>
+#include <sys/dktp/bbh.h>
+#include <sys/dktp/cmdk.h>
+#include <sys/dktp/dadev.h>
+#include <sys/dktp/dadkio.h>
+#include <sys/fdio.h>
+
+/*
+ * XDF Shell driver state structures
+ */
+typedef struct xdfs_state {
+ dev_info_t *xdfss_dip;
+ const char *xdfss_pv;
+ const char *xdfss_hvm;
+
+ /* Members below are protected by xdfss_mutex */
+ kmutex_t xdfss_mutex;
+ kcondvar_t xdfss_cv;
+ cmlb_handle_t xdfss_cmlbhandle;
+ int xdfss_otyp_count[OTYPCNT][XDF_PEXT];
+
+ /* Members below are only valid when xdfss_tgt_attached is true */
+ dev_info_t *xdfss_tgt_dip;
+ boolean_t xdfss_tgt_attached;
+ int xdfss_tgt_holds;
+ dev_t xdfss_tgt_dev;
+ ddi_devid_t xdfss_tgt_devid;
+ boolean_t xdfss_tgt_locked;
+ boolean_t xdfss_tgt_is_cd;
+ ldi_handle_t xdfss_tgt_lh[XDF_PEXT];
+} xdfs_state_t;
+
+typedef struct xdfs_h2p_map {
+ const char *xdfs_h2p_hvm;
+ const char *xdfs_h2p_pv;
+} xdfs_h2p_map_t;
+
+/*
+ * Globals defined by xdf_shell.c
+ */
+extern major_t xdfs_major;
+
+/*
+ * Functions defined by xdf_shell.c
+ */
+extern int xdfs_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
+ void *);
+extern int xdfs_strategy(struct buf *);
+extern void xdfs_minphys(struct buf *);
+
+/*
+ * Globals that must be defined by xdf_shell.c clients
+ */
+extern const char *xdfs_c_name;
+extern const char *xdfs_c_linkinfo;
+extern void **xdfs_c_hvm_ss;
+extern const size_t xdfs_c_hvm_ss_size;
+extern const struct dev_ops *xdfs_c_hvm_dev_ops;
+extern const xdfs_h2p_map_t xdfs_c_h2p_map[];
+
+/*
+ * Functions that must be implemented by xdf_shell.c clients
+ */
+
+/*
+ * xdfs_c_devid_setup() is invoked during device probe. If possible, it
+ * should create a devid for the associated disk device. This routine will
+ * not be invoked for cdrom devices.
+ */
+extern void xdfs_c_devid_setup(xdfs_state_t *);
+
+/*
+ * xdfs_c_bb_check() is invoked during device probe. It should check for
+ * the existance of bad blocks mappings in an alternate partition/slice and
+ * return B_FALSE if there are no bad block mappings found and return B_TRUE
+ * is there are bad block mappings found. The presence of bad block
+ * mappings will cause the device attach to fail. This routine will not be
+ * invoked for cdrom devices.
+ */
+extern boolean_t xdfs_c_bb_check(xdfs_state_t *);
+
+/*
+ * xdfs_c_getpgeom() is invoked during device probe. It should return the
+ * physical geometery of a disk device that is being attached. The failure
+ * of this routine will cause the device attach to fail. This routine will
+ * not be invoked for cdrom devices.
+ */
+extern int xdfs_c_getpgeom(dev_info_t *, cmlb_geom_t *);
+
+/*
+ * xdfs_c_cmlb_node_type() and xdfs_c_cmlb_alter_behavior() are invoked
+ * during device probe while initializing the cmlb module for the device
+ * node being probed. They should return a cmlb node type and cmlb alter
+ * behavior flag value that can be passed to cmlb_attach().
+ */
+extern char *xdfs_c_cmlb_node_type(xdfs_state_t *);
+extern int xdfs_c_cmlb_alter_behavior(xdfs_state_t *);
+
+/*
+ * xdfs_c_attach() is invoked during device attach. It provides an
+ * opportunity for the client to create properties or do anything else
+ * necessary for attach.
+ */
+extern void xdfs_c_attach(xdfs_state_t *);
+
+/*
+ * xdfs_c_getpgeom() is invoked to handle ioctl operations.
+ */
+extern int xdfs_c_ioctl(xdfs_state_t *, dev_t, int,
+ int, intptr_t, int, cred_t *, int *, boolean_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _XDF_SHELL_H */
diff --git a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
index 0c206a192d..030564df78 100644
--- a/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
+++ b/usr/src/uts/i86pc/i86hvm/pv_cmdk/Makefile
@@ -21,7 +21,7 @@
#
# uts/i86pc/pv_cmdk/Makefile
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# This makefile drives the production of the xdc driver.
@@ -61,8 +61,8 @@ ALL_TARGET = $(BINARY)
LINT_TARGET = $(LINT_MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
-LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb
-LDFLAGS += -Ndrv/xpvd -Ndrv/xdf
+LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xdf
+LDFLAGS += -Nmisc/hvm_cmdk
CPPFLAGS += -D_EXTVTOC
diff --git a/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile b/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile
new file mode 100644
index 0000000000..9eab704747
--- /dev/null
+++ b/usr/src/uts/i86pc/i86hvm/pv_sd/Makefile
@@ -0,0 +1,100 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+#
+# uts/i86pc/i86hvm/pv_sd/Makefile
+#
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+# Use is subject to license terms.
+#
+# This makefile drives the production of the xdc driver.
+#
+# i86pc implementation architecture dependent
+#
+
+#
+# Path to the base of the uts directory tree (usually /usr/src/uts).
+#
+UTSBASE = ../../..
+
+#
+# Define the module and object file sets.
+#
+MODULE = sd
+OBJECTS = $(PV_SD_OBJS:%=$(OBJS_DIR)/%)
+LINTS = $(PV_SD_OBJS:%.o=$(LINTS_DIR)/%.ln)
+ROOTMODULE = $(ROOT_HVM_DRV_DIR)/$(MODULE)
+
+#
+# Include common rules.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.i86hvm
+
+#
+# When generating lint libraries, we want the name of the lint module
+# that will be generated to by pv_sd and not sd, so override the
+# default lint module name here.
+#
+LINT_MODULE = pv_sd
+
+#
+# Define targets
+#
+ALL_TARGET = $(BINARY)
+LINT_TARGET = $(LINT_MODULE).lint
+INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
+
+LDFLAGS += -dy -Nmisc/strategy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xdf
+LDFLAGS += -Nmisc/hvm_sd
+
+#
+# The Xen header files do not lint cleanly. Since the troublesome
+# structures form part of the externally defined interface to the
+# hypervisor, we're stuck with the noise.
+#
+LINTTAGS += -erroff=E_BAD_PTR_CAST_ALIGN
+LINTTAGS += -erroff=E_SUPPRESSION_DIRECTIVE_UNUSED
+LINTTAGS += -erroff=E_ASSIGN_NARROW_CONV
+
+#
+# Default build targets.
+#
+.KEEP_STATE:
+
+def: $(DEF_DEPS)
+
+all: $(ALL_DEPS)
+
+clean: $(CLEAN_DEPS)
+
+clobber: $(CLOBBER_DEPS)
+
+lint: $(LINT_DEPS)
+
+modlintlib: $(MODLINTLIB_DEPS)
+
+clean.lint: $(CLEAN_LINT_DEPS)
+
+install: $(INSTALL_DEPS)
+
+#
+# Include common targets.
+#
+include $(UTSBASE)/i86pc/i86hvm/Makefile.targ
diff --git a/usr/src/uts/i86pc/i86hvm/xdf/Makefile b/usr/src/uts/i86pc/i86hvm/xdf/Makefile
index 4b7bbe75d8..bef6a685f6 100644
--- a/usr/src/uts/i86pc/i86hvm/xdf/Makefile
+++ b/usr/src/uts/i86pc/i86hvm/xdf/Makefile
@@ -22,11 +22,9 @@
#
# uts/i86pc/xdf/Makefile
#
-# Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2009 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-# ident "%Z%%M% %I% %E% SMI"
-#
# i86pc architecture dependent
#
#
@@ -55,7 +53,7 @@ LINT_TARGET = $(MODULE).lint
INSTALL_TARGET = $(BINARY) $(ROOTMODULE)
# Overrides
-CPPFLAGS += -DHVMPV_XDF_VERS=1
+CPPFLAGS += -DHVMPV_XDF_VERS=2
LDFLAGS += -dy -Nmisc/cmlb -Ndrv/xpvd -Ndrv/xpv
LINTTAGS += -erroff=E_SUSPICIOUS_COMPARISON
diff --git a/usr/src/uts/intel/io/dktp/disk/cmdk.c b/usr/src/uts/intel/io/dktp/disk/cmdk.c
index 99b56bab67..36dddd4a7b 100644
--- a/usr/src/uts/intel/io/dktp/disk/cmdk.c
+++ b/usr/src/uts/intel/io/dktp/disk/cmdk.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -73,7 +73,7 @@ static int cmdk_debug = DIO;
#define DKTP_DATA (dkp->dk_tgobjp)->tg_data
#define DKTP_EXT (dkp->dk_tgobjp)->tg_ext
-static void *cmdk_state;
+void *cmdk_state;
/*
* the cmdk_attach_mutex protects cmdk_max_instance in multi-threaded
@@ -187,18 +187,30 @@ struct dev_ops cmdk_ops = {
*/
#include <sys/modctl.h>
-extern struct mod_ops mod_driverops;
-
+#ifndef XPV_HVM_DRIVER
static struct modldrv modldrv = {
- &mod_driverops, /* Type of module. This one is a driver */
+ &mod_driverops, /* Type of module. This one is a driver */
"Common Direct Access Disk",
- &cmdk_ops, /* driver ops */
+ &cmdk_ops, /* driver ops */
};
static struct modlinkage modlinkage = {
MODREV_1, (void *)&modldrv, NULL
};
+
+#else /* XPV_HVM_DRIVER */
+static struct modlmisc modlmisc = {
+ &mod_miscops, /* Type of module. This one is a misc */
+ "HVM Common Direct Access Disk",
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, (void *)&modlmisc, NULL
+};
+
+#endif /* XPV_HVM_DRIVER */
+
/* Function prototypes for cmlb callbacks */
static int cmdk_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr,
@@ -244,13 +256,17 @@ _init(void)
{
int rval;
+#ifndef XPV_HVM_DRIVER
if (rval = ddi_soft_state_init(&cmdk_state, sizeof (struct cmdk), 7))
return (rval);
+#endif /* !XPV_HVM_DRIVER */
mutex_init(&cmdk_attach_mutex, NULL, MUTEX_DRIVER, NULL);
if ((rval = mod_install(&modlinkage)) != 0) {
mutex_destroy(&cmdk_attach_mutex);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_fini(&cmdk_state);
+#endif /* !XPV_HVM_DRIVER */
}
return (rval);
}
@@ -259,25 +275,6 @@ int
_fini(void)
{
return (EBUSY);
-
- /*
- * This has been commented out until cmdk is a true
- * unloadable module. Right now x86's are panicking on
- * a diskless reconfig boot.
- */
-
-#if 0 /* bugid 1186679 */
- int rval;
-
- rval = mod_remove(&modlinkage);
- if (rval != 0)
- return (rval);
-
- mutex_destroy(&cmdk_attach_mutex);
- ddi_soft_state_fini(&cmdk_state);
-
- return (0);
-#endif
}
int
@@ -298,11 +295,15 @@ cmdkprobe(dev_info_t *dip)
instance = ddi_get_instance(dip);
+#ifndef XPV_HVM_DRIVER
if (ddi_get_soft_state(cmdk_state, instance))
return (DDI_PROBE_PARTIAL);
- if ((ddi_soft_state_zalloc(cmdk_state, instance) != DDI_SUCCESS) ||
- ((dkp = ddi_get_soft_state(cmdk_state, instance)) == NULL))
+ if (ddi_soft_state_zalloc(cmdk_state, instance) != DDI_SUCCESS)
+ return (DDI_PROBE_PARTIAL);
+#endif /* !XPV_HVM_DRIVER */
+
+ if ((dkp = ddi_get_soft_state(cmdk_state, instance)) == NULL)
return (DDI_PROBE_PARTIAL);
mutex_init(&dkp->dk_mutex, NULL, MUTEX_DRIVER, NULL);
@@ -318,7 +319,9 @@ cmdkprobe(dev_info_t *dip)
mutex_exit(&dkp->dk_mutex);
mutex_destroy(&dkp->dk_mutex);
rw_destroy(&dkp->dk_bbh_mutex);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(cmdk_state, instance);
+#endif /* !XPV_HVM_DRIVER */
return (DDI_PROBE_PARTIAL);
}
@@ -328,7 +331,9 @@ cmdkprobe(dev_info_t *dip)
mutex_exit(&dkp->dk_mutex);
mutex_destroy(&dkp->dk_mutex);
rw_destroy(&dkp->dk_bbh_mutex);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(cmdk_state, instance);
+#endif /* !XPV_HVM_DRIVER */
return (status);
}
@@ -401,8 +406,8 @@ cmdkattach(dev_info_t *dip, ddi_attach_cmd_t cmd)
if (cmlb_attach(dip,
&cmdk_lb_ops,
DTYPE_DIRECT, /* device_type */
- 0, /* removable */
- 0, /* hot pluggable XXX */
+ B_FALSE, /* removable */
+ B_FALSE, /* hot pluggable XXX */
node_type,
CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT, /* alter_behaviour */
dkp->dk_cmlbhandle,
@@ -450,7 +455,9 @@ fail2:
rw_destroy(&dkp->dk_bbh_mutex);
mutex_exit(&dkp->dk_mutex);
mutex_destroy(&dkp->dk_mutex);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(cmdk_state, instance);
+#endif /* !XPV_HVM_DRIVER */
return (DDI_FAILURE);
}
@@ -516,7 +523,9 @@ cmdkdetach(dev_info_t *dip, ddi_detach_cmd_t cmd)
rw_destroy(&dkp->dk_bbh_mutex);
mutex_destroy(&dkp->dk_pm_mutex);
cv_destroy(&dkp->dk_suspend_cv);
+#ifndef XPV_HVM_DRIVER
ddi_soft_state_free(cmdk_state, instance);
+#endif /* !XPV_HVM_DRIVER */
return (DDI_SUCCESS);
}
diff --git a/usr/src/uts/sun/io/dada/targets/dad.c b/usr/src/uts/sun/io/dada/targets/dad.c
index 72abdf4800..1d71904da5 100644
--- a/usr/src/uts/sun/io/dada/targets/dad.c
+++ b/usr/src/uts/sun/io/dada/targets/dad.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -587,8 +587,8 @@ dcdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
if (cmlb_attach(devi,
&dcd_lb_ops,
0,
- 0,
- 0,
+ B_FALSE,
+ B_FALSE,
DDI_NT_BLOCK_CHAN,
CMLB_FAKE_GEOM_LABEL_IOCTLS_VTOC8,
un->un_dklbhandle,