summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvikram <none@none>2007-08-09 21:43:47 -0700
committervikram <none@none>2007-08-09 21:43:47 -0700
commit25e8c5aa2b496d9026e958ac731a610167574f59 (patch)
tree48d445f55e23f769f3981231d5b06b0b35505b33
parentffcd51f34e6cd303b9745909c4632da63426be17 (diff)
downloadillumos-gate-25e8c5aa2b496d9026e958ac731a610167574f59.tar.gz
PSARC 2007/290 Retire Agent for I/O Devices
6464720 Deliver a FMA I/O retire agent --HG-- rename : usr/src/cmd/fm/modules/common/io-retire/ior_main.c => deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
-rw-r--r--deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c (renamed from usr/src/cmd/fm/modules/common/io-retire/ior_main.c)0
-rw-r--r--usr/src/cmd/boot/bootadm/filelist.ramdisk1
-rw-r--r--usr/src/cmd/ctwatch/ctwatch.c8
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/Makefile13
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/io-retire.conf12
-rw-r--r--usr/src/cmd/fm/modules/common/io-retire/rio_main.c290
-rw-r--r--usr/src/cmd/prtconf/pdevinfo.c2
-rw-r--r--usr/src/cmd/rcm_daemon/common/filesys_rcm.c263
-rw-r--r--usr/src/cmd/rcm_daemon/common/rcm_impl.c17
-rw-r--r--usr/src/cmd/rcm_daemon/common/rcm_subr.c50
-rw-r--r--usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c28
-rw-r--r--usr/src/lib/fm/topo/libtopo/common/dev.c5
-rw-r--r--usr/src/lib/libcontract/Makefile8
-rw-r--r--usr/src/lib/libcontract/Makefile.com6
-rw-r--r--usr/src/lib/libcontract/common/device.c177
-rw-r--r--usr/src/lib/libcontract/common/device_dump.c103
-rw-r--r--usr/src/lib/libcontract/common/device_dump.h43
-rw-r--r--usr/src/lib/libcontract/common/libcontract.c31
-rw-r--r--usr/src/lib/libcontract/common/libcontract.h25
-rw-r--r--usr/src/lib/libcontract/common/libcontract_impl.h10
-rw-r--r--usr/src/lib/libcontract/common/libcontract_priv.c32
-rw-r--r--usr/src/lib/libcontract/common/libcontract_priv.h8
-rw-r--r--usr/src/lib/libcontract/common/mapfile-vers14
-rw-r--r--usr/src/lib/libdevinfo/Makefile.com4
-rw-r--r--usr/src/lib/libdevinfo/devinfo.c6
-rw-r--r--usr/src/lib/libdevinfo/devinfo_retire.c785
-rw-r--r--usr/src/lib/libdevinfo/libdevinfo.h18
-rw-r--r--usr/src/lib/libdevinfo/mapfile-vers3
-rw-r--r--usr/src/lib/librcm/librcm.h5
-rw-r--r--usr/src/lib/librcm/librcm_impl.h11
-rw-r--r--usr/src/pkgdefs/SUNWhea/prototype_com2
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/contract/device.c2207
-rw-r--r--usr/src/uts/common/contract/process.c35
-rw-r--r--usr/src/uts/common/fs/ctfs/ctfs_ctl.c9
-rw-r--r--usr/src/uts/common/fs/ctfs/ctfs_tmpl.c7
-rw-r--r--usr/src/uts/common/fs/specfs/specsubr.c161
-rw-r--r--usr/src/uts/common/fs/specfs/specvnops.c104
-rw-r--r--usr/src/uts/common/os/contract.c166
-rw-r--r--usr/src/uts/common/os/devcache.c13
-rw-r--r--usr/src/uts/common/os/devcfg.c727
-rw-r--r--usr/src/uts/common/os/driver_lyr.c936
-rw-r--r--usr/src/uts/common/os/modctl.c225
-rw-r--r--usr/src/uts/common/os/retire_store.c457
-rw-r--r--usr/src/uts/common/os/sunmdi.c286
-rw-r--r--usr/src/uts/common/os/sunndi.c17
-rw-r--r--usr/src/uts/common/sys/Makefile4
-rw-r--r--usr/src/uts/common/sys/autoconf.h24
-rw-r--r--usr/src/uts/common/sys/contract.h8
-rw-r--r--usr/src/uts/common/sys/contract/device.h76
-rw-r--r--usr/src/uts/common/sys/contract/device_impl.h93
-rw-r--r--usr/src/uts/common/sys/contract_impl.h50
-rw-r--r--usr/src/uts/common/sys/ctfs.h8
-rw-r--r--usr/src/uts/common/sys/ddi_impldefs.h24
-rw-r--r--usr/src/uts/common/sys/ddi_implfuncs.h11
-rw-r--r--usr/src/uts/common/sys/ddi_obsolete.h12
-rw-r--r--usr/src/uts/common/sys/fs/snode.h9
-rw-r--r--usr/src/uts/common/sys/modctl.h6
-rw-r--r--usr/src/uts/common/sys/sunldi.h48
-rw-r--r--usr/src/uts/common/sys/sunldi_impl.h52
-rw-r--r--usr/src/uts/common/sys/sunmdi.h10
-rw-r--r--usr/src/uts/intel/ia32/ml/modstubs.s2
-rw-r--r--usr/src/uts/sparc/ml/modstubs.s2
-rw-r--r--usr/src/uts/sun4/os/ddi_impl.c154
-rw-r--r--usr/src/uts/sun4u/io/sbd.c15
-rw-r--r--usr/src/uts/sun4u/io/sbd_io.c8
-rw-r--r--usr/src/uts/sun4u/ngdr/io/dr_io.c44
67 files changed, 7783 insertions, 209 deletions
diff --git a/usr/src/cmd/fm/modules/common/io-retire/ior_main.c b/deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
index 6c1afdac81..6c1afdac81 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
+++ b/deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
diff --git a/usr/src/cmd/boot/bootadm/filelist.ramdisk b/usr/src/cmd/boot/bootadm/filelist.ramdisk
index 58e885f6ca..cb2b543633 100644
--- a/usr/src/cmd/boot/bootadm/filelist.ramdisk
+++ b/usr/src/cmd/boot/bootadm/filelist.ramdisk
@@ -8,6 +8,7 @@ etc/driver_classes
etc/path_to_inst
etc/mach
etc/devices/devid_cache
+etc/devices/retire_store
etc/devices/mdi_scsi_vhci_cache
etc/devices/mdi_ib_cache
etc/cluster/nodeid
diff --git a/usr/src/cmd/ctwatch/ctwatch.c b/usr/src/cmd/ctwatch/ctwatch.c
index 6247ccaf34..a42d109983 100644
--- a/usr/src/cmd/ctwatch/ctwatch.c
+++ b/usr/src/cmd/ctwatch/ctwatch.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -54,6 +53,7 @@ struct {
int found;
} types[] = {
{ "process", 0 },
+ { "device", 0 },
{ NULL }
};
diff --git a/usr/src/cmd/fm/modules/common/io-retire/Makefile b/usr/src/cmd/fm/modules/common/io-retire/Makefile
index a1f8b20e36..6ad3386fee 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/Makefile
+++ b/usr/src/cmd/fm/modules/common/io-retire/Makefile
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,13 +19,15 @@
# CDDL HEADER END
#
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
+#pragma ident "%Z%%M% %I% %E% SMI"
MODULE = io-retire
CLASS = common
-SRCS = ior_main.c
+SRCS = rio_main.c
include ../../Makefile.plugin
+
+LDLIBS += -ldevinfo
diff --git a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
index b7fdbd04b5..cb89b6b72c 100644
--- a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
+++ b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf
@@ -2,9 +2,8 @@
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
-# Common Development and Distribution License, Version 1.0 only
-# (the "License"). You may not use this file except in compliance
-# with the License.
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
@@ -20,14 +19,15 @@
# CDDL HEADER END
#
#
-# Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
-#ident "%Z%%M% %I% %E% SMI"
+#pragma ident "%Z%%M% %I% %E% SMI"
#
# fmd configuration file for the io-retire.so agent.
#
-setprop autoclose false
+setprop global-disable false
subscribe fault.io.*
+subscribe list.repaired
subscribe defect.io.*
subscribe defect.ultraSPARC-II.memory.nodiag
diff --git a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
new file mode 100644
index 0000000000..7277eca7fd
--- /dev/null
+++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c
@@ -0,0 +1,290 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/fm/protocol.h>
+#include <fm/fmd_api.h>
+#include <strings.h>
+#include <libdevinfo.h>
+#include <sys/modctl.h>
+
+static int global_disable;
+
+struct except_list {
+ char *el_fault;
+ struct except_list *el_next;
+};
+
+static struct except_list *except_list;
+
+static void
+parse_exception_string(fmd_hdl_t *hdl, char *estr)
+{
+ char *p;
+ char *next;
+ size_t len;
+ struct except_list *elem;
+
+ len = strlen(estr);
+
+ p = estr;
+ for (;;) {
+ /* Remove leading ':' */
+ while (*p == ':')
+ p++;
+ if (*p == '\0')
+ break;
+
+ next = strchr(p, ':');
+
+ if (next)
+ *next = '\0';
+
+ elem = fmd_hdl_alloc(hdl,
+ sizeof (struct except_list), FMD_SLEEP);
+ elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP);
+ elem->el_next = except_list;
+ except_list = elem;
+
+ if (next) {
+ *next = ':';
+ p = next + 1;
+ } else {
+ break;
+ }
+ }
+
+ if (len != strlen(estr)) {
+ fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr);
+ }
+}
+
+/*
+ * Returns
+ * 1 if fault on exception list
+ * 0 otherwise
+ */
+static int
+fault_exception(fmd_hdl_t *hdl, nvlist_t *fault)
+{
+ struct except_list *elem;
+
+ for (elem = except_list; elem; elem = elem->el_next) {
+ if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) {
+ fmd_hdl_debug(hdl, "rio_recv: Skipping fault "
+ "on exception list (%s)\n", elem->el_fault);
+ return (1);
+ }
+ }
+
+ return (0);
+}
+
+static void
+free_exception_list(fmd_hdl_t *hdl)
+{
+ struct except_list *elem;
+
+ while (except_list) {
+ elem = except_list;
+ except_list = elem->el_next;
+ fmd_hdl_strfree(hdl, elem->el_fault);
+ fmd_hdl_free(hdl, elem, sizeof (*elem));
+ }
+}
+
+
+/*ARGSUSED*/
+static void
+rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
+{
+ nvlist_t **faults;
+ nvlist_t *asru;
+ uint_t nfaults;
+ int f;
+ char devpath[PATH_MAX];
+ char *path;
+ char *uuid;
+ char *scheme;
+ di_retire_t drt = {0};
+ int retire;
+ int rval;
+ int error;
+ char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO".";
+
+
+ /*
+ * If disabled, we don't do retire. We still do unretires though
+ */
+ if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
+ fmd_hdl_debug(hdl, "rio_recv: retire disabled\n");
+ return;
+ }
+
+ drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort;
+ drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug;
+ drt.rt_hdl = hdl;
+
+ if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) {
+ retire = 1;
+ } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) {
+ retire = 0;
+ } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) {
+ fmd_hdl_debug(hdl, "rio_recv: single fault: %s\n", class);
+ return;
+ } else {
+ fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class);
+ return;
+ }
+
+ faults = NULL;
+ nfaults = 0;
+ if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
+ &faults, &nfaults) != 0) {
+ fmd_hdl_debug(hdl, "rio_recv: no fault list");
+ return;
+ }
+
+ devpath[0] = '\0';
+ rval = 0;
+ for (f = 0; f < nfaults; f++) {
+ if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU,
+ &asru) != 0) {
+ fmd_hdl_debug(hdl, "rio_recv: no asru in fault");
+ continue;
+ }
+
+ scheme = NULL;
+ if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 ||
+ strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) {
+ fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s",
+ scheme ? scheme : "<NULL>");
+ continue;
+ }
+
+ if (retire && fault_exception(hdl, faults[f]))
+ continue;
+
+ if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH,
+ &path) != 0 || path[0] == '\0') {
+ fmd_hdl_debug(hdl, "rio_recv: no dev path in asru");
+ continue;
+ }
+
+ /*
+ * If retire, we retire only if a single ASRU is pinpointed.
+ * We don't do automatic retires if a fault event pinpoints
+ * more than one ASRU.
+ */
+ if (retire) {
+ if (devpath[0] != '\0' && strcmp(path, devpath) != 0) {
+ fmd_hdl_debug(hdl,
+ "rio_recv: Skipping: multiple ASRU");
+ return;
+ } else if (devpath[0] == '\0') {
+ (void) strlcpy(devpath, path, sizeof (devpath));
+ }
+ } else {
+ error = di_unretire_device(path, &drt);
+ if (error != 0) {
+ fmd_hdl_debug(hdl, "rio_recv: "
+ "di_unretire_device failed: error: %d %s",
+ error, path);
+ rval = -1;
+ }
+ }
+ }
+
+ if (retire) {
+ if (devpath[0] == '\0')
+ return;
+ error = di_retire_device(devpath, &drt, 0);
+ if (error != 0) {
+ fmd_hdl_debug(hdl, "rio_recv: di_retire_device "
+ "failed: error: %d %s", error, devpath);
+ rval = -1;
+ }
+ }
+
+ /*
+ * The fmd framework takes care of moving a case to the repaired
+ * state. To move the case to the closed state however, we (the
+ * retire agent) need to call fmd_case_uuclose()
+ */
+ if (retire && rval == 0) {
+ if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 &&
+ !fmd_case_uuclosed(hdl, uuid)) {
+ fmd_case_uuclose(hdl, uuid);
+ }
+ }
+}
+
+static const fmd_hdl_ops_t fmd_ops = {
+ rio_recv, /* fmdo_recv */
+ NULL, /* fmdo_timeout */
+ NULL, /* fmdo_close */
+ NULL, /* fmdo_stats */
+ NULL, /* fmdo_gc */
+};
+
+static const fmd_prop_t rio_props[] = {
+ { "global-disable", FMD_TYPE_BOOL, "false" },
+ { "fault-exceptions", FMD_TYPE_STRING, NULL },
+ { NULL, 0, NULL }
+};
+
+static const fmd_hdl_info_t fmd_info = {
+ "I/O Retire Agent", "2.0", &fmd_ops, rio_props
+};
+
+void
+_fmd_init(fmd_hdl_t *hdl)
+{
+ char *estr;
+ char *estrdup;
+
+ if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) {
+ fmd_hdl_debug(hdl, "failed to register handle\n");
+ return;
+ }
+
+ global_disable = fmd_prop_get_int32(hdl, "global-disable");
+
+ estrdup = NULL;
+ if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) {
+ estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP);
+ fmd_prop_free_string(hdl, estr);
+ parse_exception_string(hdl, estrdup);
+ fmd_hdl_strfree(hdl, estrdup);
+ }
+}
+
+void
+_fmd_fini(fmd_hdl_t *hdl)
+{
+ free_exception_list(hdl);
+}
diff --git a/usr/src/cmd/prtconf/pdevinfo.c b/usr/src/cmd/prtconf/pdevinfo.c
index 445e02bc29..72b34de1f1 100644
--- a/usr/src/cmd/prtconf/pdevinfo.c
+++ b/usr/src/cmd/prtconf/pdevinfo.c
@@ -673,6 +673,8 @@ dump_devs(di_node_t node, void *arg)
driver_name = di_driver_name(node);
if (driver_name != NULL)
(void) printf(" (driver name: %s)", driver_name);
+ } else if (di_retired(node)) {
+ (void) printf(" (retired)");
} else if (di_state(node) & DI_DRIVER_DETACHED)
(void) printf(" (driver not attached)");
diff --git a/usr/src/cmd/rcm_daemon/common/filesys_rcm.c b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c
index 2103ba99ae..c7a5ce3765 100644
--- a/usr/src/cmd/rcm_daemon/common/filesys_rcm.c
+++ b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,6 +47,8 @@
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/utssys.h>
+#include <unistd.h>
+#include <limits.h>
#include "rcm_module.h"
@@ -71,6 +72,7 @@
typedef struct hashentry {
int n_mounts;
char *special;
+ char *fstype;
char **mountps;
struct hashentry *next;
} hashentry_t;
@@ -252,7 +254,11 @@ mnt_unregister(rcm_handle_t *hd)
/*
* mnt_offline()
*
- * Filesystem resources cannot be offlined. Always returns failure.
+ * Filesystem resources cannot be offlined. They can however be retired
+ * if they don't provide a critical service. The offline entry point
+ * checks if this is a retire operation and if it is and the filesystem
+ * doesn't provide a critical service, the entry point returns success
+ * For all other cases, failure is returned.
* Since no real action is taken, QUERY or not doesn't matter.
*/
int
@@ -260,17 +266,58 @@ mnt_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
char **errorp, rcm_info_t **dependent_info)
{
char **dependents;
+ hashentry_t *entry;
+ int retval;
+ int i;
assert(hd != NULL);
assert(rsrc != NULL);
assert(id == (id_t)0);
assert(errorp != NULL);
+ *errorp = NULL;
+
rcm_log_message(RCM_TRACE1, "FILESYS: offline(%s)\n", rsrc);
/* Retrieve necessary info from the cache */
- if (use_cache(rsrc, errorp, &dependents) < 0)
- return (RCM_FAILURE);
+ if (use_cache(rsrc, errorp, &dependents) < 0) {
+ if (flags & RCM_RETIRE_REQUEST)
+ return (RCM_NO_CONSTRAINT);
+ else
+ return (RCM_FAILURE);
+ }
+
+ if (flags & RCM_RETIRE_REQUEST) {
+ (void) mutex_lock(&cache_lock);
+ if ((entry = cache_lookup(mnt_cache, rsrc)) == NULL) {
+ rcm_log_message(RCM_ERROR, "FILESYS: "
+ "failed to look up \"%s\" in cache (%s).\n",
+ rsrc, strerror(errno));
+ (void) mutex_unlock(&cache_lock);
+ retval = RCM_NO_CONSTRAINT;
+ goto out;
+ }
+
+ if (strcmp(entry->fstype, "zfs") == 0) {
+ retval = RCM_NO_CONSTRAINT;
+ rcm_log_message(RCM_TRACE1,
+ "FILESYS: zfs: NO_CONSTRAINT: %s\n", rsrc);
+ } else {
+ retval = RCM_SUCCESS;
+ for (i = 0; dependents[i] != NULL; i++) {
+ if (is_critical(dependents[i])) {
+ retval = RCM_FAILURE;
+ rcm_log_message(RCM_TRACE1, "FILESYS: "
+ "CRITICAL %s\n", rsrc);
+ break;
+ }
+ }
+ }
+ (void) mutex_unlock(&cache_lock);
+ goto out;
+ }
+
+ retval = RCM_FAILURE;
/* Convert the gathered dependents into an error message */
*errorp = create_message(MSG_HDR_STD, MSG_HDR_STD_MULTI, dependents);
@@ -279,9 +326,10 @@ mnt_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
"FILESYS: failed to construct offline message (%s).\n",
strerror(errno));
}
- free_list(dependents);
- return (RCM_FAILURE);
+out:
+ free_list(dependents);
+ return (retval);
}
/*
@@ -441,13 +489,167 @@ mnt_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp,
return (rv);
}
+static int
+get_spec(char *line, char *spec, size_t ssz)
+{
+ char *cp;
+ char *start;
+
+ if (strlcpy(spec, line, ssz) >= ssz) {
+ rcm_log_message(RCM_ERROR, "FILESYS: get_spec() failed: "
+ "line: %s\n", line);
+ return (-1);
+ }
+
+ cp = spec;
+ while (*cp == ' ' || *cp == '\t')
+ cp++;
+
+ if (*cp == '#')
+ return (-1);
+
+ start = cp;
+
+ while (*cp != ' ' && *cp != '\t' && *cp != '\0')
+ cp++;
+ *cp = '\0';
+
+ (void) memmove(spec, start, strlen(start) + 1);
+
+ return (0);
+}
+
+static int
+path_match(char *rsrc, char *spec)
+{
+ char r[PATH_MAX];
+ char s[PATH_MAX];
+ size_t len;
+
+ if (realpath(rsrc, r) == NULL)
+ goto error;
+
+ if (realpath(spec, s) == NULL)
+ goto error;
+
+ len = strlen("/devices/");
+
+ if (strncmp(r, "/devices/", len) != 0) {
+ errno = ENXIO;
+ goto error;
+ }
+
+ if (strncmp(s, "/devices/", len) != 0) {
+ errno = ENXIO;
+ goto error;
+ }
+
+ len = strlen(r);
+ if (strncmp(r, s, len) == 0 && (s[len] == '\0' || s[len] == ':'))
+ return (0);
+ else
+ return (1);
+
+error:
+ rcm_log_message(RCM_DEBUG, "FILESYS: path_match() failed "
+ "rsrc=%s spec=%s: %s\n", rsrc, spec, strerror(errno));
+ return (-1);
+}
+
+#define VFSTAB "/etc/vfstab"
+#define RETIRED_PREFIX "## RETIRED ##"
+
+static int
+disable_vfstab_entry(char *rsrc)
+{
+ FILE *vfp;
+ FILE *tfp;
+ int retval;
+ int update;
+ char tmp[PATH_MAX];
+ char line[MNT_LINE_MAX + 1];
+
+ vfp = fopen(VFSTAB, "r");
+ if (vfp == NULL) {
+ rcm_log_message(RCM_ERROR, "FILESYS: failed to open /etc/vfstab"
+ " for reading: %s\n", strerror(errno));
+ return (RCM_FAILURE);
+ }
+
+ (void) snprintf(tmp, sizeof (tmp), "/etc/vfstab.retire.%lu", getpid());
+
+ tfp = fopen(tmp, "w");
+ if (tfp == NULL) {
+ rcm_log_message(RCM_ERROR, "FILESYS: failed to open "
+ "/etc/vfstab.retire for writing: %s\n", strerror(errno));
+ (void) fclose(vfp);
+ return (RCM_FAILURE);
+ }
+
+ retval = RCM_SUCCESS;
+ update = 0;
+ while (fgets(line, sizeof (line), vfp)) {
+
+ char spec[MNT_LINE_MAX + 1];
+ char newline[MNT_LINE_MAX + 1];
+ char *l;
+
+ if (get_spec(line, spec, sizeof (spec)) == -1) {
+ l = line;
+ goto foot;
+ }
+
+ if (path_match(rsrc, spec) != 0) {
+ l = line;
+ goto foot;
+ }
+
+ update = 1;
+
+ /* Paths match. Disable this entry */
+ (void) snprintf(newline, sizeof (newline), "%s %s",
+ RETIRED_PREFIX, line);
+
+ rcm_log_message(RCM_TRACE1, "FILESYS: disabling line\n\t%s\n",
+ line);
+
+ l = newline;
+foot:
+ if (fputs(l, tfp) == EOF) {
+ rcm_log_message(RCM_ERROR, "FILESYS: failed to write "
+ "new vfstab: %s\n", strerror(errno));
+ update = 0;
+ retval = RCM_FAILURE;
+ break;
+ }
+ }
+
+ if (vfp)
+ (void) fclose(vfp);
+ if (tfp)
+ (void) fclose(tfp);
+
+ if (update) {
+ if (rename(tmp, VFSTAB) != 0) {
+ rcm_log_message(RCM_ERROR, "FILESYS: vfstab rename "
+ "failed: %s\n", strerror(errno));
+ retval = RCM_FAILURE;
+ }
+ }
+
+ (void) unlink(tmp);
+
+ return (retval);
+}
+
/*
* mnt_remove()
*
- * Remove should never be called since offline always fails.
+ * Remove will only be called in the retire case i.e. if RCM_RETIRE_NOTIFY
+ * flag is set.
*
- * Return failure and log the mistake if a remove is ever received for a
- * mounted filesystem resource.
+ * If the flag is not set, then return failure and log the mistake if a
+ * remove is ever received for a mounted filesystem resource.
*/
int
mnt_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp,
@@ -460,11 +662,15 @@ mnt_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp,
rcm_log_message(RCM_TRACE1, "FILESYS: remove(%s)\n", rsrc);
- /* Log the mistake */
- rcm_log_message(RCM_ERROR, "FILESYS: invalid remove of \"%s\"\n", rsrc);
- *errorp = strdup(MSG_FAIL_REMOVE);
+ if (!(flag & RCM_RETIRE_NOTIFY)) {
+ /* Log the mistake */
+ rcm_log_message(RCM_ERROR, "FILESYS: invalid remove of "
+ "\"%s\"\n", rsrc);
+ *errorp = strdup(MSG_FAIL_REMOVE);
+ return (RCM_FAILURE);
+ }
- return (RCM_FAILURE);
+ return (disable_vfstab_entry(rsrc));
}
/*
@@ -617,6 +823,8 @@ free_entry(hashentry_t **entryp)
if (*entryp) {
if ((*entryp)->special)
free((*entryp)->special);
+ if ((*entryp)->fstype)
+ free((*entryp)->fstype);
free_list((*entryp)->mountps);
free(*entryp);
}
@@ -731,9 +939,10 @@ cache_sync(rcm_handle_t *hd, cache_t **cachep)
* cache_insert()
*
* Given a cache and a mnttab entry, this routine inserts that entry in
- * the cache. The mnttab entry's special device is added to the 'mounts'
- * hashtable of the cache, and the entry's mountp value is added to the
- * list of associated mountpoints for the corresponding hashtable entry.
+ * the cache. The mnttab entry's special device and filesystem type
+ * is added to the 'mounts' hashtable of the cache, and the entry's
+ * mountp value is added to the list of associated mountpoints for the
+ * corresponding hashtable entry.
*
* Locking: the cache must be locked before calling this function.
*
@@ -751,7 +960,8 @@ cache_insert(cache_t *cache, struct mnttab *mt)
(cache->mounts == NULL) ||
(mt == NULL) ||
(mt->mnt_special == NULL) ||
- (mt->mnt_mountp == NULL)) {
+ (mt->mnt_mountp == NULL) ||
+ (mt->mnt_fstype == NULL)) {
errno = EINVAL;
return (-1);
}
@@ -776,10 +986,11 @@ cache_insert(cache_t *cache, struct mnttab *mt)
if (entry == NULL) {
entry = (hashentry_t *)calloc(1, sizeof (hashentry_t));
if ((entry == NULL) ||
- ((entry->special = strdup(mt->mnt_special)) == NULL)) {
+ ((entry->special = strdup(mt->mnt_special)) == NULL) ||
+ ((entry->fstype = strdup(mt->mnt_fstype)) == NULL)) {
rcm_log_message(RCM_ERROR,
"FILESYS: failed to allocate special device name "
- "(%s).\n", strerror(errno));
+ "or filesystem type: (%s).\n", strerror(errno));
free_entry(&entry);
errno = ENOMEM;
return (-1);
@@ -1124,19 +1335,25 @@ is_critical(char *rsrc)
if ((strcmp(rsrc, "/") == 0) ||
(strcmp(rsrc, "/usr") == 0) ||
+ (strcmp(rsrc, "/lib") == 0) ||
(strcmp(rsrc, "/usr/lib") == 0) ||
+ (strcmp(rsrc, "/bin") == 0) ||
(strcmp(rsrc, "/usr/bin") == 0) ||
(strcmp(rsrc, "/tmp") == 0) ||
(strcmp(rsrc, "/var") == 0) ||
(strcmp(rsrc, "/var/run") == 0) ||
(strcmp(rsrc, "/etc") == 0) ||
(strcmp(rsrc, "/etc/mnttab") == 0) ||
- (strcmp(rsrc, "/sbin") == 0))
+ (strcmp(rsrc, "/platform") == 0) ||
+ (strcmp(rsrc, "/usr/platform") == 0) ||
+ (strcmp(rsrc, "/sbin") == 0) ||
+ (strcmp(rsrc, "/usr/sbin") == 0))
return (1);
return (0);
}
+
/*
* use_cache()
*
diff --git a/usr/src/cmd/rcm_daemon/common/rcm_impl.c b/usr/src/cmd/rcm_daemon/common/rcm_impl.c
index e6f6e65868..395a9231f2 100644
--- a/usr/src/cmd/rcm_daemon/common/rcm_impl.c
+++ b/usr/src/cmd/rcm_daemon/common/rcm_impl.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,7 +18,7 @@
*
* CDDL HEADER END
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -89,7 +88,17 @@ common_resource_op(int cmd, char *rsrcname, pid_t pid, uint_t flag, int seq_num,
} else {
error = rsrc_tree_action(node, cmd, &arg);
}
+ } else if ((error == RCM_SUCCESS) && (flag & RCM_RETIRE_REQUEST)) {
+ /*
+ * No matching node, so no client. This means there
+ * is no constraint (RCM wise) on this retire. Return
+ * RCM_NO_CONSTRAINT to indicate this
+ */
+ rcm_log_message(RCM_TRACE1, "No client. Returning "
+ "RCM_NO_CONSTRAINT: %s\n", rsrcname);
+ error = RCM_NO_CONSTRAINT;
}
+
return (error);
}
diff --git a/usr/src/cmd/rcm_daemon/common/rcm_subr.c b/usr/src/cmd/rcm_daemon/common/rcm_subr.c
index c6e6c52afa..82234e1894 100644
--- a/usr/src/cmd/rcm_daemon/common/rcm_subr.c
+++ b/usr/src/cmd/rcm_daemon/common/rcm_subr.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -19,7 +18,7 @@
*
* CDDL HEADER END
*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -797,6 +796,21 @@ rsrc_client_action(client_t *client, int cmd, void *arg)
rval = ops->rcmop_request_offline(hdl, client->alias,
client->pid, targ->flag, &error, &depend_info);
+ /*
+ * If this is a retire operation and we managed to call
+ * into at least one client, set retcode to RCM_SUCCESS to
+ * indicate that retire has been subject to constraints
+ * This retcode will be further modified by actual return
+ * code.
+ */
+ if ((targ->flag & RCM_RETIRE_REQUEST) &&
+ (targ->retcode == RCM_NO_CONSTRAINT)) {
+ rcm_log_message(RCM_DEBUG,
+ "at least 1 client, constraint applied: %s\n",
+ client->alias);
+ targ->retcode = RCM_SUCCESS;
+ }
+
/* Update the client's state after the operation. */
if ((targ->flag & RCM_QUERY) == 0) {
if (rval == RCM_SUCCESS) {
@@ -920,11 +934,23 @@ int
rsrc_client_action_list(client_t *list, int cmd, void *arg)
{
int error, rval = RCM_SUCCESS;
+ tree_walk_arg_t *targ = (tree_walk_arg_t *)arg;
while (list) {
client_t *client = list;
list = client->next;
+ /*
+ * Make offline idempotent in the retire
+ * case
+ */
+ if ((targ->flag & RCM_RETIRE_REQUEST) &&
+ client->state == RCM_STATE_REMOVE) {
+ client->state = RCM_STATE_ONLINE;
+ rcm_log_message(RCM_DEBUG, "RETIRE: idempotent client "
+ "state: REMOVE -> ONLINE: %s\n", client->alias);
+ }
+
if (client->state == RCM_STATE_REMOVE)
continue;
@@ -1408,8 +1434,20 @@ rsrc_tree_action(rsrc_node_t *root, int cmd, tree_walk_arg_t *arg)
rcm_log_message(RCM_TRACE2, "tree_action(%s, %d)\n", root->name, cmd);
arg->cmd = cmd;
- arg->retcode = RCM_SUCCESS;
- rsrc_walk(root, (void *)arg, node_action);
+
+ /*
+ * If RCM_RETIRE_REQUEST is set, just walk one node and preset
+ * retcode to NO_CONSTRAINT
+ */
+ if (arg->flag & RCM_RETIRE_REQUEST) {
+ rcm_log_message(RCM_TRACE1, "tree_action: RETIRE_REQ: walking "
+ "only root node: %s\n", root->name);
+ arg->retcode = RCM_NO_CONSTRAINT;
+ (void) node_action(root, arg);
+ } else {
+ arg->retcode = RCM_SUCCESS;
+ rsrc_walk(root, (void *)arg, node_action);
+ }
return (arg->retcode);
}
diff --git a/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c b/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c
index 6a5f716282..78910e04b1 100644
--- a/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c
+++ b/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -383,13 +382,18 @@ out:
}
+struct bus_state {
+ int b_state;
+ int b_retired;
+};
+
static scfga_ret_t
do_stat_bus(scfga_list_t *lap, int limited_bus_stat)
{
cfga_list_data_t *clp = NULL;
ldata_list_t *listp = NULL;
int l_errno = 0;
- uint_t devinfo_state = 0;
+ struct bus_state bstate = {0};
walkarg_t u;
scfga_ret_t ret;
@@ -399,10 +403,10 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat)
u.node_args.flags = 0;
u.node_args.fcn = get_bus_state;
- ret = walk_tree(lap->apidp->hba_phys, &devinfo_state, DINFOPROP, &u,
+ ret = walk_tree(lap->apidp->hba_phys, &bstate, DINFOPROP, &u,
SCFGA_WALK_NODE, &l_errno);
if (ret == SCFGA_OK) {
- lap->hba_rstate = bus_devinfo_to_recep_state(devinfo_state);
+ lap->hba_rstate = bus_devinfo_to_recep_state(bstate.b_state);
} else {
lap->hba_rstate = CFGA_STAT_NONE;
}
@@ -428,7 +432,8 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat)
clp->ap_class[0] = '\0'; /* Filled by libcfgadm */
clp->ap_r_state = lap->hba_rstate;
clp->ap_o_state = CFGA_STAT_NONE; /* filled in later by the plug-in */
- clp->ap_cond = CFGA_COND_UNKNOWN;
+ clp->ap_cond =
+ (bstate.b_retired) ? CFGA_COND_FAILED : CFGA_COND_UNKNOWN;
clp->ap_busy = 0;
clp->ap_status_time = (time_t)-1;
clp->ap_info[0] = '\0';
@@ -446,9 +451,10 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat)
static int
get_bus_state(di_node_t node, void *arg)
{
- uint_t *di_statep = (uint_t *)arg;
+ struct bus_state *bsp = (struct bus_state *)arg;
- *di_statep = di_state(node);
+ bsp->b_state = di_state(node);
+ bsp->b_retired = di_retired(node);
return (DI_WALK_TERMINATE);
}
@@ -512,7 +518,7 @@ do_stat_dev(
clp->ap_class[0] = '\0'; /* Filled in by libcfgadm */
clp->ap_r_state = lap->hba_rstate;
clp->ap_o_state = ostate;
- clp->ap_cond = CFGA_COND_UNKNOWN;
+ clp->ap_cond = di_retired(node) ? CFGA_COND_FAILED : CFGA_COND_UNKNOWN;
clp->ap_busy = 0; /* no way to determine state change */
clp->ap_status_time = (time_t)-1;
diff --git a/usr/src/lib/fm/topo/libtopo/common/dev.c b/usr/src/lib/fm/topo/libtopo/common/dev.c
index dddede8706..7a4cb4f959 100644
--- a/usr/src/lib/fm/topo/libtopo/common/dev.c
+++ b/usr/src/lib/fm/topo/libtopo/common/dev.c
@@ -445,9 +445,10 @@ dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM));
unusable = 1;
} else {
+ uint_t retired = di_retired(dnode);
state = di_state(dnode);
- if (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN |
- DI_BUS_QUIESCED | DI_BUS_DOWN))
+ if (retired || (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN |
+ DI_BUS_QUIESCED | DI_BUS_DOWN)))
unusable = 1;
else
unusable = 0;
diff --git a/usr/src/lib/libcontract/Makefile b/usr/src/lib/libcontract/Makefile
index a042993bfc..8d05db4980 100644
--- a/usr/src/lib/libcontract/Makefile
+++ b/usr/src/lib/libcontract/Makefile
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -32,7 +32,8 @@ HDRDIR = common
SUBDIRS = $(MACH)
$(BUILD64)SUBDIRS += $(MACH64)
-MSGFILES = common/process_dump.c
+MSGFILES = common/process_dump.c common/device_dump.c \
+ common/libcontract_priv.c
POFILE = libcontract.po
all := TARGET = all
@@ -45,7 +46,8 @@ lint := TARGET = lint
all clean clobber install lint: $(SUBDIRS)
-$(POFILE): pofile_MSGFILES
+$(POFILE): $(MSGFILES)
+ $(BUILDPO.msgfiles)
install_h: $(ROOTHDRS)
diff --git a/usr/src/lib/libcontract/Makefile.com b/usr/src/lib/libcontract/Makefile.com
index 7d5ab8b471..050d42944e 100644
--- a/usr/src/lib/libcontract/Makefile.com
+++ b/usr/src/lib/libcontract/Makefile.com
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -32,7 +32,9 @@ OBJECTS = \
libcontract.o \
libcontract_priv.o \
process.o \
- process_dump.o
+ process_dump.o \
+ device.o \
+ device_dump.o
# include library definition
include ../../Makefile.lib
diff --git a/usr/src/lib/libcontract/common/device.c b/usr/src/lib/libcontract/common/device.c
new file mode 100644
index 0000000000..99e9bd1203
--- /dev/null
+++ b/usr/src/lib/libcontract/common/device.c
@@ -0,0 +1,177 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/ctfs.h>
+#include <sys/contract.h>
+#include <sys/contract/device.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <libnvpair.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <libcontract.h>
+#include "libcontract_impl.h"
+
+/*
+ * Device contract template routines
+ */
+
+int
+ct_dev_tmpl_set_minor(int fd, char *minor)
+{
+ return (ct_tmpl_set_internal(fd, CTDP_MINOR, (uintptr_t)minor));
+}
+
+int
+ct_dev_tmpl_set_aset(int fd, uint_t aset)
+{
+ return (ct_tmpl_set_internal(fd, CTDP_ACCEPT, aset));
+}
+
+int
+ct_dev_tmpl_set_noneg(int fd)
+{
+ return (ct_tmpl_set_internal(fd, CTDP_NONEG, CTDP_NONEG_SET));
+}
+
+int
+ct_dev_tmpl_clear_noneg(int fd)
+{
+ return (ct_tmpl_set_internal(fd, CTDP_NONEG, CTDP_NONEG_CLEAR));
+}
+
+int
+ct_dev_tmpl_get_minor(int fd, char *buf, size_t *buflenp)
+{
+ char path[PATH_MAX];
+ int error;
+ size_t len;
+
+ error = ct_tmpl_get_internal_string(fd, CTDP_MINOR, path);
+ if (error) {
+ return (error);
+ }
+
+ len = strlcpy(buf, path, *buflenp);
+ if (len >= *buflenp) {
+ *buflenp = len + 1;
+ return (EOVERFLOW);
+ }
+
+ return (0);
+}
+
+int
+ct_dev_tmpl_get_aset(int fd, uint_t *aset)
+{
+ return (ct_tmpl_get_internal(fd, CTDP_ACCEPT, aset));
+}
+
+int
+ct_dev_tmpl_get_noneg(int fd, uint_t *negp)
+{
+ return (ct_tmpl_get_internal(fd, CTDP_NONEG, negp));
+}
+
+/*
+ * Device contract event routines
+ */
+
+/*
+ * No device contract specific event routines
+ */
+
+
+/*
+ * Device contract status routines
+ */
+
+int
+ct_dev_status_get_aset(ct_stathdl_t stathdl, uint_t *aset)
+{
+ struct ctlib_status_info *info = stathdl;
+
+ if (info->status.ctst_type != CTT_DEVICE)
+ return (EINVAL);
+
+ if (info->nvl == NULL)
+ return (ENOENT);
+
+ return (nvlist_lookup_uint32(info->nvl, CTDS_ASET, aset));
+}
+
+int
+ct_dev_status_get_noneg(ct_stathdl_t stathdl, uint_t *negp)
+{
+ struct ctlib_status_info *info = stathdl;
+
+ if (info->status.ctst_type != CTT_DEVICE)
+ return (EINVAL);
+
+ if (info->nvl == NULL)
+ return (ENOENT);
+
+ return (nvlist_lookup_uint32(info->nvl, CTDS_NONEG, negp));
+}
+
+int
+ct_dev_status_get_dev_state(ct_stathdl_t stathdl, uint_t *statep)
+{
+ struct ctlib_status_info *info = stathdl;
+
+ if (info->status.ctst_type != CTT_DEVICE)
+ return (EINVAL);
+
+ if (info->nvl == NULL)
+ return (ENOENT);
+
+ return (nvlist_lookup_uint32(info->nvl, CTDS_STATE, statep));
+}
+
+int
+ct_dev_status_get_minor(ct_stathdl_t stathdl, char **bufp)
+{
+ int error;
+ struct ctlib_status_info *info = stathdl;
+
+ if (bufp == NULL)
+ return (EINVAL);
+
+ if (info->status.ctst_type != CTT_DEVICE)
+ return (EINVAL);
+
+ if (info->nvl == NULL)
+ return (ENOENT);
+
+ error = nvlist_lookup_string(info->nvl, CTDS_MINOR, bufp);
+ if (error != 0) {
+ return (error);
+ }
+
+ return (0);
+}
diff --git a/usr/src/lib/libcontract/common/device_dump.c b/usr/src/lib/libcontract/common/device_dump.c
new file mode 100644
index 0000000000..fb6d45cf10
--- /dev/null
+++ b/usr/src/lib/libcontract/common/device_dump.c
@@ -0,0 +1,103 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/contract/device.h>
+#include <sys/wait.h>
+#include <sys/ctfs.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+#include <assert.h>
+#include <signal.h>
+#include <libuutil.h>
+#include <libintl.h>
+#include <libcontract.h>
+#include <libcontract_priv.h>
+#include "libcontract_impl.h"
+#include "libcontract_priv.h"
+
+/*ARGSUSED*/
+void
+event_device(FILE *file, ct_evthdl_t ev, int verbose)
+{
+ uint_t type;
+ char *device;
+ char *s;
+ ctid_t ctid;
+ ct_stathdl_t stathdl;
+ int statfd;
+
+ type = ct_event_get_type(ev);
+ ctid = ct_event_get_ctid(ev);
+
+ statfd = contract_open(ctid, "device", "status", O_RDONLY);
+ if (statfd == -1) {
+ (void) fprintf(file, dgettext(TEXT_DOMAIN, "[bad contract]\n"));
+ return;
+ }
+
+ if (ct_status_read(statfd, CTD_ALL, &stathdl) != 0) {
+ (void) fprintf(file, dgettext(TEXT_DOMAIN, "[status error]\n"));
+ return;
+ }
+
+ if (ct_dev_status_get_minor(stathdl, &device) != 0) {
+ (void) fprintf(file, dgettext(TEXT_DOMAIN, "[bad status]\n"));
+ return;
+ }
+
+
+ switch (type) {
+ case CT_DEV_EV_OFFLINE:
+ s = dgettext(TEXT_DOMAIN, "device %s offlining\n");
+ break;
+ case CT_DEV_EV_DEGRADED:
+ s = dgettext(TEXT_DOMAIN, "device %s degrading\n");
+ break;
+ case CT_DEV_EV_ONLINE:
+ s = dgettext(TEXT_DOMAIN, "device %s online\n");
+ break;
+ case CT_EV_NEGEND:
+ contract_negend_dump(file, ev);
+ s = NULL;
+ break;
+ default:
+ s = dgettext(TEXT_DOMAIN, "device %s sent an unknown event\n");
+ break;
+ }
+
+ if (s) {
+ /*LINTED*/
+ (void) fprintf(file, s, device);
+ }
+
+ ct_status_free(stathdl);
+ (void) close(statfd);
+}
diff --git a/usr/src/lib/libcontract/common/device_dump.h b/usr/src/lib/libcontract/common/device_dump.h
new file mode 100644
index 0000000000..8c90400a52
--- /dev/null
+++ b/usr/src/lib/libcontract/common/device_dump.h
@@ -0,0 +1,43 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _DEVICE_DUMP_H
+#define _DEVICE_DUMP_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include "libcontract_impl.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void event_device(FILE *, ct_evthdl_t, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DEVICE_DUMP_H */
diff --git a/usr/src/lib/libcontract/common/libcontract.c b/usr/src/lib/libcontract/common/libcontract.c
index 7cb35c4cfe..d2739cd1cd 100644
--- a/usr/src/lib/libcontract/common/libcontract.c
+++ b/usr/src/lib/libcontract/common/libcontract.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -66,11 +65,11 @@ ct_tmpl_create(int fd, ctid_t *ctidp)
}
int
-ct_tmpl_set_internal(int fd, uint_t id, uint_t value)
+ct_tmpl_set_internal(int fd, uint_t id, uintptr_t value)
{
ct_param_t param;
param.ctpm_id = id;
- param.ctpm_value = value;
+ param.ctpm_value = (uint64_t)value;
if (ioctl(fd, CT_TSET, &param) == -1)
return (errno);
return (0);
@@ -112,6 +111,18 @@ ct_tmpl_get_internal(int fd, uint_t id, uint_t *value)
}
int
+ct_tmpl_get_internal_string(int fd, uint_t id, char *value)
+{
+ ct_param_t param;
+
+ param.ctpm_id = id;
+ param.ctpm_value = (uint64_t)(uintptr_t)value;
+ if (ioctl(fd, CT_TGET, &param) == -1)
+ return (errno);
+ return (0);
+}
+
+int
ct_tmpl_get_critical(int fd, uint_t *events)
{
return (ct_tmpl_get_internal(fd, CTP_EV_CRITICAL, events));
@@ -173,6 +184,14 @@ ct_ctl_ack(int fd, ctevid_t event)
}
int
+ct_ctl_nack(int fd, ctevid_t event)
+{
+ if (ioctl(fd, CT_CNACK, &event) == -1)
+ return (errno);
+ return (0);
+}
+
+int
ct_ctl_qack(int fd, ctevid_t event)
{
if (ioctl(fd, CT_CQREQ, &event) == -1)
diff --git a/usr/src/lib/libcontract/common/libcontract.h b/usr/src/lib/libcontract/common/libcontract.h
index 98092b7db0..27453e5c83 100644
--- a/usr/src/lib/libcontract/common/libcontract.h
+++ b/usr/src/lib/libcontract/common/libcontract.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -55,6 +54,7 @@ extern int ct_tmpl_get_informative(int, uint_t *);
extern int ct_ctl_adopt(int);
extern int ct_ctl_abandon(int);
extern int ct_ctl_ack(int, ctevid_t);
+extern int ct_ctl_nack(int, ctevid_t);
extern int ct_ctl_qack(int, ctevid_t);
extern int ct_ctl_newct(int, ctevid_t, int);
@@ -113,6 +113,23 @@ extern int ct_pr_status_get_fatal(ct_stathdl_t, uint_t *);
extern int ct_pr_status_get_members(ct_stathdl_t, pid_t **, uint_t *);
extern int ct_pr_status_get_contracts(ct_stathdl_t, ctid_t **, uint_t *);
+/*
+ * Device contract routines
+ */
+int ct_dev_tmpl_set_minor(int, char *);
+int ct_dev_tmpl_set_aset(int, uint_t);
+int ct_dev_tmpl_set_noneg(int);
+int ct_dev_tmpl_clear_noneg(int);
+int ct_dev_tmpl_get_minor(int, char *, size_t *);
+int ct_dev_tmpl_get_aset(int, uint_t *);
+int ct_dev_tmpl_get_noneg(int, uint_t *);
+int ct_dev_status_get_aset(ct_stathdl_t, uint_t *);
+int ct_dev_status_get_noneg(ct_stathdl_t, uint_t *);
+int ct_dev_status_get_dev_state(ct_stathdl_t, uint_t *);
+int ct_dev_status_get_minor(ct_stathdl_t, char **);
+
+
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/lib/libcontract/common/libcontract_impl.h b/usr/src/lib/libcontract/common/libcontract_impl.h
index d8504cb5cf..ad50cd3dcc 100644
--- a/usr/src/lib/libcontract/common/libcontract_impl.h
+++ b/usr/src/lib/libcontract/common/libcontract_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -48,8 +47,9 @@ struct ctlib_event_info {
nvlist_t *nvl;
};
-extern int ct_tmpl_set_internal(int, uint_t, uint_t);
+extern int ct_tmpl_set_internal(int, uint_t, uintptr_t);
extern int ct_tmpl_get_internal(int, uint_t, uint_t *);
+extern int ct_tmpl_get_internal_string(int, uint_t, char *);
typedef struct contract_type {
const char *type_name;
diff --git a/usr/src/lib/libcontract/common/libcontract_priv.c b/usr/src/lib/libcontract/common/libcontract_priv.c
index 1db8ea2d95..d74e8409c6 100644
--- a/usr/src/lib/libcontract/common/libcontract_priv.c
+++ b/usr/src/lib/libcontract/common/libcontract_priv.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -34,16 +33,19 @@
#include <stdio.h>
#include <assert.h>
#include <libuutil.h>
+#include <libintl.h>
#include <string.h>
#include <procfs.h>
#include <libcontract.h>
#include <libcontract_priv.h>
#include "libcontract_impl.h"
#include "process_dump.h"
+#include "device_dump.h"
contract_type_t types[CTT_MAXTYPE] = {
- { "process", event_process }
+ { "process", event_process },
+ { "device", event_device }
};
static int
@@ -147,3 +149,23 @@ contract_event_dump(FILE *file, ct_evthdl_t hdl, int verbose)
type = info->event.ctev_cttype;
types[type].type_event(file, hdl, verbose);
}
+
+void
+contract_negend_dump(FILE *file, ct_evthdl_t ev)
+{
+ ctevid_t nevid = 0;
+ ctid_t my_ctid = ct_event_get_ctid(ev);
+ ctid_t new_ctid = 0;
+ char *s;
+
+ (void) ct_event_get_nevid(ev, &nevid);
+ (void) ct_event_get_newct(ev, &new_ctid);
+
+ if (new_ctid != my_ctid) {
+ s = dgettext(TEXT_DOMAIN, "negotiation %llu succeeded\n");
+ } else {
+ s = dgettext(TEXT_DOMAIN, "negotiation %llu failed\n");
+ }
+ /*LINTED*/
+ (void) fprintf(file, s, (unsigned long long)nevid);
+}
diff --git a/usr/src/lib/libcontract/common/libcontract_priv.h b/usr/src/lib/libcontract/common/libcontract_priv.h
index a1069efb35..639f190aff 100644
--- a/usr/src/lib/libcontract/common/libcontract_priv.h
+++ b/usr/src/lib/libcontract/common/libcontract_priv.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -42,6 +41,7 @@ extern int contract_open(ctid_t, const char *, const char *, int);
extern int contract_abandon_id(ctid_t);
extern ctid_t getctid(void);
extern void contract_event_dump(FILE *, ct_evthdl_t, int);
+extern void contract_negend_dump(FILE *, ct_evthdl_t);
#ifdef __cplusplus
}
diff --git a/usr/src/lib/libcontract/common/mapfile-vers b/usr/src/lib/libcontract/common/mapfile-vers
index 2f220b60ad..a64cbfd047 100644
--- a/usr/src/lib/libcontract/common/mapfile-vers
+++ b/usr/src/lib/libcontract/common/mapfile-vers
@@ -19,7 +19,7 @@
# CDDL HEADER END
#
#
-# Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
# ident "%Z%%M% %I% %E% SMI"
@@ -29,6 +29,7 @@ SUNW_1.1 {
global:
ct_ctl_abandon;
ct_ctl_ack;
+ ct_ctl_nack;
ct_ctl_adopt;
ct_ctl_newct;
ct_ctl_qack;
@@ -85,6 +86,17 @@ SUNW_1.1 {
ct_tmpl_set_cookie;
ct_tmpl_set_critical;
ct_tmpl_set_informative;
+ ct_dev_tmpl_set_minor;
+ ct_dev_tmpl_set_aset;
+ ct_dev_tmpl_set_noneg;
+ ct_dev_tmpl_clear_noneg;
+ ct_dev_tmpl_get_minor;
+ ct_dev_tmpl_get_aset;
+ ct_dev_tmpl_get_noneg;
+ ct_dev_status_get_aset;
+ ct_dev_status_get_noneg;
+ ct_dev_status_get_dev_state;
+ ct_dev_status_get_minor;
};
SUNWprivate_1.1 {
diff --git a/usr/src/lib/libdevinfo/Makefile.com b/usr/src/lib/libdevinfo/Makefile.com
index c1db80004a..7c10f0ba47 100644
--- a/usr/src/lib/libdevinfo/Makefile.com
+++ b/usr/src/lib/libdevinfo/Makefile.com
@@ -30,7 +30,9 @@ VERS= .1
OBJECTS= devfsinfo.o devinfo.o devinfo_prop_decode.o devinfo_devlink.o \
devinfo_devperm.o devfsmap.o devinfo_devname.o \
- devinfo_finddev.o devinfo_dli.o devinfo_dim.o devinfo_realpath.o
+ devinfo_finddev.o devinfo_dli.o devinfo_dim.o \
+ devinfo_realpath.o devinfo_retire.o
+
include ../../Makefile.lib
include ../../Makefile.rootfs
diff --git a/usr/src/lib/libdevinfo/devinfo.c b/usr/src/lib/libdevinfo/devinfo.c
index c9179e0d1f..8c103d2f7a 100644
--- a/usr/src/lib/libdevinfo/devinfo.c
+++ b/usr/src/lib/libdevinfo/devinfo.c
@@ -998,6 +998,12 @@ di_flags(di_node_t node)
return (DI_NODE(node)->flags);
}
+uint_t
+di_retired(di_node_t node)
+{
+ return (di_flags(node) & DEVI_RETIRED);
+}
+
ddi_devid_t
di_devid(di_node_t node)
{
diff --git a/usr/src/lib/libdevinfo/devinfo_retire.c b/usr/src/lib/libdevinfo/devinfo_retire.c
new file mode 100644
index 0000000000..8bcb77a730
--- /dev/null
+++ b/usr/src/lib/libdevinfo/devinfo_retire.c
@@ -0,0 +1,785 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <libdevinfo.h>
+#include <sys/modctl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <librcm.h>
+#include <dlfcn.h>
+
+#undef NDEBUG
+#include <assert.h>
+
+typedef struct rio_path {
+ char rpt_path[PATH_MAX];
+ struct rio_path *rpt_next;
+} rio_path_t;
+
+typedef struct rcm_arg {
+ char *rcm_root;
+ di_node_t rcm_node;
+ int rcm_supp;
+ rcm_handle_t *rcm_handle;
+ int rcm_retcode;
+ di_retire_t *rcm_dp;
+ rio_path_t *rcm_cons_nodes;
+ rio_path_t *rcm_rsrc_minors;
+ int (*rcm_offline)();
+ int (*rcm_online)();
+ int (*rcm_remove)();
+} rcm_arg_t;
+
+typedef struct selector {
+ char *sel_name;
+ int (*sel_selector)(di_node_t node, rcm_arg_t *rp);
+} di_selector_t;
+
+static void rio_assert(di_retire_t *dp, const char *EXstr, int line,
+ const char *file);
+
+#define LIBRCM_PATH "/usr/lib/librcm.so"
+#define RIO_ASSERT(d, x) \
+ {if (!(x)) rio_assert(d, #x, __LINE__, __FILE__); }
+
+static int disk_select(di_node_t node, rcm_arg_t *rp);
+static int nexus_select(di_node_t node, rcm_arg_t *rp);
+
+di_selector_t supported_devices[] = {
+ {"disk", disk_select},
+ {"nexus", nexus_select},
+ {NULL, NULL}
+};
+
+void *
+s_calloc(size_t nelem, size_t elsize, int fail)
+{
+ if (fail) {
+ errno = ENOMEM;
+ return (NULL);
+ } else {
+ return (calloc(nelem, elsize));
+ }
+}
+
+static void
+rio_assert(di_retire_t *dp, const char *EXstr, int line, const char *file)
+{
+ char buf[PATH_MAX];
+
+ if (dp->rt_abort == NULL)
+ assert(0);
+
+ (void) snprintf(buf, sizeof (buf),
+ "Assertion failed: %s, file %s, line %d\n",
+ EXstr, file, line);
+ dp->rt_abort(dp->rt_hdl, buf);
+}
+
+/*ARGSUSED*/
+static int
+disk_minor(di_node_t node, di_minor_t minor, void *arg)
+{
+ rcm_arg_t *rp = (rcm_arg_t *)arg;
+ di_retire_t *dp = rp->rcm_dp;
+
+ if (di_minor_spectype(minor) == S_IFBLK) {
+ rp->rcm_supp = 1;
+ dp->rt_debug(dp->rt_hdl, "[INFO]: disk_minor: is disk minor. "
+ "IDed this node as disk\n");
+ return (DI_WALK_TERMINATE);
+ }
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: disk_minor: Not a disk minor. "
+ "Continuing minor walk\n");
+ return (DI_WALK_CONTINUE);
+}
+
+static int
+disk_select(di_node_t node, rcm_arg_t *rp)
+{
+ rcm_arg_t rarg;
+ di_retire_t *dp = rp->rcm_dp;
+
+ rarg.rcm_dp = dp;
+
+ /*
+ * Check if this is a disk minor. If any one minor is DDI_NT_BLOCK
+ * we assume it is a disk
+ */
+ rarg.rcm_supp = 0;
+ if (di_walk_minor(node, DDI_NT_BLOCK, 0, &rarg, disk_minor) != 0) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: disk_select: di_walk_minor "
+ "failed. Returning NOTSUP\n");
+ return (0);
+ }
+
+ return (rarg.rcm_supp);
+}
+
+static int
+nexus_select(di_node_t node, rcm_arg_t *rp)
+{
+ int select;
+ char *path;
+
+ di_retire_t *dp = rp->rcm_dp;
+
+ path = di_devfs_path(node);
+ if (path == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: "
+ "di_devfs_path() is NULL. Returning NOTSUP\n");
+ return (0);
+ }
+
+ /*
+ * Check if it is a nexus
+ */
+ if (di_driver_ops(node) & DI_BUS_OPS) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: is nexus %s\n",
+ path);
+ select = 1;
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: not nexus %s\n",
+ path);
+ select = 0;
+ }
+
+ di_devfs_path_free(path);
+
+ return (select);
+}
+
+static int
+node_select(di_node_t node, void *arg)
+{
+ rcm_arg_t *rp = (rcm_arg_t *)arg;
+ di_retire_t *dp;
+ int sel;
+ int i;
+ char *path;
+ uint_t state;
+
+ dp = rp->rcm_dp;
+
+ /* skip pseudo nodes - we only retire real hardware */
+ path = di_devfs_path(node);
+ if (strncmp(path, "/pseudo/", strlen("/pseudo/")) == 0 ||
+ strcmp(path, "/pseudo") == 0) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: "
+ "pseudo device in subtree - returning NOTSUP: %s\n",
+ path);
+ rp->rcm_supp = 0;
+ di_devfs_path_free(path);
+ return (DI_WALK_TERMINATE);
+ }
+ di_devfs_path_free(path);
+
+ /*
+ * If a device is offline/detached/down it is
+ * retireable irrespective of the type of device,
+ * presumably the system is able to function without
+ * it.
+ */
+ state = di_state(node);
+ if ((state & DI_DRIVER_DETACHED) || (state & DI_DEVICE_OFFLINE) ||
+ (state & DI_BUS_DOWN)) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: device "
+ "is offline/detached. Assuming retire supported\n");
+ return (DI_WALK_CONTINUE);
+ }
+
+ sel = 0;
+ for (i = 0; supported_devices[i].sel_name != NULL; i++) {
+ sel = supported_devices[i].sel_selector(node, rp);
+ if (sel == 1) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: "
+ "found supported device: %s\n",
+ supported_devices[i].sel_name);
+ break;
+ }
+ }
+
+ if (sel != 1) {
+ /*
+ * This node is not a supported device. Retire cannot proceed
+ */
+ dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: found "
+ "unsupported device. Returning NOTSUP\n");
+ rp->rcm_supp = 0;
+ return (DI_WALK_TERMINATE);
+ }
+
+ /*
+ * This node is supported. Check other nodes in this subtree.
+ */
+ dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: This node supported. "
+ "Checking other nodes in subtree: %s\n", rp->rcm_root);
+ return (DI_WALK_CONTINUE);
+}
+
+
+
+/*
+ * when in doubt assume that retire is not supported for this device.
+ */
+static int
+retire_supported(rcm_arg_t *rp)
+{
+ di_retire_t *dp;
+ di_node_t rnode = rp->rcm_node;
+
+ dp = rp->rcm_dp;
+
+ /*
+ * We should not be here if devinfo snapshot is NULL.
+ */
+ RIO_ASSERT(dp, rnode != DI_NODE_NIL);
+
+ /*
+ * Note: We initally set supported to 1, then walk the
+ * subtree rooted at devpath, allowing each node the
+ * opportunity to veto the support. We cannot do things
+ * the other way around i.e. assume "not supported" and
+ * let individual nodes indicate that they are supported.
+ * In the latter case, the supported flag would be set
+ * if any one node in the subtree was supported which is
+ * not what we want.
+ */
+ rp->rcm_supp = 1;
+ if (di_walk_node(rnode, DI_WALK_CLDFIRST, rp, node_select) != 0) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: retire_supported: "
+ "di_walk_node: failed. Returning NOTSUP\n");
+ rp->rcm_supp = 0;
+ }
+
+ if (rp->rcm_supp) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: retire IS supported\n");
+ }
+
+ return (rp->rcm_supp);
+}
+
+static void
+rcm_finalize(rcm_arg_t *rp, int retcode)
+{
+ rio_path_t *p;
+ rio_path_t *tmp;
+ int flags = RCM_RETIRE_NOTIFY;
+ int retval;
+ int error;
+ di_retire_t *dp;
+
+ dp = rp->rcm_dp;
+
+ RIO_ASSERT(dp, retcode == 0 || retcode == -1);
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: retcode=%d: dev=%s\n",
+ retcode, rp->rcm_root);
+
+ for (p = rp->rcm_cons_nodes; p; ) {
+ tmp = p;
+ p = tmp->rpt_next;
+ free(tmp);
+ }
+ rp->rcm_cons_nodes = NULL;
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: cons_nodes NULL\n");
+
+ for (p = rp->rcm_rsrc_minors; p; ) {
+ tmp = p;
+ p = tmp->rpt_next;
+ if (retcode == 0) {
+ retval = rp->rcm_remove(rp->rcm_handle,
+ tmp->rpt_path, flags, NULL);
+ error = errno;
+ } else {
+ RIO_ASSERT(dp, retcode == -1);
+ retval = rp->rcm_online(rp->rcm_handle,
+ tmp->rpt_path, flags, NULL);
+ error = errno;
+ }
+ if (retval != RCM_SUCCESS) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: rcm_finalize: "
+ "rcm_%s: retval=%d: error=%s: path=%s\n",
+ retcode == 0 ? "remove" : "online", retval,
+ strerror(error), tmp->rpt_path);
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: "
+ "rcm_%s: SUCCESS: path=%s\n",
+ retcode == 0 ? "remove" : "online", tmp->rpt_path);
+ }
+ free(tmp);
+ }
+ rp->rcm_rsrc_minors = NULL;
+}
+/*ARGSUSED*/
+static int
+call_offline(di_node_t node, di_minor_t minor, void *arg)
+{
+ rcm_arg_t *rp = (rcm_arg_t *)arg;
+ di_retire_t *dp = rp->rcm_dp;
+ char *mnp;
+ rio_path_t *rpt;
+ int retval;
+
+ mnp = di_devfs_minor_path(minor);
+ if (mnp == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_devfs_minor_path "
+ "failed. Returning RCM FAILURE: %s\n", rp->rcm_root);
+ rp->rcm_retcode = RCM_FAILURE;
+ return (DI_WALK_TERMINATE);
+ }
+
+ rpt = s_calloc(1, sizeof (rio_path_t), 0);
+ if (rpt == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: calloc failed. "
+ "Returning RCM FAILURE: %s\n", rp->rcm_root);
+ di_devfs_path_free(mnp);
+ rp->rcm_retcode = RCM_FAILURE;
+ return (DI_WALK_TERMINATE);
+ }
+
+ (void) snprintf(rpt->rpt_path, sizeof (rpt->rpt_path),
+ "/devices%s", mnp);
+
+ di_devfs_path_free(mnp);
+
+ retval = rp->rcm_offline(rp->rcm_handle, rpt->rpt_path,
+ RCM_RETIRE_REQUEST, NULL);
+
+ rpt->rpt_next = rp->rcm_rsrc_minors;
+ rp->rcm_rsrc_minors = rpt;
+
+ if (retval == RCM_FAILURE) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM OFFLINE failed "
+ "for: %s\n", rpt->rpt_path);
+ rp->rcm_retcode = RCM_FAILURE;
+ return (DI_WALK_TERMINATE);
+ } else if (retval == RCM_SUCCESS) {
+ rp->rcm_retcode = RCM_SUCCESS;
+ dp->rt_debug(dp->rt_hdl, "[INFO]: RCM OFFLINE returned "
+ "RCM_SUCCESS: %s\n", rpt->rpt_path);
+ } else if (retval != RCM_NO_CONSTRAINT) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM OFFLINE returned "
+ "invalid value for: %s\n", rpt->rpt_path);
+ rp->rcm_retcode = RCM_FAILURE;
+ return (DI_WALK_TERMINATE);
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: RCM OFFLINE returned "
+ "RCM_NO_CONSTRAINT: %s\n", rpt->rpt_path);
+ }
+
+ return (DI_WALK_CONTINUE);
+}
+
+static int
+offline_one(di_node_t node, void *arg)
+{
+ rcm_arg_t *rp = (rcm_arg_t *)arg;
+ rio_path_t *rpt;
+ di_retire_t *dp = rp->rcm_dp;
+ char *path;
+
+ /*
+ * We should already have terminated the walk
+ * in case of failure
+ */
+ RIO_ASSERT(dp, rp->rcm_retcode == RCM_SUCCESS ||
+ rp->rcm_retcode == RCM_NO_CONSTRAINT);
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: offline_one: entered\n");
+
+ rp->rcm_retcode = RCM_NO_CONSTRAINT;
+
+ rpt = s_calloc(1, sizeof (rio_path_t), 0);
+ if (rpt == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: rio_path_t calloc "
+ "failed: error: %s\n", strerror(errno));
+ goto fail;
+ }
+
+ path = di_devfs_path(node);
+ if (path == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_devfs_path "
+ "failed: error: %s\n", strerror(errno));
+ free(rpt);
+ goto fail;
+ }
+
+ (void) strlcpy(rpt->rpt_path, path, sizeof (rpt->rpt_path));
+
+ di_devfs_path_free(path);
+
+ if (di_walk_minor(node, NULL, 0, rp, call_offline) != 0) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor "
+ "failed: error: %s: %s\n", strerror(errno), path);
+ free(rpt);
+ goto fail;
+ }
+
+ if (rp->rcm_retcode == RCM_FAILURE) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor "
+ "returned: RCM_FAILURE: %s\n", rpt->rpt_path);
+ free(rpt);
+ goto fail;
+ } else if (rp->rcm_retcode == RCM_SUCCESS) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: di_walk_minor "
+ "returned: RCM_SUCCESS: %s\n", rpt->rpt_path);
+ rpt->rpt_next = rp->rcm_cons_nodes;
+ rp->rcm_cons_nodes = rpt;
+ } else if (rp->rcm_retcode != RCM_NO_CONSTRAINT) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor "
+ "returned: unknown RCM error code: %d, %s\n",
+ rp->rcm_retcode, rpt->rpt_path);
+ free(rpt);
+ goto fail;
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: di_walk_minor "
+ "returned: RCM_NO_CONSTRAINT: %s\n", rpt->rpt_path);
+ free(rpt);
+ }
+
+ /*
+ * RCM_SUCCESS or RCM_NO_CONSTRAINT.
+ * RCM_SUCCESS implies we overcame a constraint, so keep walking.
+ * RCM_NO_CONSTRAINT implies no constraints applied via RCM.
+ * Continue walking in the hope that contracts or LDI will
+ * apply constraints
+ * set retcode to RCM_SUCCESS to show that at least 1 node
+ * completely walked
+ */
+ rp->rcm_retcode = RCM_SUCCESS;
+ return (DI_WALK_CONTINUE);
+
+fail:
+ rp->rcm_retcode = RCM_FAILURE;
+ return (DI_WALK_TERMINATE);
+}
+
+/*
+ * Returns:
+ * RCM_SUCCESS: RCM constraints (if any) were applied. The
+ * device paths for which constraints were applied is passed
+ * back via the pp argument
+ *
+ * RCM_FAILURE: Either RCM constraints prevent a retire or
+ * an error occurred
+ */
+static int
+rcm_notify(rcm_arg_t *rp, char **pp, size_t *clen)
+{
+ size_t len;
+ rio_path_t *p;
+ rio_path_t *tmp;
+ char *plistp;
+ char *s;
+ di_retire_t *dp;
+ di_node_t rnode;
+
+ dp = rp->rcm_dp;
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_notify() entered\n");
+
+ RIO_ASSERT(dp, rp->rcm_root);
+
+ *pp = NULL;
+
+ rnode = rp->rcm_node;
+ if (rnode == DI_NODE_NIL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: devinfo snapshot "
+ "NULL. Returning no RCM constraint: %s\n", rp->rcm_root);
+ return (RCM_NO_CONSTRAINT);
+ }
+
+ rp->rcm_retcode = RCM_NO_CONSTRAINT;
+ rp->rcm_cons_nodes = NULL;
+ rp->rcm_rsrc_minors = NULL;
+ if (di_walk_node(rnode, DI_WALK_CLDFIRST, rp, offline_one) != 0) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_node "
+ "failed: error: %s: %s\n", strerror(errno), rp->rcm_root);
+ /* online is idempotent - safe to online non-offlined nodes */
+ rcm_finalize(rp, -1);
+ rp->rcm_retcode = RCM_FAILURE;
+ goto out;
+ }
+
+ if (rp->rcm_retcode == RCM_FAILURE) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: walk_node "
+ "returned retcode of RCM_FAILURE: %s\n", rp->rcm_root);
+ rcm_finalize(rp, -1);
+ goto out;
+ }
+
+ if (rp->rcm_retcode == RCM_NO_CONSTRAINT) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_node "
+ " - no nodes walked: RCM_NO_CONSTRAINT: %s\n",
+ rp->rcm_root);
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: walk_node: RCM_SUCCESS\n");
+ }
+
+ /*
+ * Convert to a sequence of NUL separated strings terminated by '\0'\0'
+ */
+ for (len = 0, p = rp->rcm_cons_nodes; p; p = p->rpt_next) {
+ RIO_ASSERT(dp, p->rpt_path);
+ RIO_ASSERT(dp, strlen(p->rpt_path) > 0);
+ len += (strlen(p->rpt_path) + 1);
+ }
+ len++; /* list terminating '\0' */
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: len of constraint str = %lu\n", len);
+
+ plistp = s_calloc(1, len, 0);
+ if (plistp == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: fail to alloc "
+ "constraint list: error: %s: %s\n", strerror(errno),
+ rp->rcm_root);
+ rcm_finalize(rp, -1);
+ rp->rcm_retcode = RCM_FAILURE;
+ goto out;
+ }
+
+ for (s = plistp, p = rp->rcm_cons_nodes; p; ) {
+ tmp = p;
+ p = tmp->rpt_next;
+ (void) strcpy(s, tmp->rpt_path);
+ s += strlen(s) + 1;
+ RIO_ASSERT(dp, s - plistp < len);
+ free(tmp);
+ }
+ rp->rcm_cons_nodes = NULL;
+ RIO_ASSERT(dp, s - plistp == len - 1);
+ *s = '\0';
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: constraint str = %p\n", plistp);
+
+ *pp = plistp;
+ *clen = len;
+
+ rp->rcm_retcode = RCM_SUCCESS;
+out:
+ return (rp->rcm_retcode);
+}
+
+
+/*ARGSUSED*/
+int
+di_retire_device(char *devpath, di_retire_t *dp, int flags)
+{
+ char path[PATH_MAX];
+ struct stat sb;
+ int retval = EINVAL;
+ char *constraint = NULL;
+ size_t clen;
+ void *librcm_hdl;
+ rcm_arg_t rarg = {0};
+ int (*librcm_alloc_handle)();
+ int (*librcm_free_handle)();
+
+ if (dp == NULL || dp->rt_debug == NULL || dp->rt_hdl == NULL)
+ return (EINVAL);
+
+ if (devpath == NULL || devpath[0] == '\0') {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: NULL argument(s)\n");
+ return (EINVAL);
+ }
+
+ if (devpath[0] != '/' || strlen(devpath) >= PATH_MAX ||
+ strncmp(devpath, "/devices/", strlen("/devices/")) == 0 ||
+ strstr(devpath, "../devices/") || strrchr(devpath, ':')) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: invalid devpath: %s\n",
+ devpath);
+ return (EINVAL);
+ }
+
+ if (flags != 0) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: flags should be 0: %d\n",
+ flags);
+ return (EINVAL);
+ }
+
+ /*
+ * dlopen rather than link against librcm since libdevinfo
+ * resides in / and librcm resides in /usr. The dlopen is
+ * safe to do since fmd which invokes the retire code
+ * resides on /usr and will not come here until /usr is
+ * mounted.
+ */
+ librcm_hdl = dlopen(LIBRCM_PATH, RTLD_LAZY);
+ if (librcm_hdl == NULL) {
+ char *errstr = dlerror();
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: Cannot dlopen librcm: %s\n",
+ errstr ? errstr : "Unknown error");
+ return (ENOSYS);
+ }
+
+ librcm_alloc_handle = (int (*)())dlsym(librcm_hdl, "rcm_alloc_handle");
+ rarg.rcm_offline = (int (*)())dlsym(librcm_hdl, "rcm_request_offline");
+ rarg.rcm_online = (int (*)())dlsym(librcm_hdl, "rcm_notify_online");
+ rarg.rcm_remove = (int (*)())dlsym(librcm_hdl, "rcm_notify_remove");
+ librcm_free_handle = (int (*)())dlsym(librcm_hdl, "rcm_free_handle");
+
+ if (librcm_alloc_handle == NULL ||
+ rarg.rcm_offline == NULL ||
+ rarg.rcm_online == NULL ||
+ rarg.rcm_remove == NULL ||
+ librcm_free_handle == NULL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: dlsym failed\n");
+ retval = ENOSYS;
+ goto out;
+ }
+
+ /*
+ * Take a libdevinfo snapshot here because we cannot do so
+ * after device is retired. If device doesn't attach, we retire
+ * anyway i.e. it is not fatal.
+ */
+ rarg.rcm_node = di_init(devpath, DINFOCPYALL);
+ if (rarg.rcm_node == DI_NODE_NIL) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: device doesn't attach, "
+ "retiring anyway: %s\n", devpath);
+ }
+
+ rarg.rcm_handle = NULL;
+ if (librcm_alloc_handle(NULL, 0, NULL, &rarg.rcm_handle)
+ != RCM_SUCCESS) {
+ retval = errno;
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: failed to alloc "
+ "RCM handle. Returning RCM failure: %s\n", devpath);
+ rarg.rcm_handle = NULL;
+ goto out;
+ }
+
+ rarg.rcm_root = devpath;
+ rarg.rcm_dp = dp;
+
+ /*
+ * If device is already detached/nonexistent and cannot be
+ * attached, allow retire without checking device type.
+ * XXX
+ * Else, check if retire is supported for this device type.
+ */
+ (void) snprintf(path, sizeof (path), "/devices%s", devpath);
+ if (stat(path, &sb) == -1 || !S_ISDIR(sb.st_mode)) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: detached or nonexistent "
+ "device. Bypassing retire_supported: %s\n", devpath);
+ } else if (!retire_supported(&rarg)) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: retire not supported for "
+ "device type: %s\n", devpath);
+ retval = ENOTSUP;
+ goto out;
+ }
+
+ clen = 0;
+ constraint = NULL;
+ retval = rcm_notify(&rarg, &constraint, &clen);
+ if (retval == RCM_FAILURE) {
+ /* retire not permitted */
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM constraints block "
+ "retire: %s\n", devpath);
+ retval = EBUSY;
+ goto out;
+ } else if (retval == RCM_SUCCESS) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: RCM constraints applied"
+ ": %s\n", devpath);
+ } else if (retval == RCM_NO_CONSTRAINT) {
+ dp->rt_debug(dp->rt_hdl, "[INFO]: No RCM constraints applied"
+ ": %s\n", devpath);
+ } else {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: notify returned unknown "
+ "return code: %d: %s\n", retval, devpath);
+ retval = ESRCH;
+ goto out;
+ }
+
+ if (modctl(MODRETIRE, devpath, constraint, clen) != 0) {
+ retval = errno;
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: retire modctl() failed: "
+ "%s: %s\n", devpath, strerror(retval));
+ rcm_finalize(&rarg, -1);
+ goto out;
+ }
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: retire modctl() succeeded: %s\n",
+ devpath);
+
+ rcm_finalize(&rarg, 0);
+
+ retval = 0;
+
+out:
+ if (rarg.rcm_handle)
+ (void) librcm_free_handle(rarg.rcm_handle);
+
+ RIO_ASSERT(dp, rarg.rcm_cons_nodes == NULL);
+ RIO_ASSERT(dp, rarg.rcm_rsrc_minors == NULL);
+
+ (void) dlclose(librcm_hdl);
+
+ free(constraint);
+
+ if (rarg.rcm_node != DI_NODE_NIL)
+ di_fini(rarg.rcm_node);
+
+ return (retval);
+}
+
+/*ARGSUSED*/
+int
+di_unretire_device(char *devpath, di_retire_t *dp)
+{
+ if (dp == NULL || dp->rt_debug == NULL || dp->rt_hdl == NULL)
+ return (EINVAL);
+
+ if (devpath == NULL || devpath[0] == '\0') {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: NULL devpath\n");
+ return (EINVAL);
+ }
+
+ if (devpath[0] != '/' || strlen(devpath) >= PATH_MAX ||
+ strncmp(devpath, "/devices/", strlen("/devices/")) == 0 ||
+ strstr(devpath, "../devices/") || strrchr(devpath, ':')) {
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: invalid devpath: %s\n",
+ devpath);
+ return (EINVAL);
+ }
+
+ if (modctl(MODUNRETIRE, devpath) != 0) {
+ int err = errno;
+ dp->rt_debug(dp->rt_hdl, "[ERROR]: unretire modctl() failed: "
+ "%s: %s\n", devpath, strerror(err));
+ return (err);
+ }
+
+ dp->rt_debug(dp->rt_hdl, "[INFO]: unretire modctl() done: %s\n",
+ devpath);
+
+ return (0);
+}
diff --git a/usr/src/lib/libdevinfo/libdevinfo.h b/usr/src/lib/libdevinfo/libdevinfo.h
index ad08502628..bdb4fa2238 100644
--- a/usr/src/lib/libdevinfo/libdevinfo.h
+++ b/usr/src/lib/libdevinfo/libdevinfo.h
@@ -355,6 +355,11 @@ extern void *di_parent_private_data(di_node_t node);
extern void *di_driver_private_data(di_node_t node);
/*
+ * The value of the dip's devi_flags field
+ */
+uint_t di_flags(di_node_t node);
+
+/*
* Types of links for devlink lookup
*/
#define DI_PRIMARY_LINK 0x01
@@ -412,6 +417,19 @@ extern int di_devlink_cache_walk(di_devlink_handle_t hdp, const char *re,
int (*devlink_callback)(di_devlink_t, void *));
/*
+ * Private interfaces for I/O retire
+ */
+typedef struct di_retire {
+ void *rt_hdl;
+ void (*rt_abort)(void *hdl, const char *format, ...);
+ void (*rt_debug)(void *hdl, const char *format, ...);
+} di_retire_t;
+
+extern int di_retire_device(char *path, di_retire_t *dp, int flags);
+extern int di_unretire_device(char *path, di_retire_t *dp);
+extern uint_t di_retired(di_node_t node);
+
+/*
* Private interfaces for /etc/logindevperm
*/
extern int di_devperm_login(const char *, uid_t, gid_t, void (*)(char *));
diff --git a/usr/src/lib/libdevinfo/mapfile-vers b/usr/src/lib/libdevinfo/mapfile-vers
index c941cd3efe..c2d82dcb4b 100644
--- a/usr/src/lib/libdevinfo/mapfile-vers
+++ b/usr/src/lib/libdevinfo/mapfile-vers
@@ -211,6 +211,9 @@ SUNWprivate_1.1 {
finddev_close;
finddev_next;
di_flags;
+ di_retire_device;
+ di_unretire_device;
+ di_retired;
local:
*;
};
diff --git a/usr/src/lib/librcm/librcm.h b/usr/src/lib/librcm/librcm.h
index d830ea375e..be57013b0a 100644
--- a/usr/src/lib/librcm/librcm.h
+++ b/usr/src/lib/librcm/librcm.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -57,6 +57,8 @@ extern "C" {
#define RCM_REGISTER_EVENT 0x2000 /* private */
#define RCM_REGISTER_CAPACITY 0x4000 /* private */
#define RCM_SUSPENDED 0x8000 /* private */
+#define RCM_RETIRE_REQUEST 0x10000
+#define RCM_RETIRE_NOTIFY 0x20000
/*
* RCM return values
@@ -64,6 +66,7 @@ extern "C" {
#define RCM_SUCCESS 0
#define RCM_FAILURE -1
#define RCM_CONFLICT -2
+#define RCM_NO_CONSTRAINT -3
/*
* RCM resource states
diff --git a/usr/src/lib/librcm/librcm_impl.h b/usr/src/lib/librcm/librcm_impl.h
index a534d22e1e..b096ffba0d 100644
--- a/usr/src/lib/librcm/librcm_impl.h
+++ b/usr/src/lib/librcm/librcm_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -69,8 +68,8 @@ extern "C" {
#define RCM_REGISTER_MASK (RCM_FILESYS|RCM_REGISTER_DR|\
RCM_REGISTER_EVENT|RCM_REGISTER_CAPACITY)
#define RCM_REQUEST_MASK (RCM_QUERY|RCM_SCOPE|RCM_FORCE|RCM_FILESYS|\
- RCM_QUERY_CANCEL)
-#define RCM_NOTIFY_MASK (RCM_FILESYS)
+ RCM_QUERY_CANCEL|RCM_RETIRE_REQUEST)
+#define RCM_NOTIFY_MASK (RCM_FILESYS|RCM_RETIRE_NOTIFY)
/* event data names */
#define RCM_CMD "rcm.cmd"
diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com
index 65925570c6..efa640b229 100644
--- a/usr/src/pkgdefs/SUNWhea/prototype_com
+++ b/usr/src/pkgdefs/SUNWhea/prototype_com
@@ -576,6 +576,8 @@ f none usr/include/sys/contract.h 644 root bin
f none usr/include/sys/contract_impl.h 644 root bin
f none usr/include/sys/contract/process.h 644 root bin
f none usr/include/sys/contract/process_impl.h 644 root bin
+f none usr/include/sys/contract/device.h 644 root bin
+f none usr/include/sys/contract/device_impl.h 644 root bin
f none usr/include/sys/copyops.h 644 root bin
f none usr/include/sys/core.h 644 root bin
f none usr/include/sys/corectl.h 644 root bin
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index e9de4ceac8..c439bc5d39 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -120,6 +120,7 @@ GENUNIX_OBJS += \
ddi_strtol.o \
devcfg.o \
devcache.o \
+ device.o \
devid.o \
devid_cache.o \
devid_scsi.o \
@@ -244,6 +245,7 @@ GENUNIX_OBJS += \
refstr.o \
rename.o \
resolvepath.o \
+ retire_store.o \
process.o \
rlimit.o \
rmap.o \
diff --git a/usr/src/uts/common/contract/device.c b/usr/src/uts/common/contract/device.c
new file mode 100644
index 0000000000..4632cdaa9d
--- /dev/null
+++ b/usr/src/uts/common/contract/device.c
@@ -0,0 +1,2207 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/mutex.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/id_space.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/proc.h>
+#include <sys/contract.h>
+#include <sys/contract_impl.h>
+#include <sys/contract/device.h>
+#include <sys/contract/device_impl.h>
+#include <sys/cmn_err.h>
+#include <sys/nvpair.h>
+#include <sys/policy.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi_implfuncs.h>
+#include <sys/systm.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/esunddi.h>
+#include <sys/ddi.h>
+#include <sys/fs/dv_node.h>
+#include <sys/sunndi.h>
+#undef ct_lock /* needed because clnt.h defines ct_lock as a macro */
+
+/*
+ * Device Contracts
+ * -----------------
+ * This file contains the core code for the device contracts framework.
+ * A device contract is an agreement or a contract between a process and
+ * the kernel regarding the state of the device. A device contract may be
+ * created when a relationship is formed between a device and a process
+ * i.e. at open(2) time, or it may be created at some point after the device
+ * has been opened. A device contract once formed may be broken by either party.
+ * A device contract can be broken by the process by an explicit abandon of the
+ * contract or by an implicit abandon when the process exits. A device contract
+ * can be broken by the kernel either asynchronously (without negotiation) or
+ * synchronously (with negotiation). Exactly which happens depends on the device
+ * state transition. The following state diagram shows the transitions between
+ * device states. Only device state transitions currently supported by device
+ * contracts is shown.
+ *
+ * <-- A -->
+ * /-----------------> DEGRADED
+ * | |
+ * | |
+ * | | S
+ * | | |
+ * | | v
+ * v S --> v
+ * ONLINE ------------> OFFLINE
+ *
+ *
+ * In the figure above, the arrows indicate the direction of transition. The
+ * letter S refers to transitions which are inherently synchronous i.e.
+ * require negotiation and the letter A indicates transitions which are
+ * asynchronous i.e. are done without contract negotiations. A good example
+ * of a synchronous transition is the ONLINE -> OFFLINE transition. This
+ * transition cannot happen as long as there are consumers which have the
+ * device open. Thus some form of negotiation needs to happen between the
+ * consumers and the kernel to ensure that consumers either close devices
+ * or disallow the move to OFFLINE. Certain other transitions such as
+ * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
+ * non-negotiable. A device that suffers a fault that degrades its
+ * capabilities will become degraded irrespective of what consumers it has,
+ * so a negotiation in this case is pointless.
+ *
+ * The following device states are currently defined for device contracts:
+ *
+ * CT_DEV_EV_ONLINE
+ * The device is online and functioning normally
+ * CT_DEV_EV_DEGRADED
+ * The device is online but is functioning in a degraded capacity
+ * CT_DEV_EV_OFFLINE
+ * The device is offline and is no longer configured
+ *
+ * A typical consumer of device contracts starts out with a contract
+ * template and adds terms to that template. These include the
+ * "acceptable set" (A-set) term, which is a bitset of device states which
+ * are guaranteed by the contract. If the device moves out of a state in
+ * the A-set, the contract is broken. The breaking of the contract can
+ * be asynchronous in which case a critical contract event is sent to the
+ * contract holder but no negotiations take place. If the breaking of the
+ * contract is synchronous, negotations are opened between the affected
+ * consumer and the kernel. The kernel does this by sending a critical
+ * event to the consumer with the CTE_NEG flag set indicating that this
+ * is a negotiation event. The consumer can accept this change by sending
+ * a ACK message to the kernel. Alternatively, if it has the necessary
+ * privileges, it can send a NACK message to the kernel which will block
+ * the device state change. To NACK a negotiable event, a process must
+ * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
+ *
+ * Other terms include the "minor path" term, specified explicitly if the
+ * contract is not being created at open(2) time or specified implicitly
+ * if the contract is being created at open time via an activated template.
+ *
+ * A contract event is sent on any state change to which the contract
+ * owner has subscribed via the informative or critical event sets. Only
+ * critical events are guaranteed to be delivered. Since all device state
+ * changes are controlled by the kernel and cannot be arbitrarily generated
+ * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
+ * need to be asserted in a process's effective set to designate an event as
+ * critical. To ensure privacy, a process must either have the same effective
+ * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
+ * asserted in its effective set in order to observe device contract events
+ * off the device contract type specific endpoint.
+ *
+ * Yet another term available with device contracts is the "non-negotiable"
+ * term. This term is used to pre-specify a NACK to any contract negotiation.
+ * This term is ignored for asynchronous state changes. For example, a
+ * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
+ * non-negotiable. In this case, the device contract framework assumes a
+ * NACK for any transition to OFFLINE and blocks the offline. If the A-set
+ * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
+ * are NACKed but transitions to DEGRADE succeed.
+ *
+ * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
+ * happens just before the I/O framework attempts to offline a device
+ * (i.e. detach a device and set the offline flag so that it cannot be
+ * reattached). A device contract holder is expected to either NACK the offline
+ * (if privileged) or release the device and allow the offline to proceed.
+ *
+ * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
+ * is generated just before the I/O framework transitions the device state
+ * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
+ *
+ * The contract holder is expected to ACK or NACK a negotiation event
+ * within a certain period of time. If the ACK/NACK is not received
+ * within the timeout period, the device contract framework will behave
+ * as if the contract does not exist and will proceed with the event.
+ *
+ * Unlike a process contract a device contract does not need to exist
+ * once it is abandoned, since it does not define a fault boundary. It
+ * merely represents an agreement between a process and the kernel
+ * regarding the state of the device. Once the process has abandoned
+ * the contract (either implicitly via a process exit or explicitly)
+ * the kernel has no reason to retain the contract. As a result
+ * device contracts are neither inheritable nor need to exist in an
+ * orphan state.
+ *
+ * A device unlike a process may exist in multiple contracts and has
+ * a "life" outside a device contract. A device unlike a process
+ * may exist without an associated contract. Unlike a process contract
+ * a device contract may be formed after a binding relationship is
+ * formed between a process and a device.
+ *
+ * IMPLEMENTATION NOTES
+ * ====================
+ * DATA STRUCTURES
+ * ----------------
+ * The heart of the device contracts implementation is the device contract
+ * private cont_device_t (or ctd for short) data structure. It encapsulates
+ * the generic contract_t data structure and has a number of private
+ * fields.
+ * These include:
+ * cond_minor: The minor device that is the subject of the contract
+ * cond_aset: The bitset of states which are guaranteed by the
+ * contract
+ * cond_noneg: If set, indicates that the result of negotiation has
+ * been predefined to be a NACK
+ * In addition, there are other device identifiers such the devinfo node,
+ * dev_t and spec_type of the minor node. There are also a few fields that
+ * are used during negotiation to maintain state. See
+ * uts/common/sys/contract/device_impl.h
+ * for details.
+ * The ctd structure represents the device private part of a contract of
+ * type "device"
+ *
+ * Another data structure used by device contracts is ctmpl_device. It is
+ * the device contracts private part of the contract template structure. It
+ * encapsulates the generic template structure "ct_template_t" and includes
+ * the following device contract specific fields
+ * ctd_aset: The bitset of states that should be guaranteed by a
+ * contract
+ * ctd_noneg: If set, indicates that contract should NACK a
+ * negotiation
+ * ctd_minor: The devfs_path (without the /devices prefix) of the
+ * minor node that is the subject of the contract.
+ *
+ * ALGORITHMS
+ * ---------
+ * There are three sets of routines in this file
+ * Template related routines
+ * -------------------------
+ * These routines provide support for template related operations initated
+ * via the generic template operations. These include routines that dup
+ * a template, free it, and set various terms in the template
+ * (such as the minor node path, the acceptable state set (or A-set)
+ * and the non-negotiable term) as well as a routine to query the
+ * device specific portion of the template for the abovementioned terms.
+ * There is also a routine to create (ctmpl_device_create) that is used to
+ * create a contract from a template. This routine calls (after initial
+ * setup) the common function used to create a device contract
+ * (contract_device_create).
+ *
+ * core device contract implementation
+ * ----------------------------------
+ * These routines support the generic contract framework to provide
+ * functionality that allows contracts to be created, managed and
+ * destroyed. The contract_device_create() routine is a routine used
+ * to create a contract from a template (either via an explicit create
+ * operation on a template or implicitly via an open with an
+ * activated template.). The contract_device_free() routine assists
+ * in freeing the device contract specific parts. There are routines
+ * used to abandon (contract_device_abandon) a device contract as well
+ * as a routine to destroy (which despite its name does not destroy,
+ * it only moves a contract to a dead state) a contract.
+ * There is also a routine to return status information about a
+ * contract - the level of detail depends on what is requested by the
+ * user. A value of CTD_FIXED only returns fixed length fields such
+ * as the A-set, state of device and value of the "noneg" term. If
+ * CTD_ALL is specified, the minor node path is returned as well.
+ *
+ * In addition there are interfaces (contract_device_ack/nack) which
+ * are used to support negotiation between userland processes and
+ * device contracts. These interfaces record the acknowledgement
+ * or lack thereof for negotiation events and help determine if the
+ * negotiated event should occur.
+ *
+ * "backend routines"
+ * -----------------
+ * The backend routines form the interface between the I/O framework
+ * and the device contract subsystem. These routines, allow the I/O
+ * framework to call into the device contract subsystem to notify it of
+ * impending changes to a device state as well as to inform of the
+ * final disposition of such attempted state changes. Routines in this
+ * class include contract_device_offline() that indicates an attempt to
+ * offline a device, contract_device_degrade() that indicates that
+ * a device is moving to the degraded state and contract_device_negend()
+ * that is used by the I/O framework to inform the contracts subsystem of
+ * the final disposition of an attempted operation.
+ *
+ * SUMMARY
+ * -------
+ * A contract starts its life as a template. A process allocates a device
+ * contract template and sets various terms:
+ * The A-set
+ * The device minor node
+ * Critical and informative events
+ * The noneg i.e. no negotition term
+ * Setting of these terms in the template is done via the
+ * ctmpl_device_set() entry point in this file. A process can query a
+ * template to determine the terms already set in the template - this is
+ * facilitated by the ctmpl_device_get() routine.
+ *
+ * Once all the appropriate terms are set, the contract is instantiated via
+ * one of two methods
+ * - via an explicit create operation - this is facilitated by the
+ * ctmpl_device_create() entry point
+ * - synchronously with the open(2) system call - this is achieved via the
+ * contract_device_open() routine.
+ * The core work for both these above functions is done by
+ * contract_device_create()
+ *
+ * A contract once created can be queried for its status. Support for
+ * status info is provided by both the common contracts framework and by
+ * the "device" contract type. If the level of detail requested is
+ * CTD_COMMON, only the common contract framework data is used. Higher
+ * levels of detail result in calls to contract_device_status() to supply
+ * device contract type specific status information.
+ *
+ * A contract once created may be abandoned either explicitly or implictly.
+ * In either case, the contract_device_abandon() function is invoked. This
+ * function merely calls contract_destroy() which moves the contract to
+ * the DEAD state. The device contract portion of destroy processing is
+ * provided by contract_device_destroy() which merely disassociates the
+ * contract from its device devinfo node. A contract in the DEAD state is
+ * not freed. It hanbgs around until all references to the contract are
+ * gone. When that happens, the contract is finally deallocated. The
+ * device contract specific portion of the free is done by
+ * contract_device_free() which finally frees the device contract specific
+ * data structure (cont_device_t).
+ *
+ * When a device undergoes a state change, the I/O framework calls the
+ * corresponding device contract entry point. For example, when a device
+ * is about to go OFFLINE, the routine contract_device_offline() is
+ * invoked. Similarly if a device moves to DEGRADED state, the routine
+ * contract_device_degrade() function is called. These functions call the
+ * core routine contract_device_publish(). This function determines via
+ * the function is_sync_neg() whether an event is a synchronous (i.e.
+ * negotiable) event or not. In the former case contract_device_publish()
+ * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
+ * and/or NACKs from contract holders. In the latter case, it simply
+ * publishes the event and does not wait. In the negotiation case, ACKs or
+ * NACKs from userland consumers results in contract_device_ack_nack()
+ * being called where the result of the negotiation is recorded in the
+ * contract data structure. Once all outstanding contract owners have
+ * responded, the device contract code in wait_for_acks() determines the
+ * final result of the negotiation. A single NACK overrides all other ACKs
+ * If there is no NACK, then a single ACK will result in an overall ACK
+ * result. If there are no ACKs or NACKs, then the result CT_NONE is
+ * returned back to the I/O framework. Once the event is permitted or
+ * blocked, the I/O framework proceeds or aborts the state change. The
+ * I/O framework then calls contract_device_negend() with a result code
+ * indicating final disposition of the event. This call releases the
+ * barrier and other state associated with the previous negotiation,
+ * which permits the next event (if any) to come into the device contract
+ * framework.
+ *
+ * Finally, a device that has outstanding contracts may be removed from
+ * the system which results in its devinfo node being freed. The devinfo
+ * free routine in the I/O framework, calls into the device contract
+ * function - contract_device_remove_dip(). This routine, disassociates
+ * the dip from all contracts associated with the contract being freed,
+ * allowing the devinfo node to be freed.
+ *
+ * LOCKING
+ * ---------
+ * There are four sets of data that need to be protected by locks
+ *
+ * i) device contract specific portion of the contract template - This data
+ * is protected by the template lock ctmpl_lock.
+ *
+ * ii) device contract specific portion of the contract - This data is
+ * protected by the contract lock ct_lock
+ *
+ * iii) The linked list of contracts hanging off a devinfo node - This
+ * list is protected by the per-devinfo node lock devi_ct_lock
+ *
+ * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
+ * and devi_ct_count that controls state changes to a dip
+ *
+ * The template lock is independent in that none of the other locks in this
+ * file may be taken while holding the template lock (and vice versa).
+ *
+ * The remaining three locks have the following lock order
+ *
+ * devi_ct_lock -> ct_count barrier -> ct_lock
+ *
+ */
+
+static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
+ int spec_type, proc_t *owner, int *errorp);
+
+/* barrier routines */
+static void ct_barrier_acquire(dev_info_t *dip);
+static void ct_barrier_release(dev_info_t *dip);
+static int ct_barrier_held(dev_info_t *dip);
+static int ct_barrier_empty(dev_info_t *dip);
+static void ct_barrier_wait_for_release(dev_info_t *dip);
+static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
+static void ct_barrier_decr(dev_info_t *dip);
+static void ct_barrier_incr(dev_info_t *dip);
+
+ct_type_t *device_type;
+
+/*
+ * Macro predicates for determining when events should be sent and how.
+ */
+#define EVSENDP(ctd, flag) \
+ ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
+
+#define EVINFOP(ctd, flag) \
+ ((ctd->cond_contract.ct_ev_crit & flag) == 0)
+
+/*
+ * State transition table showing which transitions are synchronous and which
+ * are not.
+ */
+struct ct_dev_negtable {
+ uint_t st_old;
+ uint_t st_new;
+ uint_t st_neg;
+} ct_dev_negtable[] = {
+ {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1},
+ {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0},
+ {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0},
+ {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1},
+ {0}
+};
+
+/*
+ * Device contract template implementation
+ */
+
+/*
+ * ctmpl_device_dup
+ *
+ * The device contract template dup entry point.
+ * This simply copies all the fields (generic as well as device contract
+ * specific) fields of the original.
+ */
+static struct ct_template *
+ctmpl_device_dup(struct ct_template *template)
+{
+ ctmpl_device_t *new;
+ ctmpl_device_t *old = template->ctmpl_data;
+ char *buf;
+ char *minor;
+
+ new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * copy generic fields.
+ * ctmpl_copy returns with old template lock held
+ */
+ ctmpl_copy(&new->ctd_ctmpl, template);
+
+ new->ctd_ctmpl.ctmpl_data = new;
+ new->ctd_aset = old->ctd_aset;
+ new->ctd_minor = NULL;
+ new->ctd_noneg = old->ctd_noneg;
+
+ if (old->ctd_minor) {
+ ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
+ bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
+ } else {
+ kmem_free(buf, MAXPATHLEN);
+ buf = NULL;
+ }
+
+ mutex_exit(&template->ctmpl_lock);
+ if (buf) {
+ minor = i_ddi_strdup(buf, KM_SLEEP);
+ kmem_free(buf, MAXPATHLEN);
+ buf = NULL;
+ } else {
+ minor = NULL;
+ }
+ mutex_enter(&template->ctmpl_lock);
+
+ if (minor) {
+ new->ctd_minor = minor;
+ }
+
+ ASSERT(buf == NULL);
+ return (&new->ctd_ctmpl);
+}
+
+/*
+ * ctmpl_device_free
+ *
+ * The device contract template free entry point. Just
+ * frees the template.
+ */
+static void
+ctmpl_device_free(struct ct_template *template)
+{
+ ctmpl_device_t *dtmpl = template->ctmpl_data;
+
+ if (dtmpl->ctd_minor)
+ kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
+
+ kmem_free(dtmpl, sizeof (ctmpl_device_t));
+}
+
+/*
+ * SAFE_EV is the set of events which a non-privileged process is
+ * allowed to make critical. An unprivileged device contract owner has
+ * no control over when a device changes state, so all device events
+ * can be in the critical set.
+ *
+ * EXCESS tells us if "value", a critical event set, requires
+ * additional privilege. For device contracts EXCESS currently
+ * evaluates to 0.
+ */
+#define SAFE_EV (CT_DEV_ALLEVENT)
+#define EXCESS(value) ((value) & ~SAFE_EV)
+
+
+/*
+ * ctmpl_device_set
+ *
+ * The device contract template set entry point. Sets various terms in the
+ * template. The non-negotiable term can only be set if the process has
+ * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
+ */
+static int
+ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr)
+{
+ ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
+ char *buf;
+ int error;
+ dev_info_t *dip;
+ int spec_type;
+
+ ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
+
+ switch (param->ctpm_id) {
+ case CTDP_ACCEPT:
+ if (param->ctpm_value & ~CT_DEV_ALLEVENT)
+ return (EINVAL);
+ if (param->ctpm_value == 0)
+ return (EINVAL);
+ if (param->ctpm_value == CT_DEV_ALLEVENT)
+ return (EINVAL);
+
+ dtmpl->ctd_aset = param->ctpm_value;
+ break;
+ case CTDP_NONEG:
+ if (param->ctpm_value != CTDP_NONEG_SET &&
+ param->ctpm_value != CTDP_NONEG_CLEAR)
+ return (EINVAL);
+
+ /*
+ * only privileged processes can designate a contract
+ * non-negotiatble.
+ */
+ if (param->ctpm_value == CTDP_NONEG_SET &&
+ (error = secpolicy_sys_devices(cr)) != 0) {
+ return (error);
+ }
+
+ dtmpl->ctd_noneg = param->ctpm_value;
+ break;
+
+ case CTDP_MINOR:
+ if (param->ctpm_value == NULL)
+ return (EINVAL);
+
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * Copyin the device path
+ */
+ error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf,
+ MAXPATHLEN, NULL);
+ if (error != 0) {
+ kmem_free(buf, MAXPATHLEN);
+ return (error);
+ }
+ buf[MAXPATHLEN - 1] = '\0';
+
+ if (*buf != '/' ||
+ strncmp(buf, "/devices/", strlen("/devices/")) == 0 ||
+ strstr(buf, "../devices/") || strchr(buf, ':') == NULL) {
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ }
+
+ spec_type = 0;
+ dip = NULL;
+ if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) {
+ kmem_free(buf, MAXPATHLEN);
+ return (ERANGE);
+ }
+ ddi_release_devi(dip);
+
+ if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ }
+
+ if (dtmpl->ctd_minor != NULL) {
+ kmem_free(dtmpl->ctd_minor,
+ strlen(dtmpl->ctd_minor) + 1);
+ }
+ dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP);
+ kmem_free(buf, MAXPATHLEN);
+ break;
+ case CTP_EV_CRITICAL:
+ /*
+ * Currently for device contracts, any event
+ * may be added to the critical set. We retain the
+ * following code however for future enhancements.
+ */
+ if (EXCESS(param->ctpm_value) &&
+ (error = secpolicy_contract_event(cr)) != 0)
+ return (error);
+ tmpl->ctmpl_ev_crit = param->ctpm_value;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * ctmpl_device_get
+ *
+ * The device contract template get entry point. Simply fetches and
+ * returns the value of the requested term.
+ */
+static int
+ctmpl_device_get(struct ct_template *template, ct_param_t *param)
+{
+ ctmpl_device_t *dtmpl = template->ctmpl_data;
+ int error;
+
+ ASSERT(MUTEX_HELD(&template->ctmpl_lock));
+
+ switch (param->ctpm_id) {
+ case CTDP_ACCEPT:
+ param->ctpm_value = dtmpl->ctd_aset;
+ break;
+ case CTDP_NONEG:
+ param->ctpm_value = dtmpl->ctd_noneg;
+ break;
+ case CTDP_MINOR:
+ if (dtmpl->ctd_minor) {
+ error = copyoutstr(dtmpl->ctd_minor,
+ (char *)(uintptr_t)param->ctpm_value,
+ MAXPATHLEN, NULL);
+ if (error != 0)
+ return (error);
+ } else {
+ return (ENOENT);
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Device contract type specific portion of creating a contract using
+ * a specified template
+ */
+/*ARGSUSED*/
+int
+ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
+{
+ ctmpl_device_t *dtmpl;
+ char *buf;
+ dev_t dev;
+ int spec_type;
+ int error;
+ cont_device_t *ctd;
+
+ if (ctidp == NULL)
+ return (EINVAL);
+
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ dtmpl = template->ctmpl_data;
+
+ mutex_enter(&template->ctmpl_lock);
+ if (dtmpl->ctd_minor == NULL) {
+ /* incomplete template */
+ mutex_exit(&template->ctmpl_lock);
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ } else {
+ ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
+ bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
+ }
+ mutex_exit(&template->ctmpl_lock);
+
+ spec_type = 0;
+ dev = NODEV;
+ if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
+ dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
+ (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
+ CT_DEBUG((CE_WARN,
+ "tmpl_create: failed to find device: %s", buf));
+ kmem_free(buf, MAXPATHLEN);
+ return (ERANGE);
+ }
+ kmem_free(buf, MAXPATHLEN);
+
+ ctd = contract_device_create(template->ctmpl_data,
+ dev, spec_type, curproc, &error);
+
+ if (ctd == NULL) {
+ CT_DEBUG((CE_WARN, "Failed to create device contract for "
+ "process (%d) with device (devt = %lu, spec_type = %s)",
+ curproc->p_pid, dev,
+ spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
+ return (error);
+ }
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ *ctidp = ctd->cond_contract.ct_id;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+
+ return (0);
+}
+
+/*
+ * Device contract specific template entry points
+ */
+static ctmplops_t ctmpl_device_ops = {
+ ctmpl_device_dup, /* ctop_dup */
+ ctmpl_device_free, /* ctop_free */
+ ctmpl_device_set, /* ctop_set */
+ ctmpl_device_get, /* ctop_get */
+ ctmpl_device_create, /* ctop_create */
+ CT_DEV_ALLEVENT /* all device events bitmask */
+};
+
+
+/*
+ * Device contract implementation
+ */
+
+/*
+ * contract_device_default
+ *
+ * The device contract default template entry point. Creates a
+ * device contract template with a default A-set and no "noneg" ,
+ * with informative degrade events and critical offline events.
+ * There is no default minor path.
+ */
+static ct_template_t *
+contract_device_default(void)
+{
+ ctmpl_device_t *new;
+
+ new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
+ ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
+
+ new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
+ new->ctd_noneg = 0;
+ new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
+ new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
+
+ return (&new->ctd_ctmpl);
+}
+
+/*
+ * contract_device_free
+ *
+ * Destroys the device contract specific portion of a contract and
+ * frees the contract.
+ */
+static void
+contract_device_free(contract_t *ct)
+{
+ cont_device_t *ctd = ct->ct_data;
+
+ ASSERT(ctd->cond_minor);
+ ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
+ kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
+
+ ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
+ ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
+
+ ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
+
+ ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
+ ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
+
+ ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
+ ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
+
+ ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
+ ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
+
+ ASSERT(!list_link_active(&ctd->cond_next));
+
+ kmem_free(ctd, sizeof (cont_device_t));
+}
+
+/*
+ * contract_device_abandon
+ *
+ * The device contract abandon entry point.
+ */
+static void
+contract_device_abandon(contract_t *ct)
+{
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+
+ /*
+ * device contracts cannot be inherited or orphaned.
+ * Move the contract to the DEAD_STATE. It will be freed
+ * once all references to it are gone.
+ */
+ contract_destroy(ct);
+}
+
+/*
+ * contract_device_destroy
+ *
+ * The device contract destroy entry point.
+ * Called from contract_destroy() to do any type specific destroy. Note
+ * that destroy is a misnomer - this does not free the contract, it only
+ * moves it to the dead state. A contract is actually freed via
+ * contract_rele() -> contract_dtor(), contop_free()
+ */
+static void
+contract_device_destroy(contract_t *ct)
+{
+ cont_device_t *ctd = ct->ct_data;
+ dev_info_t *dip = ctd->cond_dip;
+
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+
+ if (dip == NULL) {
+ /*
+ * The dip has been removed, this is a dangling contract
+ * Check that dip linkages are NULL
+ */
+ ASSERT(!list_link_active(&ctd->cond_next));
+ CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no "
+ "devinfo node. contract ctid : %d", ct->ct_id));
+ return;
+ }
+
+ /*
+ * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock
+ */
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * Waiting for the barrier to be released is strictly speaking not
+ * necessary. But it simplifies the implementation of
+ * contract_device_publish() by establishing the invariant that
+ * device contracts cannot go away during negotiation.
+ */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_wait_for_release(dip);
+ mutex_enter(&ct->ct_lock);
+
+ list_remove(&(DEVI(dip)->devi_ct), ctd);
+ ctd->cond_dip = NULL; /* no longer linked to dip */
+ contract_rele(ct); /* remove hold for dip linkage */
+
+ mutex_exit(&ct->ct_lock);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+ mutex_enter(&ct->ct_lock);
+}
+
+/*
+ * contract_device_status
+ *
+ * The device contract status entry point. Called when level of "detail"
+ * is either CTD_FIXED or CTD_ALL
+ *
+ */
+static void
+contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
+ void *status, model_t model)
+{
+ cont_device_t *ctd = ct->ct_data;
+
+ ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
+
+ mutex_enter(&ct->ct_lock);
+ contract_status_common(ct, zone, status, model);
+
+ /*
+ * There's no need to hold the contract lock while accessing static
+ * data like aset or noneg. But since we need the lock to access other
+ * data like state, we hold it anyway.
+ */
+ VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
+ VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
+ VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
+
+ if (detail == CTD_FIXED) {
+ mutex_exit(&ct->ct_lock);
+ return;
+ }
+
+ ASSERT(ctd->cond_minor);
+ VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
+
+ mutex_exit(&ct->ct_lock);
+}
+
+/*
+ * Converts a result integer into the corresponding string. Used for printing
+ * messages
+ */
+static char *
+result_str(uint_t result)
+{
+ switch (result) {
+ case CT_ACK:
+ return ("CT_ACK");
+ case CT_NACK:
+ return ("CT_NACK");
+ case CT_NONE:
+ return ("CT_NONE");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+/*
+ * Converts a device state integer constant into the corresponding string.
+ * Used to print messages.
+ */
+static char *
+state_str(uint_t state)
+{
+ switch (state) {
+ case CT_DEV_EV_ONLINE:
+ return ("ONLINE");
+ case CT_DEV_EV_DEGRADED:
+ return ("DEGRADED");
+ case CT_DEV_EV_OFFLINE:
+ return ("OFFLINE");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+/*
+ * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
+ * synchronous state change or not.
+ */
+static int
+is_sync_neg(uint_t old, uint_t new)
+{
+ int i;
+
+ ASSERT(old & CT_DEV_ALLEVENT);
+ ASSERT(new & CT_DEV_ALLEVENT);
+
+ if (old == new) {
+ CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
+ state_str(new)));
+ return (-2);
+ }
+
+ for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
+ if (old == ct_dev_negtable[i].st_old &&
+ new == ct_dev_negtable[i].st_new) {
+ return (ct_dev_negtable[i].st_neg);
+ }
+ }
+
+ CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
+ "old = %s -> new = %s", state_str(old), state_str(new)));
+
+ return (-1);
+}
+
+/*
+ * Used to cleanup cached dv_nodes so that when a device is released by
+ * a contract holder, its devinfo node can be successfully detached.
+ */
+static int
+contract_device_dvclean(dev_info_t *dip)
+{
+ char *devnm;
+ dev_info_t *pdip;
+ int error;
+
+ ASSERT(dip);
+
+ /* pdip can be NULL if we have contracts against the root dip */
+ pdip = ddi_get_parent(dip);
+
+ if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
+ char *path;
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
+ "device=%s", path));
+ kmem_free(path, MAXPATHLEN);
+ return (EDEADLOCK);
+ }
+
+ if (pdip) {
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devnm);
+ error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+ kmem_free(devnm, MAXNAMELEN + 1);
+ } else {
+ error = devfs_clean(dip, NULL, DV_CLEAN_FORCE);
+ }
+
+ return (error);
+}
+
+/*
+ * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
+ * Results in the ACK or NACK being recorded on the dip for one particular
+ * contract. The device contracts framework evaluates the ACK/NACKs for all
+ * contracts against a device to determine if a particular device state change
+ * should be allowed.
+ */
+static int
+contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
+ uint_t cmd)
+{
+ cont_device_t *ctd = ct->ct_data;
+ dev_info_t *dip;
+ ctid_t ctid;
+ int error;
+
+ ctid = ct->ct_id;
+
+ CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
+
+ mutex_enter(&ct->ct_lock);
+ CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
+
+ dip = ctd->cond_dip;
+
+ ASSERT(ctd->cond_minor);
+ ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
+
+ /*
+ * Negotiation only if new state is not in A-set
+ */
+ ASSERT(!(ctd->cond_aset & evtype));
+
+ /*
+ * Negotiation only if transition is synchronous
+ */
+ ASSERT(is_sync_neg(ctd->cond_state, evtype));
+
+ /*
+ * We shouldn't be negotiating if the "noneg" flag is set
+ */
+ ASSERT(!ctd->cond_noneg);
+
+ if (dip)
+ ndi_hold_devi(dip);
+
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * dv_clean only if !NACK and offline state change
+ */
+ if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
+ CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
+ error = contract_device_dvclean(dip);
+ if (error != 0) {
+ CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
+ ctid));
+ ddi_release_devi(dip);
+ }
+ }
+
+ mutex_enter(&ct->ct_lock);
+
+ if (dip)
+ ddi_release_devi(dip);
+
+ if (dip == NULL) {
+ if (ctd->cond_currev_id != evid) {
+ CT_DEBUG((CE_WARN, "%sACK for non-current event "
+ "(type=%s, id=%llu) on removed device",
+ cmd == CT_NACK ? "N" : "",
+ state_str(evtype), (unsigned long long)evid));
+ CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
+ ctid));
+ } else {
+ ASSERT(ctd->cond_currev_type == evtype);
+ CT_DEBUG((CE_WARN, "contract_ack: no such device: "
+ "ctid: %d", ctid));
+ }
+ error = (ct->ct_state == CTS_DEAD) ? ESRCH :
+ ((cmd == CT_NACK) ? ETIMEDOUT : 0);
+ mutex_exit(&ct->ct_lock);
+ return (error);
+ }
+
+ /*
+ * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
+ */
+ mutex_exit(&ct->ct_lock);
+
+ mutex_enter(&DEVI(dip)->devi_ct_lock);
+ mutex_enter(&ct->ct_lock);
+ if (ctd->cond_currev_id != evid) {
+ char *buf;
+ mutex_exit(&ct->ct_lock);
+ mutex_exit(&DEVI(dip)->devi_ct_lock);
+ ndi_hold_devi(dip);
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, buf);
+ ddi_release_devi(dip);
+ CT_DEBUG((CE_WARN, "%sACK for non-current event"
+ "(type=%s, id=%llu) on device %s",
+ cmd == CT_NACK ? "N" : "",
+ state_str(evtype), (unsigned long long)evid, buf));
+ kmem_free(buf, MAXPATHLEN);
+ CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
+ cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
+ return (cmd == CT_ACK ? 0 : ETIMEDOUT);
+ }
+
+ ASSERT(ctd->cond_currev_type == evtype);
+ ASSERT(cmd == CT_ACK || cmd == CT_NACK);
+
+ CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
+ cmd == CT_NACK ? "N" : "", ctid));
+
+ ctd->cond_currev_ack = cmd;
+ mutex_exit(&ct->ct_lock);
+
+ ct_barrier_decr(dip);
+ mutex_exit(&DEVI(dip)->devi_ct_lock);
+
+ CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
+
+ return (0);
+}
+
+/*
+ * Invoked when a userland contract holder approves (i.e. ACKs) a state change
+ */
+static int
+contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
+}
+
+/*
+ * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
+ */
+static int
+contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
+}
+
+/*
+ * Creates a new contract synchronously with the breaking of an existing
+ * contract. Currently not supported.
+ */
+/*ARGSUSED*/
+static int
+contract_device_newct(contract_t *ct)
+{
+ return (ENOTSUP);
+}
+
+/*
+ * Core device contract implementation entry points
+ */
+static contops_t contract_device_ops = {
+ contract_device_free, /* contop_free */
+ contract_device_abandon, /* contop_abandon */
+ contract_device_destroy, /* contop_destroy */
+ contract_device_status, /* contop_status */
+ contract_device_ack, /* contop_ack */
+ contract_device_nack, /* contop_nack */
+ contract_qack_notsup, /* contop_qack */
+ contract_device_newct /* contop_newct */
+};
+
+/*
+ * contract_device_init
+ *
+ * Initializes the device contract type.
+ */
+void
+contract_device_init(void)
+{
+ device_type = contract_type_init(CTT_DEVICE, "device",
+ &contract_device_ops, contract_device_default);
+}
+
+/*
+ * contract_device_create
+ *
+ * create a device contract given template "tmpl" and the "owner" process.
+ * May fail and return NULL if project.max-contracts would have been exceeded.
+ *
+ * Common device contract creation routine called for both open-time and
+ * non-open time device contract creation
+ */
+static cont_device_t *
+contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
+ proc_t *owner, int *errorp)
+{
+ cont_device_t *ctd;
+ char *minor;
+ char *path;
+ dev_info_t *dip;
+
+ ASSERT(dtmpl != NULL);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
+ ASSERT(errorp);
+
+ *errorp = 0;
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
+ bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+
+ dip = e_ddi_hold_devi_by_path(path, 0);
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
+ "for device path (%s)", path);
+ kmem_free(path, MAXPATHLEN);
+ *errorp = ERANGE;
+ return (NULL);
+ }
+
+ /*
+ * Lock out any parallel contract negotiations
+ */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_acquire(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ minor = i_ddi_strdup(path, KM_SLEEP);
+ kmem_free(path, MAXPATHLEN);
+
+ (void) contract_type_pbundle(device_type, owner);
+
+ ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
+
+ /*
+ * Only we hold a refernce to this contract. Safe to access
+ * the fields without a ct_lock
+ */
+ ctd->cond_minor = minor;
+ /*
+ * It is safe to set the dip pointer in the contract
+ * as the contract will always be destroyed before the dip
+ * is released
+ */
+ ctd->cond_dip = dip;
+ ctd->cond_devt = dev;
+ ctd->cond_spec = spec_type;
+
+ /*
+ * Since we are able to lookup the device, it is either
+ * online or degraded
+ */
+ ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
+ CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
+
+ mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ ctd->cond_aset = dtmpl->ctd_aset;
+ ctd->cond_noneg = dtmpl->ctd_noneg;
+
+ /*
+ * contract_ctor() initailizes the common portion of a contract
+ * contract_dtor() destroys the common portion of a contract
+ */
+ if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
+ ctd, 0, owner, B_TRUE)) {
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ /*
+ * contract_device_free() destroys the type specific
+ * portion of a contract and frees the contract.
+ * The "minor" path and "cred" is a part of the type specific
+ * portion of the contract and will be freed by
+ * contract_device_free()
+ */
+ contract_device_free(&ctd->cond_contract);
+
+ /* release barrier */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ ddi_release_devi(dip);
+ *errorp = EAGAIN;
+ return (NULL);
+ }
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
+ ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
+ ctd->cond_contract.ct_ntime.ctm_start = -1;
+ ctd->cond_contract.ct_qtime.ctm_start = -1;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+
+ /*
+ * Insert device contract into list hanging off the dip
+ * Bump up the ref-count on the contract to reflect this
+ */
+ contract_hold(&ctd->cond_contract);
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
+
+ /* release barrier */
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ ddi_release_devi(dip);
+
+ return (ctd);
+}
+
+/*
+ * Called when a device is successfully opened to create an open-time contract
+ * i.e. synchronously with a device open.
+ */
+int
+contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
+{
+ ctmpl_device_t *dtmpl;
+ ct_template_t *tmpl;
+ cont_device_t *ctd;
+ char *path;
+ klwp_t *lwp;
+ int error;
+
+ if (ctpp)
+ *ctpp = NULL;
+
+ /*
+ * Check if we are in user-context i.e. if we have an lwp
+ */
+ lwp = ttolwp(curthread);
+ if (lwp == NULL) {
+ CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
+ return (0);
+ }
+
+ tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
+ if (tmpl == NULL) {
+ return (0);
+ }
+ dtmpl = tmpl->ctmpl_data;
+
+ /*
+ * If the user set a minor path in the template before an open,
+ * ignore it. We use the minor path of the actual minor opened.
+ */
+ mutex_enter(&tmpl->ctmpl_lock);
+ if (dtmpl->ctd_minor != NULL) {
+ CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
+ "ignoring device minor path in active template: %s",
+ curproc->p_pid, dtmpl->ctd_minor));
+ /*
+ * This is a copy of the actual activated template.
+ * Safe to make changes such as freeing the minor
+ * path in the template.
+ */
+ kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
+ dtmpl->ctd_minor = NULL;
+ }
+ mutex_exit(&tmpl->ctmpl_lock);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
+ CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
+ "minor path from dev_t,spec {%lu, %d} for process (%d)",
+ dev, spec_type, curproc->p_pid));
+ ctmpl_free(tmpl);
+ kmem_free(path, MAXPATHLEN);
+ return (1);
+ }
+
+ mutex_enter(&tmpl->ctmpl_lock);
+ ASSERT(dtmpl->ctd_minor == NULL);
+ dtmpl->ctd_minor = path;
+ mutex_exit(&tmpl->ctmpl_lock);
+
+ ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
+
+ mutex_enter(&tmpl->ctmpl_lock);
+ ASSERT(dtmpl->ctd_minor);
+ dtmpl->ctd_minor = NULL;
+ mutex_exit(&tmpl->ctmpl_lock);
+ ctmpl_free(tmpl);
+ kmem_free(path, MAXPATHLEN);
+
+ if (ctd == NULL) {
+ cmn_err(CE_NOTE, "contract_device_open(): Failed to "
+ "create device contract for process (%d) holding "
+ "device (devt = %lu, spec_type = %d)",
+ curproc->p_pid, dev, spec_type);
+ return (1);
+ }
+
+ if (ctpp) {
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ *ctpp = &ctd->cond_contract;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+ return (0);
+}
+
+/*
+ * Called during contract negotiation by the device contract framework to wait
+ * for ACKs or NACKs from contract holders. If all responses are not received
+ * before a specified timeout, this routine times out.
+ */
+static uint_t
+wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
+{
+ cont_device_t *ctd;
+ int timed_out = 0;
+ int result = CT_NONE;
+ int ack;
+ char *f = "wait_for_acks";
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(dip);
+ ASSERT(evtype & CT_DEV_ALLEVENT);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFBLK || spec_type == S_IFCHR));
+
+ CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
+
+ if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
+ /*
+ * some contract owner(s) didn't respond in time
+ */
+ CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
+ timed_out = 1;
+ }
+
+ ack = 0;
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ ASSERT(ctd->cond_dip == dip);
+
+ if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /* skip if non-negotiable contract */
+ if (ctd->cond_noneg) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ ASSERT(ctd->cond_currev_type == evtype);
+ if (ctd->cond_currev_ack == CT_NACK) {
+ CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
+ f, (void *)dip));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ return (CT_NACK);
+ } else if (ctd->cond_currev_ack == CT_ACK) {
+ ack = 1;
+ CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
+ f, (void *)dip));
+ }
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+
+ if (ack) {
+ result = CT_ACK;
+ CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
+ } else if (timed_out) {
+ result = CT_NONE;
+ CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
+ f, (void *)dip));
+ } else {
+ CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
+ f, (void *)dip));
+ }
+
+
+ return (result);
+}
+
+/*
+ * Determines the current state of a device (i.e a devinfo node
+ */
+static int
+get_state(dev_info_t *dip)
+{
+ if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
+ return (CT_DEV_EV_OFFLINE);
+ else if (DEVI_IS_DEVICE_DEGRADED(dip))
+ return (CT_DEV_EV_DEGRADED);
+ else
+ return (CT_DEV_EV_ONLINE);
+}
+
+/*
+ * Sets the current state of a device in a device contract
+ */
+static void
+set_cond_state(dev_info_t *dip)
+{
+ uint_t state = get_state(dip);
+ cont_device_t *ctd;
+
+ /* verify that barrier is held */
+ ASSERT(ct_barrier_held(dip));
+
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ ASSERT(ctd->cond_dip == dip);
+ ctd->cond_state = state;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+}
+
+/*
+ * Core routine called by event-specific routines when an event occurs.
+ * Determines if an event should be be published, and if it is to be
+ * published, whether a negotiation should take place. Also implements
+ * NEGEND events which publish the final disposition of an event after
+ * negotiations are complete.
+ *
+ * When an event occurs on a minor node, this routine walks the list of
+ * contracts hanging off a devinfo node and for each contract on the affected
+ * dip, evaluates the following cases
+ *
+ * a. an event that is synchronous, breaks the contract and NONEG not set
+ * - bumps up the outstanding negotiation counts on the dip
+ * - marks the dip as undergoing negotiation (devi_ct_neg)
+ * - event of type CTE_NEG is published
+ * b. an event that is synchronous, breaks the contract and NONEG is set
+ * - sets the final result to CT_NACK, event is blocked
+ * - does not publish an event
+ * c. event is asynchronous and breaks the contract
+ * - publishes a critical event irrespect of whether the NONEG
+ * flag is set, since the contract will be broken and contract
+ * owner needs to be informed.
+ * d. No contract breakage but the owner has subscribed to the event
+ * - publishes the event irrespective of the NONEG event as the
+ * owner has explicitly subscribed to the event.
+ * e. NEGEND event
+ * - publishes a critical event. Should only be doing this if
+ * if NONEG is not set.
+ * f. all other events
+ * - Since a contract is not broken and this event has not been
+ * subscribed to, this event does not need to be published for
+ * for this contract.
+ *
+ * Once an event is published, what happens next depends on the type of
+ * event:
+ *
+ * a. NEGEND event
+ * - cleanup all state associated with the preceding negotiation
+ * and return CT_ACK to the caller of contract_device_publish()
+ * b. NACKed event
+ * - One or more contracts had the NONEG term, so the event was
+ * blocked. Return CT_NACK to the caller.
+ * c. Negotiated event
+ * - Call wait_for_acks() to wait for responses from contract
+ * holders. The end result is either CT_ACK (event is permitted),
+ * CT_NACK (event is blocked) or CT_NONE (no contract owner)
+ * responded. This result is returned back to the caller.
+ * d. All other events
+ * - If the event was asynchronous (i.e. not negotiated) or
+ * a contract was not broken return CT_ACK to the caller.
+ */
+static uint_t
+contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, nvlist_t *tnvl)
+{
+ cont_device_t *ctd;
+ uint_t result = CT_NONE;
+ uint64_t evid = 0;
+ uint64_t nevid = 0;
+ char *path = NULL;
+ int negend;
+ int match;
+ int sync = 0;
+ contract_t *ct;
+ ct_kevent_t *event;
+ nvlist_t *nvl;
+ int broken = 0;
+
+ ASSERT(dip);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFBLK || spec_type == S_IFCHR));
+ ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
+
+ /* Is this a synchronous state change ? */
+ if (evtype != CT_EV_NEGEND) {
+ sync = is_sync_neg(get_state(dip), evtype);
+ /* NOP if unsupported transition */
+ if (sync == -2 || sync == -1) {
+ DEVI(dip)->devi_flags |= DEVI_CT_NOP;
+ result = (sync == -2) ? CT_ACK : CT_NONE;
+ goto out;
+ }
+ CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
+ sync ? "" : " not"));
+ } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
+ DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
+ result = CT_ACK;
+ goto out;
+ }
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+
+ /*
+ * Negotiation end - set the state of the device in the contract
+ */
+ if (evtype == CT_EV_NEGEND) {
+ CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
+ set_cond_state(dip);
+ }
+
+ /*
+ * If this device didn't go through negotiation, don't publish
+ * a NEGEND event - simply release the barrier to allow other
+ * device events in.
+ */
+ negend = 0;
+ if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
+ CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+ result = CT_ACK;
+ goto out;
+ } else if (evtype == CT_EV_NEGEND) {
+ /*
+ * There are negotiated contract breakages that
+ * need a NEGEND event
+ */
+ ASSERT(ct_barrier_held(dip));
+ negend = 1;
+ CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
+ } else {
+ /*
+ * This is a new event, not a NEGEND event. Wait for previous
+ * contract events to complete.
+ */
+ ct_barrier_acquire(dip);
+ }
+
+
+ match = 0;
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+
+ ctid_t ctid;
+ size_t len = strlen(path);
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ ASSERT(ctd->cond_dip == dip);
+ ASSERT(ctd->cond_minor);
+ ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
+ ctd->cond_minor[len] == ':');
+
+ if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /* We have a matching contract */
+ match = 1;
+ ctid = ctd->cond_contract.ct_id;
+ CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
+ ctid));
+
+ /*
+ * There are 4 possible cases
+ * 1. A contract is broken (dev not in acceptable state) and
+ * the state change is synchronous - start negotiation
+ * by sending a CTE_NEG critical event.
+ * 2. A contract is broken and the state change is
+ * asynchronous - just send a critical event and
+ * break the contract.
+ * 3. Contract is not broken, but consumer has subscribed
+ * to the event as a critical or informative event
+ * - just send the appropriate event
+ * 4. contract waiting for negend event - just send the critical
+ * NEGEND event.
+ */
+ broken = 0;
+ if (!negend && !(evtype & ctd->cond_aset)) {
+ broken = 1;
+ CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
+ ctid));
+ }
+
+ /*
+ * Don't send event if
+ * - contract is not broken AND
+ * - contract holder has not subscribed to this event AND
+ * - contract not waiting for a NEGEND event
+ */
+ if (!broken && !EVSENDP(ctd, evtype) &&
+ !ctd->cond_neg) {
+ CT_DEBUG((CE_NOTE, "contract_device_publish(): "
+ "contract (%d): no publish reqd: event %d",
+ ctd->cond_contract.ct_id, evtype));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /*
+ * Note: need to kmem_zalloc() the event so mutexes are
+ * initialized automatically
+ */
+ ct = &ctd->cond_contract;
+ event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
+ event->cte_type = evtype;
+
+ if (broken && sync) {
+ CT_DEBUG((CE_NOTE, "publish: broken + sync: "
+ "ctid: %d", ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ if (ctd->cond_noneg) {
+ /* Nothing to publish. Event has been blocked */
+ CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
+ "not publishing blocked ev: ctid: %d",
+ ctid));
+ result = CT_NACK;
+ kmem_free(event, sizeof (ct_kevent_t));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ event->cte_flags = CTE_NEG; /* critical neg. event */
+ ctd->cond_currev_type = event->cte_type;
+ ct_barrier_incr(dip);
+ DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
+ ctd->cond_neg = 1;
+ } else if (broken && !sync) {
+ CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
+ ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ event->cte_flags = 0; /* critical event */
+ } else if (EVSENDP(ctd, event->cte_type)) {
+ CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
+ ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ event->cte_flags = EVINFOP(ctd, event->cte_type) ?
+ CTE_INFO : 0;
+ } else if (ctd->cond_neg) {
+ CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
+ ASSERT(negend);
+ ASSERT(ctd->cond_noneg == 0);
+ nevid = ctd->cond_contract.ct_nevent ?
+ ctd->cond_contract.ct_nevent->cte_id : 0;
+ ASSERT(ctd->cond_currev_id == nevid);
+ event->cte_flags = 0; /* NEGEND is always critical */
+ ctd->cond_currev_id = 0;
+ ctd->cond_currev_type = 0;
+ ctd->cond_currev_ack = 0;
+ ctd->cond_neg = 0;
+ } else {
+ CT_DEBUG((CE_NOTE, "publish: not publishing event for "
+ "ctid: %d, evtype: %d",
+ ctd->cond_contract.ct_id, event->cte_type));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ kmem_free(event, sizeof (ct_kevent_t));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ nvl = NULL;
+ if (tnvl) {
+ VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
+ if (negend) {
+ int32_t newct = 0;
+ ASSERT(ctd->cond_noneg == 0);
+ VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
+ == 0);
+ VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
+ &newct) == 0);
+ VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
+ newct == 1 ? 0 :
+ ctd->cond_contract.ct_id) == 0);
+ CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
+ "CTS_NEVID: %llu, CTS_NEWCT: %s",
+ ctid, (unsigned long long)nevid,
+ newct ? "success" : "failure"));
+
+ }
+ }
+
+ if (ctd->cond_neg) {
+ ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
+ ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
+ ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
+ ctd->cond_contract.ct_qtime.ctm_start =
+ ctd->cond_contract.ct_ntime.ctm_start;
+ }
+
+ /*
+ * by holding the dip's devi_ct_lock we ensure that
+ * all ACK/NACKs are held up until we have finished
+ * publishing to all contracts.
+ */
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ evid = cte_publish_all(ct, event, nvl, NULL);
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ if (ctd->cond_neg) {
+ ASSERT(!negend);
+ ASSERT(broken);
+ ASSERT(sync);
+ ASSERT(!ctd->cond_noneg);
+ CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
+ ": %d", ctid));
+ ctd->cond_currev_id = evid;
+ } else if (negend) {
+ ctd->cond_contract.ct_ntime.ctm_start = -1;
+ ctd->cond_contract.ct_qtime.ctm_start = -1;
+ }
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+
+ /*
+ * If "negend" set counter back to initial state (-1) so that
+ * other events can be published. Also clear the negotiation flag
+ * on dip.
+ *
+ * 0 .. n are used for counting.
+ * -1 indicates counter is available for use.
+ */
+ if (negend) {
+ /*
+ * devi_ct_count not necessarily 0. We may have
+ * timed out in which case, count will be non-zero.
+ */
+ ct_barrier_release(dip);
+ DEVI(dip)->devi_ct_neg = 0;
+ CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
+ (void *)dip));
+ } else if (DEVI(dip)->devi_ct_neg) {
+ ASSERT(match);
+ ASSERT(!ct_barrier_empty(dip));
+ CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
+ DEVI(dip)->devi_ct_count, (void *)dip));
+ } else {
+ /*
+ * for non-negotiated events or subscribed events or no
+ * matching contracts
+ */
+ ASSERT(ct_barrier_empty(dip));
+ ASSERT(DEVI(dip)->devi_ct_neg == 0);
+ CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
+ "dip=%p", (void *)dip));
+
+ /*
+ * only this function when called from contract_device_negend()
+ * can reset the counter to READY state i.e. -1. This function
+ * is so called for every event whether a NEGEND event is needed
+ * or not, but the negend event is only published if the event
+ * whose end they signal is a negotiated event for the contract.
+ */
+ }
+
+ if (!match) {
+ /* No matching contracts */
+ CT_DEBUG((CE_NOTE, "publish: No matching contract"));
+ result = CT_NONE;
+ } else if (result == CT_NACK) {
+ /* a non-negotiable contract exists and this is a neg. event */
+ CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
+ (void) wait_for_acks(dip, dev, spec_type, evtype);
+ } else if (DEVI(dip)->devi_ct_neg) {
+ /* one or more contracts going through negotations */
+ CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
+ result = wait_for_acks(dip, dev, spec_type, evtype);
+ } else {
+ /* no negotiated contracts or no broken contracts or NEGEND */
+ CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
+ result = CT_ACK;
+ }
+
+ /*
+ * Release the lock only now so that the only point where we
+ * drop the lock is in wait_for_acks(). This is so that we don't
+ * miss cv_signal/cv_broadcast from contract holders
+ */
+ CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+out:
+ if (tnvl)
+ nvlist_free(tnvl);
+ if (path)
+ kmem_free(path, MAXPATHLEN);
+
+
+ CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
+ return (result);
+}
+
+
+/*
+ * contract_device_offline
+ *
+ * Event publishing routine called by I/O framework when a device is offlined.
+ */
+ct_ack_t
+contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t result;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_OFFLINE;
+ result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
+
+ /*
+ * If a contract offline is NACKED, the framework expects us to call
+ * NEGEND ourselves, since we know the final result
+ */
+ if (result == CT_NACK) {
+ contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
+ }
+
+ return (result);
+}
+
+/*
+ * contract_device_degrade
+ *
+ * Event publishing routine called by I/O framework when a device
+ * moves to degrade state.
+ */
+/*ARGSUSED*/
+void
+contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_DEGRADED;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+}
+
+/*
+ * contract_device_undegrade
+ *
+ * Event publishing routine called by I/O framework when a device
+ * moves from degraded state to online state.
+ */
+/*ARGSUSED*/
+void
+contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_ONLINE;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+}
+
+/*
+ * For all contracts which have undergone a negotiation (because the device
+ * moved out of the acceptable state for that contract and the state
+ * change is synchronous i.e. requires negotiation) this routine publishes
+ * a CT_EV_NEGEND event with the final disposition of the event.
+ *
+ * This event is always a critical event.
+ */
+void
+contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
+
+ CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
+ "dip: %p", result, (void *)dip));
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
+ result == CT_EV_SUCCESS ? 1 : 0) == 0);
+
+ evtype = CT_EV_NEGEND;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+
+ CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
+ (void *)dip));
+}
+
+/*
+ * Wrapper routine called by other subsystems (such as LDI) to start
+ * negotiations when a synchronous device state change occurs.
+ * Returns CT_ACK or CT_NACK.
+ */
+ct_ack_t
+contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype)
+{
+ int result;
+
+ ASSERT(dip);
+ ASSERT(dev != NODEV);
+ ASSERT(dev != DDI_DEV_T_ANY);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+
+ switch (evtype) {
+ case CT_DEV_EV_OFFLINE:
+ result = contract_device_offline(dip, dev, spec_type);
+ break;
+ default:
+ cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
+ "not supported: event (%d) for dev_t (%lu) and spec (%d), "
+ "dip (%p)", evtype, dev, spec_type, (void *)dip);
+ result = CT_NACK;
+ break;
+ }
+
+ return (result);
+}
+
+/*
+ * A wrapper routine called by other subsystems (such as the LDI) to
+ * finalize event processing for a state change event. For synchronous
+ * state changes, this publishes NEGEND events. For asynchronous i.e.
+ * non-negotiable events this publishes the event.
+ */
+void
+contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, int ct_result)
+{
+ ASSERT(dip);
+ ASSERT(dev != NODEV);
+ ASSERT(dev != DDI_DEV_T_ANY);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+
+ switch (evtype) {
+ case CT_DEV_EV_OFFLINE:
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ case CT_DEV_EV_DEGRADED:
+ contract_device_degrade(dip, dev, spec_type);
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ case CT_DEV_EV_ONLINE:
+ contract_device_undegrade(dip, dev, spec_type);
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ default:
+ cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
+ "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
+ evtype, dev, spec_type, (void *)dip);
+ break;
+ }
+}
+
+/*
+ * Called by I/O framework when a devinfo node is freed to remove the
+ * association between a devinfo node and its contracts.
+ */
+void
+contract_device_remove_dip(dev_info_t *dip)
+{
+ cont_device_t *ctd;
+ cont_device_t *next;
+ contract_t *ct;
+
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_wait_for_release(dip);
+
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
+ next = list_next(&(DEVI(dip)->devi_ct), ctd);
+ list_remove(&(DEVI(dip)->devi_ct), ctd);
+ ct = &ctd->cond_contract;
+ /*
+ * Unlink the dip associated with this contract
+ */
+ mutex_enter(&ct->ct_lock);
+ ASSERT(ctd->cond_dip == dip);
+ ctd->cond_dip = NULL; /* no longer linked to dip */
+ contract_rele(ct); /* remove hold for dip linkage */
+ CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
+ "ctid: %d", ct->ct_id));
+ mutex_exit(&ct->ct_lock);
+ }
+ ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+}
+
+/*
+ * Barrier related routines
+ */
+static void
+ct_barrier_acquire(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
+ while (DEVI(dip)->devi_ct_count != -1)
+ cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
+ DEVI(dip)->devi_ct_count = 0;
+ CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
+}
+
+static void
+ct_barrier_release(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count != -1);
+ DEVI(dip)->devi_ct_count = -1;
+ cv_broadcast(&(DEVI(dip)->devi_ct_cv));
+ CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
+}
+
+static int
+ct_barrier_held(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ return (DEVI(dip)->devi_ct_count != -1);
+}
+
+static int
+ct_barrier_empty(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count != -1);
+ return (DEVI(dip)->devi_ct_count == 0);
+}
+
+static void
+ct_barrier_wait_for_release(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ while (DEVI(dip)->devi_ct_count != -1)
+ cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
+}
+
+static void
+ct_barrier_decr(dev_info_t *dip)
+{
+ CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d",
+ DEVI(dip)->devi_ct_count));
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count > 0);
+
+ DEVI(dip)->devi_ct_count--;
+ if (DEVI(dip)->devi_ct_count == 0) {
+ cv_broadcast(&DEVI(dip)->devi_ct_cv);
+ CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
+ }
+}
+
+static void
+ct_barrier_incr(dev_info_t *dip)
+{
+ ASSERT(ct_barrier_held(dip));
+ DEVI(dip)->devi_ct_count++;
+}
+
+static int
+ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
+{
+ clock_t abstime;
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+
+ abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
+ while (DEVI(dip)->devi_ct_count) {
+ if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
+ &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
+ return (-1);
+ }
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/contract/process.c b/usr/src/uts/common/contract/process.c
index 8240051f00..c92ce34352 100644
--- a/usr/src/uts/common/contract/process.c
+++ b/usr/src/uts/common/contract/process.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -502,7 +501,7 @@ contract_process_adopt(contract_t *ct, proc_t *p)
}
/*
- * contract_process_status
+ * contract_process_abandon
*
* The process contract abandon entry point.
*/
@@ -632,11 +631,23 @@ contract_process_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
}
}
+/*ARGSUSED*/
+static int
+contract_process_newct(contract_t *ct)
+{
+ return (0);
+}
+
+/* process contracts don't negotiate */
static contops_t contract_process_ops = {
contract_process_free, /* contop_free */
contract_process_abandon, /* contop_abandon */
contract_process_destroy, /* contop_destroy */
- contract_process_status /* contop_status */
+ contract_process_status, /* contop_status */
+ contract_ack_inval, /* contop_ack */
+ contract_ack_inval, /* contop_nack */
+ contract_qack_inval, /* contop_qack */
+ contract_process_newct /* contop_newct */
};
/*
@@ -774,7 +785,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus)
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_EXIT) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_EXIT;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
mutex_enter(&ct->ct_lock);
}
if (empty) {
@@ -793,7 +804,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus)
event->cte_flags = EVINFOP(ctp, CT_PR_EV_EMPTY) ?
CTE_INFO : 0;
event->cte_type = CT_PR_EV_EMPTY;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
mutex_enter(&ct->ct_lock);
}
@@ -877,7 +888,7 @@ contract_process_fork(ctmpl_process_t *rtmpl, proc_t *cp, proc_t *pp,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_FORK) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_FORK;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
}
return (ctp);
}
@@ -924,7 +935,7 @@ contract_process_core(cont_process_t *ctp, proc_t *p, int sig,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_CORE) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_CORE;
- cte_publish_all(ct, event, nvl, gnvl);
+ (void) cte_publish_all(ct, event, nvl, gnvl);
}
if (EVFATALP(ctp, CT_PR_EV_CORE)) {
@@ -956,7 +967,7 @@ contract_process_hwerr(cont_process_t *ctp, proc_t *p)
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_HWERR) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_HWERR;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
}
if (EVFATALP(ctp, CT_PR_EV_HWERR)) {
@@ -1006,7 +1017,7 @@ contract_process_sig(cont_process_t *ctp, proc_t *p, int sig, pid_t pid,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_SIGNAL) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_SIGNAL;
- cte_publish_all(ct, event, nvl, gnvl);
+ (void) cte_publish_all(ct, event, nvl, gnvl);
}
if (EVFATALP(ctp, CT_PR_EV_SIGNAL)) {
diff --git a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
index f5a0514565..da293cbb21 100644
--- a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
+++ b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
@@ -177,6 +177,7 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
contract_t *ct = ctlnode->ctfs_ctl_contract;
int error = 0;
uint64_t event;
+ int ack;
switch (cmd) {
case CT_CABANDON:
@@ -184,15 +185,21 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
break;
case CT_CACK:
+ case CT_CNACK:
if (copyin((void *)arg, &event, sizeof (uint64_t)))
return (EFAULT);
- error = contract_ack(ct, event);
+ ack = (cmd == CT_CACK) ? CT_ACK : CT_NACK;
+ error = contract_ack(ct, event, ack);
break;
case CT_CNEWCT:
+ error = contract_newct(ct);
break;
case CT_CQREQ:
+ if (copyin((void *)arg, &event, sizeof (uint64_t)))
+ return (EFAULT);
+ error = contract_qack(ct, event);
break;
case CT_CADOPT:
diff --git a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
index 28d0c93662..d99b8f56e8 100644
--- a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
+++ b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
@@ -114,6 +114,7 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
{
ctfs_tmplnode_t *tmplnode = vp->v_data;
ct_param_t param;
+ ctid_t ctid;
int error;
switch (cmd) {
@@ -127,7 +128,11 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
break;
case CT_TCREATE:
ASSERT(tmplnode->ctfs_tmn_tmpl != NULL);
- return (ctmpl_create(tmplnode->ctfs_tmn_tmpl));
+ error = ctmpl_create(tmplnode->ctfs_tmn_tmpl, &ctid);
+ if (error)
+ return (error);
+ *rvalp = ctid;
+ break;
case CT_TSET:
if (copyin((void *)arg, &param, sizeof (ct_param_t)))
return (EFAULT);
diff --git a/usr/src/uts/common/fs/specfs/specsubr.c b/usr/src/uts/common/fs/specfs/specsubr.c
index ea87c688d6..85d9089b82 100644
--- a/usr/src/uts/common/fs/specfs/specsubr.c
+++ b/usr/src/uts/common/fs/specfs/specsubr.c
@@ -70,6 +70,7 @@
struct vfs spec_vfs;
static dev_t specdev;
struct kmem_cache *snode_cache;
+int spec_debug = 0;
static struct snode *sfind(dev_t, vtype_t, struct vnode *);
static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
@@ -259,6 +260,54 @@ makespecvp(dev_t dev, vtype_t type)
return (svp);
}
+
+/*
+ * This function is called from spec_assoc_vp_with_devi(). That function
+ * associates a "new" dip with a common snode, releasing (any) old dip
+ * in the process. This function (spec_assoc_fence()) looks at the "new dip"
+ * and determines whether the snode should be fenced of or not. As the table
+ * below indicates, the value of old-dip is a don't care for all cases.
+ *
+ * old-dip new-dip common-snode
+ * =========================================
+ * Don't care NULL unfence
+ * Don't care retired fence
+ * Don't care not-retired unfence
+ *
+ * Since old-dip value is a "don't care", it is not passed into this function.
+ */
+static void
+spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
+{
+ int fence;
+ struct snode *csp;
+
+ ASSERT(vp);
+ ASSERT(vn_matchops(vp, spec_getvnodeops()));
+
+ fence = 0;
+ if (ndip != NULL) {
+ mutex_enter(&DEVI(ndip)->devi_lock);
+ if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
+ fence = 1;
+ mutex_exit(&DEVI(ndip)->devi_lock);
+ }
+
+ csp = VTOCS(vp);
+ ASSERT(csp);
+
+ /* SFENCED flag only set on common snode */
+ mutex_enter(&csp->s_lock);
+ if (fence)
+ csp->s_flag |= SFENCED;
+ else
+ csp->s_flag &= ~SFENCED;
+ mutex_exit(&csp->s_lock);
+
+ FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
+ fence ? "" : "un", (void *)csp, (void *)ndip));
+}
+
/*
* Associate the common snode with a devinfo node. This is called from:
*
@@ -322,6 +371,8 @@ spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
csp->s_flag &= ~SSIZEVALID;
mutex_exit(&csp->s_lock);
+ spec_assoc_fence(dip, vp);
+
/* release the old */
if (olddip)
ddi_release_devi(olddip);
@@ -889,3 +940,113 @@ spec_is_selfclone(vnode_t *vp)
return (0);
}
+
+/*
+ * We may be invoked with a NULL vp in which case we fence off
+ * all snodes associated with dip
+ */
+int
+spec_fence_snode(dev_info_t *dip, struct vnode *vp)
+{
+ struct snode *sp;
+ struct snode *csp;
+ int retired;
+ int i;
+ char *path;
+ int emitted;
+
+ ASSERT(dip);
+
+ retired = 0;
+ mutex_enter(&DEVI(dip)->devi_lock);
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED)
+ retired = 1;
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ if (!retired)
+ return (0);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+
+ if (vp != NULL) {
+ ASSERT(vn_matchops(vp, spec_getvnodeops()));
+ csp = VTOCS(vp);
+ ASSERT(csp);
+ mutex_enter(&csp->s_lock);
+ csp->s_flag |= SFENCED;
+ mutex_exit(&csp->s_lock);
+ FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
+ (void *)csp, path));
+ kmem_free(path, MAXPATHLEN);
+ return (0);
+ }
+
+ emitted = 0;
+ mutex_enter(&stable_lock);
+ for (i = 0; i < STABLESIZE; i++) {
+ for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
+ ASSERT(sp->s_commonvp);
+ csp = VTOS(sp->s_commonvp);
+ if (csp->s_dip == dip) {
+ /* fence off the common snode */
+ mutex_enter(&csp->s_lock);
+ csp->s_flag |= SFENCED;
+ mutex_exit(&csp->s_lock);
+ if (!emitted) {
+ FENDBG((CE_NOTE, "fenced 1 of N"));
+ emitted++;
+ }
+ }
+ }
+ }
+ mutex_exit(&stable_lock);
+
+ FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
+ kmem_free(path, MAXPATHLEN);
+
+ return (0);
+}
+
+
+int
+spec_unfence_snode(dev_info_t *dip)
+{
+ struct snode *sp;
+ struct snode *csp;
+ int i;
+ char *path;
+ int emitted;
+
+ ASSERT(dip);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+ emitted = 0;
+ mutex_enter(&stable_lock);
+ for (i = 0; i < STABLESIZE; i++) {
+ for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
+ ASSERT(sp->s_commonvp);
+ csp = VTOS(sp->s_commonvp);
+ ASSERT(csp);
+ if (csp->s_dip == dip) {
+ /* unfence the common snode */
+ mutex_enter(&csp->s_lock);
+ csp->s_flag &= ~SFENCED;
+ mutex_exit(&csp->s_lock);
+ if (!emitted) {
+ FENDBG((CE_NOTE, "unfenced 1 of N"));
+ emitted++;
+ }
+ }
+ }
+ }
+ mutex_exit(&stable_lock);
+
+ FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
+ kmem_free(path, MAXPATHLEN);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index 1841d107fb..ffaba36a21 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -93,6 +93,7 @@
#include <sys/esunddi.h>
#include <sys/autoconf.h>
#include <sys/sunndi.h>
+#include <sys/contract/device_impl.h>
static int spec_open(struct vnode **, int, struct cred *);
@@ -153,8 +154,23 @@ static int spec_pathconf(struct vnode *, int, ulong_t *, struct cred *);
mutex_exit(&csp->s_lock); \
}
+#define S_ISFENCED(sp) ((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
+
struct vnodeops *spec_vnodeops;
+/*
+ * *PLEASE NOTE*: If you add new entry points to specfs, do
+ * not forget to add support for fencing. A fenced snode
+ * is indicated by the SFENCED flag in the common snode.
+ * If a snode is fenced, determine if your entry point is
+ * a configuration operation (Example: open), a detection
+ * operation (Example: gettattr), an I/O operation (Example: ioctl())
+ * or an unconfiguration operation (Example: close). If it is
+ * a configuration or detection operation, fail the operation
+ * for a fenced snode with an ENXIO or EIO as appropriate. If
+ * it is any other operation, let it through.
+ */
+
const fs_operation_def_t spec_vnodeops_template[] = {
VOPNAME_OPEN, { .vop_open = spec_open },
VOPNAME_CLOSE, { .vop_close = spec_close },
@@ -530,6 +546,7 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
struct stdata *stp;
dev_info_t *dip;
int error, type;
+ contract_t *ct = NULL;
int open_returns_eintr;
flag &= ~FCREAT; /* paranoia */
@@ -579,6 +596,10 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
ddi_release_devi(dip); /* from e_ddi_hold_devi_by_dev */
}
+ /* check if device fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
#ifdef DEBUG
/* verify attach/open exclusion guarantee */
dip = csp->s_dip;
@@ -628,6 +649,18 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
csp = VTOS(sp->s_commonvp);
}
+ /*
+ * create contracts only for userland opens
+ * Successful open and cloning is done at this point.
+ */
+ if (error == 0 && !(flag & FKLYR)) {
+ int spec_type;
+ spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK;
+ if (contract_device_open(newdev, spec_type, NULL) != 0) {
+ error = EIO;
+ }
+ }
+
if (error == 0) {
sp->s_size = SPEC_SIZE(csp);
@@ -729,6 +762,19 @@ streams_open:
UNLOCK_CSP(csp);
}
+ /*
+ * create contracts only for userland opens
+ * Successful open and cloning is done at this point.
+ */
+ if (error == 0 && !(flag & FKLYR)) {
+ /* STREAM is of type S_IFCHR */
+ if (contract_device_open(newdev, S_IFCHR, &ct) != 0) {
+ UNLOCK_CSP(csp);
+ (void) spec_close(vp, flag, 1, 0, cr);
+ return (EIO);
+ }
+ }
+
if (error == 0) {
/* STREAMS devices don't have a size */
sp->s_size = csp->s_size = 0;
@@ -741,6 +787,11 @@ streams_open:
return (0);
/* strctty() was interrupted by a signal */
+ if (ct) {
+ /* we only create contracts for userland opens */
+ ASSERT(ttoproc(curthread));
+ (void) contract_abandon(ct, ttoproc(curthread), 0);
+ }
(void) spec_close(vp, flag, 1, 0, cr);
return (EINTR);
}
@@ -795,6 +846,7 @@ spec_close(
if (count > 1)
return (0);
+ /* we allow close to succeed even if device is fenced off */
sp = VTOS(vp);
cvp = sp->s_commonvp;
@@ -1157,6 +1209,13 @@ spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr,
if (vp->v_type != VCHR)
return (ENOTTY);
+
+ /*
+ * allow ioctls() to go through even for fenced snodes, as they
+ * may include unconfiguration operation - for example popping of
+ * streams modules.
+ */
+
sp = VTOS(vp);
dev = sp->s_dev;
if (STREAMSTAB(getmajor(dev))) {
@@ -1180,6 +1239,11 @@ spec_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr)
vp = sp->s_commonvp;
}
sp = VTOS(vp);
+
+ /* we want stat() to fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
realvp = sp->s_realvp;
if (realvp == NULL) {
@@ -1258,6 +1322,10 @@ spec_setattr(
struct vnode *realvp;
int error;
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) {
/*
* 1135080: O_TRUNC should have no effect on
@@ -1293,6 +1361,10 @@ spec_access(struct vnode *vp, int mode, int flags, struct cred *cr)
struct vnode *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_ACCESS(realvp, mode, flags, cr));
else
@@ -1309,6 +1381,11 @@ spec_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl,
int mode, struct vnode **vpp, struct cred *cr, int flag)
{
int error;
+ struct snode *sp = VTOS(dvp);
+
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
if (excl == NONEXCL) {
@@ -1333,6 +1410,8 @@ spec_fsync(struct vnode *vp, int syncflag, struct cred *cr)
struct vnode *cvp;
struct vattr va, vatmp;
+ /* allow syncing even if device is fenced off */
+
/* If times didn't change, don't flush anything. */
mutex_enter(&sp->s_lock);
if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) {
@@ -2222,10 +2301,15 @@ spec_map(
struct cred *cred)
{
int error = 0;
+ struct snode *sp = VTOS(vp);
if (vp->v_flag & VNOMAP)
return (ENOSYS);
+ /* fail map with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
/*
* If file is locked, fail mapping attempt.
*/
@@ -2314,6 +2398,10 @@ spec_addmap(
if (vp->v_flag & VNOMAP)
return (ENOSYS);
+ /* fail with EIO if the device is fenced off */
+ if (S_ISFENCED(csp))
+ return (EIO);
+
npages = btopr(len);
LOCK_CSP(csp);
csp->s_mapcnt += npages;
@@ -2343,6 +2431,8 @@ spec_delmap(
ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
+ /* allow delmap to succeed even if device fenced off */
+
/*
* XXX Given the above assertion, this might not
* be a particularly sensible thing to test..
@@ -2389,6 +2479,8 @@ spec_delmap(
static int
spec_dump(struct vnode *vp, caddr_t addr, int bn, int count)
{
+ /* allow dump to succeed even if device fenced off */
+
ASSERT(vp->v_type == VBLK);
return (bdev_dump(vp->v_rdev, addr, bn, count));
}
@@ -2438,6 +2530,10 @@ spec_setsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr)
struct snode *sp = VTOS(vp);
int error;
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
/*
* The acl(2) system calls VOP_RWLOCK on the file before setting an
* ACL, but since specfs does not serialize reads and writes, this
@@ -2464,6 +2560,10 @@ spec_getsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr)
struct vnode *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_GETSECATTR(realvp, vsap, flag, cr));
else
@@ -2476,6 +2576,10 @@ spec_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
vnode_t *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_PATHCONF(realvp, cmd, valp, cr));
else
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c
index aadfb92e62..6fde3f5714 100644
--- a/usr/src/uts/common/os/contract.c
+++ b/usr/src/uts/common/os/contract.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -174,6 +173,8 @@
#include <sys/proc.h>
#include <sys/contract_impl.h>
#include <sys/contract/process_impl.h>
+#include <sys/dditypes.h>
+#include <sys/contract/device_impl.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/cmn_err.h>
@@ -181,6 +182,8 @@
#include <sys/policy.h>
#include <sys/zone.h>
#include <sys/task.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
extern rctl_hndl_t rc_project_contract;
@@ -191,6 +194,7 @@ static kmutex_t contract_lock;
int ct_ntypes = CTT_MAXTYPE;
static ct_type_t *ct_types_static[CTT_MAXTYPE];
ct_type_t **ct_types = ct_types_static;
+int ct_debug;
static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
static void cte_queue_destroy(ct_equeue_t *);
@@ -237,6 +241,7 @@ contract_init(void)
* Initialize contract types.
*/
contract_process_init();
+ contract_device_init();
/*
* Initialize p0/lwp0 contract state.
@@ -310,6 +315,9 @@ contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
ct->ct_cookie = tmpl->ctmpl_cookie;
ct->ct_owner = author;
+ ct->ct_ntime.ctm_total = -1;
+ ct->ct_qtime.ctm_total = -1;
+ ct->ct_nevent = NULL;
/*
* Test project.max-contracts.
@@ -570,6 +578,12 @@ contract_abandon(contract_t *ct, proc_t *p, int explicit)
return (0);
}
+int
+contract_newct(contract_t *ct)
+{
+ return (ct->ct_type->ct_type_ops->contop_newct(ct));
+}
+
/*
* contract_adopt
*
@@ -647,11 +661,15 @@ contract_adopt(contract_t *ct, proc_t *p)
* Acknowledges receipt of a critical event.
*/
int
-contract_ack(contract_t *ct, uint64_t evid)
+contract_ack(contract_t *ct, uint64_t evid, int ack)
{
ct_kevent_t *ev;
list_t *queue = &ct->ct_events.ctq_events;
int error = ESRCH;
+ int nego = 0;
+ uint_t evtype;
+
+ ASSERT(ack == CT_ACK || ack == CT_NACK);
mutex_enter(&ct->ct_lock);
mutex_enter(&ct->ct_events.ctq_lock);
@@ -660,9 +678,14 @@ contract_ack(contract_t *ct, uint64_t evid)
*/
for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
if (ev->cte_id == evid) {
+ if (ev->cte_flags & CTE_NEG)
+ nego = 1;
+ else if (ack == CT_NACK)
+ break;
if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
ev->cte_flags |= CTE_ACK;
ct->ct_evcnt--;
+ evtype = ev->cte_type;
error = 0;
}
break;
@@ -671,9 +694,86 @@ contract_ack(contract_t *ct, uint64_t evid)
mutex_exit(&ct->ct_events.ctq_lock);
mutex_exit(&ct->ct_lock);
+ /*
+ * Not all critical events are negotiation events, however
+ * every negotiation event is a critical event. NEGEND events
+ * are critical events but are not negotiation events
+ */
+ if (error || !nego)
+ return (error);
+
+ if (ack == CT_ACK)
+ error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
+ else
+ error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
+
return (error);
}
+/*ARGSUSED*/
+int
+contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
+ ct->ct_id);
+ return (ENOSYS);
+}
+
+/*ARGSUSED*/
+int
+contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
+ ct->ct_id);
+ return (ENOSYS);
+}
+
+/*ARGSUSED*/
+int
+contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (ERANGE);
+}
+
+/*
+ * contract_qack
+ *
+ * Asks that negotiations be extended by another time quantum
+ */
+int
+contract_qack(contract_t *ct, uint64_t evid)
+{
+ ct_kevent_t *ev;
+ list_t *queue = &ct->ct_events.ctq_events;
+ int nego = 0;
+ uint_t evtype;
+
+ mutex_enter(&ct->ct_lock);
+ mutex_enter(&ct->ct_events.ctq_lock);
+
+ for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
+ if (ev->cte_id == evid) {
+ if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
+ evtype = ev->cte_type;
+ nego = 1;
+ }
+ break;
+ }
+ }
+ mutex_exit(&ct->ct_events.ctq_lock);
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * Only a negotiated event (which is by definition also a critical
+ * event) which has not yet been acknowledged can provide
+ * time quanta to a negotiating owner process.
+ */
+ if (!nego)
+ return (ESRCH);
+
+ return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
+}
+
/*
* contract_orphan
*
@@ -840,6 +940,20 @@ contract_exit(proc_t *p)
}
}
+static int
+get_time_left(struct ct_time *t)
+{
+ clock_t ticks_elapsed;
+ int secs_elapsed;
+
+ if (t->ctm_total == -1)
+ return (-1);
+
+ ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
+ secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
+ return (secs_elapsed > 0 ? secs_elapsed : 0);
+}
+
/*
* contract_status_common
*
@@ -897,8 +1011,8 @@ contract_status_common(contract_t *ct, zone_t *zone, void *status,
CTS_OWNED : ct->ct_state);
}
STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
- STRUCT_FSET(lstatus, ctst_ntime, -1);
- STRUCT_FSET(lstatus, ctst_qtime, -1);
+ STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
+ STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
STRUCT_FSET(lstatus, ctst_nevid,
ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
@@ -1469,9 +1583,9 @@ ctmpl_clear(ct_template_t *template)
* Creates a new contract using the specified template.
*/
int
-ctmpl_create(ct_template_t *template)
+ctmpl_create(ct_template_t *template, ctid_t *ctidp)
{
- return (template->ctmpl_ops->ctop_create(template));
+ return (template->ctmpl_ops->ctop_create(template, ctidp));
}
/*
@@ -1520,7 +1634,7 @@ ctmpl_copy(ct_template_t *new, ct_template_t *old)
*/
/*ARGSUSED*/
int
-ctmpl_create_inval(ct_template_t *template)
+ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
{
return (EINVAL);
}
@@ -2046,19 +2160,34 @@ cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
* be zallocated by the caller, and the event's flags and type must be
* set. The rest of the event's fields are initialized here.
*/
-void
+uint64_t
cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
{
ct_equeue_t *q;
timespec_t ts;
+ uint64_t evid;
+ ct_kevent_t *negev;
+ int negend;
e->cte_contract = ct;
e->cte_data = data;
e->cte_gdata = gdata;
e->cte_refs = 3;
- e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
+ evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
contract_hold(ct);
+ /*
+ * For a negotiation event we set the ct->ct_nevent field of the
+ * contract for the duration of the negotiation
+ */
+ negend = 0;
+ if (e->cte_flags & CTE_NEG) {
+ cte_hold(e);
+ ct->ct_nevent = e;
+ } else if (e->cte_type == CT_EV_NEGEND) {
+ negend = 1;
+ }
+
gethrestime(&ts);
/*
@@ -2111,7 +2240,17 @@ cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
cte_rele(e);
}
+ if (negend) {
+ mutex_enter(&ct->ct_lock);
+ negev = ct->ct_nevent;
+ ct->ct_nevent = NULL;
+ cte_rele(negev);
+ mutex_exit(&ct->ct_lock);
+ }
+
mutex_exit(&ct->ct_evtlock);
+
+ return (evid);
}
/*
@@ -2347,7 +2486,8 @@ cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
STRUCT_FSET(ev, ctev_evid, temp->cte_id);
STRUCT_FSET(ev, ctev_cttype,
temp->cte_contract->ct_type->ct_type_index);
- STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO));
+ STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
+ (CTE_ACK|CTE_INFO|CTE_NEG));
STRUCT_FSET(ev, ctev_type, temp->cte_type);
STRUCT_FSET(ev, ctev_nbytes, len);
STRUCT_FSET(ev, ctev_goffset, size);
diff --git a/usr/src/uts/common/os/devcache.c b/usr/src/uts/common/os/devcache.c
index 14cde49faf..8e1313d487 100644
--- a/usr/src/uts/common/os/devcache.c
+++ b/usr/src/uts/common/os/devcache.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -224,6 +224,7 @@ i_ddi_devices_init(void)
list_create(&nvf_dirty_files, sizeof (nvfd_t),
offsetof(nvfd_t, nvf_link));
mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
+ retire_store_init();
devid_cache_init();
}
@@ -235,6 +236,16 @@ i_ddi_devices_init(void)
void
i_ddi_read_devices_files(void)
{
+ /*
+ * The retire store should be the first file read as it
+ * may need to offline devices. kfio_disable_read is not
+ * used for retire. For the rationale see the tunable
+ * ddi_retire_store_bypass and comments in:
+ * uts/common/os/retire_store.c
+ */
+
+ retire_store_read();
+
if (!kfio_disable_read) {
mdi_read_devices_files();
devid_cache_read();
diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c
index 29150c5d8c..03f7ec89a2 100644
--- a/usr/src/uts/common/os/devcfg.c
+++ b/usr/src/uts/common/os/devcfg.c
@@ -38,6 +38,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/ndi_impldefs.h>
#include <sys/modctl.h>
+#include <sys/contract/device_impl.h>
#include <sys/dacf.h>
#include <sys/promif.h>
#include <sys/cpuvar.h>
@@ -50,6 +51,9 @@
#include <sys/fs/snode.h>
#include <sys/fs/dv_node.h>
#include <sys/reboot.h>
+#include <sys/sysmacros.h>
+#include <sys/sunldi.h>
+#include <sys/sunldi_impl.h>
#ifdef DEBUG
int ddidebug = DDI_AUDIT;
@@ -192,6 +196,10 @@ static void ndi_devi_exit_and_wait(dev_info_t *dip,
int circular, clock_t end_time);
static int ndi_devi_unbind_driver(dev_info_t *dip);
+static void i_ddi_check_retire(dev_info_t *dip);
+
+
+
/*
* dev_info cache and node management
*/
@@ -324,6 +332,15 @@ i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid,
mutex_init(&(devi->devi_pm_lock), NULL, MUTEX_DEFAULT, NULL);
mutex_init(&(devi->devi_pm_busy_lock), NULL, MUTEX_DEFAULT, NULL);
+ RIO_TRACE((CE_NOTE, "i_ddi_alloc_node: Initing contract fields: "
+ "dip=%p, name=%s", (void *)devi, node_name));
+
+ mutex_init(&(devi->devi_ct_lock), NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&(devi->devi_ct_cv), NULL, CV_DEFAULT, NULL);
+ devi->devi_ct_count = -1; /* counter not in use if -1 */
+ list_create(&(devi->devi_ct), sizeof (cont_device_t),
+ offsetof(cont_device_t, cond_next));
+
i_ddi_set_node_state((dev_info_t *)devi, DS_PROTO);
da_log_enter((dev_info_t *)devi);
return ((dev_info_t *)devi);
@@ -389,7 +406,6 @@ i_ddi_free_node(dev_info_t *dip)
if (devi->devi_audit) {
kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
}
- kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
if (devi->devi_device_class)
kmem_free(devi->devi_device_class,
strlen(devi->devi_device_class) + 1);
@@ -398,6 +414,20 @@ i_ddi_free_node(dev_info_t *dip)
mutex_destroy(&(devi->devi_pm_lock));
mutex_destroy(&(devi->devi_pm_busy_lock));
+ RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroying contract fields: "
+ "dip=%p", (void *)dip));
+ contract_device_remove_dip(dip);
+ ASSERT(devi->devi_ct_count == -1);
+ ASSERT(list_is_empty(&(devi->devi_ct)));
+ cv_destroy(&(devi->devi_ct_cv));
+ list_destroy(&(devi->devi_ct));
+ /* free this last since contract_device_remove_dip() uses it */
+ mutex_destroy(&(devi->devi_ct_lock));
+ RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroyed all contract fields: "
+ "dip=%p, name=%s", (void *)dip, devi->devi_node_name));
+
+ kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
+
kmem_cache_free(ddi_node_cache, devi);
}
@@ -1441,6 +1471,7 @@ i_ndi_config_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
i_ddi_set_node_state(dip, DS_PROBED);
break;
case DS_PROBED:
+ i_ddi_check_retire(dip);
atomic_add_long(&devinfo_attach_detach, 1);
if ((rv = attach_node(dip)) == DDI_SUCCESS)
i_ddi_set_node_state(dip, DS_ATTACHED);
@@ -5110,6 +5141,172 @@ ndi_devi_config_obp_args(dev_info_t *parent, char *devnm,
return (error);
}
+/*
+ * Pay attention, the following is a bit tricky:
+ * There are three possible cases when constraints are applied
+ *
+ * - A constraint is applied and the offline is disallowed.
+ * Simply return failure and block the offline
+ *
+ * - A constraint is applied and the offline is allowed.
+ * Mark the dip as having passed the constraint and allow
+ * offline to proceed.
+ *
+ * - A constraint is not applied. Allow the offline to proceed for now.
+ *
+ * In the latter two cases we allow the offline to proceed. If the
+ * offline succeeds (no users) everything is fine. It is ok for an unused
+ * device to be offlined even if no constraints were imposed on the offline.
+ * If the offline fails because there are users, we look at the constraint
+ * flag on the dip. If the constraint flag is set (implying that it passed
+ * a constraint) we allow the dip to be retired. If not, we don't allow
+ * the retire. This ensures that we don't allow unconstrained retire.
+ */
+int
+e_ddi_offline_notify(dev_info_t *dip)
+{
+ int retval;
+ int constraint;
+ int failure;
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): entered: dip=%p",
+ (void *) dip));
+
+ constraint = 0;
+ failure = 0;
+
+ /*
+ * Start with userland constraints first - applied via device contracts
+ */
+ retval = contract_device_offline(dip, DDI_DEV_T_ANY, 0);
+ switch (retval) {
+ case CT_NACK:
+ RIO_DEBUG((CE_NOTE, "Received NACK for dip=%p", (void *)dip));
+ failure = 1;
+ goto out;
+ case CT_ACK:
+ constraint = 1;
+ RIO_DEBUG((CE_NOTE, "Received ACK for dip=%p", (void *)dip));
+ break;
+ case CT_NONE:
+ /* no contracts */
+ RIO_DEBUG((CE_NOTE, "No contracts on dip=%p", (void *)dip));
+ break;
+ default:
+ ASSERT(retval == CT_NONE);
+ }
+
+ /*
+ * Next, use LDI to impose kernel constraints
+ */
+ retval = ldi_invoke_notify(dip, DDI_DEV_T_ANY, 0, LDI_EV_OFFLINE, NULL);
+ switch (retval) {
+ case LDI_EV_FAILURE:
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_FAILURE);
+ RIO_DEBUG((CE_NOTE, "LDI callback failed on dip=%p",
+ (void *)dip));
+ failure = 1;
+ goto out;
+ case LDI_EV_SUCCESS:
+ constraint = 1;
+ RIO_DEBUG((CE_NOTE, "LDI callback success on dip=%p",
+ (void *)dip));
+ break;
+ case LDI_EV_NONE:
+ /* no matching LDI callbacks */
+ RIO_DEBUG((CE_NOTE, "No LDI callbacks for dip=%p",
+ (void *)dip));
+ break;
+ default:
+ ASSERT(retval == LDI_EV_NONE);
+ }
+
+out:
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && failure) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
+ "BLOCKED flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_BLOCKED;
+ if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): "
+ "blocked. clearing RCM CONSTRAINT flag. dip=%p",
+ (void *)dip));
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ }
+ } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && constraint) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
+ "CONSTRAINT flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) &&
+ DEVI(dip)->devi_ref == 0) {
+ /* also allow retire if device is not in use */
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): device not in "
+ "use. Setting CONSTRAINT flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ } else {
+ /*
+ * Note: We cannot ASSERT here that DEVI_R_CONSTRAINT is
+ * not set, since other sources (such as RCM) may have
+ * set the flag.
+ */
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): not setting "
+ "constraint flag. dip=%p", (void *)dip));
+ }
+ mutex_exit(&(DEVI(dip)->devi_lock));
+
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): exit: dip=%p",
+ (void *) dip));
+
+ return (failure ? DDI_FAILURE : DDI_SUCCESS);
+}
+
+void
+e_ddi_offline_finalize(dev_info_t *dip, int result)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_offline_finalize(): entry: result=%s, "
+ "dip=%p", result == DDI_SUCCESS ? "SUCCESS" : "FAILURE",
+ (void *)dip));
+
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, result == DDI_SUCCESS ?
+ CT_EV_SUCCESS : CT_EV_FAILURE);
+
+ ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0,
+ LDI_EV_OFFLINE, result == DDI_SUCCESS ?
+ LDI_EV_SUCCESS : LDI_EV_FAILURE, NULL);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_finalize(): exit: dip=%p",
+ (void *)dip));
+}
+
+void
+e_ddi_degrade_finalize(dev_info_t *dip)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_degrade_finalize(): entry: "
+ "result always = DDI_SUCCESS, dip=%p", (void *)dip));
+
+ contract_device_degrade(dip, DDI_DEV_T_ANY, 0);
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
+
+ ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, LDI_EV_DEGRADE,
+ LDI_EV_SUCCESS, NULL);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_degrade_finalize(): exit: dip=%p",
+ (void *)dip));
+}
+
+void
+e_ddi_undegrade_finalize(dev_info_t *dip)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_undegrade_finalize(): entry: "
+ "result always = DDI_SUCCESS, dip=%p", (void *)dip));
+
+ contract_device_undegrade(dip, DDI_DEV_T_ANY, 0);
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_undegrade_finalize(): exit: dip=%p",
+ (void *)dip));
+}
/*
* detach a node with parent already held busy
@@ -5123,6 +5320,19 @@ devi_detach_node(dev_info_t *dip, uint_t flags)
ASSERT(pdip && DEVI_BUSY_OWNED(pdip));
+ /*
+ * Invoke notify if offlining
+ */
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offlining dip=%p",
+ (void *)dip));
+ if (e_ddi_offline_notify(dip) != DDI_SUCCESS) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline NACKed"
+ "dip=%p", (void *)dip));
+ return (NDI_FAILURE);
+ }
+ }
+
if (flags & NDI_POST_EVENT) {
if (i_ddi_devi_attached(pdip)) {
if (ddi_get_eventcookie(dip, DDI_DEVI_REMOVE_EVENT,
@@ -5131,8 +5341,22 @@ devi_detach_node(dev_info_t *dip, uint_t flags)
}
}
- if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS)
+ if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS) {
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline failed."
+ " Calling e_ddi_offline_finalize with result=%d. "
+ "dip=%p", DDI_FAILURE, (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_FAILURE);
+ }
return (NDI_FAILURE);
+ }
+
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline succeeded."
+ " Calling e_ddi_offline_finalize with result=%d, "
+ "dip=%p", DDI_SUCCESS, (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_SUCCESS);
+ }
if (flags & NDI_AUTODETACH)
return (NDI_SUCCESS);
@@ -7220,3 +7444,502 @@ ibt_hw_is_present()
{
return (ib_hw_status);
}
+
+/*
+ * ASSERT that constraint flag is not set and then set the "retire attempt"
+ * flag.
+ */
+int
+e_ddi_mark_retiring(dev_info_t *dip, void *arg)
+{
+ char **cons_array = (char **)arg;
+ char *path;
+ int constraint;
+ int i;
+
+ constraint = 0;
+ if (cons_array) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ for (i = 0; cons_array[i] != NULL; i++) {
+ if (strcmp(path, cons_array[i]) == 0) {
+ constraint = 1;
+ break;
+ }
+ }
+ kmem_free(path, MAXPATHLEN);
+ }
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ DEVI(dip)->devi_flags |= DEVI_RETIRING;
+ if (constraint)
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ RIO_VERBOSE((CE_NOTE, "marked dip as undergoing retire process dip=%p",
+ (void *)dip));
+
+ if (constraint)
+ RIO_DEBUG((CE_NOTE, "marked dip as constrained, dip=%p",
+ (void *)dip));
+
+ if (MDI_PHCI(dip))
+ mdi_phci_mark_retiring(dip, cons_array);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+free_array(char **cons_array)
+{
+ int i;
+
+ if (cons_array == NULL)
+ return;
+
+ for (i = 0; cons_array[i] != NULL; i++) {
+ kmem_free(cons_array[i], strlen(cons_array[i]) + 1);
+ }
+ kmem_free(cons_array, (i+1) * sizeof (char *));
+}
+
+/*
+ * Walk *every* node in subtree and check if it blocks, allows or has no
+ * comment on a proposed retire.
+ */
+int
+e_ddi_retire_notify(dev_info_t *dip, void *arg)
+{
+ int *constraint = (int *)arg;
+
+ RIO_DEBUG((CE_NOTE, "retire notify: dip = %p", (void *)dip));
+
+ (void) e_ddi_offline_notify(dip);
+
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
+ RIO_DEBUG((CE_WARN, "retire notify: dip in retire "
+ "subtree is not marked: dip = %p", (void *)dip));
+ *constraint = 0;
+ } else if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ RIO_DEBUG((CE_NOTE, "retire notify: BLOCKED: dip = %p",
+ (void *)dip));
+ *constraint = 0;
+ } else if (!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)) {
+ RIO_DEBUG((CE_NOTE, "retire notify: NO CONSTRAINT: "
+ "dip = %p", (void *)dip));
+ *constraint = 0;
+ } else {
+ RIO_DEBUG((CE_NOTE, "retire notify: CONSTRAINT set: "
+ "dip = %p", (void *)dip));
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_notify(dip, constraint);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+int
+e_ddi_retire_finalize(dev_info_t *dip, void *arg)
+{
+ int constraint = *(int *)arg;
+ int finalize;
+ int phci_only;
+
+ ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
+ RIO_DEBUG((CE_WARN,
+ "retire: unmarked dip(%p) in retire subtree",
+ (void *)dip));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRED));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ mutex_exit(&DEVI(dip)->devi_lock);
+ return (DDI_WALK_CONTINUE);
+ }
+
+ /*
+ * retire the device if constraints have been applied
+ * or if the device is not in use
+ */
+ finalize = 0;
+ if (constraint) {
+ ASSERT(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ DEVI(dip)->devi_flags |= DEVI_RETIRED;
+ mutex_exit(&DEVI(dip)->devi_lock);
+ (void) spec_fence_snode(dip, NULL);
+ RIO_DEBUG((CE_NOTE, "Fenced off: dip = %p", (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_SUCCESS);
+ } else {
+ if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ DEVI(dip)->devi_flags &= ~DEVI_R_BLOCKED;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ /* we have already finalized during notify */
+ } else if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ finalize = 1;
+ } else {
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ /*
+ * even if no contracts, need to call finalize
+ * to clear the contract barrier on the dip
+ */
+ finalize = 1;
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+ RIO_DEBUG((CE_NOTE, "finalize: NOT retired: dip = %p",
+ (void *)dip));
+ if (finalize)
+ e_ddi_offline_finalize(dip, DDI_FAILURE);
+ mutex_enter(&DEVI(dip)->devi_lock);
+ DEVI_SET_DEVICE_DEGRADED(dip);
+ mutex_exit(&DEVI(dip)->devi_lock);
+ }
+
+ /*
+ * phci_only variable indicates no client checking, just
+ * offline the PHCI. We set that to 0 to enable client
+ * checking
+ */
+ phci_only = 0;
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_finalize(dip, phci_only);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * Returns
+ * DDI_SUCCESS if constraints allow retire
+ * DDI_FAILURE if constraints don't allow retire.
+ * cons_array is a NULL terminated array of node paths for
+ * which constraints have already been applied.
+ */
+int
+e_ddi_retire_device(char *path, char **cons_array)
+{
+ dev_info_t *dip;
+ dev_info_t *pdip;
+ int circ;
+ int circ2;
+ int constraint;
+ char *devnm;
+
+ /*
+ * First, lookup the device
+ */
+ dip = e_ddi_hold_devi_by_path(path, 0);
+ if (dip == NULL) {
+ /*
+ * device does not exist. This device cannot be
+ * a critical device since it is not in use. Thus
+ * this device is always retireable. Return DDI_SUCCESS
+ * to indicate this. If this device is ever
+ * instantiated, I/O framework will consult the
+ * the persistent retire store, mark it as
+ * retired and fence it off.
+ */
+ RIO_DEBUG((CE_NOTE, "Retire device: device doesn't exist."
+ " NOP. Just returning SUCCESS. path=%s", path));
+ free_array(cons_array);
+ return (DDI_SUCCESS);
+ }
+
+ RIO_DEBUG((CE_NOTE, "Retire device: found dip = %p.", (void *)dip));
+
+ pdip = ddi_get_parent(dip);
+ ndi_hold_devi(pdip);
+
+ /*
+ * Run devfs_clean() in case dip has no constraints and is
+ * not in use, so is retireable but there are dv_nodes holding
+ * ref-count on the dip. Note that devfs_clean() always returns
+ * success.
+ */
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devnm);
+ (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+ kmem_free(devnm, MAXNAMELEN + 1);
+
+ ndi_devi_enter(pdip, &circ);
+
+ /* release hold from e_ddi_hold_devi_by_path */
+ ndi_rele_devi(dip);
+
+ /*
+ * If it cannot make a determination, is_leaf_node() assumes
+ * dip is a nexus.
+ */
+ (void) e_ddi_mark_retiring(dip, cons_array);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_mark_retiring,
+ cons_array);
+ ndi_devi_exit(dip, circ2);
+ }
+ free_array(cons_array);
+
+ /*
+ * apply constraints
+ */
+ RIO_DEBUG((CE_NOTE, "retire: subtree retire notify: path = %s", path));
+
+ constraint = 1; /* assume constraints allow retire */
+ (void) e_ddi_retire_notify(dip, &constraint);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_notify,
+ &constraint);
+ ndi_devi_exit(dip, circ2);
+ }
+
+ /*
+ * Now finalize the retire
+ */
+ (void) e_ddi_retire_finalize(dip, &constraint);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_finalize,
+ &constraint);
+ ndi_devi_exit(dip, circ2);
+ }
+
+ if (!constraint) {
+ RIO_DEBUG((CE_WARN, "retire failed: path = %s", path));
+ } else {
+ RIO_DEBUG((CE_NOTE, "retire succeeded: path = %s", path));
+ }
+
+ ndi_devi_exit(pdip, circ);
+ ndi_rele_devi(pdip);
+ return (constraint ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+static int
+unmark_and_unfence(dev_info_t *dip, void *arg)
+{
+ char *path = (char *)arg;
+
+ ASSERT(path);
+
+ (void) ddi_pathname(dip, path);
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRED;
+ DEVI_SET_DEVICE_ONLINE(dip);
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ RIO_VERBOSE((CE_NOTE, "Cleared RETIRED flag: dip=%p, path=%s",
+ (void *)dip, path));
+
+ (void) spec_unfence_snode(dip);
+ RIO_DEBUG((CE_NOTE, "Unfenced device: %s", path));
+
+ if (MDI_PHCI(dip))
+ mdi_phci_unretire(dip);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+struct find_dip {
+ char *fd_buf;
+ char *fd_path;
+ dev_info_t *fd_dip;
+};
+
+static int
+find_dip_fcn(dev_info_t *dip, void *arg)
+{
+ struct find_dip *findp = (struct find_dip *)arg;
+
+ (void) ddi_pathname(dip, findp->fd_buf);
+
+ if (strcmp(findp->fd_path, findp->fd_buf) != 0)
+ return (DDI_WALK_CONTINUE);
+
+ ndi_hold_devi(dip);
+ findp->fd_dip = dip;
+
+ return (DDI_WALK_TERMINATE);
+}
+
+int
+e_ddi_unretire_device(char *path)
+{
+ int circ;
+ char *path2;
+ dev_info_t *pdip;
+ dev_info_t *dip;
+ struct find_dip find_dip;
+
+ ASSERT(path);
+ ASSERT(*path == '/');
+
+ if (strcmp(path, "/") == 0) {
+ cmn_err(CE_WARN, "Root node cannot be retired. Skipping "
+ "device unretire: %s", path);
+ return (0);
+ }
+
+ /*
+ * We can't lookup the dip (corresponding to path) via
+ * e_ddi_hold_devi_by_path() because the dip may be offline
+ * and may not attach. Use ddi_walk_devs() instead;
+ */
+ find_dip.fd_buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ find_dip.fd_path = path;
+ find_dip.fd_dip = NULL;
+
+ pdip = ddi_root_node();
+
+ ndi_devi_enter(pdip, &circ);
+ ddi_walk_devs(ddi_get_child(pdip), find_dip_fcn, &find_dip);
+ ndi_devi_exit(pdip, circ);
+
+ kmem_free(find_dip.fd_buf, MAXPATHLEN);
+
+ if (find_dip.fd_dip == NULL) {
+ cmn_err(CE_WARN, "Device not found in device tree. Skipping "
+ "device unretire: %s", path);
+ return (0);
+ }
+
+ dip = find_dip.fd_dip;
+
+ pdip = ddi_get_parent(dip);
+
+ ndi_hold_devi(pdip);
+
+ ndi_devi_enter(pdip, &circ);
+
+ path2 = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ (void) unmark_and_unfence(dip, path2);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), unmark_and_unfence, path2);
+ ndi_devi_exit(dip, circ);
+ }
+
+ kmem_free(path2, MAXPATHLEN);
+
+ /* release hold from find_dip_fcn() */
+ ndi_rele_devi(dip);
+
+ ndi_devi_exit(pdip, circ);
+
+ ndi_rele_devi(pdip);
+
+ return (0);
+}
+
+/*
+ * Called before attach on a dip that has been retired.
+ */
+static int
+mark_and_fence(dev_info_t *dip, void *arg)
+{
+ char *fencepath = (char *)arg;
+
+ /*
+ * We have already decided to retire this device. The various
+ * constraint checking should not be set.
+ * NOTE that the retire flag may already be set due to
+ * fenced -> detach -> fenced transitions.
+ */
+ mutex_enter(&DEVI(dip)->devi_lock);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRING));
+ DEVI(dip)->devi_flags |= DEVI_RETIRED;
+ mutex_exit(&DEVI(dip)->devi_lock);
+ RIO_VERBOSE((CE_NOTE, "marked as RETIRED dip=%p", (void *)dip));
+
+ if (fencepath) {
+ (void) spec_fence_snode(dip, NULL);
+ RIO_DEBUG((CE_NOTE, "Fenced: %s",
+ ddi_pathname(dip, fencepath)));
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * Checks the retire database and:
+ *
+ * - if device is present in the retire database, marks the device retired
+ * and fences it off.
+ * - if device is not in retire database, allows the device to attach normally
+ *
+ * To be called only by framework attach code on first attach attempt.
+ *
+ */
+static void
+i_ddi_check_retire(dev_info_t *dip)
+{
+ char *path;
+ dev_info_t *pdip;
+ int circ;
+ int phci_only;
+
+ pdip = ddi_get_parent(dip);
+
+ /*
+ * Root dip is treated special and doesn't take this code path.
+ * Also root can never be retired.
+ */
+ ASSERT(pdip);
+ ASSERT(DEVI_BUSY_OWNED(pdip));
+ ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ (void) ddi_pathname(dip, path);
+
+ RIO_VERBOSE((CE_NOTE, "Checking if dip should attach: dip=%p, path=%s",
+ (void *)dip, path));
+
+ /*
+ * Check if this device is in the "retired" store i.e. should
+ * be retired. If not, we have nothing to do.
+ */
+ if (e_ddi_device_retired(path) == 0) {
+ RIO_VERBOSE((CE_NOTE, "device is NOT retired: path=%s", path));
+ kmem_free(path, MAXPATHLEN);
+ return;
+ }
+
+ RIO_DEBUG((CE_NOTE, "attach: device is retired: path=%s", path));
+
+ /*
+ * Mark dips and fence off snodes (if any)
+ */
+ RIO_DEBUG((CE_NOTE, "attach: Mark and fence subtree: path=%s", path));
+ (void) mark_and_fence(dip, path);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), mark_and_fence, path);
+ ndi_devi_exit(dip, circ);
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ /*
+ * We don't want to check the client. We just want to
+ * offline the PHCI
+ */
+ phci_only = 1;
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_finalize(dip, phci_only);
+}
diff --git a/usr/src/uts/common/os/driver_lyr.c b/usr/src/uts/common/os/driver_lyr.c
index f2dea074c1..266e3cbb79 100644
--- a/usr/src/uts/common/os/driver_lyr.c
+++ b/usr/src/uts/common/os/driver_lyr.c
@@ -69,6 +69,11 @@
#include <sys/socketvar.h>
#include <sys/kstr.h>
+/*
+ * Device contract related
+ */
+#include <sys/contract_impl.h>
+#include <sys/contract/device_impl.h>
/*
* Define macros to manipulate snode, vnode, and open device flags
@@ -97,11 +102,23 @@
#define LH_CBDEV (0x2) /* handle to a char/block device */
/*
- * Define marco for devid property lookups
+ * Define macro for devid property lookups
*/
#define DEVID_PROP_FLAGS (DDI_PROP_DONTPASS | \
DDI_PROP_TYPE_STRING|DDI_PROP_CANSLEEP)
+/*
+ * Dummy string for NDI events
+ */
+#define NDI_EVENT_SERVICE "NDI_EVENT_SERVICE"
+
+static void ldi_ev_lock(void);
+static void ldi_ev_unlock(void);
+
+#ifdef LDI_OBSOLETE_EVENT
+int ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id);
+#endif
+
/*
* globals
@@ -113,6 +130,22 @@ static kmutex_t ldi_handle_hash_lock[LH_HASH_SZ];
static struct ldi_handle *ldi_handle_hash[LH_HASH_SZ];
static size_t ldi_handle_hash_count;
+static struct ldi_ev_callback_list ldi_ev_callback_list;
+
+static uint32_t ldi_ev_id_pool = 0;
+
+struct ldi_ev_cookie {
+ char *ck_evname;
+ uint_t ck_sync;
+ uint_t ck_ctype;
+};
+
+static struct ldi_ev_cookie ldi_ev_cookies[] = {
+ { LDI_EV_OFFLINE, 1, CT_DEV_EV_OFFLINE},
+ { LDI_EV_DEGRADE, 0, CT_DEV_EV_DEGRADED},
+ { NULL} /* must terminate list */
+};
+
void
ldi_init(void)
{
@@ -127,6 +160,17 @@ ldi_init(void)
mutex_init(&ldi_ident_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
ldi_ident_hash[i] = NULL;
}
+
+ /*
+ * Initialize the LDI event subsystem
+ */
+ mutex_init(&ldi_ev_callback_list.le_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ldi_ev_callback_list.le_cv, NULL, CV_DEFAULT, NULL);
+ ldi_ev_callback_list.le_busy = 0;
+ ldi_ev_callback_list.le_thread = NULL;
+ list_create(&ldi_ev_callback_list.le_head,
+ sizeof (ldi_ev_callback_impl_t),
+ offsetof(ldi_ev_callback_impl_t, lec_list));
}
/*
@@ -334,7 +378,9 @@ handle_alloc(vnode_t *vp, struct ldi_ident *ident)
lhp->lh_ref = 1;
lhp->lh_vp = vp;
lhp->lh_ident = ident;
+#ifdef LDI_OBSOLETE_EVENT
mutex_init(lhp->lh_lock, NULL, MUTEX_DEFAULT, NULL);
+#endif
/* set the device type for this handle */
lhp->lh_type = 0;
@@ -398,10 +444,13 @@ handle_release(struct ldi_handle *lhp)
VN_RELE(lhp->lh_vp);
ident_release(lhp->lh_ident);
+#ifdef LDI_OBSOLETE_EVENT
mutex_destroy(lhp->lh_lock);
+#endif
kmem_free(lhp, sizeof (struct ldi_handle));
}
+#ifdef LDI_OBSOLETE_EVENT
/*
* LDI event manipulation functions
*/
@@ -457,6 +506,7 @@ i_ldi_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie,
lep->le_handler(lep->le_lhp, event_cookie, lep->le_arg, bus_impldata);
}
+#endif
/*
* LDI open helper functions
@@ -1629,6 +1679,9 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
struct ldi_handle *handlep = (struct ldi_handle *)lh;
struct ldi_event *lep;
int err = 0;
+ int notify = 0;
+ list_t *listp;
+ ldi_ev_callback_impl_t *lecp;
if (lh == NULL)
return (EINVAL);
@@ -1644,6 +1697,8 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
bflush(dev);
}
+#ifdef LDI_OBSOLETE_EVENT
+
/*
* Any event handlers should have been unregistered by the
* time ldi_close() is called. If they haven't then it's a
@@ -1669,6 +1724,7 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
"failed to unregister layered event handlers before "
"closing devices", lip->li_modname);
}
+#endif
/* do a layered close on the device */
err = VOP_CLOSE(handlep->lh_vp, flag | FKLYR, 1, (offset_t)0, cr);
@@ -1676,6 +1732,40 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
LDI_OPENCLOSE((CE_WARN, "%s: lh=0x%p", "ldi close", (void *)lh));
/*
+ * Search the event callback list for callbacks with this
+ * handle. There are 2 cases
+ * 1. Called in the context of a notify. The handle consumer
+ * is releasing its hold on the device to allow a reconfiguration
+ * of the device. Simply NULL out the handle and the notify callback.
+ * The finalize callback is still available so that the consumer
+ * knows of the final disposition of the device.
+ * 2. Not called in the context of notify. NULL out the handle as well
+ * as the notify and finalize callbacks. Since the consumer has
+ * closed the handle, we assume it is not interested in the
+ * notify and finalize callbacks.
+ */
+ ldi_ev_lock();
+
+ if (handlep->lh_flags & LH_FLAGS_NOTIFY)
+ notify = 1;
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+ if (lecp->lec_lhp != handlep)
+ continue;
+ lecp->lec_lhp = NULL;
+ lecp->lec_notify = NULL;
+ LDI_EVDBG((CE_NOTE, "ldi_close: NULLed lh and notify"));
+ if (!notify) {
+ LDI_EVDBG((CE_NOTE, "ldi_close: NULLed finalize"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+
+ if (notify)
+ handlep->lh_flags &= ~LH_FLAGS_NOTIFY;
+ ldi_ev_unlock();
+
+ /*
* Free the handle even if the device close failed. why?
*
* If the device close failed we can't really make assumptions
@@ -2678,6 +2768,8 @@ ldi_prop_exists(ldi_handle_t lh, uint_t flags, char *name)
return (res);
}
+#ifdef LDI_OBSOLETE_EVENT
+
int
ldi_get_eventcookie(ldi_handle_t lh, char *name, ddi_eventcookie_t *ecp)
{
@@ -2794,3 +2886,845 @@ ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id)
kmem_free(lep, sizeof (struct ldi_event));
return (res);
}
+
+#endif
+
+/*
+ * Here are some definitions of terms used in the following LDI events
+ * code:
+ *
+ * "LDI events" AKA "native events": These are events defined by the
+ * "new" LDI event framework. These events are serviced by the LDI event
+ * framework itself and thus are native to it.
+ *
+ * "LDI contract events": These are contract events that correspond to the
+ * LDI events. This mapping of LDI events to contract events is defined by
+ * the ldi_ev_cookies[] array above.
+ *
+ * NDI events: These are events which are serviced by the NDI event subsystem.
+ * LDI subsystem just provides a thin wrapper around the NDI event interfaces
+ * These events are thereefore *not* native events.
+ */
+
+static int
+ldi_native_event(const char *evname)
+{
+ int i;
+
+ LDI_EVTRC((CE_NOTE, "ldi_native_event: entered: ev=%s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static uint_t
+ldi_ev_sync_event(const char *evname)
+{
+ int i;
+
+ ASSERT(ldi_native_event(evname));
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_sync_event: entered: %s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (ldi_ev_cookies[i].ck_sync);
+ }
+
+ /*
+ * This should never happen until non-contract based
+ * LDI events are introduced. If that happens, we will
+ * use a "special" token to indicate that there are no
+ * contracts corresponding to this LDI event.
+ */
+ cmn_err(CE_PANIC, "Unknown LDI event: %s", evname);
+
+ return (0);
+}
+
+static uint_t
+ldi_contract_event(const char *evname)
+{
+ int i;
+
+ ASSERT(ldi_native_event(evname));
+
+ LDI_EVTRC((CE_NOTE, "ldi_contract_event: entered: %s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (ldi_ev_cookies[i].ck_ctype);
+ }
+
+ /*
+ * This should never happen until non-contract based
+ * LDI events are introduced. If that happens, we will
+ * use a "special" token to indicate that there are no
+ * contracts corresponding to this LDI event.
+ */
+ cmn_err(CE_PANIC, "Unknown LDI event: %s", evname);
+
+ return (0);
+}
+
+char *
+ldi_ev_get_type(ldi_ev_cookie_t cookie)
+{
+ int i;
+ struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (&ldi_ev_cookies[i] == cookie_impl) {
+ LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: LDI: %s",
+ ldi_ev_cookies[i].ck_evname));
+ return (ldi_ev_cookies[i].ck_evname);
+ }
+ }
+
+ /*
+ * Not an LDI native event. Must be NDI event service.
+ * Just return a generic string
+ */
+ LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: is NDI"));
+ return (NDI_EVENT_SERVICE);
+}
+
+static int
+ldi_native_cookie(ldi_ev_cookie_t cookie)
+{
+ int i;
+ struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (&ldi_ev_cookies[i] == cookie_impl) {
+ LDI_EVTRC((CE_NOTE, "ldi_native_cookie: native LDI"));
+ return (1);
+ }
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_native_cookie: is NDI"));
+ return (0);
+}
+
+static ldi_ev_cookie_t
+ldi_get_native_cookie(const char *evname)
+{
+ int i;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) {
+ LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: found"));
+ return ((ldi_ev_cookie_t)&ldi_ev_cookies[i]);
+ }
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: NOT found"));
+ return (NULL);
+}
+
+/*
+ * ldi_ev_lock() needs to be recursive, since layered drivers may call
+ * other LDI interfaces (such as ldi_close() from within the context of
+ * a notify callback. Since the notify callback is called with the
+ * ldi_ev_lock() held and ldi_close() also grabs ldi_ev_lock, the lock needs
+ * to be recursive.
+ */
+static void
+ldi_ev_lock(void)
+{
+ LDI_EVTRC((CE_NOTE, "ldi_ev_lock: entered"));
+
+ mutex_enter(&ldi_ev_callback_list.le_lock);
+ if (ldi_ev_callback_list.le_thread == curthread) {
+ ASSERT(ldi_ev_callback_list.le_busy >= 1);
+ ldi_ev_callback_list.le_busy++;
+ } else {
+ while (ldi_ev_callback_list.le_busy)
+ cv_wait(&ldi_ev_callback_list.le_cv,
+ &ldi_ev_callback_list.le_lock);
+ ASSERT(ldi_ev_callback_list.le_thread == NULL);
+ ldi_ev_callback_list.le_busy = 1;
+ ldi_ev_callback_list.le_thread = curthread;
+ }
+ mutex_exit(&ldi_ev_callback_list.le_lock);
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_lock: exit"));
+}
+
+static void
+ldi_ev_unlock(void)
+{
+ LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: entered"));
+ mutex_enter(&ldi_ev_callback_list.le_lock);
+ ASSERT(ldi_ev_callback_list.le_thread == curthread);
+ ASSERT(ldi_ev_callback_list.le_busy >= 1);
+
+ ldi_ev_callback_list.le_busy--;
+ if (ldi_ev_callback_list.le_busy == 0) {
+ ldi_ev_callback_list.le_thread = NULL;
+ cv_signal(&ldi_ev_callback_list.le_cv);
+ }
+ mutex_exit(&ldi_ev_callback_list.le_lock);
+ LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: exit"));
+}
+
+int
+ldi_ev_get_cookie(ldi_handle_t lh, char *evname, ldi_ev_cookie_t *cookiep)
+{
+ struct ldi_handle *handlep = (struct ldi_handle *)lh;
+ dev_info_t *dip;
+ dev_t dev;
+ int res;
+ struct snode *csp;
+ ddi_eventcookie_t ddi_cookie;
+ ldi_ev_cookie_t tcookie;
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: entered: evname=%s",
+ evname ? evname : "<NULL>"));
+
+ if (lh == NULL || evname == NULL ||
+ strlen(evname) == 0 || cookiep == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: invalid args"));
+ return (LDI_EV_FAILURE);
+ }
+
+ *cookiep = NULL;
+
+ /*
+ * First check if it is a LDI native event
+ */
+ tcookie = ldi_get_native_cookie(evname);
+ if (tcookie) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: got native cookie"));
+ *cookiep = tcookie;
+ return (LDI_EV_SUCCESS);
+ }
+
+ /*
+ * Not a LDI native event. Try NDI event services
+ */
+
+ dev = handlep->lh_vp->v_rdev;
+
+ csp = VTOCS(handlep->lh_vp);
+ mutex_enter(&csp->s_lock);
+ if ((dip = csp->s_dip) != NULL)
+ e_ddi_hold_devi(dip);
+ mutex_exit(&csp->s_lock);
+ if (dip == NULL)
+ dip = e_ddi_hold_devi_by_dev(dev, 0);
+
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "ldi_ev_get_cookie: No devinfo node for LDI "
+ "handle: %p", (void *)handlep);
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "Calling ddi_get_eventcookie: dip=%p, ev=%s",
+ (void *)dip, evname));
+
+ res = ddi_get_eventcookie(dip, evname, &ddi_cookie);
+
+ ddi_release_devi(dip);
+
+ if (res == DDI_SUCCESS) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: NDI cookie found"));
+ *cookiep = (ldi_ev_cookie_t)ddi_cookie;
+ return (LDI_EV_SUCCESS);
+ } else {
+ LDI_EVDBG((CE_WARN, "ldi_ev_get_cookie: NDI cookie: failed"));
+ return (LDI_EV_FAILURE);
+ }
+}
+
+/*ARGSUSED*/
+static void
+i_ldi_ev_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie,
+ void *arg, void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp = (ldi_ev_callback_impl_t *)arg;
+
+ ASSERT(lecp != NULL);
+ ASSERT(!ldi_native_cookie(lecp->lec_cookie));
+ ASSERT(lecp->lec_lhp);
+ ASSERT(lecp->lec_notify == NULL);
+ ASSERT(lecp->lec_finalize);
+
+ LDI_EVDBG((CE_NOTE, "i_ldi_ev_callback: ldh=%p, cookie=%p, arg=%p, "
+ "ev_data=%p", (void *)lecp->lec_lhp, (void *)event_cookie,
+ (void *)lecp->lec_arg, (void *)ev_data));
+
+ lecp->lec_finalize(lecp->lec_lhp, (ldi_ev_cookie_t)event_cookie,
+ lecp->lec_arg, ev_data);
+}
+
+int
+ldi_ev_register_callbacks(ldi_handle_t lh, ldi_ev_cookie_t cookie,
+ ldi_ev_callback_t *callb, void *arg, ldi_callback_id_t *id)
+{
+ struct ldi_handle *lhp = (struct ldi_handle *)lh;
+ ldi_ev_callback_impl_t *lecp;
+ dev_t dev;
+ struct snode *csp;
+ dev_info_t *dip;
+ int ddi_event;
+
+ ASSERT(!servicing_interrupt());
+
+ if (lh == NULL || cookie == NULL || callb == NULL || id == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid args"));
+ return (LDI_EV_FAILURE);
+ }
+
+ if (callb->cb_vers != LDI_EV_CB_VERS) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid vers"));
+ return (LDI_EV_FAILURE);
+ }
+
+ if (callb->cb_notify == NULL && callb->cb_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: NULL callb"));
+ return (LDI_EV_FAILURE);
+ }
+
+ *id = 0;
+
+ dev = lhp->lh_vp->v_rdev;
+ csp = VTOCS(lhp->lh_vp);
+ mutex_enter(&csp->s_lock);
+ if ((dip = csp->s_dip) != NULL)
+ e_ddi_hold_devi(dip);
+ mutex_exit(&csp->s_lock);
+ if (dip == NULL)
+ dip = e_ddi_hold_devi_by_dev(dev, 0);
+
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "ldi_ev_register: No devinfo node for "
+ "LDI handle: %p", (void *)lhp);
+ return (LDI_EV_FAILURE);
+ }
+
+ lecp = kmem_zalloc(sizeof (ldi_ev_callback_impl_t), KM_SLEEP);
+
+ ddi_event = 0;
+ if (!ldi_native_cookie(cookie)) {
+ if (callb->cb_notify || callb->cb_finalize == NULL) {
+ /*
+ * NDI event services only accept finalize
+ */
+ cmn_err(CE_WARN, "%s: module: %s: NDI event cookie. "
+ "Only finalize"
+ " callback supported with this cookie",
+ "ldi_ev_register_callbacks",
+ lhp->lh_ident->li_modname);
+ kmem_free(lecp, sizeof (ldi_ev_callback_impl_t));
+ ddi_release_devi(dip);
+ return (LDI_EV_FAILURE);
+ }
+
+ if (ddi_add_event_handler(dip, (ddi_eventcookie_t)cookie,
+ i_ldi_ev_callback, (void *)lecp,
+ (ddi_callback_id_t *)&lecp->lec_id)
+ != DDI_SUCCESS) {
+ kmem_free(lecp, sizeof (ldi_ev_callback_impl_t));
+ ddi_release_devi(dip);
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): "
+ "ddi_add_event_handler failed"));
+ return (LDI_EV_FAILURE);
+ }
+ ddi_event = 1;
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): "
+ "ddi_add_event_handler success"));
+ }
+
+
+
+ ldi_ev_lock();
+
+ /*
+ * Add the notify/finalize callback to the LDI's list of callbacks.
+ */
+ lecp->lec_lhp = lhp;
+ lecp->lec_dev = lhp->lh_vp->v_rdev;
+ lecp->lec_spec = (lhp->lh_vp->v_type == VCHR) ?
+ S_IFCHR : S_IFBLK;
+ lecp->lec_notify = callb->cb_notify;
+ lecp->lec_finalize = callb->cb_finalize;
+ lecp->lec_arg = arg;
+ lecp->lec_cookie = cookie;
+ if (!ddi_event)
+ lecp->lec_id = (void *)(uintptr_t)(++ldi_ev_id_pool);
+ else
+ ASSERT(lecp->lec_id);
+ lecp->lec_dip = dip;
+ list_insert_tail(&ldi_ev_callback_list.le_head, lecp);
+
+ *id = (ldi_callback_id_t)lecp->lec_id;
+
+ ldi_ev_unlock();
+
+ ddi_release_devi(dip);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: registered "
+ "notify/finalize"));
+
+ return (LDI_EV_SUCCESS);
+}
+
+static int
+ldi_ev_device_match(ldi_ev_callback_impl_t *lecp, dev_info_t *dip,
+ dev_t dev, int spec_type)
+{
+ ASSERT(lecp);
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(lecp->lec_dip);
+ ASSERT(lecp->lec_spec == S_IFCHR || lecp->lec_spec == S_IFBLK);
+ ASSERT(lecp->lec_dev != DDI_DEV_T_ANY);
+ ASSERT(lecp->lec_dev != DDI_DEV_T_NONE);
+ ASSERT(lecp->lec_dev != NODEV);
+
+ if (dip != lecp->lec_dip)
+ return (0);
+
+ if (dev != DDI_DEV_T_ANY) {
+ if (dev != lecp->lec_dev || spec_type != lecp->lec_spec)
+ return (0);
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_device_match: MATCH dip=%p", (void *)dip));
+
+ return (1);
+}
+
+/*
+ * LDI framework function to post a "notify" event to all layered drivers
+ * that have registered for that event
+ *
+ * Returns:
+ * LDI_EV_SUCCESS - registered callbacks allow event
+ * LDI_EV_FAILURE - registered callbacks block event
+ * LDI_EV_NONE - No matching LDI callbacks
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+int
+ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp;
+ list_t *listp;
+ int ret;
+ char *lec_event;
+
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(event);
+ ASSERT(ldi_native_event(event));
+ ASSERT(ldi_ev_sync_event(event));
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): entered: dip=%p, ev=%s",
+ (void *)dip, event));
+
+ ret = LDI_EV_NONE;
+ ldi_ev_lock();
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+
+ /* Check if matching device */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+ if (lecp->lec_lhp == NULL) {
+ /*
+ * Consumer has unregistered the handle and so
+ * is no longer interested in notify events.
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No LDI "
+ "handle, skipping"));
+ continue;
+ }
+
+ if (lecp->lec_notify == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No notify "
+ "callback. skipping"));
+ continue; /* not interested in notify */
+ }
+
+ /*
+ * Check if matching event
+ */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching"
+ " event {%s,%s}. skipping", event, lec_event));
+ continue;
+ }
+
+ lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY;
+ if (lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie,
+ lecp->lec_arg, ev_data) != LDI_EV_SUCCESS) {
+ ret = LDI_EV_FAILURE;
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify"
+ " FAILURE"));
+ break;
+ }
+
+ /* We have a matching callback that allows the event to occur */
+ ret = LDI_EV_SUCCESS;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success"));
+ }
+
+ if (ret != LDI_EV_FAILURE)
+ goto out;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify"));
+
+ /*
+ * Undo notifies already sent
+ */
+ lecp = list_prev(listp, lecp);
+ for (; lecp; lecp = list_prev(listp, lecp)) {
+
+ /*
+ * Check if matching device
+ */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+
+ if (lecp->lec_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no finalize, "
+ "skipping"));
+ continue; /* not interested in finalize */
+ }
+
+ /*
+ * it is possible that in response to a notify event a
+ * layered driver closed its LDI handle so it is ok
+ * to have a NULL LDI handle for finalize. The layered
+ * driver is expected to maintain state in its "arg"
+ * parameter to keep track of the closed device.
+ */
+
+ /* Check if matching event */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): not matching "
+ "event: %s,%s, skipping", event, lec_event));
+ continue;
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize"));
+
+ lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
+ LDI_EV_FAILURE, lecp->lec_arg, ev_data);
+
+ /*
+ * If LDI native event and LDI handle closed in context
+ * of notify, NULL out the finalize callback as we have
+ * already called the 1 finalize above allowed in this situation
+ */
+ if (lecp->lec_lhp == NULL &&
+ ldi_native_cookie(lecp->lec_cookie)) {
+ LDI_EVDBG((CE_NOTE,
+ "ldi_invoke_notify(): NULL-ing finalize after "
+ "calling 1 finalize following ldi_close"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+
+out:
+ ldi_ev_unlock();
+
+ if (ret == LDI_EV_NONE) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no matching "
+ "LDI callbacks"));
+ }
+
+ return (ret);
+}
+
+/*
+ * Framework function to be called from a layered driver to propagate
+ * LDI "notify" events to exported minors.
+ *
+ * This function is a public interface exported by the LDI framework
+ * for use by layered drivers to propagate device events up the software
+ * stack.
+ */
+int
+ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type,
+ ldi_ev_cookie_t cookie, void *ev_data)
+{
+ char *evname = ldi_ev_get_type(cookie);
+ uint_t ct_evtype;
+ dev_t dev;
+ major_t major;
+ int retc;
+ int retl;
+
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+ ASSERT(dip);
+ ASSERT(ldi_native_cookie(cookie));
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): entered: event=%s, dip=%p",
+ evname, (void *)dip));
+
+ if (!ldi_ev_sync_event(evname)) {
+ cmn_err(CE_PANIC, "ldi_ev_notify(): %s not a "
+ "negotiatable event", evname);
+ return (LDI_EV_SUCCESS);
+ }
+
+ major = ddi_driver_major(dip);
+ if (major == (major_t)-1) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "ldi_ev_notify: cannot derive major number "
+ "for device %s", path);
+ kmem_free(path, MAXPATHLEN);
+ return (LDI_EV_FAILURE);
+ }
+ dev = makedevice(major, minor);
+
+ /*
+ * Generate negotiation contract events on contracts (if any) associated
+ * with this minor.
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): calling contract nego."));
+ ct_evtype = ldi_contract_event(evname);
+ retc = contract_device_negotiate(dip, dev, spec_type, ct_evtype);
+ if (retc == CT_NACK) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): contract neg. NACK"));
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): LDI invoke notify"));
+ retl = ldi_invoke_notify(dip, dev, spec_type, evname, ev_data);
+ if (retl == LDI_EV_FAILURE) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): ldi_invoke_notify "
+ "returned FAILURE. Calling contract negend"));
+ contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
+ return (LDI_EV_FAILURE);
+ }
+
+ /*
+ * The very fact that we are here indicates that there is a
+ * LDI callback (and hence a constraint) for the retire of the
+ * HW device. So we just return success even if there are no
+ * contracts or LDI callbacks against the minors layered on top
+ * of the HW minors
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): returning SUCCESS"));
+ return (LDI_EV_SUCCESS);
+}
+
+/*
+ * LDI framework function to invoke "finalize" callbacks for all layered
+ * drivers that have registered callbacks for that event.
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+void
+ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ int ldi_result, void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp;
+ list_t *listp;
+ char *lec_event;
+ int found = 0;
+
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(event);
+ ASSERT(ldi_native_event(event));
+ ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE);
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): entered: dip=%p, result=%d"
+ " event=%s", (void *)dip, ldi_result, event));
+
+ ldi_ev_lock();
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+
+ if (lecp->lec_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): No "
+ "finalize. Skipping"));
+ continue; /* Not interested in finalize */
+ }
+
+ /*
+ * Check if matching device
+ */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+ /*
+ * It is valid for the LDI handle to be NULL during finalize.
+ * The layered driver may have done an LDI close in the notify
+ * callback.
+ */
+
+ /*
+ * Check if matching event
+ */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): Not "
+ "matching event {%s,%s}. Skipping",
+ event, lec_event));
+ continue;
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): calling finalize"));
+
+ found = 1;
+
+ lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
+ ldi_result, lecp->lec_arg, ev_data);
+
+ /*
+ * If LDI native event and LDI handle closed in context
+ * of notify, NULL out the finalize callback as we have
+ * already called the 1 finalize above allowed in this situation
+ */
+ if (lecp->lec_lhp == NULL &&
+ ldi_native_cookie(lecp->lec_cookie)) {
+ LDI_EVDBG((CE_NOTE,
+ "ldi_invoke_finalize(): NULLing finalize after "
+ "calling 1 finalize following ldi_close"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+ ldi_ev_unlock();
+
+ if (found)
+ return;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): no matching callbacks"));
+}
+
+/*
+ * Framework function to be called from a layered driver to propagate
+ * LDI "finalize" events to exported minors.
+ *
+ * This function is a public interface exported by the LDI framework
+ * for use by layered drivers to propagate device events up the software
+ * stack.
+ */
+void
+ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type, int ldi_result,
+ ldi_ev_cookie_t cookie, void *ev_data)
+{
+ dev_t dev;
+ major_t major;
+ char *evname;
+ int ct_result = (ldi_result == LDI_EV_SUCCESS) ?
+ CT_EV_SUCCESS : CT_EV_FAILURE;
+ uint_t ct_evtype;
+
+ ASSERT(dip);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+ ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE);
+ ASSERT(ldi_native_cookie(cookie));
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: entered: dip=%p", (void *)dip));
+
+ major = ddi_driver_major(dip);
+ if (major == (major_t)-1) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "ldi_ev_finalize: cannot derive major number "
+ "for device %s", path);
+ kmem_free(path, MAXPATHLEN);
+ return;
+ }
+ dev = makedevice(major, minor);
+
+ evname = ldi_ev_get_type(cookie);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling contracts"));
+ ct_evtype = ldi_contract_event(evname);
+ contract_device_finalize(dip, dev, spec_type, ct_evtype, ct_result);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling ldi_invoke_finalize"));
+ ldi_invoke_finalize(dip, dev, spec_type, evname, ldi_result, ev_data);
+}
+
+int
+ldi_ev_remove_callbacks(ldi_callback_id_t id)
+{
+ ldi_ev_callback_impl_t *lecp;
+ ldi_ev_callback_impl_t *next;
+ ldi_ev_callback_impl_t *found;
+ list_t *listp;
+
+ ASSERT(!servicing_interrupt());
+
+ if (id == 0) {
+ cmn_err(CE_WARN, "ldi_ev_remove_callbacks: Invalid ID 0");
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: entered: id=%p",
+ (void *)id));
+
+ ldi_ev_lock();
+
+ listp = &ldi_ev_callback_list.le_head;
+ next = found = NULL;
+ for (lecp = list_head(listp); lecp; lecp = next) {
+ next = list_next(listp, lecp);
+ if (lecp->lec_id == id) {
+ ASSERT(found == NULL);
+ list_remove(listp, lecp);
+ found = lecp;
+ }
+ }
+ ldi_ev_unlock();
+
+ if (found == NULL) {
+ cmn_err(CE_WARN, "No LDI event handler for id (%p)",
+ (void *)id);
+ return (LDI_EV_SUCCESS);
+ }
+
+ if (!ldi_native_cookie(found->lec_cookie)) {
+ ASSERT(found->lec_notify == NULL);
+ if (ddi_remove_event_handler((ddi_callback_id_t)id)
+ != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "failed to remove NDI event handler "
+ "for id (%p)", (void *)id);
+ ldi_ev_lock();
+ list_insert_tail(listp, found);
+ ldi_ev_unlock();
+ return (LDI_EV_FAILURE);
+ }
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: NDI event "
+ "service removal succeeded"));
+ } else {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: removed "
+ "LDI native callbacks"));
+ }
+ kmem_free(found, sizeof (ldi_ev_callback_impl_t));
+
+ return (LDI_EV_SUCCESS);
+}
diff --git a/usr/src/uts/common/os/modctl.c b/usr/src/uts/common/os/modctl.c
index 31108c215b..1f821fef85 100644
--- a/usr/src/uts/common/os/modctl.c
+++ b/usr/src/uts/common/os/modctl.c
@@ -161,8 +161,6 @@ extern int make_mbind(char *, int, char *, struct bind **);
static int minorperm_loaded = 0;
-
-
void
mod_setup(void)
{
@@ -798,6 +796,217 @@ modctl_getmaj(char *uname, uint_t ulen, int *umajorp)
return (0);
}
+static char **
+convert_constraint_string(char *constraints, size_t len)
+{
+ int i;
+ int n;
+ char *p;
+ char **array;
+
+ ASSERT(constraints != NULL);
+ ASSERT(len > 0);
+
+ for (i = 0, p = constraints; strlen(p) > 0; i++, p += strlen(p) + 1);
+
+ n = i;
+
+ if (n == 0) {
+ kmem_free(constraints, len);
+ return (NULL);
+ }
+
+ array = kmem_alloc((n + 1) * sizeof (char *), KM_SLEEP);
+
+ for (i = 0, p = constraints; i < n; i++, p += strlen(p) + 1) {
+ array[i] = i_ddi_strdup(p, KM_SLEEP);
+ }
+ array[n] = NULL;
+
+ kmem_free(constraints, len);
+
+ return (array);
+}
+/*ARGSUSED*/
+static int
+modctl_retire(char *path, char *uconstraints, size_t ulen)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int retval;
+ char *constraints;
+ char **cons_array;
+
+ if (path == NULL)
+ return (EINVAL);
+
+ if ((uconstraints == NULL) ^ (ulen == 0))
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (retval != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (retval);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ /*
+ * First check if the device is already retired.
+ * If it is, this becomes a NOP
+ */
+ if (e_ddi_device_retired(devpath)) {
+ cmn_err(CE_NOTE, "Device: already retired: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+ }
+
+ cons_array = NULL;
+ if (uconstraints) {
+ constraints = kmem_alloc(ulen, KM_SLEEP);
+ if (copyin(uconstraints, constraints, ulen)) {
+ kmem_free(constraints, ulen);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (EFAULT);
+ }
+ cons_array = convert_constraint_string(constraints, ulen);
+ }
+
+ /*
+ * Try to retire the device first. The following
+ * routine will return an error only if the device
+ * is not retireable i.e. retire constraints forbid
+ * a retire. A return of success from this routine
+ * indicates that device is retireable.
+ */
+ retval = e_ddi_retire_device(devpath, cons_array);
+ if (retval != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "constraints forbid retire: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ok, the retire succeeded. Persist the retire.
+ * If retiring a nexus, we need to only persist the
+ * nexus retire. Any children of a retired nexus
+ * are automatically covered by the retire store
+ * code.
+ */
+ retval = e_ddi_retire_persist(devpath);
+ if (retval != 0) {
+ cmn_err(CE_WARN, "Failed to persist device retire: error %d: "
+ "%s", retval, devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (retval);
+ }
+ if (moddebug & MODDEBUG_RETIRE)
+ cmn_err(CE_NOTE, "Persisted retire of device: %s", devpath);
+
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+}
+
+static int
+modctl_is_retired(char *path, int *statep)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int error;
+ int status;
+
+ if (path == NULL || statep == NULL)
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ error = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (error != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (error);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ if (e_ddi_device_retired(devpath))
+ status = 1;
+ else
+ status = 0;
+ kmem_free(devpath, strlen(devpath) + 1);
+
+ return (copyout(&status, statep, sizeof (status)) ? EFAULT : 0);
+}
+
+static int
+modctl_unretire(char *path)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int retired;
+ int retval;
+
+ if (path == NULL)
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (retval != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (retval);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ /*
+ * We check if a device is retired (first) before
+ * unpersisting the retire, because we use the
+ * retire store to determine if a device is retired.
+ * If we unpersist first, the device will always appear
+ * to be unretired. For the rationale behind unpersisting
+ * a device that is not retired, see the next comment.
+ */
+ retired = e_ddi_device_retired(devpath);
+
+ /*
+ * We call unpersist unconditionally because the lookup
+ * for retired devices (e_ddi_device_retired()), skips "bypassed"
+ * devices. We still want to be able remove "bypassed" entries
+ * from the persistent store, so we unpersist unconditionally
+ * i.e. whether or not the entry is found on a lookup.
+ *
+ * e_ddi_retire_unpersist() returns 1 if it found and cleared
+ * an entry from the retire store or 0 otherwise.
+ */
+ if (e_ddi_retire_unpersist(devpath))
+ if (moddebug & MODDEBUG_RETIRE) {
+ cmn_err(CE_NOTE, "Unpersisted retire of device: %s",
+ devpath);
+ }
+
+ /*
+ * Check if the device is already unretired. If so,
+ * the unretire becomes a NOP
+ */
+ if (!retired) {
+ cmn_err(CE_NOTE, "Not retired: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+ }
+
+ retval = e_ddi_unretire_device(devpath);
+ if (retval != 0) {
+ cmn_err(CE_WARN, "cannot unretire device: error %d, path %s\n",
+ retval, devpath);
+ }
+
+ kmem_free(devpath, strlen(devpath) + 1);
+
+ return (retval);
+}
+
static int
modctl_getname(char *uname, uint_t ulen, int *umajorp)
{
@@ -2069,6 +2278,18 @@ modctl(int cmd, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
error = modctl_moddevname((int)a1, a2, a3);
break;
+ case MODRETIRE: /* retire device named by physpath a1 */
+ error = modctl_retire((char *)a1, (char *)a2, (size_t)a3);
+ break;
+
+ case MODISRETIRED: /* check if a device is retired. */
+ error = modctl_is_retired((char *)a1, (int *)a2);
+ break;
+
+ case MODUNRETIRE: /* unretire device named by physpath a1 */
+ error = modctl_unretire((char *)a1);
+ break;
+
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/common/os/retire_store.c b/usr/src/uts/common/os/retire_store.c
new file mode 100644
index 0000000000..f1c3db9445
--- /dev/null
+++ b/usr/src/uts/common/os/retire_store.c
@@ -0,0 +1,457 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi_implfuncs.h>
+#include <sys/list.h>
+#include <sys/reboot.h>
+#include <sys/sysmacros.h>
+#include <sys/console.h>
+#include <sys/devcache.h>
+
+/*
+ * The nvpair name in the I/O retire specific sub-nvlist
+ */
+#define RIO_STORE_VERSION_STR "rio-store-version"
+#define RIO_STORE_MAGIC_STR "rio-store-magic"
+#define RIO_STORE_FLAGS_STR "rio-store-flags"
+
+#define RIO_STORE_VERSION_1 1
+#define RIO_STORE_VERSION RIO_STORE_VERSION_1
+
+/*
+ * decoded retire list element
+ */
+
+typedef enum rio_store_flags {
+ RIO_STORE_F_INVAL = 0,
+ RIO_STORE_F_RETIRED = 1,
+ RIO_STORE_F_BYPASS = 2
+} rio_store_flags_t;
+
+typedef struct rio_store {
+ char *rst_devpath;
+ rio_store_flags_t rst_flags;
+ list_node_t rst_next;
+} rio_store_t;
+
+#define RIO_STORE_MAGIC 0x601fcace /* retire */
+
+static int rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name);
+static int rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl);
+static void retire_list_free(nvf_handle_t nvfh);
+
+
+/*
+ * Retire I/O persistent store registration info
+ */
+static nvf_ops_t rio_store_ops = {
+ "/etc/devices/retire_store", /* path to store */
+ rio_store_decode, /* decode nvlist into retire_list */
+ rio_store_encode, /* encode retire_list into nvlist */
+ retire_list_free, /* free retire_list */
+ NULL /* write complete callback */
+};
+
+static nvf_handle_t rio_store_handle;
+static char store_path[MAXPATHLEN];
+static int store_debug = 0;
+static int bypass_msg = 0;
+static int retire_msg = 0;
+
+#define STORE_DEBUG 0x0001
+#define STORE_TRACE 0x0002
+
+#define STORE_DBG(args) if (store_debug & STORE_DEBUG) cmn_err args
+#define STORE_TRC(args) if (store_debug & STORE_TRACE) cmn_err args
+
+/*
+ * We don't use the simple read disable offered by the
+ * caching framework (see devcache.c) as it will not
+ * have the desired effect of bypassing the persistent
+ * store. A simple read disable will
+ *
+ * 1. cause any additions to the cache to destroy the
+ * existing on-disk cache
+ *
+ * 2. prevent deletions from the existing on-disk
+ * cache which is needed for recovery from bad
+ * retire decisions.
+ *
+ * Use the following tunable instead
+ *
+ */
+int ddi_retire_store_bypass = 0;
+
+
+
+/*
+ * Initialize retire store data structures
+ */
+void
+retire_store_init(void)
+{
+ if (boothowto & RB_ASKNAME) {
+
+ printf("Retire store [%s] (/dev/null to bypass): ",
+ rio_store_ops.nvfr_cache_path);
+ console_gets(store_path, sizeof (store_path) - 1);
+ store_path[sizeof (store_path) - 1] = '\0';
+
+ if (strcmp(store_path, "/dev/null") == 0) {
+ ddi_retire_store_bypass = 1;
+ } else if (store_path[0] != '\0') {
+ if (store_path[0] != '/') {
+ printf("Invalid store path: %s. Using default"
+ "\n", store_path);
+ } else {
+ rio_store_ops.nvfr_cache_path = store_path;
+ }
+ }
+ }
+
+ rio_store_handle = nvf_register_file(&rio_store_ops);
+
+ list_create(nvf_list(rio_store_handle), sizeof (rio_store_t),
+ offsetof(rio_store_t, rst_next));
+}
+
+/*
+ * Read and populate the in-core retire store
+ */
+void
+retire_store_read(void)
+{
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+ ASSERT(list_head(nvf_list(rio_store_handle)) == NULL);
+ (void) nvf_read_file(rio_store_handle);
+ rw_exit(nvf_lock(rio_store_handle));
+ STORE_DBG((CE_NOTE, "Read on-disk retire store"));
+}
+
+static void
+rio_store_free(rio_store_t *rsp)
+{
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+
+ ASSERT(rsp);
+ ASSERT(rsp->rst_devpath);
+ ASSERT(rsp->rst_flags & RIO_STORE_F_RETIRED);
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ STORE_TRC((CE_NOTE, "store: freed path: %s", rsp->rst_devpath));
+
+ kmem_free(rsp->rst_devpath, strlen(rsp->rst_devpath) + 1);
+ kmem_free(rsp, sizeof (*rsp));
+}
+
+static void
+retire_list_free(nvf_handle_t nvfh)
+{
+ list_t *listp;
+ rio_store_t *rsp;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+
+ listp = nvf_list(nvfh);
+ while (rsp = list_head(listp)) {
+ list_remove(listp, rsp);
+ rio_store_free(rsp);
+ }
+
+ STORE_DBG((CE_NOTE, "store: freed retire list"));
+}
+
+static int
+rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name)
+{
+ rio_store_t *rsp;
+ int32_t version;
+ int32_t magic;
+ int32_t flags;
+ int rval;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+ ASSERT(name);
+
+ version = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_VERSION_STR, &version);
+ if (rval != 0 || version != RIO_STORE_VERSION) {
+ return (EINVAL);
+ }
+
+ magic = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_MAGIC_STR, &magic);
+ if (rval != 0 || magic != RIO_STORE_MAGIC) {
+ return (EINVAL);
+ }
+
+ flags = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_FLAGS_STR, &flags);
+ if (rval != 0 || flags != RIO_STORE_F_RETIRED) {
+ return (EINVAL);
+ }
+
+ if (ddi_retire_store_bypass) {
+ flags |= RIO_STORE_F_BYPASS;
+ if (!bypass_msg) {
+ bypass_msg = 1;
+ cmn_err(CE_WARN,
+ "Bypassing retire store /etc/devices/retire_store");
+ }
+ }
+
+ rsp = kmem_zalloc(sizeof (rio_store_t), KM_SLEEP);
+ rsp->rst_devpath = i_ddi_strdup(name, KM_SLEEP);
+ rsp->rst_flags = flags;
+ list_insert_tail(nvf_list(nvfh), rsp);
+
+ STORE_TRC((CE_NOTE, "store: added to retire list: %s", name));
+ if (!retire_msg) {
+ retire_msg = 1;
+ cmn_err(CE_NOTE, "One or more I/O devices have been retired");
+ }
+
+ return (0);
+}
+
+static int
+rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl)
+{
+ nvlist_t *nvl;
+ nvlist_t *line_nvl;
+ list_t *listp;
+ rio_store_t *rsp;
+ int rval;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+
+ *ret_nvl = NULL;
+
+ nvl = NULL;
+ rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rval != 0) {
+ return (DDI_FAILURE);
+ }
+
+ listp = nvf_list(nvfh);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ int flags;
+ ASSERT(rsp->rst_devpath);
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ line_nvl = NULL;
+ rval = nvlist_alloc(&line_nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rval != 0) {
+ line_nvl = NULL;
+ goto error;
+ }
+
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_VERSION_STR,
+ RIO_STORE_VERSION);
+ if (rval != 0) {
+ goto error;
+ }
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_MAGIC_STR,
+ RIO_STORE_MAGIC);
+ if (rval != 0) {
+ goto error;
+ }
+
+ /* don't save the bypass flag */
+ flags = RIO_STORE_F_RETIRED;
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_FLAGS_STR,
+ flags);
+ if (rval != 0) {
+ goto error;
+ }
+
+ rval = nvlist_add_nvlist(nvl, rsp->rst_devpath, line_nvl);
+ if (rval != 0) {
+ goto error;
+ }
+ nvlist_free(line_nvl);
+ line_nvl = NULL;
+ }
+
+ *ret_nvl = nvl;
+ STORE_DBG((CE_NOTE, "packed retire list into nvlist"));
+ return (DDI_SUCCESS);
+
+error:
+ if (line_nvl)
+ nvlist_free(line_nvl);
+ ASSERT(nvl);
+ nvlist_free(nvl);
+ return (DDI_FAILURE);
+}
+
+int
+e_ddi_retire_persist(char *devpath)
+{
+ rio_store_t *rsp;
+ rio_store_t *new_rsp;
+ list_t *listp;
+ char *new_path;
+
+ STORE_DBG((CE_NOTE, "e_ddi_retire_persist: entered: %s", devpath));
+
+ new_rsp = kmem_zalloc(sizeof (*new_rsp), KM_SLEEP);
+ new_rsp->rst_devpath = new_path = i_ddi_strdup(devpath, KM_SLEEP);
+ new_rsp->rst_flags = RIO_STORE_F_RETIRED;
+
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ /* already there */
+ if (strcmp(devpath, rsp->rst_devpath) == 0) {
+ /* explicit retire, clear bypass flag (if any) */
+ rsp->rst_flags &= ~RIO_STORE_F_BYPASS;
+ ASSERT(rsp->rst_flags == RIO_STORE_F_RETIRED);
+ rw_exit(nvf_lock(rio_store_handle));
+ kmem_free(new_path, strlen(new_path) + 1);
+ kmem_free(new_rsp, sizeof (*new_rsp));
+ STORE_DBG((CE_NOTE, "store: already in. Clear bypass "
+ ": %s", devpath));
+ return (0);
+ }
+
+ }
+
+ ASSERT(rsp == NULL);
+ list_insert_tail(listp, new_rsp);
+
+ nvf_mark_dirty(rio_store_handle);
+
+ rw_exit(nvf_lock(rio_store_handle));
+
+ nvf_wake_daemon();
+
+ STORE_DBG((CE_NOTE, "store: New, added to list, dirty: %s", devpath));
+
+ return (0);
+}
+
+int
+e_ddi_retire_unpersist(char *devpath)
+{
+ rio_store_t *rsp;
+ rio_store_t *next;
+ list_t *listp;
+ int is_dirty = 0;
+
+ STORE_DBG((CE_NOTE, "e_ddi_retire_unpersist: entered: %s", devpath));
+
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = next) {
+ next = list_next(listp, rsp);
+ if (strcmp(devpath, rsp->rst_devpath) != 0)
+ continue;
+
+ list_remove(listp, rsp);
+ rio_store_free(rsp);
+
+ STORE_DBG((CE_NOTE, "store: found in list. Freed: %s",
+ devpath));
+
+ nvf_mark_dirty(rio_store_handle);
+ is_dirty = 1;
+ }
+
+ rw_exit(nvf_lock(rio_store_handle));
+
+ if (is_dirty)
+ nvf_wake_daemon();
+
+ return (is_dirty);
+}
+
+int
+e_ddi_device_retired(char *devpath)
+{
+ list_t *listp;
+ rio_store_t *rsp;
+ size_t len;
+ int retired;
+
+ retired = 0;
+
+ rw_enter(nvf_lock(rio_store_handle), RW_READER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ /*
+ * If the "bypass" flag is set, then the device
+ * is *not* retired for the current boot of the
+ * system. It indicates that the retire store
+ * was read but the devices in the retire store
+ * were not retired i.e. effectively the store
+ * was bypassed. For why we bother to even read
+ * the store when we bypass it, see the comments
+ * for the tunable ddi_retire_store_bypass.
+ */
+ if (rsp->rst_flags & RIO_STORE_F_BYPASS) {
+ STORE_TRC((CE_NOTE, "store: found & bypassed: %s",
+ rsp->rst_devpath));
+ continue;
+ }
+
+ /*
+ * device is retired, if it or a parent exists
+ * in the in-core list
+ */
+ len = strlen(rsp->rst_devpath);
+ if (strncmp(devpath, rsp->rst_devpath, len) != 0)
+ continue;
+ if (devpath[len] == '\0' || devpath[len] == '/') {
+ /* exact match or a child */
+ retired = 1;
+ STORE_TRC((CE_NOTE, "store: found & !bypassed: %s",
+ devpath));
+ break;
+ }
+ }
+ rw_exit(nvf_lock(rio_store_handle));
+
+ return (retired);
+}
diff --git a/usr/src/uts/common/os/sunmdi.c b/usr/src/uts/common/os/sunmdi.c
index 0c6b1e3055..cec7a252b6 100644
--- a/usr/src/uts/common/os/sunmdi.c
+++ b/usr/src/uts/common/os/sunmdi.c
@@ -4777,6 +4777,292 @@ i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
return (rv);
}
+void
+mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL) {
+ return;
+ }
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ /* has no last path */
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ MDI_PI_UNLOCK(pip);
+
+ cdip = ct->ct_dip;
+ if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ /* Last path. Mark client dip as retiring */
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ (void) e_ddi_mark_retiring(cdip, cons_array);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+void
+mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL)
+ return;
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ MDI_PHCI_UNLOCK(ph);
+ /* not last path */
+ return;
+ }
+
+ if (ph->ph_unstable) {
+ MDI_PHCI_UNLOCK(ph);
+ /* can't check for constraints */
+ *constraint = 0;
+ return;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ /*
+ * The mdi_pathinfo state is OK. Check the client state.
+ * If failover in progress fail the pHCI from offlining
+ */
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
+ (ct->ct_unstable)) {
+ /*
+ * Failover is in progress, can't check for constraints
+ */
+ MDI_PI_UNLOCK(pip);
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ *constraint = 0;
+ return;
+ }
+ MDI_PI_UNLOCK(pip);
+
+ /*
+ * Check to see of we are retiring the last path of this
+ * client device...
+ */
+ cdip = ct->ct_dip;
+ if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ (void) e_ddi_retire_notify(cdip, constraint);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+/*
+ * offline the path(s) hanging off the PHCI. If the
+ * last path to any client, check that constraints
+ * have been applied.
+ */
+void
+mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+ int unstable = 0;
+ int constraint;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL) {
+ /* no last path and no pips */
+ return;
+ }
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ MDI_PHCI_UNLOCK(ph);
+ /* no last path and no pips */
+ return;
+ }
+
+ /*
+ * Check to see if the pHCI can be offlined
+ */
+ if (ph->ph_unstable) {
+ unstable = 1;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ /*
+ * if failover in progress fail the pHCI from offlining
+ */
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
+ (ct->ct_unstable)) {
+ unstable = 1;
+ }
+ MDI_PI_UNLOCK(pip);
+
+ /*
+ * Check to see of we are removing the last path of this
+ * client device...
+ */
+ cdip = ct->ct_dip;
+ if (!phci_only && cdip &&
+ (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ /*
+ * We don't retire clients we just retire the
+ * path to a client. If it is the last path
+ * to a client, constraints are checked and
+ * if we pass the last path is offlined. MPXIO will
+ * then fail all I/Os to the client. Since we don't
+ * want to retire the client on a path error
+ * set constraint = 0 so that the client dip
+ * is not retired.
+ */
+ constraint = 0;
+ (void) e_ddi_retire_finalize(cdip, &constraint);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ /*
+ * Cannot offline pip(s)
+ */
+ if (unstable) {
+ cmn_err(CE_WARN, "PHCI in transient state, cannot "
+ "retire, dip = %p", (void *)dip);
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+
+ /*
+ * Mark the pHCI as offline
+ */
+ MDI_PHCI_SET_OFFLINE(ph);
+
+ /*
+ * Mark the child mdi_pathinfo nodes as transient
+ */
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+ MDI_PI_SET_OFFLINING(pip);
+ MDI_PI_UNLOCK(pip);
+ pip = next;
+ }
+ MDI_PHCI_UNLOCK(ph);
+ /*
+ * Give a chance for any pending commands to execute
+ */
+ delay(1);
+ MDI_PHCI_LOCK(ph);
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+ (void) i_mdi_pi_offline(pip, 0);
+ MDI_PI_LOCK(pip);
+ ct = MDI_PI(pip)->pi_client;
+ if (!MDI_PI_IS_OFFLINE(pip)) {
+ cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
+ "PHCI dip = %p", (void *)dip);
+ MDI_PI_UNLOCK(pip);
+ MDI_PHCI_SET_ONLINE(ph);
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+ MDI_PI_UNLOCK(pip);
+ pip = next;
+ }
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+void
+mdi_phci_unretire(dev_info_t *dip)
+{
+ ASSERT(MDI_PHCI(dip));
+
+ /*
+ * Online the phci
+ */
+ i_mdi_phci_online(dip);
+}
+
/*ARGSUSED*/
static int
i_mdi_client_offline(dev_info_t *dip, uint_t flags)
diff --git a/usr/src/uts/common/os/sunndi.c b/usr/src/uts/common/os/sunndi.c
index 58d76dbd69..627f8fe6c6 100644
--- a/usr/src/uts/common/os/sunndi.c
+++ b/usr/src/uts/common/os/sunndi.c
@@ -68,6 +68,7 @@
#include <sys/nvpair.h>
#include <sys/sunmdi.h>
#include <sys/fs/dv_node.h>
+#include <sys/sunldi_impl.h>
#ifdef __sparc
#include <sys/archsystm.h> /* getpil/setpil */
@@ -853,6 +854,20 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags,
*/
if (dcp->flags & DEVCTL_OFFLINE) {
/*
+ * In the unlikely event that the dip was somehow attached by
+ * the userland process (and device contracts or LDI opens
+ * were registered against the dip) after it was created by
+ * a previous DEVCTL_CONSTRUCT call, we start notify
+ * proceedings on this dip. Note that we don't need to
+ * return the dip after a failure of the notify since
+ * for a contract or LDI handle to be created the dip was
+ * already available to the user.
+ */
+ if (e_ddi_offline_notify(cdip) == DDI_FAILURE) {
+ return (EBUSY);
+ }
+
+ /*
* hand set the OFFLINE flag to prevent any asynchronous
* autoconfiguration operations from attaching this node.
*/
@@ -860,6 +875,8 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags,
DEVI_SET_DEVICE_OFFLINE(cdip);
mutex_exit(&(DEVI(cdip)->devi_lock));
+ e_ddi_offline_finalize(cdip, DDI_SUCCESS);
+
rv = ndi_devi_bind_driver(cdip, flags);
if (rv != NDI_SUCCESS) {
(void) ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 2c4defc38d..b4591f05d9 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -845,7 +845,9 @@ SYSEVENTHDRS= \
CONTRACTHDRS= \
process.h \
- process_impl.h
+ process_impl.h \
+ device.h \
+ device_impl.h
USBHDRS= \
usba.h \
diff --git a/usr/src/uts/common/sys/autoconf.h b/usr/src/uts/common/sys/autoconf.h
index 3b10e97c89..e7fbd33267 100644
--- a/usr/src/uts/common/sys/autoconf.h
+++ b/usr/src/uts/common/sys/autoconf.h
@@ -104,6 +104,11 @@ struct devnames {
#define DDI_INTR_API 0x0200 /* interrupt interface messages */
#define DDI_INTR_IMPL 0x0400 /* interrupt implementation msgs */
#define DDI_INTR_NEXUS 0x0800 /* interrupt messages from nexuses */
+#define DDI_DBG_RETIRE 0x1000 /* Retire related messages */
+#define DDI_DBG_RTR_VRBOSE 0x2000 /* Verbose Retire messages */
+#define DDI_DBG_RTR_TRACE 0x4000 /* Trace Retire messages */
+#define LDI_EV_DEBUG 0x8000 /* LDI events debug messages */
+#define LDI_EV_TRACE 0x10000 /* LDI events trace messages */
extern int ddidebug;
@@ -118,6 +123,11 @@ extern int ddidebug;
#define DDI_INTR_APIDBG(args) if (ddidebug & DDI_INTR_API) cmn_err args
#define DDI_INTR_IMPLDBG(args) if (ddidebug & DDI_INTR_IMPL) cmn_err args
#define DDI_INTR_NEXDBG(args) if (ddidebug & DDI_INTR_NEXUS) cmn_err args
+#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args
+#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args
+#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args
+#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args
+#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args
#else
#define NDI_CONFIG_DEBUG(args)
#define BMDPRINTF(args)
@@ -129,6 +139,11 @@ extern int ddidebug;
#define DDI_INTR_APIDBG(args)
#define DDI_INTR_IMPLDBG(args)
#define DDI_INTR_NEXDBG(args)
+#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args
+#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args
+#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args
+#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args
+#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args
#endif
@@ -256,6 +271,15 @@ extern int i_ddi_reconfig(void);
extern void i_ddi_set_sysavail(void);
extern void i_ddi_set_reconfig(void);
+/* I/O retire related */
+extern int e_ddi_retire_device(char *path, char **cons_array);
+extern int e_ddi_unretire_device(char *path);
+extern int e_ddi_mark_retiring(dev_info_t *dip, void *arg);
+extern int e_ddi_retire_notify(dev_info_t *dip, void *arg);
+extern int e_ddi_retire_finalize(dev_info_t *dip, void *arg);
+extern void e_ddi_degrade_finalize(dev_info_t *dip);
+extern void e_ddi_undegrade_finalize(dev_info_t *dip);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/contract.h b/usr/src/uts/common/sys/contract.h
index 163f90cbfa..0bef407b98 100644
--- a/usr/src/uts/common/sys/contract.h
+++ b/usr/src/uts/common/sys/contract.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -72,6 +71,7 @@ typedef enum ctstate {
typedef enum ct_typeid {
CTT_PROCESS, /* process contract */
+ CTT_DEVICE, /* device contract */
CTT_MAXTYPE
} ct_typeid_t;
diff --git a/usr/src/uts/common/sys/contract/device.h b/usr/src/uts/common/sys/contract/device.h
new file mode 100644
index 0000000000..252cce3165
--- /dev/null
+++ b/usr/src/uts/common/sys/contract/device.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CONTRACT_DEVICE_H
+#define _SYS_CONTRACT_DEVICE_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/contract.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ctmpl_device ctmpl_device_t;
+typedef struct cont_device cont_device_t;
+
+/*
+ * ct_ev_* flags
+ */
+#define CT_DEV_EV_ONLINE 0x1 /* device is moving to online state */
+#define CT_DEV_EV_DEGRADED 0x2 /* device is moving to degraded state */
+#define CT_DEV_EV_OFFLINE 0x4 /* device is moving to offline state */
+#define CT_DEV_ALLEVENT 0x7
+
+/*
+ * ctp_id values
+ */
+#define CTDP_ACCEPT 0x1 /* the acceptable set term */
+#define CTDP_NONEG 0x2 /* the non-negotiable term */
+#define CTDP_MINOR 0x4 /* the minor path term */
+#define CTDP_ALLPARAMS 0x7
+
+#define CTDP_NONEG_CLEAR 0x0 /* clear the noneg flag */
+#define CTDP_NONEG_SET 0x1 /* set noneg */
+
+/*
+ * Status fields
+ */
+#define CTDS_STATE "ctds_state"
+#define CTDS_ASET "ctds_aset"
+#define CTDS_NONEG "ctds_noneg"
+#define CTDS_MINOR "ctds_minor"
+
+/*
+ * Max Time allowed for synchronous acknowledgement of a negotiation event
+ */
+#define CT_DEV_ACKTIME 60 /* 60 seconds */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CONTRACT_DEVICE_H */
diff --git a/usr/src/uts/common/sys/contract/device_impl.h b/usr/src/uts/common/sys/contract/device_impl.h
new file mode 100644
index 0000000000..1bc27c454d
--- /dev/null
+++ b/usr/src/uts/common/sys/contract/device_impl.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CONTRACT_DEVICE_IMPL_H
+#define _SYS_CONTRACT_DEVICE_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/contract_impl.h>
+#include <sys/dditypes.h>
+#include <sys/contract/device.h>
+#include <sys/fs/snode.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Result of negotiation end: event successfully occurred or not
+ */
+#define CT_EV_SUCCESS 150
+#define CT_EV_FAILURE 151
+
+struct ctmpl_device {
+ ct_template_t ctd_ctmpl;
+ uint_t ctd_aset;
+ uint_t ctd_noneg;
+ char *ctd_minor;
+};
+
+struct cont_device {
+ contract_t cond_contract; /* common contract data */
+ char *cond_minor; /* minor node resource in contract */
+ dev_info_t *cond_dip; /* dip for minor node */
+ dev_t cond_devt; /* dev_t of minor node */
+ uint_t cond_spec; /* spec type of minor node */
+ uint_t cond_aset; /* acceptable state set */
+ uint_t cond_noneg; /* no negotiation if set */
+ uint_t cond_state; /* current state of device */
+ uint_t cond_neg; /* contract undergoing negotiation */
+ uint64_t cond_currev_id; /* id of event being negotiated */
+ uint_t cond_currev_type; /* type of event being negotiated */
+ uint_t cond_currev_ack; /* ack/nack status of ev negotiation */
+ list_node_t cond_next; /* linkage - devinfo's contracts */
+};
+
+/*
+ * Kernel APIs
+ */
+extern ct_type_t *device_type;
+/*
+ * struct proc;
+ */
+void contract_device_init(void);
+ct_ack_t contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type);
+void contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type);
+void contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type);
+int contract_device_open(dev_t dev, int spec_type, contract_t **ctpp);
+void contract_device_remove_dip(dev_info_t *dip);
+ct_ack_t contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype);
+void contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, int ct_result);
+void contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type,
+ int result);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CONTRACT_DEVICE_IMPL_H */
diff --git a/usr/src/uts/common/sys/contract_impl.h b/usr/src/uts/common/sys/contract_impl.h
index c45cf06e60..7523de5bf0 100644
--- a/usr/src/uts/common/sys/contract_impl.h
+++ b/usr/src/uts/common/sys/contract_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,6 +49,10 @@
extern "C" {
#endif
+extern int ct_debug;
+
+#define CT_DEBUG(args) if (ct_debug) cmn_err args
+
#ifdef _SYSCALL32
/*
@@ -110,7 +113,7 @@ typedef struct ctmplops {
int (*ctop_set)(struct ct_template *, ct_param_t *,
const cred_t *);
int (*ctop_get)(struct ct_template *, ct_param_t *);
- int (*ctop_create)(struct ct_template *);
+ int (*ctop_create)(struct ct_template *, ctid_t *);
uint_t allevents;
} ctmplops_t;
@@ -127,6 +130,7 @@ typedef struct ct_template {
uint_t ctmpl_ev_info; /* term: informative events */
} ct_template_t;
+
typedef enum ct_listnum {
CTEL_CONTRACT, /* ../contracts/type/<id>/events */
CTEL_BUNDLE, /* ../contracts/type/bundle */
@@ -139,6 +143,12 @@ typedef enum ctqflags {
CTQ_REFFED = 2 /* queue is reference counted */
} ctqflags_t;
+typedef enum ct_ack {
+ CT_ACK = 1, /* accept break */
+ CT_NACK, /* disallow break */
+ CT_NONE /* no matching contracts */
+} ct_ack_t;
+
/*
* Contract event queue
*/
@@ -198,6 +208,12 @@ typedef struct contops {
void (*contop_destroy)(struct contract *);
void (*contop_status)(struct contract *, zone_t *, int, nvlist_t *,
void *, model_t);
+ int (*contop_ack)(struct contract *, uint_t evtype,
+ uint64_t evid);
+ int (*contop_nack)(struct contract *, uint_t evtype,
+ uint64_t evid);
+ int (*contop_qack)(struct contract *, uint_t, uint64_t);
+ int (*contop_newct)(struct contract *);
} contops_t;
typedef ct_template_t *(ct_f_default_t)(void);
@@ -221,6 +237,11 @@ typedef enum ctflags {
CTF_INHERIT = 0x1
} ctflags_t;
+typedef struct ct_time {
+ long ctm_total; /* Total time allowed for event */
+ clock_t ctm_start; /* starting lbolt for event */
+} ct_time_t;
+
/*
* Contract
*/
@@ -257,6 +278,8 @@ typedef struct contract {
struct contract *ct_regent; /* [prospective] regent contract */
int ct_evcnt; /* number of critical events */
ct_kevent_t *ct_nevent; /* negotiation event */
+ ct_time_t ct_ntime; /* negotiation time tracker */
+ ct_time_t ct_qtime; /* quantum time tracker */
} contract_t;
#define CTLF_COPYOUT 0x1 /* performing copyout */
@@ -284,7 +307,7 @@ int ctmpl_get(ct_template_t *, ct_param_t *);
ct_template_t *ctmpl_dup(ct_template_t *);
void ctmpl_activate(ct_template_t *);
void ctmpl_clear(ct_template_t *);
-int ctmpl_create(ct_template_t *);
+int ctmpl_create(ct_template_t *, ctid_t *);
/*
* Contract functions
@@ -294,12 +317,14 @@ int contract_abandon(contract_t *, struct proc *, int);
int contract_adopt(contract_t *, struct proc *);
void contract_destroy(contract_t *);
void contract_exit(struct proc *);
-int contract_ack(contract_t *, uint64_t);
+int contract_ack(contract_t *ct, uint64_t evid, int cmd);
+int contract_qack(contract_t *ct, uint64_t evid);
+int contract_newct(contract_t *ct);
/*
* Event interfaces
*/
-void cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *);
+uint64_t cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *);
void cte_add_listener(ct_equeue_t *, ct_listener_t *);
void cte_remove_listener(ct_listener_t *);
void cte_reset_listener(ct_listener_t *);
@@ -313,7 +338,7 @@ int cte_set_reliable(ct_listener_t *, const cred_t *);
int contract_compar(const void *, const void *);
void ctmpl_init(ct_template_t *, ctmplops_t *, ct_type_t *, void *);
void ctmpl_copy(ct_template_t *, ct_template_t *);
-int ctmpl_create_inval(ct_template_t *);
+int ctmpl_create_inval(ct_template_t *, ctid_t *);
int contract_ctor(contract_t *, ct_type_t *, ct_template_t *, void *, ctflags_t,
struct proc *, int);
void contract_hold(contract_t *);
@@ -352,6 +377,13 @@ vnode_t *contract_vnode_get(contract_t *, vfs_t *);
void contract_vnode_set(contract_t *, contract_vnode_t *, vnode_t *);
int contract_vnode_clear(contract_t *, contract_vnode_t *);
+/*
+ * Negotiation stubs
+ */
+int contract_ack_inval(contract_t *, uint_t, uint64_t);
+int contract_qack_inval(contract_t *, uint_t, uint64_t);
+int contract_qack_notsup(contract_t *, uint_t, uint64_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/ctfs.h b/usr/src/uts/common/sys/ctfs.h
index e6702044d1..b46a517f2c 100644
--- a/usr/src/uts/common/sys/ctfs.h
+++ b/usr/src/uts/common/sys/ctfs.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -65,6 +64,7 @@ extern "C" {
#define CT_CQREQ CTFS_CTL(2) /* Request an additional quantum */
#define CT_CADOPT CTFS_CTL(3) /* Adopt a contract */
#define CT_CNEWCT CTFS_CTL(4) /* Define new contract */
+#define CT_CNACK CTFS_CTL(5) /* nack a negotiation */
/*
* Control codes for messages written to status files.
diff --git a/usr/src/uts/common/sys/ddi_impldefs.h b/usr/src/uts/common/sys/ddi_impldefs.h
index 3b99c60997..f5c227e5aa 100644
--- a/usr/src/uts/common/sys/ddi_impldefs.h
+++ b/usr/src/uts/common/sys/ddi_impldefs.h
@@ -194,6 +194,12 @@ struct dev_info {
char *devi_addr_buf; /* buffer for devi_addr */
char *devi_rebinding_name; /* binding_name of rebind */
+ /* For device contracts that have this dip's minor node as resource */
+ kmutex_t devi_ct_lock; /* contract lock */
+ kcondvar_t devi_ct_cv; /* contract cv */
+ int devi_ct_count; /* # of outstanding responses */
+ int devi_ct_neg; /* neg. occurred on dip */
+ list_t devi_ct;
};
#define DEVI(dev_info_type) ((struct dev_info *)(dev_info_type))
@@ -271,6 +277,11 @@ struct dev_info {
#define DEVI_SET_DEVICE_ONLINE(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
+ if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_undegrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
+ } \
/* setting ONLINE clears DOWN, DEGRADED, OFFLINE */ \
DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DOWN | \
DEVI_DEVICE_DEGRADED | DEVI_DEVICE_OFFLINE); \
@@ -297,12 +308,20 @@ struct dev_info {
#define DEVI_SET_DEVICE_DEGRADED(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_degrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
DEVI(dip)->devi_state |= (DEVI_DEVICE_DEGRADED | DEVI_S_REPORT); \
}
#define DEVI_SET_DEVICE_UP(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \
+ if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_undegrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
+ } \
DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DEGRADED | DEVI_DEVICE_DOWN); \
DEVI(dip)->devi_state |= DEVI_S_REPORT; \
}
@@ -503,6 +522,11 @@ void i_devi_exit(dev_info_t *, uint_t c_mask, int has_lock);
#define DEVI_REGISTERED_DEVID 0x00000020 /* device registered a devid */
#define DEVI_PHCI_SIGNALS_VHCI 0x00000040 /* pHCI ndi_devi_exit signals vHCI */
#define DEVI_REBIND 0x00000080 /* post initchild driver rebind */
+#define DEVI_RETIRED 0x00000100 /* device is retired */
+#define DEVI_RETIRING 0x00000200 /* being evaluated for retire */
+#define DEVI_R_CONSTRAINT 0x00000400 /* constraints have been applied */
+#define DEVI_R_BLOCKED 0x00000800 /* constraints block retire */
+#define DEVI_CT_NOP 0x00001000 /* NOP contract event occurred */
#define DEVI_BUSY_CHANGING(dip) (DEVI(dip)->devi_flags & DEVI_BUSY)
#define DEVI_BUSY_OWNED(dip) (DEVI_BUSY_CHANGING(dip) && \
diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h
index 5105c4ce18..4aa213c1b2 100644
--- a/usr/src/uts/common/sys/ddi_implfuncs.h
+++ b/usr/src/uts/common/sys/ddi_implfuncs.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -274,6 +274,15 @@ int e_devid_cache_to_devt_list(ddi_devid_t, char *, int *, dev_t **);
void e_devid_cache_free_devt_list(int, dev_t *);
/*
+ * I/O retire persistent store
+ */
+void retire_store_init(void);
+void retire_store_read(void);
+int e_ddi_retire_persist(char *devpath);
+int e_ddi_retire_unpersist(char *devpath);
+int e_ddi_device_retired(char *devpath);
+
+/*
* Resource control functions to lock down device memory.
*/
extern int i_ddi_incr_locked_memory(proc_t *, rctl_qty_t);
diff --git a/usr/src/uts/common/sys/ddi_obsolete.h b/usr/src/uts/common/sys/ddi_obsolete.h
index c6a44c78de..84970dbb54 100644
--- a/usr/src/uts/common/sys/ddi_obsolete.h
+++ b/usr/src/uts/common/sys/ddi_obsolete.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -14,6 +14,7 @@
#include <sys/types.h>
#include <sys/dditypes.h>
+#include <sys/sunldi.h>
#ifdef __cplusplus
@@ -192,6 +193,15 @@ extern void repoutsw(int port, uint16_t *addr, int count);
extern void repoutsd(int port, uint32_t *addr, int count);
#endif
+/* Obsolete LDI event interfaces */
+extern int ldi_get_eventcookie(ldi_handle_t, char *,
+ ddi_eventcookie_t *);
+extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t,
+ void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *),
+ void *, ldi_callback_id_t *);
+extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t);
+
+
#endif /* not _DDI_STRICT */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/fs/snode.h b/usr/src/uts/common/sys/fs/snode.h
index ecef85390c..cd572d545c 100644
--- a/usr/src/uts/common/sys/fs/snode.h
+++ b/usr/src/uts/common/sys/fs/snode.h
@@ -119,6 +119,7 @@ struct snode {
#define SSELFCLONE 0x2000 /* represents a self cloning device */
#define SNOFLUSH 0x4000 /* do not flush device on fsync */
#define SCLOSING 0x8000 /* in last close(9E) */
+#define SFENCED 0x10000 /* snode fenced off for I/O retire */
#ifdef _KERNEL
/*
@@ -128,6 +129,12 @@ struct snode {
#define VTOCS(vp) (VTOS(VTOS(vp)->s_commonvp))
#define STOV(sp) ((sp)->s_vnode)
+extern int spec_debug;
+
+#define SPEC_FENCE_DEBUG 0x0001 /* emit fence related debug messages */
+
+#define FENDBG(args) if (spec_debug & SPEC_FENCE_DEBUG) cmn_err args
+
/*
* Forward declarations
@@ -167,6 +174,8 @@ void spec_snode_walk(int (*callback)(struct snode *, void *), void *);
int spec_devi_open_count(struct snode *, dev_info_t **);
int spec_is_clone(struct vnode *);
int spec_is_selfclone(struct vnode *);
+int spec_fence_snode(dev_info_t *dip, struct vnode *vp);
+int spec_unfence_snode(dev_info_t *dip);
/*
diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h
index 255d02d7b2..9eab8025da 100644
--- a/usr/src/uts/common/sys/modctl.h
+++ b/usr/src/uts/common/sys/modctl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -258,6 +258,9 @@ struct modlinkage {
#define MODDEVNAME 37
#define MODGETDEVFSPATH_MI_LEN 38
#define MODGETDEVFSPATH_MI 39
+#define MODRETIRE 40
+#define MODUNRETIRE 41
+#define MODISRETIRED 42
/*
* sub cmds for MODEVENTS
@@ -641,6 +644,7 @@ extern int modctl(int, ...);
#define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */
#define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */
#define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */
+#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */
#define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */
#define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */
#define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */
diff --git a/usr/src/uts/common/sys/sunldi.h b/usr/src/uts/common/sys/sunldi.h
index f80cc44f8f..71e9d9a7da 100644
--- a/usr/src/uts/common/sys/sunldi.h
+++ b/usr/src/uts/common/sys/sunldi.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -60,6 +59,26 @@ typedef struct __ldi_handle *ldi_handle_t;
typedef struct __ldi_callback_id *ldi_callback_id_t;
+typedef struct __ldi_ev_cookie *ldi_ev_cookie_t;
+
+/*
+ * LDI event interface related
+ */
+#define LDI_EV_SUCCESS 0
+#define LDI_EV_FAILURE (-1)
+#define LDI_EV_NONE (-2) /* no matching callbacks registered */
+#define LDI_EV_OFFLINE "LDI:EVENT:OFFLINE"
+#define LDI_EV_DEGRADE "LDI:EVENT:DEGRADE"
+
+#define LDI_EV_CB_VERS_1 1
+#define LDI_EV_CB_VERS LDI_EV_CB_VERS_1
+
+typedef struct ldi_ev_callback {
+ uint_t cb_vers;
+ int (*cb_notify)(ldi_handle_t, ldi_ev_cookie_t, void *, void *);
+ void (*cb_finalize)(ldi_handle_t, ldi_ev_cookie_t, int, void *, void *);
+} ldi_ev_callback_t;
+
/*
* LDI Ident manipulation functions
*/
@@ -93,13 +112,6 @@ extern int ldi_get_size(ldi_handle_t, uint64_t *);
extern int ldi_prop_op(ldi_handle_t, ddi_prop_op_t, int,
char *, caddr_t, int *);
-extern int ldi_get_eventcookie(ldi_handle_t, char *,
- ddi_eventcookie_t *);
-extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t,
- void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *),
- void *, ldi_callback_id_t *);
-extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t);
-
extern int ldi_strategy(ldi_handle_t, struct buf *);
extern int ldi_dump(ldi_handle_t, caddr_t, daddr_t, int);
extern int ldi_devmap(ldi_handle_t, devmap_cookie_t, offset_t,
@@ -132,6 +144,20 @@ extern int ldi_get_otyp(ldi_handle_t, int *);
extern int ldi_get_devid(ldi_handle_t, ddi_devid_t *);
extern int ldi_get_minor_name(ldi_handle_t, char **);
+/*
+ * LDI events related declarations
+ */
+extern int ldi_ev_get_cookie(ldi_handle_t lh, char *evname,
+ ldi_ev_cookie_t *cookiep);
+extern char *ldi_ev_get_type(ldi_ev_cookie_t cookie);
+extern int ldi_ev_register_callbacks(ldi_handle_t lh,
+ ldi_ev_cookie_t cookie, ldi_ev_callback_t *callb,
+ void *arg, ldi_callback_id_t *id);
+extern int ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type,
+ ldi_ev_cookie_t cookie, void *ev_data);
+extern void ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type,
+ int ldi_result, ldi_ev_cookie_t cookie, void *ev_data);
+extern int ldi_ev_remove_callbacks(ldi_callback_id_t id);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/sunldi_impl.h b/usr/src/uts/common/sys/sunldi_impl.h
index 9cbffc3ed2..1156fe2c41 100644
--- a/usr/src/uts/common/sys/sunldi_impl.h
+++ b/usr/src/uts/common/sys/sunldi_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -53,6 +52,17 @@ extern "C" {
#define LI_HASH_SZ 32
/*
+ * Obsolete LDI event interfaces are available for now but are deprecated and a
+ * warning will be issued to consumers.
+ */
+#define LDI_OBSOLETE_EVENT 1
+
+/*
+ * Flag for LDI handle's lh_flags field
+ */
+#define LH_FLAGS_NOTIFY 0x0001 /* invoked in context of a notify */
+
+/*
* LDI initialization function
*/
void ldi_init(void);
@@ -87,20 +97,24 @@ struct ldi_handle {
/* protected by ldi_handle_hash_lock */
struct ldi_handle *lh_next;
uint_t lh_ref;
+ uint_t lh_flags;
/* unique/static fields in the handle */
uint_t lh_type;
struct ldi_ident *lh_ident;
vnode_t *lh_vp;
+#ifdef LDI_OBSOLETE_EVENT
/* fields protected by lh_lock */
kmutex_t lh_lock[1];
struct ldi_event *lh_events;
+#endif
};
/*
* LDI event information
*/
+#ifdef LDI_OBSOLETE_EVENT
typedef struct ldi_event {
/* fields protected by le_lhp->lh_lock */
struct ldi_event *le_next;
@@ -112,6 +126,36 @@ typedef struct ldi_event {
void *le_arg;
ddi_callback_id_t le_id;
} ldi_event_t;
+#endif
+
+typedef struct ldi_ev_callback_impl {
+ struct ldi_handle *lec_lhp;
+ dev_info_t *lec_dip;
+ dev_t lec_dev;
+ int lec_spec;
+ int (*lec_notify)();
+ void (*lec_finalize)();
+ void *lec_arg;
+ void *lec_cookie;
+ void *lec_id;
+ list_node_t lec_list;
+} ldi_ev_callback_impl_t;
+
+struct ldi_ev_callback_list {
+ kmutex_t le_lock;
+ kcondvar_t le_cv;
+ int le_busy;
+ void *le_thread;
+ list_t le_head;
+};
+
+int ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ void *ev_data);
+void ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ int ldi_result, void *ev_data);
+int e_ddi_offline_notify(dev_info_t *dip);
+void e_ddi_offline_finalize(dev_info_t *dip, int result);
+
/*
* LDI device usage interfaces
diff --git a/usr/src/uts/common/sys/sunmdi.h b/usr/src/uts/common/sys/sunmdi.h
index 75b4f83ef7..c4a42633be 100644
--- a/usr/src/uts/common/sys/sunmdi.h
+++ b/usr/src/uts/common/sys/sunmdi.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,6 +133,14 @@ int mdi_devi_online(dev_info_t *, uint_t);
int mdi_devi_offline(dev_info_t *, uint_t);
/*
+ * MDI path retire interfaces
+ */
+void mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array);
+void mdi_phci_retire_notify(dev_info_t *dip, int *constraint);
+void mdi_phci_retire_finalize(dev_info_t *dip, int phci_only);
+void mdi_phci_unretire(dev_info_t *dip);
+
+/*
* MDI devinfo locking functions.
*/
void mdi_devi_enter(dev_info_t *, int *);
diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s
index 2d16849c3d..889a6e350a 100644
--- a/usr/src/uts/intel/ia32/ml/modstubs.s
+++ b/usr/src/uts/intel/ia32/ml/modstubs.s
@@ -457,6 +457,8 @@ fcnname/**/_info: \
NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one);
NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero);
NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one);
+ NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one);
END_MODULE(specfs);
#endif
diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s
index 5eade06f7b..a22853aba3 100644
--- a/usr/src/uts/sparc/ml/modstubs.s
+++ b/usr/src/uts/sparc/ml/modstubs.s
@@ -345,6 +345,8 @@ stubs_base:
NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one);
NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero);
NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero);
+ NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one);
+ NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one);
END_MODULE(specfs);
#endif
diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c
index 954ae51bf5..eb068ac4e9 100644
--- a/usr/src/uts/sun4/os/ddi_impl.c
+++ b/usr/src/uts/sun4/os/ddi_impl.c
@@ -54,6 +54,8 @@
#include <sys/fs/snode.h>
#include <sys/ddi_isa.h>
#include <sys/modhash.h>
+#include <sys/modctl.h>
+#include <sys/sunldi_impl.h>
dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *,
ddi_intr_handle_impl_t *);
@@ -2036,31 +2038,127 @@ visit_node(pnode_t nodeid, struct pta *ap)
}
}
-/*ARGSUSED*/
+/*
+ * NOTE: The caller of this function must check for device contracts
+ * or LDI callbacks against this dip before setting the dip offline.
+ */
static int
-set_dip_offline(dev_info_t *dip, void *arg)
+set_infant_dip_offline(dev_info_t *dip, void *arg)
{
+ char *path = (char *)arg;
+
ASSERT(dip);
+ ASSERT(arg);
+
+ if (i_ddi_node_state(dip) >= DS_ATTACHED) {
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "Attempt to set offline flag on attached "
+ "node: %s", path);
+ return (DDI_FAILURE);
+ }
mutex_enter(&(DEVI(dip)->devi_lock));
if (!DEVI_IS_DEVICE_OFFLINE(dip))
DEVI_SET_DEVICE_OFFLINE(dip);
mutex_exit(&(DEVI(dip)->devi_lock));
+ return (DDI_SUCCESS);
+}
+
+typedef struct result {
+ char *path;
+ int result;
+} result_t;
+
+static int
+dip_set_offline(dev_info_t *dip, void *arg)
+{
+ int end;
+ result_t *resp = (result_t *)arg;
+
+ ASSERT(dip);
+ ASSERT(resp);
+
+ /*
+ * We stop the walk if e_ddi_offline_notify() returns
+ * failure, because this implies that one or more consumers
+ * (either LDI or contract based) has blocked the offline.
+ * So there is no point in conitnuing the walk
+ */
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ resp->result = DDI_FAILURE;
+ return (DDI_WALK_TERMINATE);
+ }
+
+ /*
+ * If set_infant_dip_offline() returns failure, it implies
+ * that we failed to set a particular dip offline. This
+ * does not imply that the offline as a whole should fail.
+ * We want to do the best we can, so we continue the walk.
+ */
+ if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
return (DDI_WALK_CONTINUE);
}
+/*
+ * The call to e_ddi_offline_notify() exists for the
+ * unlikely error case that a branch we are trying to
+ * create already exists and has device contracts or LDI
+ * event callbacks against it.
+ *
+ * We allow create to succeed for such branches only if
+ * no constraints block the offline.
+ */
+static int
+branch_set_offline(dev_info_t *dip, char *path)
+{
+ int circ;
+ int end;
+ result_t res;
+
+
+ if (e_ddi_offline_notify(dip) == DDI_FAILURE) {
+ return (DDI_FAILURE);
+ }
+
+ if (set_infant_dip_offline(dip, path) == DDI_SUCCESS)
+ end = DDI_SUCCESS;
+ else
+ end = DDI_FAILURE;
+
+ e_ddi_offline_finalize(dip, end);
+
+ if (end == DDI_FAILURE)
+ return (DDI_FAILURE);
+
+ res.result = DDI_SUCCESS;
+ res.path = path;
+
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res);
+ ndi_devi_exit(dip, circ);
+
+ return (res.result);
+}
+
/*ARGSUSED*/
static int
create_prom_branch(void *arg, int has_changed)
{
- int circ, c;
+ int circ;
int exists, rv;
pnode_t nodeid;
struct ptnode *tnp;
dev_info_t *dip;
struct pta *ap = arg;
devi_branch_t *bp;
+ char *path;
ASSERT(ap);
ASSERT(ap->fdip == NULL);
@@ -2086,6 +2184,7 @@ create_prom_branch(void *arg, int has_changed)
if (ap->head == NULL)
return (ENODEV);
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
rv = 0;
while ((tnp = ap->head) != NULL) {
ap->head = tnp->next;
@@ -2112,32 +2211,28 @@ create_prom_branch(void *arg, int has_changed)
kmem_free(tnp, sizeof (struct ptnode));
- if (dip == NULL) {
- ndi_devi_exit(ap->pdip, circ);
- rv = EIO;
- continue;
- }
-
- ASSERT(ddi_get_parent(dip) == ap->pdip);
-
/*
* Hold the branch if it is not already held
*/
- if (!exists)
+ if (dip && !exists) {
e_ddi_branch_hold(dip);
+ }
- ASSERT(e_ddi_branch_held(dip));
+ ASSERT(dip == NULL || e_ddi_branch_held(dip));
/*
- * Set all dips in the branch offline so that
+ * Set all dips in the newly created branch offline so that
* only a "configure" operation can attach
* the branch
*/
- (void) set_dip_offline(dip, NULL);
+ if (dip == NULL || branch_set_offline(dip, path)
+ == DDI_FAILURE) {
+ ndi_devi_exit(ap->pdip, circ);
+ rv = EIO;
+ continue;
+ }
- ndi_devi_enter(dip, &c);
- ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL);
- ndi_devi_exit(dip, c);
+ ASSERT(ddi_get_parent(dip) == ap->pdip);
ndi_devi_exit(ap->pdip, circ);
@@ -2155,6 +2250,8 @@ create_prom_branch(void *arg, int has_changed)
bp->devi_branch_callback(dip, bp->arg, 0);
}
+ kmem_free(path, MAXPATHLEN);
+
return (rv);
}
@@ -2162,9 +2259,10 @@ static int
sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
{
int rv, circ, len;
- int i, flags;
+ int i, flags, ret;
dev_info_t *dip;
char *nbuf;
+ char *path;
static const char *noname = "<none>";
ASSERT(pdip);
@@ -2258,9 +2356,23 @@ sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
*rdipp = dip;
/*
- * Set device offline - only the "configure" op should cause an attach
+ * Set device offline - only the "configure" op should cause an attach.
+ * Note that it is safe to set the dip offline without checking
+ * for either device contract or layered driver (LDI) based constraints
+ * since there cannot be any contracts or LDI opens of this device.
+ * This is because this node is a newly created dip with the parent busy
+ * held, so no other thread can come in and attach this dip. A dip that
+ * has never been attached cannot have contracts since by definition
+ * a device contract (an agreement between a process and a device minor
+ * node) can only be created against a device that has minor nodes
+ * i.e is attached. Similarly an LDI open will only succeed if the
+ * dip is attached. We assert below that the dip is not attached.
*/
- (void) set_dip_offline(dip, NULL);
+ ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ ret = set_infant_dip_offline(dip, path);
+ ASSERT(ret == DDI_SUCCESS);
+ kmem_free(path, MAXPATHLEN);
return (rv);
fail:
diff --git a/usr/src/uts/sun4u/io/sbd.c b/usr/src/uts/sun4u/io/sbd.c
index 8e41b8ea1f..ec1b7096ba 100644
--- a/usr/src/uts/sun4u/io/sbd.c
+++ b/usr/src/uts/sun4u/io/sbd.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -5124,6 +5123,14 @@ sbd_get_comp_cond(dev_info_t *dip)
return (SBD_COND_UNKNOWN);
}
+ /*
+ * If retired, return FAILED
+ */
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED) {
+ PR_CPU("dip is retired\n");
+ return (SBD_COND_FAILED);
+ }
+
if (ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
(char *)status, &len) != DDI_PROP_SUCCESS) {
PR_CPU("status in sbd is ok\n");
diff --git a/usr/src/uts/sun4u/io/sbd_io.c b/usr/src/uts/sun4u/io/sbd_io.c
index 4f07908874..c8a9ea27f7 100644
--- a/usr/src/uts/sun4u/io/sbd_io.c
+++ b/usr/src/uts/sun4u/io/sbd_io.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -253,6 +252,7 @@ sbd_io_status(sbd_handle_t *hp, sbd_devset_t devset, sbd_dev_stat_t *dsp)
isp->is_ostate = ostate_cvt(dstate);
isp->is_type = SBD_COMP_IO;
ip = SBD_GET_BOARD_IOUNIT(sbp, unit);
+ ip->sbi_cm.sbdev_cond = sbd_get_comp_cond(dip);
isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond;
isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy;
isp->is_cm.c_time = ip->sbi_cm.sbdev_time;
diff --git a/usr/src/uts/sun4u/ngdr/io/dr_io.c b/usr/src/uts/sun4u/ngdr/io/dr_io.c
index ac36e971d0..49a9866602 100644
--- a/usr/src/uts/sun4u/ngdr/io/dr_io.c
+++ b/usr/src/uts/sun4u/ngdr/io/dr_io.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -281,6 +281,26 @@ dr_post_detach_io(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
return (rv);
}
+static void
+dr_get_comp_cond(dr_io_unit_t *ip, dev_info_t *dip)
+{
+ if (dip == NULL) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_UNKNOWN;
+ return;
+ }
+
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_FAILED;
+ return;
+ }
+
+ if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) {
+ ip->sbi_cm.sbdev_cond = SBD_COND_OK;
+ }
+}
+
int
dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
{
@@ -322,6 +342,16 @@ dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
return (-1);
}
+ dip = NULL;
+ err = drmach_get_dip(id, &dip);
+ if (err) {
+ /* catch this in debug kernels */
+ ASSERT(0);
+
+ sbd_err_clear(&err);
+ continue;
+ }
+
isp = &dsp->d_io;
bzero((caddr_t)isp, sizeof (*isp));
@@ -329,21 +359,15 @@ dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
isp->is_cm.c_id.c_unit = ip->sbi_cm.sbdev_unum;
strncpy(isp->is_cm.c_id.c_name, pstat.type,
sizeof (isp->is_cm.c_id.c_name));
+
+ dr_get_comp_cond(ip, dip);
isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond;
isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy | pstat.busy;
isp->is_cm.c_time = ip->sbi_cm.sbdev_time;
isp->is_cm.c_ostate = ip->sbi_cm.sbdev_ostate;
isp->is_cm.c_sflags = 0;
- dip = NULL;
- err = drmach_get_dip(id, &dip);
- if (err) {
- /* catch this in debug kernels */
- ASSERT(0);
-
- sbd_err_clear(&err);
- continue;
- } else if (dip == NULL) {
+ if (dip == NULL) {
isp->is_pathname[0] = '\0';
isp->is_referenced = 0;
isp->is_unsafe_count = 0;