diff options
author | vikram <none@none> | 2007-08-09 21:43:47 -0700 |
---|---|---|
committer | vikram <none@none> | 2007-08-09 21:43:47 -0700 |
commit | 25e8c5aa2b496d9026e958ac731a610167574f59 (patch) | |
tree | 48d445f55e23f769f3981231d5b06b0b35505b33 | |
parent | ffcd51f34e6cd303b9745909c4632da63426be17 (diff) | |
download | illumos-gate-25e8c5aa2b496d9026e958ac731a610167574f59.tar.gz |
PSARC 2007/290 Retire Agent for I/O Devices
6464720 Deliver a FMA I/O retire agent
--HG--
rename : usr/src/cmd/fm/modules/common/io-retire/ior_main.c => deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
67 files changed, 7783 insertions, 209 deletions
diff --git a/usr/src/cmd/fm/modules/common/io-retire/ior_main.c b/deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c index 6c1afdac81..6c1afdac81 100644 --- a/usr/src/cmd/fm/modules/common/io-retire/ior_main.c +++ b/deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c diff --git a/usr/src/cmd/boot/bootadm/filelist.ramdisk b/usr/src/cmd/boot/bootadm/filelist.ramdisk index 58e885f6ca..cb2b543633 100644 --- a/usr/src/cmd/boot/bootadm/filelist.ramdisk +++ b/usr/src/cmd/boot/bootadm/filelist.ramdisk @@ -8,6 +8,7 @@ etc/driver_classes etc/path_to_inst etc/mach etc/devices/devid_cache +etc/devices/retire_store etc/devices/mdi_scsi_vhci_cache etc/devices/mdi_ib_cache etc/cluster/nodeid diff --git a/usr/src/cmd/ctwatch/ctwatch.c b/usr/src/cmd/ctwatch/ctwatch.c index 6247ccaf34..a42d109983 100644 --- a/usr/src/cmd/ctwatch/ctwatch.c +++ b/usr/src/cmd/ctwatch/ctwatch.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -54,6 +53,7 @@ struct { int found; } types[] = { { "process", 0 }, + { "device", 0 }, { NULL } }; diff --git a/usr/src/cmd/fm/modules/common/io-retire/Makefile b/usr/src/cmd/fm/modules/common/io-retire/Makefile index a1f8b20e36..6ad3386fee 100644 --- a/usr/src/cmd/fm/modules/common/io-retire/Makefile +++ b/usr/src/cmd/fm/modules/common/io-retire/Makefile @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,13 +19,15 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +#pragma ident "%Z%%M% %I% %E% SMI" MODULE = io-retire CLASS = common -SRCS = ior_main.c +SRCS = rio_main.c include ../../Makefile.plugin + +LDLIBS += -ldevinfo diff --git a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf index b7fdbd04b5..cb89b6b72c 100644 --- a/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf +++ b/usr/src/cmd/fm/modules/common/io-retire/io-retire.conf @@ -2,9 +2,8 @@ # CDDL HEADER START # # The contents of this file are subject to the terms of the -# Common Development and Distribution License, Version 1.0 only -# (the "License"). You may not use this file except in compliance -# with the License. +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. # # You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE # or http://www.opensolaris.org/os/licensing. @@ -20,14 +19,15 @@ # CDDL HEADER END # # -# Copyright 2004 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # -#ident "%Z%%M% %I% %E% SMI" +#pragma ident "%Z%%M% %I% %E% SMI" # # fmd configuration file for the io-retire.so agent. # -setprop autoclose false +setprop global-disable false subscribe fault.io.* +subscribe list.repaired subscribe defect.io.* subscribe defect.ultraSPARC-II.memory.nodiag diff --git a/usr/src/cmd/fm/modules/common/io-retire/rio_main.c b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c new file mode 100644 index 0000000000..7277eca7fd --- /dev/null +++ b/usr/src/cmd/fm/modules/common/io-retire/rio_main.c @@ -0,0 +1,290 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/fm/protocol.h> +#include <fm/fmd_api.h> +#include <strings.h> +#include <libdevinfo.h> +#include <sys/modctl.h> + +static int global_disable; + +struct except_list { + char *el_fault; + struct except_list *el_next; +}; + +static struct except_list *except_list; + +static void +parse_exception_string(fmd_hdl_t *hdl, char *estr) +{ + char *p; + char *next; + size_t len; + struct except_list *elem; + + len = strlen(estr); + + p = estr; + for (;;) { + /* Remove leading ':' */ + while (*p == ':') + p++; + if (*p == '\0') + break; + + next = strchr(p, ':'); + + if (next) + *next = '\0'; + + elem = fmd_hdl_alloc(hdl, + sizeof (struct except_list), FMD_SLEEP); + elem->el_fault = fmd_hdl_strdup(hdl, p, FMD_SLEEP); + elem->el_next = except_list; + except_list = elem; + + if (next) { + *next = ':'; + p = next + 1; + } else { + break; + } + } + + if (len != strlen(estr)) { + fmd_hdl_abort(hdl, "Error parsing exception list: %s\n", estr); + } +} + +/* + * Returns + * 1 if fault on exception list + * 0 otherwise + */ +static int +fault_exception(fmd_hdl_t *hdl, nvlist_t *fault) +{ + struct except_list *elem; + + for (elem = except_list; elem; elem = elem->el_next) { + if (fmd_nvl_class_match(hdl, fault, elem->el_fault)) { + fmd_hdl_debug(hdl, "rio_recv: Skipping fault " + "on exception list (%s)\n", elem->el_fault); + return (1); + } + } + + return (0); +} + +static void +free_exception_list(fmd_hdl_t *hdl) +{ + struct except_list *elem; + + while (except_list) { + elem = except_list; + except_list = elem->el_next; + fmd_hdl_strfree(hdl, elem->el_fault); + fmd_hdl_free(hdl, elem, sizeof (*elem)); + } +} + + +/*ARGSUSED*/ +static void +rio_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class) +{ + nvlist_t **faults; + nvlist_t *asru; + uint_t nfaults; + int f; + char devpath[PATH_MAX]; + char *path; + char *uuid; + char *scheme; + di_retire_t drt = {0}; + int retire; + int rval; + int error; + char *snglfault = FM_FAULT_CLASS"."FM_ERROR_IO"."; + + + /* + * If disabled, we don't do retire. We still do unretires though + */ + if (global_disable && strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { + fmd_hdl_debug(hdl, "rio_recv: retire disabled\n"); + return; + } + + drt.rt_abort = (void (*)(void *, const char *, ...))fmd_hdl_abort; + drt.rt_debug = (void (*)(void *, const char *, ...))fmd_hdl_debug; + drt.rt_hdl = hdl; + + if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0) { + retire = 1; + } else if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) { + retire = 0; + } else if (strncmp(class, snglfault, strlen(snglfault)) == 0) { + fmd_hdl_debug(hdl, "rio_recv: single fault: %s\n", class); + return; + } else { + fmd_hdl_debug(hdl, "rio_recv: not list.* class: %s\n", class); + return; + } + + faults = NULL; + nfaults = 0; + if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, + &faults, &nfaults) != 0) { + fmd_hdl_debug(hdl, "rio_recv: no fault list"); + return; + } + + devpath[0] = '\0'; + rval = 0; + for (f = 0; f < nfaults; f++) { + if (nvlist_lookup_nvlist(faults[f], FM_FAULT_ASRU, + &asru) != 0) { + fmd_hdl_debug(hdl, "rio_recv: no asru in fault"); + continue; + } + + scheme = NULL; + if (nvlist_lookup_string(asru, FM_FMRI_SCHEME, &scheme) != 0 || + strcmp(scheme, FM_FMRI_SCHEME_DEV) != 0) { + fmd_hdl_debug(hdl, "rio_recv: not \"dev\" scheme: %s", + scheme ? scheme : "<NULL>"); + continue; + } + + if (retire && fault_exception(hdl, faults[f])) + continue; + + if (nvlist_lookup_string(asru, FM_FMRI_DEV_PATH, + &path) != 0 || path[0] == '\0') { + fmd_hdl_debug(hdl, "rio_recv: no dev path in asru"); + continue; + } + + /* + * If retire, we retire only if a single ASRU is pinpointed. + * We don't do automatic retires if a fault event pinpoints + * more than one ASRU. + */ + if (retire) { + if (devpath[0] != '\0' && strcmp(path, devpath) != 0) { + fmd_hdl_debug(hdl, + "rio_recv: Skipping: multiple ASRU"); + return; + } else if (devpath[0] == '\0') { + (void) strlcpy(devpath, path, sizeof (devpath)); + } + } else { + error = di_unretire_device(path, &drt); + if (error != 0) { + fmd_hdl_debug(hdl, "rio_recv: " + "di_unretire_device failed: error: %d %s", + error, path); + rval = -1; + } + } + } + + if (retire) { + if (devpath[0] == '\0') + return; + error = di_retire_device(devpath, &drt, 0); + if (error != 0) { + fmd_hdl_debug(hdl, "rio_recv: di_retire_device " + "failed: error: %d %s", error, devpath); + rval = -1; + } + } + + /* + * The fmd framework takes care of moving a case to the repaired + * state. To move the case to the closed state however, we (the + * retire agent) need to call fmd_case_uuclose() + */ + if (retire && rval == 0) { + if (nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0 && + !fmd_case_uuclosed(hdl, uuid)) { + fmd_case_uuclose(hdl, uuid); + } + } +} + +static const fmd_hdl_ops_t fmd_ops = { + rio_recv, /* fmdo_recv */ + NULL, /* fmdo_timeout */ + NULL, /* fmdo_close */ + NULL, /* fmdo_stats */ + NULL, /* fmdo_gc */ +}; + +static const fmd_prop_t rio_props[] = { + { "global-disable", FMD_TYPE_BOOL, "false" }, + { "fault-exceptions", FMD_TYPE_STRING, NULL }, + { NULL, 0, NULL } +}; + +static const fmd_hdl_info_t fmd_info = { + "I/O Retire Agent", "2.0", &fmd_ops, rio_props +}; + +void +_fmd_init(fmd_hdl_t *hdl) +{ + char *estr; + char *estrdup; + + if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { + fmd_hdl_debug(hdl, "failed to register handle\n"); + return; + } + + global_disable = fmd_prop_get_int32(hdl, "global-disable"); + + estrdup = NULL; + if (estr = fmd_prop_get_string(hdl, "fault-exceptions")) { + estrdup = fmd_hdl_strdup(hdl, estr, FMD_SLEEP); + fmd_prop_free_string(hdl, estr); + parse_exception_string(hdl, estrdup); + fmd_hdl_strfree(hdl, estrdup); + } +} + +void +_fmd_fini(fmd_hdl_t *hdl) +{ + free_exception_list(hdl); +} diff --git a/usr/src/cmd/prtconf/pdevinfo.c b/usr/src/cmd/prtconf/pdevinfo.c index 445e02bc29..72b34de1f1 100644 --- a/usr/src/cmd/prtconf/pdevinfo.c +++ b/usr/src/cmd/prtconf/pdevinfo.c @@ -673,6 +673,8 @@ dump_devs(di_node_t node, void *arg) driver_name = di_driver_name(node); if (driver_name != NULL) (void) printf(" (driver name: %s)", driver_name); + } else if (di_retired(node)) { + (void) printf(" (retired)"); } else if (di_state(node) & DI_DRIVER_DETACHED) (void) printf(" (driver not attached)"); diff --git a/usr/src/cmd/rcm_daemon/common/filesys_rcm.c b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c index 2103ba99ae..c7a5ce3765 100644 --- a/usr/src/cmd/rcm_daemon/common/filesys_rcm.c +++ b/usr/src/cmd/rcm_daemon/common/filesys_rcm.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,6 +47,8 @@ #include <sys/param.h> #include <sys/stat.h> #include <sys/utssys.h> +#include <unistd.h> +#include <limits.h> #include "rcm_module.h" @@ -71,6 +72,7 @@ typedef struct hashentry { int n_mounts; char *special; + char *fstype; char **mountps; struct hashentry *next; } hashentry_t; @@ -252,7 +254,11 @@ mnt_unregister(rcm_handle_t *hd) /* * mnt_offline() * - * Filesystem resources cannot be offlined. Always returns failure. + * Filesystem resources cannot be offlined. They can however be retired + * if they don't provide a critical service. The offline entry point + * checks if this is a retire operation and if it is and the filesystem + * doesn't provide a critical service, the entry point returns success + * For all other cases, failure is returned. * Since no real action is taken, QUERY or not doesn't matter. */ int @@ -260,17 +266,58 @@ mnt_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp, rcm_info_t **dependent_info) { char **dependents; + hashentry_t *entry; + int retval; + int i; assert(hd != NULL); assert(rsrc != NULL); assert(id == (id_t)0); assert(errorp != NULL); + *errorp = NULL; + rcm_log_message(RCM_TRACE1, "FILESYS: offline(%s)\n", rsrc); /* Retrieve necessary info from the cache */ - if (use_cache(rsrc, errorp, &dependents) < 0) - return (RCM_FAILURE); + if (use_cache(rsrc, errorp, &dependents) < 0) { + if (flags & RCM_RETIRE_REQUEST) + return (RCM_NO_CONSTRAINT); + else + return (RCM_FAILURE); + } + + if (flags & RCM_RETIRE_REQUEST) { + (void) mutex_lock(&cache_lock); + if ((entry = cache_lookup(mnt_cache, rsrc)) == NULL) { + rcm_log_message(RCM_ERROR, "FILESYS: " + "failed to look up \"%s\" in cache (%s).\n", + rsrc, strerror(errno)); + (void) mutex_unlock(&cache_lock); + retval = RCM_NO_CONSTRAINT; + goto out; + } + + if (strcmp(entry->fstype, "zfs") == 0) { + retval = RCM_NO_CONSTRAINT; + rcm_log_message(RCM_TRACE1, + "FILESYS: zfs: NO_CONSTRAINT: %s\n", rsrc); + } else { + retval = RCM_SUCCESS; + for (i = 0; dependents[i] != NULL; i++) { + if (is_critical(dependents[i])) { + retval = RCM_FAILURE; + rcm_log_message(RCM_TRACE1, "FILESYS: " + "CRITICAL %s\n", rsrc); + break; + } + } + } + (void) mutex_unlock(&cache_lock); + goto out; + } + + retval = RCM_FAILURE; /* Convert the gathered dependents into an error message */ *errorp = create_message(MSG_HDR_STD, MSG_HDR_STD_MULTI, dependents); @@ -279,9 +326,10 @@ mnt_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, "FILESYS: failed to construct offline message (%s).\n", strerror(errno)); } - free_list(dependents); - return (RCM_FAILURE); +out: + free_list(dependents); + return (retval); } /* @@ -441,13 +489,167 @@ mnt_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, return (rv); } +static int +get_spec(char *line, char *spec, size_t ssz) +{ + char *cp; + char *start; + + if (strlcpy(spec, line, ssz) >= ssz) { + rcm_log_message(RCM_ERROR, "FILESYS: get_spec() failed: " + "line: %s\n", line); + return (-1); + } + + cp = spec; + while (*cp == ' ' || *cp == '\t') + cp++; + + if (*cp == '#') + return (-1); + + start = cp; + + while (*cp != ' ' && *cp != '\t' && *cp != '\0') + cp++; + *cp = '\0'; + + (void) memmove(spec, start, strlen(start) + 1); + + return (0); +} + +static int +path_match(char *rsrc, char *spec) +{ + char r[PATH_MAX]; + char s[PATH_MAX]; + size_t len; + + if (realpath(rsrc, r) == NULL) + goto error; + + if (realpath(spec, s) == NULL) + goto error; + + len = strlen("/devices/"); + + if (strncmp(r, "/devices/", len) != 0) { + errno = ENXIO; + goto error; + } + + if (strncmp(s, "/devices/", len) != 0) { + errno = ENXIO; + goto error; + } + + len = strlen(r); + if (strncmp(r, s, len) == 0 && (s[len] == '\0' || s[len] == ':')) + return (0); + else + return (1); + +error: + rcm_log_message(RCM_DEBUG, "FILESYS: path_match() failed " + "rsrc=%s spec=%s: %s\n", rsrc, spec, strerror(errno)); + return (-1); +} + +#define VFSTAB "/etc/vfstab" +#define RETIRED_PREFIX "## RETIRED ##" + +static int +disable_vfstab_entry(char *rsrc) +{ + FILE *vfp; + FILE *tfp; + int retval; + int update; + char tmp[PATH_MAX]; + char line[MNT_LINE_MAX + 1]; + + vfp = fopen(VFSTAB, "r"); + if (vfp == NULL) { + rcm_log_message(RCM_ERROR, "FILESYS: failed to open /etc/vfstab" + " for reading: %s\n", strerror(errno)); + return (RCM_FAILURE); + } + + (void) snprintf(tmp, sizeof (tmp), "/etc/vfstab.retire.%lu", getpid()); + + tfp = fopen(tmp, "w"); + if (tfp == NULL) { + rcm_log_message(RCM_ERROR, "FILESYS: failed to open " + "/etc/vfstab.retire for writing: %s\n", strerror(errno)); + (void) fclose(vfp); + return (RCM_FAILURE); + } + + retval = RCM_SUCCESS; + update = 0; + while (fgets(line, sizeof (line), vfp)) { + + char spec[MNT_LINE_MAX + 1]; + char newline[MNT_LINE_MAX + 1]; + char *l; + + if (get_spec(line, spec, sizeof (spec)) == -1) { + l = line; + goto foot; + } + + if (path_match(rsrc, spec) != 0) { + l = line; + goto foot; + } + + update = 1; + + /* Paths match. Disable this entry */ + (void) snprintf(newline, sizeof (newline), "%s %s", + RETIRED_PREFIX, line); + + rcm_log_message(RCM_TRACE1, "FILESYS: disabling line\n\t%s\n", + line); + + l = newline; +foot: + if (fputs(l, tfp) == EOF) { + rcm_log_message(RCM_ERROR, "FILESYS: failed to write " + "new vfstab: %s\n", strerror(errno)); + update = 0; + retval = RCM_FAILURE; + break; + } + } + + if (vfp) + (void) fclose(vfp); + if (tfp) + (void) fclose(tfp); + + if (update) { + if (rename(tmp, VFSTAB) != 0) { + rcm_log_message(RCM_ERROR, "FILESYS: vfstab rename " + "failed: %s\n", strerror(errno)); + retval = RCM_FAILURE; + } + } + + (void) unlink(tmp); + + return (retval); +} + /* * mnt_remove() * - * Remove should never be called since offline always fails. + * Remove will only be called in the retire case i.e. if RCM_RETIRE_NOTIFY + * flag is set. * - * Return failure and log the mistake if a remove is ever received for a - * mounted filesystem resource. + * If the flag is not set, then return failure and log the mistake if a + * remove is ever received for a mounted filesystem resource. */ int mnt_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, @@ -460,11 +662,15 @@ mnt_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flag, char **errorp, rcm_log_message(RCM_TRACE1, "FILESYS: remove(%s)\n", rsrc); - /* Log the mistake */ - rcm_log_message(RCM_ERROR, "FILESYS: invalid remove of \"%s\"\n", rsrc); - *errorp = strdup(MSG_FAIL_REMOVE); + if (!(flag & RCM_RETIRE_NOTIFY)) { + /* Log the mistake */ + rcm_log_message(RCM_ERROR, "FILESYS: invalid remove of " + "\"%s\"\n", rsrc); + *errorp = strdup(MSG_FAIL_REMOVE); + return (RCM_FAILURE); + } - return (RCM_FAILURE); + return (disable_vfstab_entry(rsrc)); } /* @@ -617,6 +823,8 @@ free_entry(hashentry_t **entryp) if (*entryp) { if ((*entryp)->special) free((*entryp)->special); + if ((*entryp)->fstype) + free((*entryp)->fstype); free_list((*entryp)->mountps); free(*entryp); } @@ -731,9 +939,10 @@ cache_sync(rcm_handle_t *hd, cache_t **cachep) * cache_insert() * * Given a cache and a mnttab entry, this routine inserts that entry in - * the cache. The mnttab entry's special device is added to the 'mounts' - * hashtable of the cache, and the entry's mountp value is added to the - * list of associated mountpoints for the corresponding hashtable entry. + * the cache. The mnttab entry's special device and filesystem type + * is added to the 'mounts' hashtable of the cache, and the entry's + * mountp value is added to the list of associated mountpoints for the + * corresponding hashtable entry. * * Locking: the cache must be locked before calling this function. * @@ -751,7 +960,8 @@ cache_insert(cache_t *cache, struct mnttab *mt) (cache->mounts == NULL) || (mt == NULL) || (mt->mnt_special == NULL) || - (mt->mnt_mountp == NULL)) { + (mt->mnt_mountp == NULL) || + (mt->mnt_fstype == NULL)) { errno = EINVAL; return (-1); } @@ -776,10 +986,11 @@ cache_insert(cache_t *cache, struct mnttab *mt) if (entry == NULL) { entry = (hashentry_t *)calloc(1, sizeof (hashentry_t)); if ((entry == NULL) || - ((entry->special = strdup(mt->mnt_special)) == NULL)) { + ((entry->special = strdup(mt->mnt_special)) == NULL) || + ((entry->fstype = strdup(mt->mnt_fstype)) == NULL)) { rcm_log_message(RCM_ERROR, "FILESYS: failed to allocate special device name " - "(%s).\n", strerror(errno)); + "or filesystem type: (%s).\n", strerror(errno)); free_entry(&entry); errno = ENOMEM; return (-1); @@ -1124,19 +1335,25 @@ is_critical(char *rsrc) if ((strcmp(rsrc, "/") == 0) || (strcmp(rsrc, "/usr") == 0) || + (strcmp(rsrc, "/lib") == 0) || (strcmp(rsrc, "/usr/lib") == 0) || + (strcmp(rsrc, "/bin") == 0) || (strcmp(rsrc, "/usr/bin") == 0) || (strcmp(rsrc, "/tmp") == 0) || (strcmp(rsrc, "/var") == 0) || (strcmp(rsrc, "/var/run") == 0) || (strcmp(rsrc, "/etc") == 0) || (strcmp(rsrc, "/etc/mnttab") == 0) || - (strcmp(rsrc, "/sbin") == 0)) + (strcmp(rsrc, "/platform") == 0) || + (strcmp(rsrc, "/usr/platform") == 0) || + (strcmp(rsrc, "/sbin") == 0) || + (strcmp(rsrc, "/usr/sbin") == 0)) return (1); return (0); } + /* * use_cache() * diff --git a/usr/src/cmd/rcm_daemon/common/rcm_impl.c b/usr/src/cmd/rcm_daemon/common/rcm_impl.c index e6f6e65868..395a9231f2 100644 --- a/usr/src/cmd/rcm_daemon/common/rcm_impl.c +++ b/usr/src/cmd/rcm_daemon/common/rcm_impl.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,7 +18,7 @@ * * CDDL HEADER END * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -89,7 +88,17 @@ common_resource_op(int cmd, char *rsrcname, pid_t pid, uint_t flag, int seq_num, } else { error = rsrc_tree_action(node, cmd, &arg); } + } else if ((error == RCM_SUCCESS) && (flag & RCM_RETIRE_REQUEST)) { + /* + * No matching node, so no client. This means there + * is no constraint (RCM wise) on this retire. Return + * RCM_NO_CONSTRAINT to indicate this + */ + rcm_log_message(RCM_TRACE1, "No client. Returning " + "RCM_NO_CONSTRAINT: %s\n", rsrcname); + error = RCM_NO_CONSTRAINT; } + return (error); } diff --git a/usr/src/cmd/rcm_daemon/common/rcm_subr.c b/usr/src/cmd/rcm_daemon/common/rcm_subr.c index c6e6c52afa..82234e1894 100644 --- a/usr/src/cmd/rcm_daemon/common/rcm_subr.c +++ b/usr/src/cmd/rcm_daemon/common/rcm_subr.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -19,7 +18,7 @@ * * CDDL HEADER END * - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -797,6 +796,21 @@ rsrc_client_action(client_t *client, int cmd, void *arg) rval = ops->rcmop_request_offline(hdl, client->alias, client->pid, targ->flag, &error, &depend_info); + /* + * If this is a retire operation and we managed to call + * into at least one client, set retcode to RCM_SUCCESS to + * indicate that retire has been subject to constraints + * This retcode will be further modified by actual return + * code. + */ + if ((targ->flag & RCM_RETIRE_REQUEST) && + (targ->retcode == RCM_NO_CONSTRAINT)) { + rcm_log_message(RCM_DEBUG, + "at least 1 client, constraint applied: %s\n", + client->alias); + targ->retcode = RCM_SUCCESS; + } + /* Update the client's state after the operation. */ if ((targ->flag & RCM_QUERY) == 0) { if (rval == RCM_SUCCESS) { @@ -920,11 +934,23 @@ int rsrc_client_action_list(client_t *list, int cmd, void *arg) { int error, rval = RCM_SUCCESS; + tree_walk_arg_t *targ = (tree_walk_arg_t *)arg; while (list) { client_t *client = list; list = client->next; + /* + * Make offline idempotent in the retire + * case + */ + if ((targ->flag & RCM_RETIRE_REQUEST) && + client->state == RCM_STATE_REMOVE) { + client->state = RCM_STATE_ONLINE; + rcm_log_message(RCM_DEBUG, "RETIRE: idempotent client " + "state: REMOVE -> ONLINE: %s\n", client->alias); + } + if (client->state == RCM_STATE_REMOVE) continue; @@ -1408,8 +1434,20 @@ rsrc_tree_action(rsrc_node_t *root, int cmd, tree_walk_arg_t *arg) rcm_log_message(RCM_TRACE2, "tree_action(%s, %d)\n", root->name, cmd); arg->cmd = cmd; - arg->retcode = RCM_SUCCESS; - rsrc_walk(root, (void *)arg, node_action); + + /* + * If RCM_RETIRE_REQUEST is set, just walk one node and preset + * retcode to NO_CONSTRAINT + */ + if (arg->flag & RCM_RETIRE_REQUEST) { + rcm_log_message(RCM_TRACE1, "tree_action: RETIRE_REQ: walking " + "only root node: %s\n", root->name); + arg->retcode = RCM_NO_CONSTRAINT; + (void) node_action(root, arg); + } else { + arg->retcode = RCM_SUCCESS; + rsrc_walk(root, (void *)arg, node_action); + } return (arg->retcode); } diff --git a/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c b/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c index 6a5f716282..78910e04b1 100644 --- a/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c +++ b/usr/src/lib/cfgadm_plugins/scsi/common/cfga_list.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -383,13 +382,18 @@ out: } +struct bus_state { + int b_state; + int b_retired; +}; + static scfga_ret_t do_stat_bus(scfga_list_t *lap, int limited_bus_stat) { cfga_list_data_t *clp = NULL; ldata_list_t *listp = NULL; int l_errno = 0; - uint_t devinfo_state = 0; + struct bus_state bstate = {0}; walkarg_t u; scfga_ret_t ret; @@ -399,10 +403,10 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat) u.node_args.flags = 0; u.node_args.fcn = get_bus_state; - ret = walk_tree(lap->apidp->hba_phys, &devinfo_state, DINFOPROP, &u, + ret = walk_tree(lap->apidp->hba_phys, &bstate, DINFOPROP, &u, SCFGA_WALK_NODE, &l_errno); if (ret == SCFGA_OK) { - lap->hba_rstate = bus_devinfo_to_recep_state(devinfo_state); + lap->hba_rstate = bus_devinfo_to_recep_state(bstate.b_state); } else { lap->hba_rstate = CFGA_STAT_NONE; } @@ -428,7 +432,8 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat) clp->ap_class[0] = '\0'; /* Filled by libcfgadm */ clp->ap_r_state = lap->hba_rstate; clp->ap_o_state = CFGA_STAT_NONE; /* filled in later by the plug-in */ - clp->ap_cond = CFGA_COND_UNKNOWN; + clp->ap_cond = + (bstate.b_retired) ? CFGA_COND_FAILED : CFGA_COND_UNKNOWN; clp->ap_busy = 0; clp->ap_status_time = (time_t)-1; clp->ap_info[0] = '\0'; @@ -446,9 +451,10 @@ do_stat_bus(scfga_list_t *lap, int limited_bus_stat) static int get_bus_state(di_node_t node, void *arg) { - uint_t *di_statep = (uint_t *)arg; + struct bus_state *bsp = (struct bus_state *)arg; - *di_statep = di_state(node); + bsp->b_state = di_state(node); + bsp->b_retired = di_retired(node); return (DI_WALK_TERMINATE); } @@ -512,7 +518,7 @@ do_stat_dev( clp->ap_class[0] = '\0'; /* Filled in by libcfgadm */ clp->ap_r_state = lap->hba_rstate; clp->ap_o_state = ostate; - clp->ap_cond = CFGA_COND_UNKNOWN; + clp->ap_cond = di_retired(node) ? CFGA_COND_FAILED : CFGA_COND_UNKNOWN; clp->ap_busy = 0; /* no way to determine state change */ clp->ap_status_time = (time_t)-1; diff --git a/usr/src/lib/fm/topo/libtopo/common/dev.c b/usr/src/lib/fm/topo/libtopo/common/dev.c index dddede8706..7a4cb4f959 100644 --- a/usr/src/lib/fm/topo/libtopo/common/dev.c +++ b/usr/src/lib/fm/topo/libtopo/common/dev.c @@ -445,9 +445,10 @@ dev_fmri_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version, return (topo_mod_seterrno(mod, EMOD_UKNOWN_ENUM)); unusable = 1; } else { + uint_t retired = di_retired(dnode); state = di_state(dnode); - if (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN | - DI_BUS_QUIESCED | DI_BUS_DOWN)) + if (retired || (state & (DI_DEVICE_OFFLINE | DI_DEVICE_DOWN | + DI_BUS_QUIESCED | DI_BUS_DOWN))) unusable = 1; else unusable = 0; diff --git a/usr/src/lib/libcontract/Makefile b/usr/src/lib/libcontract/Makefile index a042993bfc..8d05db4980 100644 --- a/usr/src/lib/libcontract/Makefile +++ b/usr/src/lib/libcontract/Makefile @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -32,7 +32,8 @@ HDRDIR = common SUBDIRS = $(MACH) $(BUILD64)SUBDIRS += $(MACH64) -MSGFILES = common/process_dump.c +MSGFILES = common/process_dump.c common/device_dump.c \ + common/libcontract_priv.c POFILE = libcontract.po all := TARGET = all @@ -45,7 +46,8 @@ lint := TARGET = lint all clean clobber install lint: $(SUBDIRS) -$(POFILE): pofile_MSGFILES +$(POFILE): $(MSGFILES) + $(BUILDPO.msgfiles) install_h: $(ROOTHDRS) diff --git a/usr/src/lib/libcontract/Makefile.com b/usr/src/lib/libcontract/Makefile.com index 7d5ab8b471..050d42944e 100644 --- a/usr/src/lib/libcontract/Makefile.com +++ b/usr/src/lib/libcontract/Makefile.com @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -32,7 +32,9 @@ OBJECTS = \ libcontract.o \ libcontract_priv.o \ process.o \ - process_dump.o + process_dump.o \ + device.o \ + device_dump.o # include library definition include ../../Makefile.lib diff --git a/usr/src/lib/libcontract/common/device.c b/usr/src/lib/libcontract/common/device.c new file mode 100644 index 0000000000..99e9bd1203 --- /dev/null +++ b/usr/src/lib/libcontract/common/device.c @@ -0,0 +1,177 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ctfs.h> +#include <sys/contract.h> +#include <sys/contract/device.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <libnvpair.h> +#include <limits.h> +#include <sys/stat.h> +#include <libcontract.h> +#include "libcontract_impl.h" + +/* + * Device contract template routines + */ + +int +ct_dev_tmpl_set_minor(int fd, char *minor) +{ + return (ct_tmpl_set_internal(fd, CTDP_MINOR, (uintptr_t)minor)); +} + +int +ct_dev_tmpl_set_aset(int fd, uint_t aset) +{ + return (ct_tmpl_set_internal(fd, CTDP_ACCEPT, aset)); +} + +int +ct_dev_tmpl_set_noneg(int fd) +{ + return (ct_tmpl_set_internal(fd, CTDP_NONEG, CTDP_NONEG_SET)); +} + +int +ct_dev_tmpl_clear_noneg(int fd) +{ + return (ct_tmpl_set_internal(fd, CTDP_NONEG, CTDP_NONEG_CLEAR)); +} + +int +ct_dev_tmpl_get_minor(int fd, char *buf, size_t *buflenp) +{ + char path[PATH_MAX]; + int error; + size_t len; + + error = ct_tmpl_get_internal_string(fd, CTDP_MINOR, path); + if (error) { + return (error); + } + + len = strlcpy(buf, path, *buflenp); + if (len >= *buflenp) { + *buflenp = len + 1; + return (EOVERFLOW); + } + + return (0); +} + +int +ct_dev_tmpl_get_aset(int fd, uint_t *aset) +{ + return (ct_tmpl_get_internal(fd, CTDP_ACCEPT, aset)); +} + +int +ct_dev_tmpl_get_noneg(int fd, uint_t *negp) +{ + return (ct_tmpl_get_internal(fd, CTDP_NONEG, negp)); +} + +/* + * Device contract event routines + */ + +/* + * No device contract specific event routines + */ + + +/* + * Device contract status routines + */ + +int +ct_dev_status_get_aset(ct_stathdl_t stathdl, uint_t *aset) +{ + struct ctlib_status_info *info = stathdl; + + if (info->status.ctst_type != CTT_DEVICE) + return (EINVAL); + + if (info->nvl == NULL) + return (ENOENT); + + return (nvlist_lookup_uint32(info->nvl, CTDS_ASET, aset)); +} + +int +ct_dev_status_get_noneg(ct_stathdl_t stathdl, uint_t *negp) +{ + struct ctlib_status_info *info = stathdl; + + if (info->status.ctst_type != CTT_DEVICE) + return (EINVAL); + + if (info->nvl == NULL) + return (ENOENT); + + return (nvlist_lookup_uint32(info->nvl, CTDS_NONEG, negp)); +} + +int +ct_dev_status_get_dev_state(ct_stathdl_t stathdl, uint_t *statep) +{ + struct ctlib_status_info *info = stathdl; + + if (info->status.ctst_type != CTT_DEVICE) + return (EINVAL); + + if (info->nvl == NULL) + return (ENOENT); + + return (nvlist_lookup_uint32(info->nvl, CTDS_STATE, statep)); +} + +int +ct_dev_status_get_minor(ct_stathdl_t stathdl, char **bufp) +{ + int error; + struct ctlib_status_info *info = stathdl; + + if (bufp == NULL) + return (EINVAL); + + if (info->status.ctst_type != CTT_DEVICE) + return (EINVAL); + + if (info->nvl == NULL) + return (ENOENT); + + error = nvlist_lookup_string(info->nvl, CTDS_MINOR, bufp); + if (error != 0) { + return (error); + } + + return (0); +} diff --git a/usr/src/lib/libcontract/common/device_dump.c b/usr/src/lib/libcontract/common/device_dump.c new file mode 100644 index 0000000000..fb6d45cf10 --- /dev/null +++ b/usr/src/lib/libcontract/common/device_dump.c @@ -0,0 +1,103 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/contract/device.h> +#include <sys/wait.h> +#include <sys/ctfs.h> +#include <unistd.h> +#include <fcntl.h> +#include <errno.h> +#include <string.h> +#include <limits.h> +#include <stdio.h> +#include <assert.h> +#include <signal.h> +#include <libuutil.h> +#include <libintl.h> +#include <libcontract.h> +#include <libcontract_priv.h> +#include "libcontract_impl.h" +#include "libcontract_priv.h" + +/*ARGSUSED*/ +void +event_device(FILE *file, ct_evthdl_t ev, int verbose) +{ + uint_t type; + char *device; + char *s; + ctid_t ctid; + ct_stathdl_t stathdl; + int statfd; + + type = ct_event_get_type(ev); + ctid = ct_event_get_ctid(ev); + + statfd = contract_open(ctid, "device", "status", O_RDONLY); + if (statfd == -1) { + (void) fprintf(file, dgettext(TEXT_DOMAIN, "[bad contract]\n")); + return; + } + + if (ct_status_read(statfd, CTD_ALL, &stathdl) != 0) { + (void) fprintf(file, dgettext(TEXT_DOMAIN, "[status error]\n")); + return; + } + + if (ct_dev_status_get_minor(stathdl, &device) != 0) { + (void) fprintf(file, dgettext(TEXT_DOMAIN, "[bad status]\n")); + return; + } + + + switch (type) { + case CT_DEV_EV_OFFLINE: + s = dgettext(TEXT_DOMAIN, "device %s offlining\n"); + break; + case CT_DEV_EV_DEGRADED: + s = dgettext(TEXT_DOMAIN, "device %s degrading\n"); + break; + case CT_DEV_EV_ONLINE: + s = dgettext(TEXT_DOMAIN, "device %s online\n"); + break; + case CT_EV_NEGEND: + contract_negend_dump(file, ev); + s = NULL; + break; + default: + s = dgettext(TEXT_DOMAIN, "device %s sent an unknown event\n"); + break; + } + + if (s) { + /*LINTED*/ + (void) fprintf(file, s, device); + } + + ct_status_free(stathdl); + (void) close(statfd); +} diff --git a/usr/src/lib/libcontract/common/device_dump.h b/usr/src/lib/libcontract/common/device_dump.h new file mode 100644 index 0000000000..8c90400a52 --- /dev/null +++ b/usr/src/lib/libcontract/common/device_dump.h @@ -0,0 +1,43 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _DEVICE_DUMP_H +#define _DEVICE_DUMP_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include "libcontract_impl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void event_device(FILE *, ct_evthdl_t, int); + +#ifdef __cplusplus +} +#endif + +#endif /* _DEVICE_DUMP_H */ diff --git a/usr/src/lib/libcontract/common/libcontract.c b/usr/src/lib/libcontract/common/libcontract.c index 7cb35c4cfe..d2739cd1cd 100644 --- a/usr/src/lib/libcontract/common/libcontract.c +++ b/usr/src/lib/libcontract/common/libcontract.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -66,11 +65,11 @@ ct_tmpl_create(int fd, ctid_t *ctidp) } int -ct_tmpl_set_internal(int fd, uint_t id, uint_t value) +ct_tmpl_set_internal(int fd, uint_t id, uintptr_t value) { ct_param_t param; param.ctpm_id = id; - param.ctpm_value = value; + param.ctpm_value = (uint64_t)value; if (ioctl(fd, CT_TSET, ¶m) == -1) return (errno); return (0); @@ -112,6 +111,18 @@ ct_tmpl_get_internal(int fd, uint_t id, uint_t *value) } int +ct_tmpl_get_internal_string(int fd, uint_t id, char *value) +{ + ct_param_t param; + + param.ctpm_id = id; + param.ctpm_value = (uint64_t)(uintptr_t)value; + if (ioctl(fd, CT_TGET, ¶m) == -1) + return (errno); + return (0); +} + +int ct_tmpl_get_critical(int fd, uint_t *events) { return (ct_tmpl_get_internal(fd, CTP_EV_CRITICAL, events)); @@ -173,6 +184,14 @@ ct_ctl_ack(int fd, ctevid_t event) } int +ct_ctl_nack(int fd, ctevid_t event) +{ + if (ioctl(fd, CT_CNACK, &event) == -1) + return (errno); + return (0); +} + +int ct_ctl_qack(int fd, ctevid_t event) { if (ioctl(fd, CT_CQREQ, &event) == -1) diff --git a/usr/src/lib/libcontract/common/libcontract.h b/usr/src/lib/libcontract/common/libcontract.h index 98092b7db0..27453e5c83 100644 --- a/usr/src/lib/libcontract/common/libcontract.h +++ b/usr/src/lib/libcontract/common/libcontract.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -55,6 +54,7 @@ extern int ct_tmpl_get_informative(int, uint_t *); extern int ct_ctl_adopt(int); extern int ct_ctl_abandon(int); extern int ct_ctl_ack(int, ctevid_t); +extern int ct_ctl_nack(int, ctevid_t); extern int ct_ctl_qack(int, ctevid_t); extern int ct_ctl_newct(int, ctevid_t, int); @@ -113,6 +113,23 @@ extern int ct_pr_status_get_fatal(ct_stathdl_t, uint_t *); extern int ct_pr_status_get_members(ct_stathdl_t, pid_t **, uint_t *); extern int ct_pr_status_get_contracts(ct_stathdl_t, ctid_t **, uint_t *); +/* + * Device contract routines + */ +int ct_dev_tmpl_set_minor(int, char *); +int ct_dev_tmpl_set_aset(int, uint_t); +int ct_dev_tmpl_set_noneg(int); +int ct_dev_tmpl_clear_noneg(int); +int ct_dev_tmpl_get_minor(int, char *, size_t *); +int ct_dev_tmpl_get_aset(int, uint_t *); +int ct_dev_tmpl_get_noneg(int, uint_t *); +int ct_dev_status_get_aset(ct_stathdl_t, uint_t *); +int ct_dev_status_get_noneg(ct_stathdl_t, uint_t *); +int ct_dev_status_get_dev_state(ct_stathdl_t, uint_t *); +int ct_dev_status_get_minor(ct_stathdl_t, char **); + + + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/libcontract/common/libcontract_impl.h b/usr/src/lib/libcontract/common/libcontract_impl.h index d8504cb5cf..ad50cd3dcc 100644 --- a/usr/src/lib/libcontract/common/libcontract_impl.h +++ b/usr/src/lib/libcontract/common/libcontract_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -48,8 +47,9 @@ struct ctlib_event_info { nvlist_t *nvl; }; -extern int ct_tmpl_set_internal(int, uint_t, uint_t); +extern int ct_tmpl_set_internal(int, uint_t, uintptr_t); extern int ct_tmpl_get_internal(int, uint_t, uint_t *); +extern int ct_tmpl_get_internal_string(int, uint_t, char *); typedef struct contract_type { const char *type_name; diff --git a/usr/src/lib/libcontract/common/libcontract_priv.c b/usr/src/lib/libcontract/common/libcontract_priv.c index 1db8ea2d95..d74e8409c6 100644 --- a/usr/src/lib/libcontract/common/libcontract_priv.c +++ b/usr/src/lib/libcontract/common/libcontract_priv.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -34,16 +33,19 @@ #include <stdio.h> #include <assert.h> #include <libuutil.h> +#include <libintl.h> #include <string.h> #include <procfs.h> #include <libcontract.h> #include <libcontract_priv.h> #include "libcontract_impl.h" #include "process_dump.h" +#include "device_dump.h" contract_type_t types[CTT_MAXTYPE] = { - { "process", event_process } + { "process", event_process }, + { "device", event_device } }; static int @@ -147,3 +149,23 @@ contract_event_dump(FILE *file, ct_evthdl_t hdl, int verbose) type = info->event.ctev_cttype; types[type].type_event(file, hdl, verbose); } + +void +contract_negend_dump(FILE *file, ct_evthdl_t ev) +{ + ctevid_t nevid = 0; + ctid_t my_ctid = ct_event_get_ctid(ev); + ctid_t new_ctid = 0; + char *s; + + (void) ct_event_get_nevid(ev, &nevid); + (void) ct_event_get_newct(ev, &new_ctid); + + if (new_ctid != my_ctid) { + s = dgettext(TEXT_DOMAIN, "negotiation %llu succeeded\n"); + } else { + s = dgettext(TEXT_DOMAIN, "negotiation %llu failed\n"); + } + /*LINTED*/ + (void) fprintf(file, s, (unsigned long long)nevid); +} diff --git a/usr/src/lib/libcontract/common/libcontract_priv.h b/usr/src/lib/libcontract/common/libcontract_priv.h index a1069efb35..639f190aff 100644 --- a/usr/src/lib/libcontract/common/libcontract_priv.h +++ b/usr/src/lib/libcontract/common/libcontract_priv.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -42,6 +41,7 @@ extern int contract_open(ctid_t, const char *, const char *, int); extern int contract_abandon_id(ctid_t); extern ctid_t getctid(void); extern void contract_event_dump(FILE *, ct_evthdl_t, int); +extern void contract_negend_dump(FILE *, ct_evthdl_t); #ifdef __cplusplus } diff --git a/usr/src/lib/libcontract/common/mapfile-vers b/usr/src/lib/libcontract/common/mapfile-vers index 2f220b60ad..a64cbfd047 100644 --- a/usr/src/lib/libcontract/common/mapfile-vers +++ b/usr/src/lib/libcontract/common/mapfile-vers @@ -19,7 +19,7 @@ # CDDL HEADER END # # -# Copyright 2006 Sun Microsystems, Inc. All rights reserved. +# Copyright 2007 Sun Microsystems, Inc. All rights reserved. # Use is subject to license terms. # # ident "%Z%%M% %I% %E% SMI" @@ -29,6 +29,7 @@ SUNW_1.1 { global: ct_ctl_abandon; ct_ctl_ack; + ct_ctl_nack; ct_ctl_adopt; ct_ctl_newct; ct_ctl_qack; @@ -85,6 +86,17 @@ SUNW_1.1 { ct_tmpl_set_cookie; ct_tmpl_set_critical; ct_tmpl_set_informative; + ct_dev_tmpl_set_minor; + ct_dev_tmpl_set_aset; + ct_dev_tmpl_set_noneg; + ct_dev_tmpl_clear_noneg; + ct_dev_tmpl_get_minor; + ct_dev_tmpl_get_aset; + ct_dev_tmpl_get_noneg; + ct_dev_status_get_aset; + ct_dev_status_get_noneg; + ct_dev_status_get_dev_state; + ct_dev_status_get_minor; }; SUNWprivate_1.1 { diff --git a/usr/src/lib/libdevinfo/Makefile.com b/usr/src/lib/libdevinfo/Makefile.com index c1db80004a..7c10f0ba47 100644 --- a/usr/src/lib/libdevinfo/Makefile.com +++ b/usr/src/lib/libdevinfo/Makefile.com @@ -30,7 +30,9 @@ VERS= .1 OBJECTS= devfsinfo.o devinfo.o devinfo_prop_decode.o devinfo_devlink.o \ devinfo_devperm.o devfsmap.o devinfo_devname.o \ - devinfo_finddev.o devinfo_dli.o devinfo_dim.o devinfo_realpath.o + devinfo_finddev.o devinfo_dli.o devinfo_dim.o \ + devinfo_realpath.o devinfo_retire.o + include ../../Makefile.lib include ../../Makefile.rootfs diff --git a/usr/src/lib/libdevinfo/devinfo.c b/usr/src/lib/libdevinfo/devinfo.c index c9179e0d1f..8c103d2f7a 100644 --- a/usr/src/lib/libdevinfo/devinfo.c +++ b/usr/src/lib/libdevinfo/devinfo.c @@ -998,6 +998,12 @@ di_flags(di_node_t node) return (DI_NODE(node)->flags); } +uint_t +di_retired(di_node_t node) +{ + return (di_flags(node) & DEVI_RETIRED); +} + ddi_devid_t di_devid(di_node_t node) { diff --git a/usr/src/lib/libdevinfo/devinfo_retire.c b/usr/src/lib/libdevinfo/devinfo_retire.c new file mode 100644 index 0000000000..8bcb77a730 --- /dev/null +++ b/usr/src/lib/libdevinfo/devinfo_retire.c @@ -0,0 +1,785 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <libdevinfo.h> +#include <sys/modctl.h> +#include <sys/stat.h> +#include <string.h> +#include <librcm.h> +#include <dlfcn.h> + +#undef NDEBUG +#include <assert.h> + +typedef struct rio_path { + char rpt_path[PATH_MAX]; + struct rio_path *rpt_next; +} rio_path_t; + +typedef struct rcm_arg { + char *rcm_root; + di_node_t rcm_node; + int rcm_supp; + rcm_handle_t *rcm_handle; + int rcm_retcode; + di_retire_t *rcm_dp; + rio_path_t *rcm_cons_nodes; + rio_path_t *rcm_rsrc_minors; + int (*rcm_offline)(); + int (*rcm_online)(); + int (*rcm_remove)(); +} rcm_arg_t; + +typedef struct selector { + char *sel_name; + int (*sel_selector)(di_node_t node, rcm_arg_t *rp); +} di_selector_t; + +static void rio_assert(di_retire_t *dp, const char *EXstr, int line, + const char *file); + +#define LIBRCM_PATH "/usr/lib/librcm.so" +#define RIO_ASSERT(d, x) \ + {if (!(x)) rio_assert(d, #x, __LINE__, __FILE__); } + +static int disk_select(di_node_t node, rcm_arg_t *rp); +static int nexus_select(di_node_t node, rcm_arg_t *rp); + +di_selector_t supported_devices[] = { + {"disk", disk_select}, + {"nexus", nexus_select}, + {NULL, NULL} +}; + +void * +s_calloc(size_t nelem, size_t elsize, int fail) +{ + if (fail) { + errno = ENOMEM; + return (NULL); + } else { + return (calloc(nelem, elsize)); + } +} + +static void +rio_assert(di_retire_t *dp, const char *EXstr, int line, const char *file) +{ + char buf[PATH_MAX]; + + if (dp->rt_abort == NULL) + assert(0); + + (void) snprintf(buf, sizeof (buf), + "Assertion failed: %s, file %s, line %d\n", + EXstr, file, line); + dp->rt_abort(dp->rt_hdl, buf); +} + +/*ARGSUSED*/ +static int +disk_minor(di_node_t node, di_minor_t minor, void *arg) +{ + rcm_arg_t *rp = (rcm_arg_t *)arg; + di_retire_t *dp = rp->rcm_dp; + + if (di_minor_spectype(minor) == S_IFBLK) { + rp->rcm_supp = 1; + dp->rt_debug(dp->rt_hdl, "[INFO]: disk_minor: is disk minor. " + "IDed this node as disk\n"); + return (DI_WALK_TERMINATE); + } + + dp->rt_debug(dp->rt_hdl, "[INFO]: disk_minor: Not a disk minor. " + "Continuing minor walk\n"); + return (DI_WALK_CONTINUE); +} + +static int +disk_select(di_node_t node, rcm_arg_t *rp) +{ + rcm_arg_t rarg; + di_retire_t *dp = rp->rcm_dp; + + rarg.rcm_dp = dp; + + /* + * Check if this is a disk minor. If any one minor is DDI_NT_BLOCK + * we assume it is a disk + */ + rarg.rcm_supp = 0; + if (di_walk_minor(node, DDI_NT_BLOCK, 0, &rarg, disk_minor) != 0) { + dp->rt_debug(dp->rt_hdl, "[INFO]: disk_select: di_walk_minor " + "failed. Returning NOTSUP\n"); + return (0); + } + + return (rarg.rcm_supp); +} + +static int +nexus_select(di_node_t node, rcm_arg_t *rp) +{ + int select; + char *path; + + di_retire_t *dp = rp->rcm_dp; + + path = di_devfs_path(node); + if (path == NULL) { + dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: " + "di_devfs_path() is NULL. Returning NOTSUP\n"); + return (0); + } + + /* + * Check if it is a nexus + */ + if (di_driver_ops(node) & DI_BUS_OPS) { + dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: is nexus %s\n", + path); + select = 1; + } else { + dp->rt_debug(dp->rt_hdl, "[INFO]: nexus_select: not nexus %s\n", + path); + select = 0; + } + + di_devfs_path_free(path); + + return (select); +} + +static int +node_select(di_node_t node, void *arg) +{ + rcm_arg_t *rp = (rcm_arg_t *)arg; + di_retire_t *dp; + int sel; + int i; + char *path; + uint_t state; + + dp = rp->rcm_dp; + + /* skip pseudo nodes - we only retire real hardware */ + path = di_devfs_path(node); + if (strncmp(path, "/pseudo/", strlen("/pseudo/")) == 0 || + strcmp(path, "/pseudo") == 0) { + dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: " + "pseudo device in subtree - returning NOTSUP: %s\n", + path); + rp->rcm_supp = 0; + di_devfs_path_free(path); + return (DI_WALK_TERMINATE); + } + di_devfs_path_free(path); + + /* + * If a device is offline/detached/down it is + * retireable irrespective of the type of device, + * presumably the system is able to function without + * it. + */ + state = di_state(node); + if ((state & DI_DRIVER_DETACHED) || (state & DI_DEVICE_OFFLINE) || + (state & DI_BUS_DOWN)) { + dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: device " + "is offline/detached. Assuming retire supported\n"); + return (DI_WALK_CONTINUE); + } + + sel = 0; + for (i = 0; supported_devices[i].sel_name != NULL; i++) { + sel = supported_devices[i].sel_selector(node, rp); + if (sel == 1) { + dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: " + "found supported device: %s\n", + supported_devices[i].sel_name); + break; + } + } + + if (sel != 1) { + /* + * This node is not a supported device. Retire cannot proceed + */ + dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: found " + "unsupported device. Returning NOTSUP\n"); + rp->rcm_supp = 0; + return (DI_WALK_TERMINATE); + } + + /* + * This node is supported. Check other nodes in this subtree. + */ + dp->rt_debug(dp->rt_hdl, "[INFO]: node_select: This node supported. " + "Checking other nodes in subtree: %s\n", rp->rcm_root); + return (DI_WALK_CONTINUE); +} + + + +/* + * when in doubt assume that retire is not supported for this device. + */ +static int +retire_supported(rcm_arg_t *rp) +{ + di_retire_t *dp; + di_node_t rnode = rp->rcm_node; + + dp = rp->rcm_dp; + + /* + * We should not be here if devinfo snapshot is NULL. + */ + RIO_ASSERT(dp, rnode != DI_NODE_NIL); + + /* + * Note: We initally set supported to 1, then walk the + * subtree rooted at devpath, allowing each node the + * opportunity to veto the support. We cannot do things + * the other way around i.e. assume "not supported" and + * let individual nodes indicate that they are supported. + * In the latter case, the supported flag would be set + * if any one node in the subtree was supported which is + * not what we want. + */ + rp->rcm_supp = 1; + if (di_walk_node(rnode, DI_WALK_CLDFIRST, rp, node_select) != 0) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: retire_supported: " + "di_walk_node: failed. Returning NOTSUP\n"); + rp->rcm_supp = 0; + } + + if (rp->rcm_supp) { + dp->rt_debug(dp->rt_hdl, "[INFO]: retire IS supported\n"); + } + + return (rp->rcm_supp); +} + +static void +rcm_finalize(rcm_arg_t *rp, int retcode) +{ + rio_path_t *p; + rio_path_t *tmp; + int flags = RCM_RETIRE_NOTIFY; + int retval; + int error; + di_retire_t *dp; + + dp = rp->rcm_dp; + + RIO_ASSERT(dp, retcode == 0 || retcode == -1); + + dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: retcode=%d: dev=%s\n", + retcode, rp->rcm_root); + + for (p = rp->rcm_cons_nodes; p; ) { + tmp = p; + p = tmp->rpt_next; + free(tmp); + } + rp->rcm_cons_nodes = NULL; + + dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: cons_nodes NULL\n"); + + for (p = rp->rcm_rsrc_minors; p; ) { + tmp = p; + p = tmp->rpt_next; + if (retcode == 0) { + retval = rp->rcm_remove(rp->rcm_handle, + tmp->rpt_path, flags, NULL); + error = errno; + } else { + RIO_ASSERT(dp, retcode == -1); + retval = rp->rcm_online(rp->rcm_handle, + tmp->rpt_path, flags, NULL); + error = errno; + } + if (retval != RCM_SUCCESS) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: rcm_finalize: " + "rcm_%s: retval=%d: error=%s: path=%s\n", + retcode == 0 ? "remove" : "online", retval, + strerror(error), tmp->rpt_path); + } else { + dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_finalize: " + "rcm_%s: SUCCESS: path=%s\n", + retcode == 0 ? "remove" : "online", tmp->rpt_path); + } + free(tmp); + } + rp->rcm_rsrc_minors = NULL; +} +/*ARGSUSED*/ +static int +call_offline(di_node_t node, di_minor_t minor, void *arg) +{ + rcm_arg_t *rp = (rcm_arg_t *)arg; + di_retire_t *dp = rp->rcm_dp; + char *mnp; + rio_path_t *rpt; + int retval; + + mnp = di_devfs_minor_path(minor); + if (mnp == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_devfs_minor_path " + "failed. Returning RCM FAILURE: %s\n", rp->rcm_root); + rp->rcm_retcode = RCM_FAILURE; + return (DI_WALK_TERMINATE); + } + + rpt = s_calloc(1, sizeof (rio_path_t), 0); + if (rpt == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: calloc failed. " + "Returning RCM FAILURE: %s\n", rp->rcm_root); + di_devfs_path_free(mnp); + rp->rcm_retcode = RCM_FAILURE; + return (DI_WALK_TERMINATE); + } + + (void) snprintf(rpt->rpt_path, sizeof (rpt->rpt_path), + "/devices%s", mnp); + + di_devfs_path_free(mnp); + + retval = rp->rcm_offline(rp->rcm_handle, rpt->rpt_path, + RCM_RETIRE_REQUEST, NULL); + + rpt->rpt_next = rp->rcm_rsrc_minors; + rp->rcm_rsrc_minors = rpt; + + if (retval == RCM_FAILURE) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM OFFLINE failed " + "for: %s\n", rpt->rpt_path); + rp->rcm_retcode = RCM_FAILURE; + return (DI_WALK_TERMINATE); + } else if (retval == RCM_SUCCESS) { + rp->rcm_retcode = RCM_SUCCESS; + dp->rt_debug(dp->rt_hdl, "[INFO]: RCM OFFLINE returned " + "RCM_SUCCESS: %s\n", rpt->rpt_path); + } else if (retval != RCM_NO_CONSTRAINT) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM OFFLINE returned " + "invalid value for: %s\n", rpt->rpt_path); + rp->rcm_retcode = RCM_FAILURE; + return (DI_WALK_TERMINATE); + } else { + dp->rt_debug(dp->rt_hdl, "[INFO]: RCM OFFLINE returned " + "RCM_NO_CONSTRAINT: %s\n", rpt->rpt_path); + } + + return (DI_WALK_CONTINUE); +} + +static int +offline_one(di_node_t node, void *arg) +{ + rcm_arg_t *rp = (rcm_arg_t *)arg; + rio_path_t *rpt; + di_retire_t *dp = rp->rcm_dp; + char *path; + + /* + * We should already have terminated the walk + * in case of failure + */ + RIO_ASSERT(dp, rp->rcm_retcode == RCM_SUCCESS || + rp->rcm_retcode == RCM_NO_CONSTRAINT); + + dp->rt_debug(dp->rt_hdl, "[INFO]: offline_one: entered\n"); + + rp->rcm_retcode = RCM_NO_CONSTRAINT; + + rpt = s_calloc(1, sizeof (rio_path_t), 0); + if (rpt == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: rio_path_t calloc " + "failed: error: %s\n", strerror(errno)); + goto fail; + } + + path = di_devfs_path(node); + if (path == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_devfs_path " + "failed: error: %s\n", strerror(errno)); + free(rpt); + goto fail; + } + + (void) strlcpy(rpt->rpt_path, path, sizeof (rpt->rpt_path)); + + di_devfs_path_free(path); + + if (di_walk_minor(node, NULL, 0, rp, call_offline) != 0) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor " + "failed: error: %s: %s\n", strerror(errno), path); + free(rpt); + goto fail; + } + + if (rp->rcm_retcode == RCM_FAILURE) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor " + "returned: RCM_FAILURE: %s\n", rpt->rpt_path); + free(rpt); + goto fail; + } else if (rp->rcm_retcode == RCM_SUCCESS) { + dp->rt_debug(dp->rt_hdl, "[INFO]: di_walk_minor " + "returned: RCM_SUCCESS: %s\n", rpt->rpt_path); + rpt->rpt_next = rp->rcm_cons_nodes; + rp->rcm_cons_nodes = rpt; + } else if (rp->rcm_retcode != RCM_NO_CONSTRAINT) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_minor " + "returned: unknown RCM error code: %d, %s\n", + rp->rcm_retcode, rpt->rpt_path); + free(rpt); + goto fail; + } else { + dp->rt_debug(dp->rt_hdl, "[INFO]: di_walk_minor " + "returned: RCM_NO_CONSTRAINT: %s\n", rpt->rpt_path); + free(rpt); + } + + /* + * RCM_SUCCESS or RCM_NO_CONSTRAINT. + * RCM_SUCCESS implies we overcame a constraint, so keep walking. + * RCM_NO_CONSTRAINT implies no constraints applied via RCM. + * Continue walking in the hope that contracts or LDI will + * apply constraints + * set retcode to RCM_SUCCESS to show that at least 1 node + * completely walked + */ + rp->rcm_retcode = RCM_SUCCESS; + return (DI_WALK_CONTINUE); + +fail: + rp->rcm_retcode = RCM_FAILURE; + return (DI_WALK_TERMINATE); +} + +/* + * Returns: + * RCM_SUCCESS: RCM constraints (if any) were applied. The + * device paths for which constraints were applied is passed + * back via the pp argument + * + * RCM_FAILURE: Either RCM constraints prevent a retire or + * an error occurred + */ +static int +rcm_notify(rcm_arg_t *rp, char **pp, size_t *clen) +{ + size_t len; + rio_path_t *p; + rio_path_t *tmp; + char *plistp; + char *s; + di_retire_t *dp; + di_node_t rnode; + + dp = rp->rcm_dp; + + dp->rt_debug(dp->rt_hdl, "[INFO]: rcm_notify() entered\n"); + + RIO_ASSERT(dp, rp->rcm_root); + + *pp = NULL; + + rnode = rp->rcm_node; + if (rnode == DI_NODE_NIL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: devinfo snapshot " + "NULL. Returning no RCM constraint: %s\n", rp->rcm_root); + return (RCM_NO_CONSTRAINT); + } + + rp->rcm_retcode = RCM_NO_CONSTRAINT; + rp->rcm_cons_nodes = NULL; + rp->rcm_rsrc_minors = NULL; + if (di_walk_node(rnode, DI_WALK_CLDFIRST, rp, offline_one) != 0) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_node " + "failed: error: %s: %s\n", strerror(errno), rp->rcm_root); + /* online is idempotent - safe to online non-offlined nodes */ + rcm_finalize(rp, -1); + rp->rcm_retcode = RCM_FAILURE; + goto out; + } + + if (rp->rcm_retcode == RCM_FAILURE) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: walk_node " + "returned retcode of RCM_FAILURE: %s\n", rp->rcm_root); + rcm_finalize(rp, -1); + goto out; + } + + if (rp->rcm_retcode == RCM_NO_CONSTRAINT) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: di_walk_node " + " - no nodes walked: RCM_NO_CONSTRAINT: %s\n", + rp->rcm_root); + } else { + dp->rt_debug(dp->rt_hdl, "[INFO]: walk_node: RCM_SUCCESS\n"); + } + + /* + * Convert to a sequence of NUL separated strings terminated by '\0'\0' + */ + for (len = 0, p = rp->rcm_cons_nodes; p; p = p->rpt_next) { + RIO_ASSERT(dp, p->rpt_path); + RIO_ASSERT(dp, strlen(p->rpt_path) > 0); + len += (strlen(p->rpt_path) + 1); + } + len++; /* list terminating '\0' */ + + dp->rt_debug(dp->rt_hdl, "[INFO]: len of constraint str = %lu\n", len); + + plistp = s_calloc(1, len, 0); + if (plistp == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: fail to alloc " + "constraint list: error: %s: %s\n", strerror(errno), + rp->rcm_root); + rcm_finalize(rp, -1); + rp->rcm_retcode = RCM_FAILURE; + goto out; + } + + for (s = plistp, p = rp->rcm_cons_nodes; p; ) { + tmp = p; + p = tmp->rpt_next; + (void) strcpy(s, tmp->rpt_path); + s += strlen(s) + 1; + RIO_ASSERT(dp, s - plistp < len); + free(tmp); + } + rp->rcm_cons_nodes = NULL; + RIO_ASSERT(dp, s - plistp == len - 1); + *s = '\0'; + + dp->rt_debug(dp->rt_hdl, "[INFO]: constraint str = %p\n", plistp); + + *pp = plistp; + *clen = len; + + rp->rcm_retcode = RCM_SUCCESS; +out: + return (rp->rcm_retcode); +} + + +/*ARGSUSED*/ +int +di_retire_device(char *devpath, di_retire_t *dp, int flags) +{ + char path[PATH_MAX]; + struct stat sb; + int retval = EINVAL; + char *constraint = NULL; + size_t clen; + void *librcm_hdl; + rcm_arg_t rarg = {0}; + int (*librcm_alloc_handle)(); + int (*librcm_free_handle)(); + + if (dp == NULL || dp->rt_debug == NULL || dp->rt_hdl == NULL) + return (EINVAL); + + if (devpath == NULL || devpath[0] == '\0') { + dp->rt_debug(dp->rt_hdl, "[ERROR]: NULL argument(s)\n"); + return (EINVAL); + } + + if (devpath[0] != '/' || strlen(devpath) >= PATH_MAX || + strncmp(devpath, "/devices/", strlen("/devices/")) == 0 || + strstr(devpath, "../devices/") || strrchr(devpath, ':')) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: invalid devpath: %s\n", + devpath); + return (EINVAL); + } + + if (flags != 0) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: flags should be 0: %d\n", + flags); + return (EINVAL); + } + + /* + * dlopen rather than link against librcm since libdevinfo + * resides in / and librcm resides in /usr. The dlopen is + * safe to do since fmd which invokes the retire code + * resides on /usr and will not come here until /usr is + * mounted. + */ + librcm_hdl = dlopen(LIBRCM_PATH, RTLD_LAZY); + if (librcm_hdl == NULL) { + char *errstr = dlerror(); + dp->rt_debug(dp->rt_hdl, "[ERROR]: Cannot dlopen librcm: %s\n", + errstr ? errstr : "Unknown error"); + return (ENOSYS); + } + + librcm_alloc_handle = (int (*)())dlsym(librcm_hdl, "rcm_alloc_handle"); + rarg.rcm_offline = (int (*)())dlsym(librcm_hdl, "rcm_request_offline"); + rarg.rcm_online = (int (*)())dlsym(librcm_hdl, "rcm_notify_online"); + rarg.rcm_remove = (int (*)())dlsym(librcm_hdl, "rcm_notify_remove"); + librcm_free_handle = (int (*)())dlsym(librcm_hdl, "rcm_free_handle"); + + if (librcm_alloc_handle == NULL || + rarg.rcm_offline == NULL || + rarg.rcm_online == NULL || + rarg.rcm_remove == NULL || + librcm_free_handle == NULL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: dlsym failed\n"); + retval = ENOSYS; + goto out; + } + + /* + * Take a libdevinfo snapshot here because we cannot do so + * after device is retired. If device doesn't attach, we retire + * anyway i.e. it is not fatal. + */ + rarg.rcm_node = di_init(devpath, DINFOCPYALL); + if (rarg.rcm_node == DI_NODE_NIL) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: device doesn't attach, " + "retiring anyway: %s\n", devpath); + } + + rarg.rcm_handle = NULL; + if (librcm_alloc_handle(NULL, 0, NULL, &rarg.rcm_handle) + != RCM_SUCCESS) { + retval = errno; + dp->rt_debug(dp->rt_hdl, "[ERROR]: failed to alloc " + "RCM handle. Returning RCM failure: %s\n", devpath); + rarg.rcm_handle = NULL; + goto out; + } + + rarg.rcm_root = devpath; + rarg.rcm_dp = dp; + + /* + * If device is already detached/nonexistent and cannot be + * attached, allow retire without checking device type. + * XXX + * Else, check if retire is supported for this device type. + */ + (void) snprintf(path, sizeof (path), "/devices%s", devpath); + if (stat(path, &sb) == -1 || !S_ISDIR(sb.st_mode)) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: detached or nonexistent " + "device. Bypassing retire_supported: %s\n", devpath); + } else if (!retire_supported(&rarg)) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: retire not supported for " + "device type: %s\n", devpath); + retval = ENOTSUP; + goto out; + } + + clen = 0; + constraint = NULL; + retval = rcm_notify(&rarg, &constraint, &clen); + if (retval == RCM_FAILURE) { + /* retire not permitted */ + dp->rt_debug(dp->rt_hdl, "[ERROR]: RCM constraints block " + "retire: %s\n", devpath); + retval = EBUSY; + goto out; + } else if (retval == RCM_SUCCESS) { + dp->rt_debug(dp->rt_hdl, "[INFO]: RCM constraints applied" + ": %s\n", devpath); + } else if (retval == RCM_NO_CONSTRAINT) { + dp->rt_debug(dp->rt_hdl, "[INFO]: No RCM constraints applied" + ": %s\n", devpath); + } else { + dp->rt_debug(dp->rt_hdl, "[ERROR]: notify returned unknown " + "return code: %d: %s\n", retval, devpath); + retval = ESRCH; + goto out; + } + + if (modctl(MODRETIRE, devpath, constraint, clen) != 0) { + retval = errno; + dp->rt_debug(dp->rt_hdl, "[ERROR]: retire modctl() failed: " + "%s: %s\n", devpath, strerror(retval)); + rcm_finalize(&rarg, -1); + goto out; + } + + dp->rt_debug(dp->rt_hdl, "[INFO]: retire modctl() succeeded: %s\n", + devpath); + + rcm_finalize(&rarg, 0); + + retval = 0; + +out: + if (rarg.rcm_handle) + (void) librcm_free_handle(rarg.rcm_handle); + + RIO_ASSERT(dp, rarg.rcm_cons_nodes == NULL); + RIO_ASSERT(dp, rarg.rcm_rsrc_minors == NULL); + + (void) dlclose(librcm_hdl); + + free(constraint); + + if (rarg.rcm_node != DI_NODE_NIL) + di_fini(rarg.rcm_node); + + return (retval); +} + +/*ARGSUSED*/ +int +di_unretire_device(char *devpath, di_retire_t *dp) +{ + if (dp == NULL || dp->rt_debug == NULL || dp->rt_hdl == NULL) + return (EINVAL); + + if (devpath == NULL || devpath[0] == '\0') { + dp->rt_debug(dp->rt_hdl, "[ERROR]: NULL devpath\n"); + return (EINVAL); + } + + if (devpath[0] != '/' || strlen(devpath) >= PATH_MAX || + strncmp(devpath, "/devices/", strlen("/devices/")) == 0 || + strstr(devpath, "../devices/") || strrchr(devpath, ':')) { + dp->rt_debug(dp->rt_hdl, "[ERROR]: invalid devpath: %s\n", + devpath); + return (EINVAL); + } + + if (modctl(MODUNRETIRE, devpath) != 0) { + int err = errno; + dp->rt_debug(dp->rt_hdl, "[ERROR]: unretire modctl() failed: " + "%s: %s\n", devpath, strerror(err)); + return (err); + } + + dp->rt_debug(dp->rt_hdl, "[INFO]: unretire modctl() done: %s\n", + devpath); + + return (0); +} diff --git a/usr/src/lib/libdevinfo/libdevinfo.h b/usr/src/lib/libdevinfo/libdevinfo.h index ad08502628..bdb4fa2238 100644 --- a/usr/src/lib/libdevinfo/libdevinfo.h +++ b/usr/src/lib/libdevinfo/libdevinfo.h @@ -355,6 +355,11 @@ extern void *di_parent_private_data(di_node_t node); extern void *di_driver_private_data(di_node_t node); /* + * The value of the dip's devi_flags field + */ +uint_t di_flags(di_node_t node); + +/* * Types of links for devlink lookup */ #define DI_PRIMARY_LINK 0x01 @@ -412,6 +417,19 @@ extern int di_devlink_cache_walk(di_devlink_handle_t hdp, const char *re, int (*devlink_callback)(di_devlink_t, void *)); /* + * Private interfaces for I/O retire + */ +typedef struct di_retire { + void *rt_hdl; + void (*rt_abort)(void *hdl, const char *format, ...); + void (*rt_debug)(void *hdl, const char *format, ...); +} di_retire_t; + +extern int di_retire_device(char *path, di_retire_t *dp, int flags); +extern int di_unretire_device(char *path, di_retire_t *dp); +extern uint_t di_retired(di_node_t node); + +/* * Private interfaces for /etc/logindevperm */ extern int di_devperm_login(const char *, uid_t, gid_t, void (*)(char *)); diff --git a/usr/src/lib/libdevinfo/mapfile-vers b/usr/src/lib/libdevinfo/mapfile-vers index c941cd3efe..c2d82dcb4b 100644 --- a/usr/src/lib/libdevinfo/mapfile-vers +++ b/usr/src/lib/libdevinfo/mapfile-vers @@ -211,6 +211,9 @@ SUNWprivate_1.1 { finddev_close; finddev_next; di_flags; + di_retire_device; + di_unretire_device; + di_retired; local: *; }; diff --git a/usr/src/lib/librcm/librcm.h b/usr/src/lib/librcm/librcm.h index d830ea375e..be57013b0a 100644 --- a/usr/src/lib/librcm/librcm.h +++ b/usr/src/lib/librcm/librcm.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -57,6 +57,8 @@ extern "C" { #define RCM_REGISTER_EVENT 0x2000 /* private */ #define RCM_REGISTER_CAPACITY 0x4000 /* private */ #define RCM_SUSPENDED 0x8000 /* private */ +#define RCM_RETIRE_REQUEST 0x10000 +#define RCM_RETIRE_NOTIFY 0x20000 /* * RCM return values @@ -64,6 +66,7 @@ extern "C" { #define RCM_SUCCESS 0 #define RCM_FAILURE -1 #define RCM_CONFLICT -2 +#define RCM_NO_CONSTRAINT -3 /* * RCM resource states diff --git a/usr/src/lib/librcm/librcm_impl.h b/usr/src/lib/librcm/librcm_impl.h index a534d22e1e..b096ffba0d 100644 --- a/usr/src/lib/librcm/librcm_impl.h +++ b/usr/src/lib/librcm/librcm_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -69,8 +68,8 @@ extern "C" { #define RCM_REGISTER_MASK (RCM_FILESYS|RCM_REGISTER_DR|\ RCM_REGISTER_EVENT|RCM_REGISTER_CAPACITY) #define RCM_REQUEST_MASK (RCM_QUERY|RCM_SCOPE|RCM_FORCE|RCM_FILESYS|\ - RCM_QUERY_CANCEL) -#define RCM_NOTIFY_MASK (RCM_FILESYS) + RCM_QUERY_CANCEL|RCM_RETIRE_REQUEST) +#define RCM_NOTIFY_MASK (RCM_FILESYS|RCM_RETIRE_NOTIFY) /* event data names */ #define RCM_CMD "rcm.cmd" diff --git a/usr/src/pkgdefs/SUNWhea/prototype_com b/usr/src/pkgdefs/SUNWhea/prototype_com index 65925570c6..efa640b229 100644 --- a/usr/src/pkgdefs/SUNWhea/prototype_com +++ b/usr/src/pkgdefs/SUNWhea/prototype_com @@ -576,6 +576,8 @@ f none usr/include/sys/contract.h 644 root bin f none usr/include/sys/contract_impl.h 644 root bin f none usr/include/sys/contract/process.h 644 root bin f none usr/include/sys/contract/process_impl.h 644 root bin +f none usr/include/sys/contract/device.h 644 root bin +f none usr/include/sys/contract/device_impl.h 644 root bin f none usr/include/sys/copyops.h 644 root bin f none usr/include/sys/core.h 644 root bin f none usr/include/sys/corectl.h 644 root bin diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files index e9de4ceac8..c439bc5d39 100644 --- a/usr/src/uts/common/Makefile.files +++ b/usr/src/uts/common/Makefile.files @@ -120,6 +120,7 @@ GENUNIX_OBJS += \ ddi_strtol.o \ devcfg.o \ devcache.o \ + device.o \ devid.o \ devid_cache.o \ devid_scsi.o \ @@ -244,6 +245,7 @@ GENUNIX_OBJS += \ refstr.o \ rename.o \ resolvepath.o \ + retire_store.o \ process.o \ rlimit.o \ rmap.o \ diff --git a/usr/src/uts/common/contract/device.c b/usr/src/uts/common/contract/device.c new file mode 100644 index 0000000000..4632cdaa9d --- /dev/null +++ b/usr/src/uts/common/contract/device.c @@ -0,0 +1,2207 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/mutex.h> +#include <sys/debug.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/kmem.h> +#include <sys/thread.h> +#include <sys/id_space.h> +#include <sys/avl.h> +#include <sys/list.h> +#include <sys/sysmacros.h> +#include <sys/proc.h> +#include <sys/contract.h> +#include <sys/contract_impl.h> +#include <sys/contract/device.h> +#include <sys/contract/device_impl.h> +#include <sys/cmn_err.h> +#include <sys/nvpair.h> +#include <sys/policy.h> +#include <sys/ddi_impldefs.h> +#include <sys/ddi_implfuncs.h> +#include <sys/systm.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/esunddi.h> +#include <sys/ddi.h> +#include <sys/fs/dv_node.h> +#include <sys/sunndi.h> +#undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ + +/* + * Device Contracts + * ----------------- + * This file contains the core code for the device contracts framework. + * A device contract is an agreement or a contract between a process and + * the kernel regarding the state of the device. A device contract may be + * created when a relationship is formed between a device and a process + * i.e. at open(2) time, or it may be created at some point after the device + * has been opened. A device contract once formed may be broken by either party. + * A device contract can be broken by the process by an explicit abandon of the + * contract or by an implicit abandon when the process exits. A device contract + * can be broken by the kernel either asynchronously (without negotiation) or + * synchronously (with negotiation). Exactly which happens depends on the device + * state transition. The following state diagram shows the transitions between + * device states. Only device state transitions currently supported by device + * contracts is shown. + * + * <-- A --> + * /-----------------> DEGRADED + * | | + * | | + * | | S + * | | | + * | | v + * v S --> v + * ONLINE ------------> OFFLINE + * + * + * In the figure above, the arrows indicate the direction of transition. The + * letter S refers to transitions which are inherently synchronous i.e. + * require negotiation and the letter A indicates transitions which are + * asynchronous i.e. are done without contract negotiations. A good example + * of a synchronous transition is the ONLINE -> OFFLINE transition. This + * transition cannot happen as long as there are consumers which have the + * device open. Thus some form of negotiation needs to happen between the + * consumers and the kernel to ensure that consumers either close devices + * or disallow the move to OFFLINE. Certain other transitions such as + * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. + * non-negotiable. A device that suffers a fault that degrades its + * capabilities will become degraded irrespective of what consumers it has, + * so a negotiation in this case is pointless. + * + * The following device states are currently defined for device contracts: + * + * CT_DEV_EV_ONLINE + * The device is online and functioning normally + * CT_DEV_EV_DEGRADED + * The device is online but is functioning in a degraded capacity + * CT_DEV_EV_OFFLINE + * The device is offline and is no longer configured + * + * A typical consumer of device contracts starts out with a contract + * template and adds terms to that template. These include the + * "acceptable set" (A-set) term, which is a bitset of device states which + * are guaranteed by the contract. If the device moves out of a state in + * the A-set, the contract is broken. The breaking of the contract can + * be asynchronous in which case a critical contract event is sent to the + * contract holder but no negotiations take place. If the breaking of the + * contract is synchronous, negotations are opened between the affected + * consumer and the kernel. The kernel does this by sending a critical + * event to the consumer with the CTE_NEG flag set indicating that this + * is a negotiation event. The consumer can accept this change by sending + * a ACK message to the kernel. Alternatively, if it has the necessary + * privileges, it can send a NACK message to the kernel which will block + * the device state change. To NACK a negotiable event, a process must + * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. + * + * Other terms include the "minor path" term, specified explicitly if the + * contract is not being created at open(2) time or specified implicitly + * if the contract is being created at open time via an activated template. + * + * A contract event is sent on any state change to which the contract + * owner has subscribed via the informative or critical event sets. Only + * critical events are guaranteed to be delivered. Since all device state + * changes are controlled by the kernel and cannot be arbitrarily generated + * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not + * need to be asserted in a process's effective set to designate an event as + * critical. To ensure privacy, a process must either have the same effective + * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege + * asserted in its effective set in order to observe device contract events + * off the device contract type specific endpoint. + * + * Yet another term available with device contracts is the "non-negotiable" + * term. This term is used to pre-specify a NACK to any contract negotiation. + * This term is ignored for asynchronous state changes. For example, a + * provcess may have the A-set {ONLINE|DEGRADED} and make the contract + * non-negotiable. In this case, the device contract framework assumes a + * NACK for any transition to OFFLINE and blocks the offline. If the A-set + * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE + * are NACKed but transitions to DEGRADE succeed. + * + * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) + * happens just before the I/O framework attempts to offline a device + * (i.e. detach a device and set the offline flag so that it cannot be + * reattached). A device contract holder is expected to either NACK the offline + * (if privileged) or release the device and allow the offline to proceed. + * + * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) + * is generated just before the I/O framework transitions the device state + * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). + * + * The contract holder is expected to ACK or NACK a negotiation event + * within a certain period of time. If the ACK/NACK is not received + * within the timeout period, the device contract framework will behave + * as if the contract does not exist and will proceed with the event. + * + * Unlike a process contract a device contract does not need to exist + * once it is abandoned, since it does not define a fault boundary. It + * merely represents an agreement between a process and the kernel + * regarding the state of the device. Once the process has abandoned + * the contract (either implicitly via a process exit or explicitly) + * the kernel has no reason to retain the contract. As a result + * device contracts are neither inheritable nor need to exist in an + * orphan state. + * + * A device unlike a process may exist in multiple contracts and has + * a "life" outside a device contract. A device unlike a process + * may exist without an associated contract. Unlike a process contract + * a device contract may be formed after a binding relationship is + * formed between a process and a device. + * + * IMPLEMENTATION NOTES + * ==================== + * DATA STRUCTURES + * ---------------- + * The heart of the device contracts implementation is the device contract + * private cont_device_t (or ctd for short) data structure. It encapsulates + * the generic contract_t data structure and has a number of private + * fields. + * These include: + * cond_minor: The minor device that is the subject of the contract + * cond_aset: The bitset of states which are guaranteed by the + * contract + * cond_noneg: If set, indicates that the result of negotiation has + * been predefined to be a NACK + * In addition, there are other device identifiers such the devinfo node, + * dev_t and spec_type of the minor node. There are also a few fields that + * are used during negotiation to maintain state. See + * uts/common/sys/contract/device_impl.h + * for details. + * The ctd structure represents the device private part of a contract of + * type "device" + * + * Another data structure used by device contracts is ctmpl_device. It is + * the device contracts private part of the contract template structure. It + * encapsulates the generic template structure "ct_template_t" and includes + * the following device contract specific fields + * ctd_aset: The bitset of states that should be guaranteed by a + * contract + * ctd_noneg: If set, indicates that contract should NACK a + * negotiation + * ctd_minor: The devfs_path (without the /devices prefix) of the + * minor node that is the subject of the contract. + * + * ALGORITHMS + * --------- + * There are three sets of routines in this file + * Template related routines + * ------------------------- + * These routines provide support for template related operations initated + * via the generic template operations. These include routines that dup + * a template, free it, and set various terms in the template + * (such as the minor node path, the acceptable state set (or A-set) + * and the non-negotiable term) as well as a routine to query the + * device specific portion of the template for the abovementioned terms. + * There is also a routine to create (ctmpl_device_create) that is used to + * create a contract from a template. This routine calls (after initial + * setup) the common function used to create a device contract + * (contract_device_create). + * + * core device contract implementation + * ---------------------------------- + * These routines support the generic contract framework to provide + * functionality that allows contracts to be created, managed and + * destroyed. The contract_device_create() routine is a routine used + * to create a contract from a template (either via an explicit create + * operation on a template or implicitly via an open with an + * activated template.). The contract_device_free() routine assists + * in freeing the device contract specific parts. There are routines + * used to abandon (contract_device_abandon) a device contract as well + * as a routine to destroy (which despite its name does not destroy, + * it only moves a contract to a dead state) a contract. + * There is also a routine to return status information about a + * contract - the level of detail depends on what is requested by the + * user. A value of CTD_FIXED only returns fixed length fields such + * as the A-set, state of device and value of the "noneg" term. If + * CTD_ALL is specified, the minor node path is returned as well. + * + * In addition there are interfaces (contract_device_ack/nack) which + * are used to support negotiation between userland processes and + * device contracts. These interfaces record the acknowledgement + * or lack thereof for negotiation events and help determine if the + * negotiated event should occur. + * + * "backend routines" + * ----------------- + * The backend routines form the interface between the I/O framework + * and the device contract subsystem. These routines, allow the I/O + * framework to call into the device contract subsystem to notify it of + * impending changes to a device state as well as to inform of the + * final disposition of such attempted state changes. Routines in this + * class include contract_device_offline() that indicates an attempt to + * offline a device, contract_device_degrade() that indicates that + * a device is moving to the degraded state and contract_device_negend() + * that is used by the I/O framework to inform the contracts subsystem of + * the final disposition of an attempted operation. + * + * SUMMARY + * ------- + * A contract starts its life as a template. A process allocates a device + * contract template and sets various terms: + * The A-set + * The device minor node + * Critical and informative events + * The noneg i.e. no negotition term + * Setting of these terms in the template is done via the + * ctmpl_device_set() entry point in this file. A process can query a + * template to determine the terms already set in the template - this is + * facilitated by the ctmpl_device_get() routine. + * + * Once all the appropriate terms are set, the contract is instantiated via + * one of two methods + * - via an explicit create operation - this is facilitated by the + * ctmpl_device_create() entry point + * - synchronously with the open(2) system call - this is achieved via the + * contract_device_open() routine. + * The core work for both these above functions is done by + * contract_device_create() + * + * A contract once created can be queried for its status. Support for + * status info is provided by both the common contracts framework and by + * the "device" contract type. If the level of detail requested is + * CTD_COMMON, only the common contract framework data is used. Higher + * levels of detail result in calls to contract_device_status() to supply + * device contract type specific status information. + * + * A contract once created may be abandoned either explicitly or implictly. + * In either case, the contract_device_abandon() function is invoked. This + * function merely calls contract_destroy() which moves the contract to + * the DEAD state. The device contract portion of destroy processing is + * provided by contract_device_destroy() which merely disassociates the + * contract from its device devinfo node. A contract in the DEAD state is + * not freed. It hanbgs around until all references to the contract are + * gone. When that happens, the contract is finally deallocated. The + * device contract specific portion of the free is done by + * contract_device_free() which finally frees the device contract specific + * data structure (cont_device_t). + * + * When a device undergoes a state change, the I/O framework calls the + * corresponding device contract entry point. For example, when a device + * is about to go OFFLINE, the routine contract_device_offline() is + * invoked. Similarly if a device moves to DEGRADED state, the routine + * contract_device_degrade() function is called. These functions call the + * core routine contract_device_publish(). This function determines via + * the function is_sync_neg() whether an event is a synchronous (i.e. + * negotiable) event or not. In the former case contract_device_publish() + * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs + * and/or NACKs from contract holders. In the latter case, it simply + * publishes the event and does not wait. In the negotiation case, ACKs or + * NACKs from userland consumers results in contract_device_ack_nack() + * being called where the result of the negotiation is recorded in the + * contract data structure. Once all outstanding contract owners have + * responded, the device contract code in wait_for_acks() determines the + * final result of the negotiation. A single NACK overrides all other ACKs + * If there is no NACK, then a single ACK will result in an overall ACK + * result. If there are no ACKs or NACKs, then the result CT_NONE is + * returned back to the I/O framework. Once the event is permitted or + * blocked, the I/O framework proceeds or aborts the state change. The + * I/O framework then calls contract_device_negend() with a result code + * indicating final disposition of the event. This call releases the + * barrier and other state associated with the previous negotiation, + * which permits the next event (if any) to come into the device contract + * framework. + * + * Finally, a device that has outstanding contracts may be removed from + * the system which results in its devinfo node being freed. The devinfo + * free routine in the I/O framework, calls into the device contract + * function - contract_device_remove_dip(). This routine, disassociates + * the dip from all contracts associated with the contract being freed, + * allowing the devinfo node to be freed. + * + * LOCKING + * --------- + * There are four sets of data that need to be protected by locks + * + * i) device contract specific portion of the contract template - This data + * is protected by the template lock ctmpl_lock. + * + * ii) device contract specific portion of the contract - This data is + * protected by the contract lock ct_lock + * + * iii) The linked list of contracts hanging off a devinfo node - This + * list is protected by the per-devinfo node lock devi_ct_lock + * + * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv + * and devi_ct_count that controls state changes to a dip + * + * The template lock is independent in that none of the other locks in this + * file may be taken while holding the template lock (and vice versa). + * + * The remaining three locks have the following lock order + * + * devi_ct_lock -> ct_count barrier -> ct_lock + * + */ + +static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, + int spec_type, proc_t *owner, int *errorp); + +/* barrier routines */ +static void ct_barrier_acquire(dev_info_t *dip); +static void ct_barrier_release(dev_info_t *dip); +static int ct_barrier_held(dev_info_t *dip); +static int ct_barrier_empty(dev_info_t *dip); +static void ct_barrier_wait_for_release(dev_info_t *dip); +static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); +static void ct_barrier_decr(dev_info_t *dip); +static void ct_barrier_incr(dev_info_t *dip); + +ct_type_t *device_type; + +/* + * Macro predicates for determining when events should be sent and how. + */ +#define EVSENDP(ctd, flag) \ + ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) + +#define EVINFOP(ctd, flag) \ + ((ctd->cond_contract.ct_ev_crit & flag) == 0) + +/* + * State transition table showing which transitions are synchronous and which + * are not. + */ +struct ct_dev_negtable { + uint_t st_old; + uint_t st_new; + uint_t st_neg; +} ct_dev_negtable[] = { + {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, + {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, + {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, + {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, + {0} +}; + +/* + * Device contract template implementation + */ + +/* + * ctmpl_device_dup + * + * The device contract template dup entry point. + * This simply copies all the fields (generic as well as device contract + * specific) fields of the original. + */ +static struct ct_template * +ctmpl_device_dup(struct ct_template *template) +{ + ctmpl_device_t *new; + ctmpl_device_t *old = template->ctmpl_data; + char *buf; + char *minor; + + new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * copy generic fields. + * ctmpl_copy returns with old template lock held + */ + ctmpl_copy(&new->ctd_ctmpl, template); + + new->ctd_ctmpl.ctmpl_data = new; + new->ctd_aset = old->ctd_aset; + new->ctd_minor = NULL; + new->ctd_noneg = old->ctd_noneg; + + if (old->ctd_minor) { + ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); + bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); + } else { + kmem_free(buf, MAXPATHLEN); + buf = NULL; + } + + mutex_exit(&template->ctmpl_lock); + if (buf) { + minor = i_ddi_strdup(buf, KM_SLEEP); + kmem_free(buf, MAXPATHLEN); + buf = NULL; + } else { + minor = NULL; + } + mutex_enter(&template->ctmpl_lock); + + if (minor) { + new->ctd_minor = minor; + } + + ASSERT(buf == NULL); + return (&new->ctd_ctmpl); +} + +/* + * ctmpl_device_free + * + * The device contract template free entry point. Just + * frees the template. + */ +static void +ctmpl_device_free(struct ct_template *template) +{ + ctmpl_device_t *dtmpl = template->ctmpl_data; + + if (dtmpl->ctd_minor) + kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); + + kmem_free(dtmpl, sizeof (ctmpl_device_t)); +} + +/* + * SAFE_EV is the set of events which a non-privileged process is + * allowed to make critical. An unprivileged device contract owner has + * no control over when a device changes state, so all device events + * can be in the critical set. + * + * EXCESS tells us if "value", a critical event set, requires + * additional privilege. For device contracts EXCESS currently + * evaluates to 0. + */ +#define SAFE_EV (CT_DEV_ALLEVENT) +#define EXCESS(value) ((value) & ~SAFE_EV) + + +/* + * ctmpl_device_set + * + * The device contract template set entry point. Sets various terms in the + * template. The non-negotiable term can only be set if the process has + * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. + */ +static int +ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) +{ + ctmpl_device_t *dtmpl = tmpl->ctmpl_data; + char *buf; + int error; + dev_info_t *dip; + int spec_type; + + ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); + + switch (param->ctpm_id) { + case CTDP_ACCEPT: + if (param->ctpm_value & ~CT_DEV_ALLEVENT) + return (EINVAL); + if (param->ctpm_value == 0) + return (EINVAL); + if (param->ctpm_value == CT_DEV_ALLEVENT) + return (EINVAL); + + dtmpl->ctd_aset = param->ctpm_value; + break; + case CTDP_NONEG: + if (param->ctpm_value != CTDP_NONEG_SET && + param->ctpm_value != CTDP_NONEG_CLEAR) + return (EINVAL); + + /* + * only privileged processes can designate a contract + * non-negotiatble. + */ + if (param->ctpm_value == CTDP_NONEG_SET && + (error = secpolicy_sys_devices(cr)) != 0) { + return (error); + } + + dtmpl->ctd_noneg = param->ctpm_value; + break; + + case CTDP_MINOR: + if (param->ctpm_value == NULL) + return (EINVAL); + + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * Copyin the device path + */ + error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf, + MAXPATHLEN, NULL); + if (error != 0) { + kmem_free(buf, MAXPATHLEN); + return (error); + } + buf[MAXPATHLEN - 1] = '\0'; + + if (*buf != '/' || + strncmp(buf, "/devices/", strlen("/devices/")) == 0 || + strstr(buf, "../devices/") || strchr(buf, ':') == NULL) { + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } + + spec_type = 0; + dip = NULL; + if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) { + kmem_free(buf, MAXPATHLEN); + return (ERANGE); + } + ddi_release_devi(dip); + + if (spec_type != S_IFCHR && spec_type != S_IFBLK) { + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } + + if (dtmpl->ctd_minor != NULL) { + kmem_free(dtmpl->ctd_minor, + strlen(dtmpl->ctd_minor) + 1); + } + dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP); + kmem_free(buf, MAXPATHLEN); + break; + case CTP_EV_CRITICAL: + /* + * Currently for device contracts, any event + * may be added to the critical set. We retain the + * following code however for future enhancements. + */ + if (EXCESS(param->ctpm_value) && + (error = secpolicy_contract_event(cr)) != 0) + return (error); + tmpl->ctmpl_ev_crit = param->ctpm_value; + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * ctmpl_device_get + * + * The device contract template get entry point. Simply fetches and + * returns the value of the requested term. + */ +static int +ctmpl_device_get(struct ct_template *template, ct_param_t *param) +{ + ctmpl_device_t *dtmpl = template->ctmpl_data; + int error; + + ASSERT(MUTEX_HELD(&template->ctmpl_lock)); + + switch (param->ctpm_id) { + case CTDP_ACCEPT: + param->ctpm_value = dtmpl->ctd_aset; + break; + case CTDP_NONEG: + param->ctpm_value = dtmpl->ctd_noneg; + break; + case CTDP_MINOR: + if (dtmpl->ctd_minor) { + error = copyoutstr(dtmpl->ctd_minor, + (char *)(uintptr_t)param->ctpm_value, + MAXPATHLEN, NULL); + if (error != 0) + return (error); + } else { + return (ENOENT); + } + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * Device contract type specific portion of creating a contract using + * a specified template + */ +/*ARGSUSED*/ +int +ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) +{ + ctmpl_device_t *dtmpl; + char *buf; + dev_t dev; + int spec_type; + int error; + cont_device_t *ctd; + + if (ctidp == NULL) + return (EINVAL); + + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + dtmpl = template->ctmpl_data; + + mutex_enter(&template->ctmpl_lock); + if (dtmpl->ctd_minor == NULL) { + /* incomplete template */ + mutex_exit(&template->ctmpl_lock); + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } else { + ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); + bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); + } + mutex_exit(&template->ctmpl_lock); + + spec_type = 0; + dev = NODEV; + if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || + dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || + (spec_type != S_IFCHR && spec_type != S_IFBLK)) { + CT_DEBUG((CE_WARN, + "tmpl_create: failed to find device: %s", buf)); + kmem_free(buf, MAXPATHLEN); + return (ERANGE); + } + kmem_free(buf, MAXPATHLEN); + + ctd = contract_device_create(template->ctmpl_data, + dev, spec_type, curproc, &error); + + if (ctd == NULL) { + CT_DEBUG((CE_WARN, "Failed to create device contract for " + "process (%d) with device (devt = %lu, spec_type = %s)", + curproc->p_pid, dev, + spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); + return (error); + } + + mutex_enter(&ctd->cond_contract.ct_lock); + *ctidp = ctd->cond_contract.ct_id; + mutex_exit(&ctd->cond_contract.ct_lock); + + return (0); +} + +/* + * Device contract specific template entry points + */ +static ctmplops_t ctmpl_device_ops = { + ctmpl_device_dup, /* ctop_dup */ + ctmpl_device_free, /* ctop_free */ + ctmpl_device_set, /* ctop_set */ + ctmpl_device_get, /* ctop_get */ + ctmpl_device_create, /* ctop_create */ + CT_DEV_ALLEVENT /* all device events bitmask */ +}; + + +/* + * Device contract implementation + */ + +/* + * contract_device_default + * + * The device contract default template entry point. Creates a + * device contract template with a default A-set and no "noneg" , + * with informative degrade events and critical offline events. + * There is no default minor path. + */ +static ct_template_t * +contract_device_default(void) +{ + ctmpl_device_t *new; + + new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); + ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); + + new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; + new->ctd_noneg = 0; + new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; + new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; + + return (&new->ctd_ctmpl); +} + +/* + * contract_device_free + * + * Destroys the device contract specific portion of a contract and + * frees the contract. + */ +static void +contract_device_free(contract_t *ct) +{ + cont_device_t *ctd = ct->ct_data; + + ASSERT(ctd->cond_minor); + ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); + kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); + + ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && + ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); + + ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); + + ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); + ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); + + ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); + ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); + + ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); + ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); + + ASSERT(!list_link_active(&ctd->cond_next)); + + kmem_free(ctd, sizeof (cont_device_t)); +} + +/* + * contract_device_abandon + * + * The device contract abandon entry point. + */ +static void +contract_device_abandon(contract_t *ct) +{ + ASSERT(MUTEX_HELD(&ct->ct_lock)); + + /* + * device contracts cannot be inherited or orphaned. + * Move the contract to the DEAD_STATE. It will be freed + * once all references to it are gone. + */ + contract_destroy(ct); +} + +/* + * contract_device_destroy + * + * The device contract destroy entry point. + * Called from contract_destroy() to do any type specific destroy. Note + * that destroy is a misnomer - this does not free the contract, it only + * moves it to the dead state. A contract is actually freed via + * contract_rele() -> contract_dtor(), contop_free() + */ +static void +contract_device_destroy(contract_t *ct) +{ + cont_device_t *ctd = ct->ct_data; + dev_info_t *dip = ctd->cond_dip; + + ASSERT(MUTEX_HELD(&ct->ct_lock)); + + if (dip == NULL) { + /* + * The dip has been removed, this is a dangling contract + * Check that dip linkages are NULL + */ + ASSERT(!list_link_active(&ctd->cond_next)); + CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " + "devinfo node. contract ctid : %d", ct->ct_id)); + return; + } + + /* + * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock + */ + mutex_exit(&ct->ct_lock); + + /* + * Waiting for the barrier to be released is strictly speaking not + * necessary. But it simplifies the implementation of + * contract_device_publish() by establishing the invariant that + * device contracts cannot go away during negotiation. + */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_wait_for_release(dip); + mutex_enter(&ct->ct_lock); + + list_remove(&(DEVI(dip)->devi_ct), ctd); + ctd->cond_dip = NULL; /* no longer linked to dip */ + contract_rele(ct); /* remove hold for dip linkage */ + + mutex_exit(&ct->ct_lock); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + mutex_enter(&ct->ct_lock); +} + +/* + * contract_device_status + * + * The device contract status entry point. Called when level of "detail" + * is either CTD_FIXED or CTD_ALL + * + */ +static void +contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, + void *status, model_t model) +{ + cont_device_t *ctd = ct->ct_data; + + ASSERT(detail == CTD_FIXED || detail == CTD_ALL); + + mutex_enter(&ct->ct_lock); + contract_status_common(ct, zone, status, model); + + /* + * There's no need to hold the contract lock while accessing static + * data like aset or noneg. But since we need the lock to access other + * data like state, we hold it anyway. + */ + VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); + VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); + VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); + + if (detail == CTD_FIXED) { + mutex_exit(&ct->ct_lock); + return; + } + + ASSERT(ctd->cond_minor); + VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); + + mutex_exit(&ct->ct_lock); +} + +/* + * Converts a result integer into the corresponding string. Used for printing + * messages + */ +static char * +result_str(uint_t result) +{ + switch (result) { + case CT_ACK: + return ("CT_ACK"); + case CT_NACK: + return ("CT_NACK"); + case CT_NONE: + return ("CT_NONE"); + default: + return ("UNKNOWN"); + } +} + +/* + * Converts a device state integer constant into the corresponding string. + * Used to print messages. + */ +static char * +state_str(uint_t state) +{ + switch (state) { + case CT_DEV_EV_ONLINE: + return ("ONLINE"); + case CT_DEV_EV_DEGRADED: + return ("DEGRADED"); + case CT_DEV_EV_OFFLINE: + return ("OFFLINE"); + default: + return ("UNKNOWN"); + } +} + +/* + * Routine that determines if a particular CT_DEV_EV_? event corresponds to a + * synchronous state change or not. + */ +static int +is_sync_neg(uint_t old, uint_t new) +{ + int i; + + ASSERT(old & CT_DEV_ALLEVENT); + ASSERT(new & CT_DEV_ALLEVENT); + + if (old == new) { + CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", + state_str(new))); + return (-2); + } + + for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { + if (old == ct_dev_negtable[i].st_old && + new == ct_dev_negtable[i].st_new) { + return (ct_dev_negtable[i].st_neg); + } + } + + CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " + "old = %s -> new = %s", state_str(old), state_str(new))); + + return (-1); +} + +/* + * Used to cleanup cached dv_nodes so that when a device is released by + * a contract holder, its devinfo node can be successfully detached. + */ +static int +contract_device_dvclean(dev_info_t *dip) +{ + char *devnm; + dev_info_t *pdip; + int error; + + ASSERT(dip); + + /* pdip can be NULL if we have contracts against the root dip */ + pdip = ddi_get_parent(dip); + + if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { + char *path; + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " + "device=%s", path)); + kmem_free(path, MAXPATHLEN); + return (EDEADLOCK); + } + + if (pdip) { + devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); + (void) ddi_deviname(dip, devnm); + error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); + kmem_free(devnm, MAXNAMELEN + 1); + } else { + error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); + } + + return (error); +} + +/* + * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. + * Results in the ACK or NACK being recorded on the dip for one particular + * contract. The device contracts framework evaluates the ACK/NACKs for all + * contracts against a device to determine if a particular device state change + * should be allowed. + */ +static int +contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, + uint_t cmd) +{ + cont_device_t *ctd = ct->ct_data; + dev_info_t *dip; + ctid_t ctid; + int error; + + ctid = ct->ct_id; + + CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); + + mutex_enter(&ct->ct_lock); + CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); + + dip = ctd->cond_dip; + + ASSERT(ctd->cond_minor); + ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); + + /* + * Negotiation only if new state is not in A-set + */ + ASSERT(!(ctd->cond_aset & evtype)); + + /* + * Negotiation only if transition is synchronous + */ + ASSERT(is_sync_neg(ctd->cond_state, evtype)); + + /* + * We shouldn't be negotiating if the "noneg" flag is set + */ + ASSERT(!ctd->cond_noneg); + + if (dip) + ndi_hold_devi(dip); + + mutex_exit(&ct->ct_lock); + + /* + * dv_clean only if !NACK and offline state change + */ + if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { + CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); + error = contract_device_dvclean(dip); + if (error != 0) { + CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", + ctid)); + ddi_release_devi(dip); + } + } + + mutex_enter(&ct->ct_lock); + + if (dip) + ddi_release_devi(dip); + + if (dip == NULL) { + if (ctd->cond_currev_id != evid) { + CT_DEBUG((CE_WARN, "%sACK for non-current event " + "(type=%s, id=%llu) on removed device", + cmd == CT_NACK ? "N" : "", + state_str(evtype), (unsigned long long)evid)); + CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", + ctid)); + } else { + ASSERT(ctd->cond_currev_type == evtype); + CT_DEBUG((CE_WARN, "contract_ack: no such device: " + "ctid: %d", ctid)); + } + error = (ct->ct_state == CTS_DEAD) ? ESRCH : + ((cmd == CT_NACK) ? ETIMEDOUT : 0); + mutex_exit(&ct->ct_lock); + return (error); + } + + /* + * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock + */ + mutex_exit(&ct->ct_lock); + + mutex_enter(&DEVI(dip)->devi_ct_lock); + mutex_enter(&ct->ct_lock); + if (ctd->cond_currev_id != evid) { + char *buf; + mutex_exit(&ct->ct_lock); + mutex_exit(&DEVI(dip)->devi_ct_lock); + ndi_hold_devi(dip); + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, buf); + ddi_release_devi(dip); + CT_DEBUG((CE_WARN, "%sACK for non-current event" + "(type=%s, id=%llu) on device %s", + cmd == CT_NACK ? "N" : "", + state_str(evtype), (unsigned long long)evid, buf)); + kmem_free(buf, MAXPATHLEN); + CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", + cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); + return (cmd == CT_ACK ? 0 : ETIMEDOUT); + } + + ASSERT(ctd->cond_currev_type == evtype); + ASSERT(cmd == CT_ACK || cmd == CT_NACK); + + CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", + cmd == CT_NACK ? "N" : "", ctid)); + + ctd->cond_currev_ack = cmd; + mutex_exit(&ct->ct_lock); + + ct_barrier_decr(dip); + mutex_exit(&DEVI(dip)->devi_ct_lock); + + CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); + + return (0); +} + +/* + * Invoked when a userland contract holder approves (i.e. ACKs) a state change + */ +static int +contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) +{ + return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); +} + +/* + * Invoked when a userland contract holder blocks (i.e. NACKs) a state change + */ +static int +contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) +{ + return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); +} + +/* + * Creates a new contract synchronously with the breaking of an existing + * contract. Currently not supported. + */ +/*ARGSUSED*/ +static int +contract_device_newct(contract_t *ct) +{ + return (ENOTSUP); +} + +/* + * Core device contract implementation entry points + */ +static contops_t contract_device_ops = { + contract_device_free, /* contop_free */ + contract_device_abandon, /* contop_abandon */ + contract_device_destroy, /* contop_destroy */ + contract_device_status, /* contop_status */ + contract_device_ack, /* contop_ack */ + contract_device_nack, /* contop_nack */ + contract_qack_notsup, /* contop_qack */ + contract_device_newct /* contop_newct */ +}; + +/* + * contract_device_init + * + * Initializes the device contract type. + */ +void +contract_device_init(void) +{ + device_type = contract_type_init(CTT_DEVICE, "device", + &contract_device_ops, contract_device_default); +} + +/* + * contract_device_create + * + * create a device contract given template "tmpl" and the "owner" process. + * May fail and return NULL if project.max-contracts would have been exceeded. + * + * Common device contract creation routine called for both open-time and + * non-open time device contract creation + */ +static cont_device_t * +contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, + proc_t *owner, int *errorp) +{ + cont_device_t *ctd; + char *minor; + char *path; + dev_info_t *dip; + + ASSERT(dtmpl != NULL); + ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); + ASSERT(errorp); + + *errorp = 0; + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); + ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); + bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + + dip = e_ddi_hold_devi_by_path(path, 0); + if (dip == NULL) { + cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " + "for device path (%s)", path); + kmem_free(path, MAXPATHLEN); + *errorp = ERANGE; + return (NULL); + } + + /* + * Lock out any parallel contract negotiations + */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_acquire(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + minor = i_ddi_strdup(path, KM_SLEEP); + kmem_free(path, MAXPATHLEN); + + (void) contract_type_pbundle(device_type, owner); + + ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); + + /* + * Only we hold a refernce to this contract. Safe to access + * the fields without a ct_lock + */ + ctd->cond_minor = minor; + /* + * It is safe to set the dip pointer in the contract + * as the contract will always be destroyed before the dip + * is released + */ + ctd->cond_dip = dip; + ctd->cond_devt = dev; + ctd->cond_spec = spec_type; + + /* + * Since we are able to lookup the device, it is either + * online or degraded + */ + ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? + CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; + + mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); + ctd->cond_aset = dtmpl->ctd_aset; + ctd->cond_noneg = dtmpl->ctd_noneg; + + /* + * contract_ctor() initailizes the common portion of a contract + * contract_dtor() destroys the common portion of a contract + */ + if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, + ctd, 0, owner, B_TRUE)) { + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + /* + * contract_device_free() destroys the type specific + * portion of a contract and frees the contract. + * The "minor" path and "cred" is a part of the type specific + * portion of the contract and will be freed by + * contract_device_free() + */ + contract_device_free(&ctd->cond_contract); + + /* release barrier */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + ddi_release_devi(dip); + *errorp = EAGAIN; + return (NULL); + } + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + + mutex_enter(&ctd->cond_contract.ct_lock); + ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; + ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; + ctd->cond_contract.ct_ntime.ctm_start = -1; + ctd->cond_contract.ct_qtime.ctm_start = -1; + mutex_exit(&ctd->cond_contract.ct_lock); + + /* + * Insert device contract into list hanging off the dip + * Bump up the ref-count on the contract to reflect this + */ + contract_hold(&ctd->cond_contract); + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + list_insert_tail(&(DEVI(dip)->devi_ct), ctd); + + /* release barrier */ + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + ddi_release_devi(dip); + + return (ctd); +} + +/* + * Called when a device is successfully opened to create an open-time contract + * i.e. synchronously with a device open. + */ +int +contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) +{ + ctmpl_device_t *dtmpl; + ct_template_t *tmpl; + cont_device_t *ctd; + char *path; + klwp_t *lwp; + int error; + + if (ctpp) + *ctpp = NULL; + + /* + * Check if we are in user-context i.e. if we have an lwp + */ + lwp = ttolwp(curthread); + if (lwp == NULL) { + CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); + return (0); + } + + tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); + if (tmpl == NULL) { + return (0); + } + dtmpl = tmpl->ctmpl_data; + + /* + * If the user set a minor path in the template before an open, + * ignore it. We use the minor path of the actual minor opened. + */ + mutex_enter(&tmpl->ctmpl_lock); + if (dtmpl->ctd_minor != NULL) { + CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " + "ignoring device minor path in active template: %s", + curproc->p_pid, dtmpl->ctd_minor)); + /* + * This is a copy of the actual activated template. + * Safe to make changes such as freeing the minor + * path in the template. + */ + kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); + dtmpl->ctd_minor = NULL; + } + mutex_exit(&tmpl->ctmpl_lock); + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { + CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " + "minor path from dev_t,spec {%lu, %d} for process (%d)", + dev, spec_type, curproc->p_pid)); + ctmpl_free(tmpl); + kmem_free(path, MAXPATHLEN); + return (1); + } + + mutex_enter(&tmpl->ctmpl_lock); + ASSERT(dtmpl->ctd_minor == NULL); + dtmpl->ctd_minor = path; + mutex_exit(&tmpl->ctmpl_lock); + + ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); + + mutex_enter(&tmpl->ctmpl_lock); + ASSERT(dtmpl->ctd_minor); + dtmpl->ctd_minor = NULL; + mutex_exit(&tmpl->ctmpl_lock); + ctmpl_free(tmpl); + kmem_free(path, MAXPATHLEN); + + if (ctd == NULL) { + cmn_err(CE_NOTE, "contract_device_open(): Failed to " + "create device contract for process (%d) holding " + "device (devt = %lu, spec_type = %d)", + curproc->p_pid, dev, spec_type); + return (1); + } + + if (ctpp) { + mutex_enter(&ctd->cond_contract.ct_lock); + *ctpp = &ctd->cond_contract; + mutex_exit(&ctd->cond_contract.ct_lock); + } + return (0); +} + +/* + * Called during contract negotiation by the device contract framework to wait + * for ACKs or NACKs from contract holders. If all responses are not received + * before a specified timeout, this routine times out. + */ +static uint_t +wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) +{ + cont_device_t *ctd; + int timed_out = 0; + int result = CT_NONE; + int ack; + char *f = "wait_for_acks"; + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(dip); + ASSERT(evtype & CT_DEV_ALLEVENT); + ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFBLK || spec_type == S_IFCHR)); + + CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); + + if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { + /* + * some contract owner(s) didn't respond in time + */ + CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); + timed_out = 1; + } + + ack = 0; + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + + mutex_enter(&ctd->cond_contract.ct_lock); + + ASSERT(ctd->cond_dip == dip); + + if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* skip if non-negotiable contract */ + if (ctd->cond_noneg) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + ASSERT(ctd->cond_currev_type == evtype); + if (ctd->cond_currev_ack == CT_NACK) { + CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", + f, (void *)dip)); + mutex_exit(&ctd->cond_contract.ct_lock); + return (CT_NACK); + } else if (ctd->cond_currev_ack == CT_ACK) { + ack = 1; + CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", + f, (void *)dip)); + } + mutex_exit(&ctd->cond_contract.ct_lock); + } + + if (ack) { + result = CT_ACK; + CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); + } else if (timed_out) { + result = CT_NONE; + CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", + f, (void *)dip)); + } else { + CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", + f, (void *)dip)); + } + + + return (result); +} + +/* + * Determines the current state of a device (i.e a devinfo node + */ +static int +get_state(dev_info_t *dip) +{ + if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) + return (CT_DEV_EV_OFFLINE); + else if (DEVI_IS_DEVICE_DEGRADED(dip)) + return (CT_DEV_EV_DEGRADED); + else + return (CT_DEV_EV_ONLINE); +} + +/* + * Sets the current state of a device in a device contract + */ +static void +set_cond_state(dev_info_t *dip) +{ + uint_t state = get_state(dip); + cont_device_t *ctd; + + /* verify that barrier is held */ + ASSERT(ct_barrier_held(dip)); + + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + mutex_enter(&ctd->cond_contract.ct_lock); + ASSERT(ctd->cond_dip == dip); + ctd->cond_state = state; + mutex_exit(&ctd->cond_contract.ct_lock); + } +} + +/* + * Core routine called by event-specific routines when an event occurs. + * Determines if an event should be be published, and if it is to be + * published, whether a negotiation should take place. Also implements + * NEGEND events which publish the final disposition of an event after + * negotiations are complete. + * + * When an event occurs on a minor node, this routine walks the list of + * contracts hanging off a devinfo node and for each contract on the affected + * dip, evaluates the following cases + * + * a. an event that is synchronous, breaks the contract and NONEG not set + * - bumps up the outstanding negotiation counts on the dip + * - marks the dip as undergoing negotiation (devi_ct_neg) + * - event of type CTE_NEG is published + * b. an event that is synchronous, breaks the contract and NONEG is set + * - sets the final result to CT_NACK, event is blocked + * - does not publish an event + * c. event is asynchronous and breaks the contract + * - publishes a critical event irrespect of whether the NONEG + * flag is set, since the contract will be broken and contract + * owner needs to be informed. + * d. No contract breakage but the owner has subscribed to the event + * - publishes the event irrespective of the NONEG event as the + * owner has explicitly subscribed to the event. + * e. NEGEND event + * - publishes a critical event. Should only be doing this if + * if NONEG is not set. + * f. all other events + * - Since a contract is not broken and this event has not been + * subscribed to, this event does not need to be published for + * for this contract. + * + * Once an event is published, what happens next depends on the type of + * event: + * + * a. NEGEND event + * - cleanup all state associated with the preceding negotiation + * and return CT_ACK to the caller of contract_device_publish() + * b. NACKed event + * - One or more contracts had the NONEG term, so the event was + * blocked. Return CT_NACK to the caller. + * c. Negotiated event + * - Call wait_for_acks() to wait for responses from contract + * holders. The end result is either CT_ACK (event is permitted), + * CT_NACK (event is blocked) or CT_NONE (no contract owner) + * responded. This result is returned back to the caller. + * d. All other events + * - If the event was asynchronous (i.e. not negotiated) or + * a contract was not broken return CT_ACK to the caller. + */ +static uint_t +contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype, nvlist_t *tnvl) +{ + cont_device_t *ctd; + uint_t result = CT_NONE; + uint64_t evid = 0; + uint64_t nevid = 0; + char *path = NULL; + int negend; + int match; + int sync = 0; + contract_t *ct; + ct_kevent_t *event; + nvlist_t *nvl; + int broken = 0; + + ASSERT(dip); + ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFBLK || spec_type == S_IFCHR)); + ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); + + /* Is this a synchronous state change ? */ + if (evtype != CT_EV_NEGEND) { + sync = is_sync_neg(get_state(dip), evtype); + /* NOP if unsupported transition */ + if (sync == -2 || sync == -1) { + DEVI(dip)->devi_flags |= DEVI_CT_NOP; + result = (sync == -2) ? CT_ACK : CT_NONE; + goto out; + } + CT_DEBUG((CE_NOTE, "publish: is%s sync state change", + sync ? "" : " not")); + } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { + DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; + result = CT_ACK; + goto out; + } + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + + /* + * Negotiation end - set the state of the device in the contract + */ + if (evtype == CT_EV_NEGEND) { + CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); + set_cond_state(dip); + } + + /* + * If this device didn't go through negotiation, don't publish + * a NEGEND event - simply release the barrier to allow other + * device events in. + */ + negend = 0; + if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { + CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + result = CT_ACK; + goto out; + } else if (evtype == CT_EV_NEGEND) { + /* + * There are negotiated contract breakages that + * need a NEGEND event + */ + ASSERT(ct_barrier_held(dip)); + negend = 1; + CT_DEBUG((CE_NOTE, "publish: setting negend flag")); + } else { + /* + * This is a new event, not a NEGEND event. Wait for previous + * contract events to complete. + */ + ct_barrier_acquire(dip); + } + + + match = 0; + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + + ctid_t ctid; + size_t len = strlen(path); + + mutex_enter(&ctd->cond_contract.ct_lock); + + ASSERT(ctd->cond_dip == dip); + ASSERT(ctd->cond_minor); + ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && + ctd->cond_minor[len] == ':'); + + if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* We have a matching contract */ + match = 1; + ctid = ctd->cond_contract.ct_id; + CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", + ctid)); + + /* + * There are 4 possible cases + * 1. A contract is broken (dev not in acceptable state) and + * the state change is synchronous - start negotiation + * by sending a CTE_NEG critical event. + * 2. A contract is broken and the state change is + * asynchronous - just send a critical event and + * break the contract. + * 3. Contract is not broken, but consumer has subscribed + * to the event as a critical or informative event + * - just send the appropriate event + * 4. contract waiting for negend event - just send the critical + * NEGEND event. + */ + broken = 0; + if (!negend && !(evtype & ctd->cond_aset)) { + broken = 1; + CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", + ctid)); + } + + /* + * Don't send event if + * - contract is not broken AND + * - contract holder has not subscribed to this event AND + * - contract not waiting for a NEGEND event + */ + if (!broken && !EVSENDP(ctd, evtype) && + !ctd->cond_neg) { + CT_DEBUG((CE_NOTE, "contract_device_publish(): " + "contract (%d): no publish reqd: event %d", + ctd->cond_contract.ct_id, evtype)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* + * Note: need to kmem_zalloc() the event so mutexes are + * initialized automatically + */ + ct = &ctd->cond_contract; + event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); + event->cte_type = evtype; + + if (broken && sync) { + CT_DEBUG((CE_NOTE, "publish: broken + sync: " + "ctid: %d", ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + if (ctd->cond_noneg) { + /* Nothing to publish. Event has been blocked */ + CT_DEBUG((CE_NOTE, "publish: sync and noneg:" + "not publishing blocked ev: ctid: %d", + ctid)); + result = CT_NACK; + kmem_free(event, sizeof (ct_kevent_t)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + event->cte_flags = CTE_NEG; /* critical neg. event */ + ctd->cond_currev_type = event->cte_type; + ct_barrier_incr(dip); + DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ + ctd->cond_neg = 1; + } else if (broken && !sync) { + CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", + ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + event->cte_flags = 0; /* critical event */ + } else if (EVSENDP(ctd, event->cte_type)) { + CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", + ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + event->cte_flags = EVINFOP(ctd, event->cte_type) ? + CTE_INFO : 0; + } else if (ctd->cond_neg) { + CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); + ASSERT(negend); + ASSERT(ctd->cond_noneg == 0); + nevid = ctd->cond_contract.ct_nevent ? + ctd->cond_contract.ct_nevent->cte_id : 0; + ASSERT(ctd->cond_currev_id == nevid); + event->cte_flags = 0; /* NEGEND is always critical */ + ctd->cond_currev_id = 0; + ctd->cond_currev_type = 0; + ctd->cond_currev_ack = 0; + ctd->cond_neg = 0; + } else { + CT_DEBUG((CE_NOTE, "publish: not publishing event for " + "ctid: %d, evtype: %d", + ctd->cond_contract.ct_id, event->cte_type)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + kmem_free(event, sizeof (ct_kevent_t)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + nvl = NULL; + if (tnvl) { + VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); + if (negend) { + int32_t newct = 0; + ASSERT(ctd->cond_noneg == 0); + VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) + == 0); + VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, + &newct) == 0); + VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, + newct == 1 ? 0 : + ctd->cond_contract.ct_id) == 0); + CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " + "CTS_NEVID: %llu, CTS_NEWCT: %s", + ctid, (unsigned long long)nevid, + newct ? "success" : "failure")); + + } + } + + if (ctd->cond_neg) { + ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); + ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); + ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); + ctd->cond_contract.ct_qtime.ctm_start = + ctd->cond_contract.ct_ntime.ctm_start; + } + + /* + * by holding the dip's devi_ct_lock we ensure that + * all ACK/NACKs are held up until we have finished + * publishing to all contracts. + */ + mutex_exit(&ctd->cond_contract.ct_lock); + evid = cte_publish_all(ct, event, nvl, NULL); + mutex_enter(&ctd->cond_contract.ct_lock); + + if (ctd->cond_neg) { + ASSERT(!negend); + ASSERT(broken); + ASSERT(sync); + ASSERT(!ctd->cond_noneg); + CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" + ": %d", ctid)); + ctd->cond_currev_id = evid; + } else if (negend) { + ctd->cond_contract.ct_ntime.ctm_start = -1; + ctd->cond_contract.ct_qtime.ctm_start = -1; + } + mutex_exit(&ctd->cond_contract.ct_lock); + } + + /* + * If "negend" set counter back to initial state (-1) so that + * other events can be published. Also clear the negotiation flag + * on dip. + * + * 0 .. n are used for counting. + * -1 indicates counter is available for use. + */ + if (negend) { + /* + * devi_ct_count not necessarily 0. We may have + * timed out in which case, count will be non-zero. + */ + ct_barrier_release(dip); + DEVI(dip)->devi_ct_neg = 0; + CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", + (void *)dip)); + } else if (DEVI(dip)->devi_ct_neg) { + ASSERT(match); + ASSERT(!ct_barrier_empty(dip)); + CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", + DEVI(dip)->devi_ct_count, (void *)dip)); + } else { + /* + * for non-negotiated events or subscribed events or no + * matching contracts + */ + ASSERT(ct_barrier_empty(dip)); + ASSERT(DEVI(dip)->devi_ct_neg == 0); + CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " + "dip=%p", (void *)dip)); + + /* + * only this function when called from contract_device_negend() + * can reset the counter to READY state i.e. -1. This function + * is so called for every event whether a NEGEND event is needed + * or not, but the negend event is only published if the event + * whose end they signal is a negotiated event for the contract. + */ + } + + if (!match) { + /* No matching contracts */ + CT_DEBUG((CE_NOTE, "publish: No matching contract")); + result = CT_NONE; + } else if (result == CT_NACK) { + /* a non-negotiable contract exists and this is a neg. event */ + CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); + (void) wait_for_acks(dip, dev, spec_type, evtype); + } else if (DEVI(dip)->devi_ct_neg) { + /* one or more contracts going through negotations */ + CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); + result = wait_for_acks(dip, dev, spec_type, evtype); + } else { + /* no negotiated contracts or no broken contracts or NEGEND */ + CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); + result = CT_ACK; + } + + /* + * Release the lock only now so that the only point where we + * drop the lock is in wait_for_acks(). This is so that we don't + * miss cv_signal/cv_broadcast from contract holders + */ + CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + +out: + if (tnvl) + nvlist_free(tnvl); + if (path) + kmem_free(path, MAXPATHLEN); + + + CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); + return (result); +} + + +/* + * contract_device_offline + * + * Event publishing routine called by I/O framework when a device is offlined. + */ +ct_ack_t +contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t result; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_OFFLINE; + result = contract_device_publish(dip, dev, spec_type, evtype, nvl); + + /* + * If a contract offline is NACKED, the framework expects us to call + * NEGEND ourselves, since we know the final result + */ + if (result == CT_NACK) { + contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); + } + + return (result); +} + +/* + * contract_device_degrade + * + * Event publishing routine called by I/O framework when a device + * moves to degrade state. + */ +/*ARGSUSED*/ +void +contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_DEGRADED; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); +} + +/* + * contract_device_undegrade + * + * Event publishing routine called by I/O framework when a device + * moves from degraded state to online state. + */ +/*ARGSUSED*/ +void +contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_ONLINE; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); +} + +/* + * For all contracts which have undergone a negotiation (because the device + * moved out of the acceptable state for that contract and the state + * change is synchronous i.e. requires negotiation) this routine publishes + * a CT_EV_NEGEND event with the final disposition of the event. + * + * This event is always a critical event. + */ +void +contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) +{ + nvlist_t *nvl; + uint_t evtype; + + ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); + + CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " + "dip: %p", result, (void *)dip)); + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, + result == CT_EV_SUCCESS ? 1 : 0) == 0); + + evtype = CT_EV_NEGEND; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); + + CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", + (void *)dip)); +} + +/* + * Wrapper routine called by other subsystems (such as LDI) to start + * negotiations when a synchronous device state change occurs. + * Returns CT_ACK or CT_NACK. + */ +ct_ack_t +contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype) +{ + int result; + + ASSERT(dip); + ASSERT(dev != NODEV); + ASSERT(dev != DDI_DEV_T_ANY); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + + switch (evtype) { + case CT_DEV_EV_OFFLINE: + result = contract_device_offline(dip, dev, spec_type); + break; + default: + cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " + "not supported: event (%d) for dev_t (%lu) and spec (%d), " + "dip (%p)", evtype, dev, spec_type, (void *)dip); + result = CT_NACK; + break; + } + + return (result); +} + +/* + * A wrapper routine called by other subsystems (such as the LDI) to + * finalize event processing for a state change event. For synchronous + * state changes, this publishes NEGEND events. For asynchronous i.e. + * non-negotiable events this publishes the event. + */ +void +contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype, int ct_result) +{ + ASSERT(dip); + ASSERT(dev != NODEV); + ASSERT(dev != DDI_DEV_T_ANY); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + + switch (evtype) { + case CT_DEV_EV_OFFLINE: + contract_device_negend(dip, dev, spec_type, ct_result); + break; + case CT_DEV_EV_DEGRADED: + contract_device_degrade(dip, dev, spec_type); + contract_device_negend(dip, dev, spec_type, ct_result); + break; + case CT_DEV_EV_ONLINE: + contract_device_undegrade(dip, dev, spec_type); + contract_device_negend(dip, dev, spec_type, ct_result); + break; + default: + cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " + "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", + evtype, dev, spec_type, (void *)dip); + break; + } +} + +/* + * Called by I/O framework when a devinfo node is freed to remove the + * association between a devinfo node and its contracts. + */ +void +contract_device_remove_dip(dev_info_t *dip) +{ + cont_device_t *ctd; + cont_device_t *next; + contract_t *ct; + + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_wait_for_release(dip); + + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { + next = list_next(&(DEVI(dip)->devi_ct), ctd); + list_remove(&(DEVI(dip)->devi_ct), ctd); + ct = &ctd->cond_contract; + /* + * Unlink the dip associated with this contract + */ + mutex_enter(&ct->ct_lock); + ASSERT(ctd->cond_dip == dip); + ctd->cond_dip = NULL; /* no longer linked to dip */ + contract_rele(ct); /* remove hold for dip linkage */ + CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " + "ctid: %d", ct->ct_id)); + mutex_exit(&ct->ct_lock); + } + ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); +} + +/* + * Barrier related routines + */ +static void +ct_barrier_acquire(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); + while (DEVI(dip)->devi_ct_count != -1) + cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); + DEVI(dip)->devi_ct_count = 0; + CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); +} + +static void +ct_barrier_release(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count != -1); + DEVI(dip)->devi_ct_count = -1; + cv_broadcast(&(DEVI(dip)->devi_ct_cv)); + CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); +} + +static int +ct_barrier_held(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + return (DEVI(dip)->devi_ct_count != -1); +} + +static int +ct_barrier_empty(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count != -1); + return (DEVI(dip)->devi_ct_count == 0); +} + +static void +ct_barrier_wait_for_release(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + while (DEVI(dip)->devi_ct_count != -1) + cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); +} + +static void +ct_barrier_decr(dev_info_t *dip) +{ + CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", + DEVI(dip)->devi_ct_count)); + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count > 0); + + DEVI(dip)->devi_ct_count--; + if (DEVI(dip)->devi_ct_count == 0) { + cv_broadcast(&DEVI(dip)->devi_ct_cv); + CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); + } +} + +static void +ct_barrier_incr(dev_info_t *dip) +{ + ASSERT(ct_barrier_held(dip)); + DEVI(dip)->devi_ct_count++; +} + +static int +ct_barrier_wait_for_empty(dev_info_t *dip, int secs) +{ + clock_t abstime; + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + + abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); + while (DEVI(dip)->devi_ct_count) { + if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), + &(DEVI(dip)->devi_ct_lock), abstime) == -1) { + return (-1); + } + } + return (0); +} diff --git a/usr/src/uts/common/contract/process.c b/usr/src/uts/common/contract/process.c index 8240051f00..c92ce34352 100644 --- a/usr/src/uts/common/contract/process.c +++ b/usr/src/uts/common/contract/process.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -502,7 +501,7 @@ contract_process_adopt(contract_t *ct, proc_t *p) } /* - * contract_process_status + * contract_process_abandon * * The process contract abandon entry point. */ @@ -632,11 +631,23 @@ contract_process_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, } } +/*ARGSUSED*/ +static int +contract_process_newct(contract_t *ct) +{ + return (0); +} + +/* process contracts don't negotiate */ static contops_t contract_process_ops = { contract_process_free, /* contop_free */ contract_process_abandon, /* contop_abandon */ contract_process_destroy, /* contop_destroy */ - contract_process_status /* contop_status */ + contract_process_status, /* contop_status */ + contract_ack_inval, /* contop_ack */ + contract_ack_inval, /* contop_nack */ + contract_qack_inval, /* contop_qack */ + contract_process_newct /* contop_newct */ }; /* @@ -774,7 +785,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus) event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); event->cte_flags = EVINFOP(ctp, CT_PR_EV_EXIT) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_EXIT; - cte_publish_all(ct, event, nvl, NULL); + (void) cte_publish_all(ct, event, nvl, NULL); mutex_enter(&ct->ct_lock); } if (empty) { @@ -793,7 +804,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus) event->cte_flags = EVINFOP(ctp, CT_PR_EV_EMPTY) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_EMPTY; - cte_publish_all(ct, event, nvl, NULL); + (void) cte_publish_all(ct, event, nvl, NULL); mutex_enter(&ct->ct_lock); } @@ -877,7 +888,7 @@ contract_process_fork(ctmpl_process_t *rtmpl, proc_t *cp, proc_t *pp, event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); event->cte_flags = EVINFOP(ctp, CT_PR_EV_FORK) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_FORK; - cte_publish_all(ct, event, nvl, NULL); + (void) cte_publish_all(ct, event, nvl, NULL); } return (ctp); } @@ -924,7 +935,7 @@ contract_process_core(cont_process_t *ctp, proc_t *p, int sig, event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); event->cte_flags = EVINFOP(ctp, CT_PR_EV_CORE) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_CORE; - cte_publish_all(ct, event, nvl, gnvl); + (void) cte_publish_all(ct, event, nvl, gnvl); } if (EVFATALP(ctp, CT_PR_EV_CORE)) { @@ -956,7 +967,7 @@ contract_process_hwerr(cont_process_t *ctp, proc_t *p) event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); event->cte_flags = EVINFOP(ctp, CT_PR_EV_HWERR) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_HWERR; - cte_publish_all(ct, event, nvl, NULL); + (void) cte_publish_all(ct, event, nvl, NULL); } if (EVFATALP(ctp, CT_PR_EV_HWERR)) { @@ -1006,7 +1017,7 @@ contract_process_sig(cont_process_t *ctp, proc_t *p, int sig, pid_t pid, event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); event->cte_flags = EVINFOP(ctp, CT_PR_EV_SIGNAL) ? CTE_INFO : 0; event->cte_type = CT_PR_EV_SIGNAL; - cte_publish_all(ct, event, nvl, gnvl); + (void) cte_publish_all(ct, event, nvl, gnvl); } if (EVFATALP(ctp, CT_PR_EV_SIGNAL)) { diff --git a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c index f5a0514565..da293cbb21 100644 --- a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c +++ b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c @@ -177,6 +177,7 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, contract_t *ct = ctlnode->ctfs_ctl_contract; int error = 0; uint64_t event; + int ack; switch (cmd) { case CT_CABANDON: @@ -184,15 +185,21 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, break; case CT_CACK: + case CT_CNACK: if (copyin((void *)arg, &event, sizeof (uint64_t))) return (EFAULT); - error = contract_ack(ct, event); + ack = (cmd == CT_CACK) ? CT_ACK : CT_NACK; + error = contract_ack(ct, event, ack); break; case CT_CNEWCT: + error = contract_newct(ct); break; case CT_CQREQ: + if (copyin((void *)arg, &event, sizeof (uint64_t))) + return (EFAULT); + error = contract_qack(ct, event); break; case CT_CADOPT: diff --git a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c index 28d0c93662..d99b8f56e8 100644 --- a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c +++ b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c @@ -114,6 +114,7 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, { ctfs_tmplnode_t *tmplnode = vp->v_data; ct_param_t param; + ctid_t ctid; int error; switch (cmd) { @@ -127,7 +128,11 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr, break; case CT_TCREATE: ASSERT(tmplnode->ctfs_tmn_tmpl != NULL); - return (ctmpl_create(tmplnode->ctfs_tmn_tmpl)); + error = ctmpl_create(tmplnode->ctfs_tmn_tmpl, &ctid); + if (error) + return (error); + *rvalp = ctid; + break; case CT_TSET: if (copyin((void *)arg, ¶m, sizeof (ct_param_t))) return (EFAULT); diff --git a/usr/src/uts/common/fs/specfs/specsubr.c b/usr/src/uts/common/fs/specfs/specsubr.c index ea87c688d6..85d9089b82 100644 --- a/usr/src/uts/common/fs/specfs/specsubr.c +++ b/usr/src/uts/common/fs/specfs/specsubr.c @@ -70,6 +70,7 @@ struct vfs spec_vfs; static dev_t specdev; struct kmem_cache *snode_cache; +int spec_debug = 0; static struct snode *sfind(dev_t, vtype_t, struct vnode *); static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *); @@ -259,6 +260,54 @@ makespecvp(dev_t dev, vtype_t type) return (svp); } + +/* + * This function is called from spec_assoc_vp_with_devi(). That function + * associates a "new" dip with a common snode, releasing (any) old dip + * in the process. This function (spec_assoc_fence()) looks at the "new dip" + * and determines whether the snode should be fenced of or not. As the table + * below indicates, the value of old-dip is a don't care for all cases. + * + * old-dip new-dip common-snode + * ========================================= + * Don't care NULL unfence + * Don't care retired fence + * Don't care not-retired unfence + * + * Since old-dip value is a "don't care", it is not passed into this function. + */ +static void +spec_assoc_fence(dev_info_t *ndip, vnode_t *vp) +{ + int fence; + struct snode *csp; + + ASSERT(vp); + ASSERT(vn_matchops(vp, spec_getvnodeops())); + + fence = 0; + if (ndip != NULL) { + mutex_enter(&DEVI(ndip)->devi_lock); + if (DEVI(ndip)->devi_flags & DEVI_RETIRED) + fence = 1; + mutex_exit(&DEVI(ndip)->devi_lock); + } + + csp = VTOCS(vp); + ASSERT(csp); + + /* SFENCED flag only set on common snode */ + mutex_enter(&csp->s_lock); + if (fence) + csp->s_flag |= SFENCED; + else + csp->s_flag &= ~SFENCED; + mutex_exit(&csp->s_lock); + + FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p", + fence ? "" : "un", (void *)csp, (void *)ndip)); +} + /* * Associate the common snode with a devinfo node. This is called from: * @@ -322,6 +371,8 @@ spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip) csp->s_flag &= ~SSIZEVALID; mutex_exit(&csp->s_lock); + spec_assoc_fence(dip, vp); + /* release the old */ if (olddip) ddi_release_devi(olddip); @@ -889,3 +940,113 @@ spec_is_selfclone(vnode_t *vp) return (0); } + +/* + * We may be invoked with a NULL vp in which case we fence off + * all snodes associated with dip + */ +int +spec_fence_snode(dev_info_t *dip, struct vnode *vp) +{ + struct snode *sp; + struct snode *csp; + int retired; + int i; + char *path; + int emitted; + + ASSERT(dip); + + retired = 0; + mutex_enter(&DEVI(dip)->devi_lock); + if (DEVI(dip)->devi_flags & DEVI_RETIRED) + retired = 1; + mutex_exit(&DEVI(dip)->devi_lock); + + if (!retired) + return (0); + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + + + if (vp != NULL) { + ASSERT(vn_matchops(vp, spec_getvnodeops())); + csp = VTOCS(vp); + ASSERT(csp); + mutex_enter(&csp->s_lock); + csp->s_flag |= SFENCED; + mutex_exit(&csp->s_lock); + FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s", + (void *)csp, path)); + kmem_free(path, MAXPATHLEN); + return (0); + } + + emitted = 0; + mutex_enter(&stable_lock); + for (i = 0; i < STABLESIZE; i++) { + for (sp = stable[i]; sp != NULL; sp = sp->s_next) { + ASSERT(sp->s_commonvp); + csp = VTOS(sp->s_commonvp); + if (csp->s_dip == dip) { + /* fence off the common snode */ + mutex_enter(&csp->s_lock); + csp->s_flag |= SFENCED; + mutex_exit(&csp->s_lock); + if (!emitted) { + FENDBG((CE_NOTE, "fenced 1 of N")); + emitted++; + } + } + } + } + mutex_exit(&stable_lock); + + FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path)); + kmem_free(path, MAXPATHLEN); + + return (0); +} + + +int +spec_unfence_snode(dev_info_t *dip) +{ + struct snode *sp; + struct snode *csp; + int i; + char *path; + int emitted; + + ASSERT(dip); + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + + emitted = 0; + mutex_enter(&stable_lock); + for (i = 0; i < STABLESIZE; i++) { + for (sp = stable[i]; sp != NULL; sp = sp->s_next) { + ASSERT(sp->s_commonvp); + csp = VTOS(sp->s_commonvp); + ASSERT(csp); + if (csp->s_dip == dip) { + /* unfence the common snode */ + mutex_enter(&csp->s_lock); + csp->s_flag &= ~SFENCED; + mutex_exit(&csp->s_lock); + if (!emitted) { + FENDBG((CE_NOTE, "unfenced 1 of N")); + emitted++; + } + } + } + } + mutex_exit(&stable_lock); + + FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path)); + kmem_free(path, MAXPATHLEN); + + return (0); +} diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c index 1841d107fb..ffaba36a21 100644 --- a/usr/src/uts/common/fs/specfs/specvnops.c +++ b/usr/src/uts/common/fs/specfs/specvnops.c @@ -93,6 +93,7 @@ #include <sys/esunddi.h> #include <sys/autoconf.h> #include <sys/sunndi.h> +#include <sys/contract/device_impl.h> static int spec_open(struct vnode **, int, struct cred *); @@ -153,8 +154,23 @@ static int spec_pathconf(struct vnode *, int, ulong_t *, struct cred *); mutex_exit(&csp->s_lock); \ } +#define S_ISFENCED(sp) ((VTOS((sp)->s_commonvp))->s_flag & SFENCED) + struct vnodeops *spec_vnodeops; +/* + * *PLEASE NOTE*: If you add new entry points to specfs, do + * not forget to add support for fencing. A fenced snode + * is indicated by the SFENCED flag in the common snode. + * If a snode is fenced, determine if your entry point is + * a configuration operation (Example: open), a detection + * operation (Example: gettattr), an I/O operation (Example: ioctl()) + * or an unconfiguration operation (Example: close). If it is + * a configuration or detection operation, fail the operation + * for a fenced snode with an ENXIO or EIO as appropriate. If + * it is any other operation, let it through. + */ + const fs_operation_def_t spec_vnodeops_template[] = { VOPNAME_OPEN, { .vop_open = spec_open }, VOPNAME_CLOSE, { .vop_close = spec_close }, @@ -530,6 +546,7 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr) struct stdata *stp; dev_info_t *dip; int error, type; + contract_t *ct = NULL; int open_returns_eintr; flag &= ~FCREAT; /* paranoia */ @@ -579,6 +596,10 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr) ddi_release_devi(dip); /* from e_ddi_hold_devi_by_dev */ } + /* check if device fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + #ifdef DEBUG /* verify attach/open exclusion guarantee */ dip = csp->s_dip; @@ -628,6 +649,18 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr) csp = VTOS(sp->s_commonvp); } + /* + * create contracts only for userland opens + * Successful open and cloning is done at this point. + */ + if (error == 0 && !(flag & FKLYR)) { + int spec_type; + spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK; + if (contract_device_open(newdev, spec_type, NULL) != 0) { + error = EIO; + } + } + if (error == 0) { sp->s_size = SPEC_SIZE(csp); @@ -729,6 +762,19 @@ streams_open: UNLOCK_CSP(csp); } + /* + * create contracts only for userland opens + * Successful open and cloning is done at this point. + */ + if (error == 0 && !(flag & FKLYR)) { + /* STREAM is of type S_IFCHR */ + if (contract_device_open(newdev, S_IFCHR, &ct) != 0) { + UNLOCK_CSP(csp); + (void) spec_close(vp, flag, 1, 0, cr); + return (EIO); + } + } + if (error == 0) { /* STREAMS devices don't have a size */ sp->s_size = csp->s_size = 0; @@ -741,6 +787,11 @@ streams_open: return (0); /* strctty() was interrupted by a signal */ + if (ct) { + /* we only create contracts for userland opens */ + ASSERT(ttoproc(curthread)); + (void) contract_abandon(ct, ttoproc(curthread), 0); + } (void) spec_close(vp, flag, 1, 0, cr); return (EINTR); } @@ -795,6 +846,7 @@ spec_close( if (count > 1) return (0); + /* we allow close to succeed even if device is fenced off */ sp = VTOS(vp); cvp = sp->s_commonvp; @@ -1157,6 +1209,13 @@ spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr, if (vp->v_type != VCHR) return (ENOTTY); + + /* + * allow ioctls() to go through even for fenced snodes, as they + * may include unconfiguration operation - for example popping of + * streams modules. + */ + sp = VTOS(vp); dev = sp->s_dev; if (STREAMSTAB(getmajor(dev))) { @@ -1180,6 +1239,11 @@ spec_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr) vp = sp->s_commonvp; } sp = VTOS(vp); + + /* we want stat() to fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + realvp = sp->s_realvp; if (realvp == NULL) { @@ -1258,6 +1322,10 @@ spec_setattr( struct vnode *realvp; int error; + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) { /* * 1135080: O_TRUNC should have no effect on @@ -1293,6 +1361,10 @@ spec_access(struct vnode *vp, int mode, int flags, struct cred *cr) struct vnode *realvp; struct snode *sp = VTOS(vp); + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + if ((realvp = sp->s_realvp) != NULL) return (VOP_ACCESS(realvp, mode, flags, cr)); else @@ -1309,6 +1381,11 @@ spec_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl, int mode, struct vnode **vpp, struct cred *cr, int flag) { int error; + struct snode *sp = VTOS(dvp); + + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0'); if (excl == NONEXCL) { @@ -1333,6 +1410,8 @@ spec_fsync(struct vnode *vp, int syncflag, struct cred *cr) struct vnode *cvp; struct vattr va, vatmp; + /* allow syncing even if device is fenced off */ + /* If times didn't change, don't flush anything. */ mutex_enter(&sp->s_lock); if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) { @@ -2222,10 +2301,15 @@ spec_map( struct cred *cred) { int error = 0; + struct snode *sp = VTOS(vp); if (vp->v_flag & VNOMAP) return (ENOSYS); + /* fail map with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + /* * If file is locked, fail mapping attempt. */ @@ -2314,6 +2398,10 @@ spec_addmap( if (vp->v_flag & VNOMAP) return (ENOSYS); + /* fail with EIO if the device is fenced off */ + if (S_ISFENCED(csp)) + return (EIO); + npages = btopr(len); LOCK_CSP(csp); csp->s_mapcnt += npages; @@ -2343,6 +2431,8 @@ spec_delmap( ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp); + /* allow delmap to succeed even if device fenced off */ + /* * XXX Given the above assertion, this might not * be a particularly sensible thing to test.. @@ -2389,6 +2479,8 @@ spec_delmap( static int spec_dump(struct vnode *vp, caddr_t addr, int bn, int count) { + /* allow dump to succeed even if device fenced off */ + ASSERT(vp->v_type == VBLK); return (bdev_dump(vp->v_rdev, addr, bn, count)); } @@ -2438,6 +2530,10 @@ spec_setsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr) struct snode *sp = VTOS(vp); int error; + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + /* * The acl(2) system calls VOP_RWLOCK on the file before setting an * ACL, but since specfs does not serialize reads and writes, this @@ -2464,6 +2560,10 @@ spec_getsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr) struct vnode *realvp; struct snode *sp = VTOS(vp); + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + if ((realvp = sp->s_realvp) != NULL) return (VOP_GETSECATTR(realvp, vsap, flag, cr)); else @@ -2476,6 +2576,10 @@ spec_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr) vnode_t *realvp; struct snode *sp = VTOS(vp); + /* fail with ENXIO if the device is fenced off */ + if (S_ISFENCED(sp)) + return (ENXIO); + if ((realvp = sp->s_realvp) != NULL) return (VOP_PATHCONF(realvp, cmd, valp, cr)); else diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c index aadfb92e62..6fde3f5714 100644 --- a/usr/src/uts/common/os/contract.c +++ b/usr/src/uts/common/os/contract.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -174,6 +173,8 @@ #include <sys/proc.h> #include <sys/contract_impl.h> #include <sys/contract/process_impl.h> +#include <sys/dditypes.h> +#include <sys/contract/device_impl.h> #include <sys/systm.h> #include <sys/atomic.h> #include <sys/cmn_err.h> @@ -181,6 +182,8 @@ #include <sys/policy.h> #include <sys/zone.h> #include <sys/task.h> +#include <sys/ddi.h> +#include <sys/sunddi.h> extern rctl_hndl_t rc_project_contract; @@ -191,6 +194,7 @@ static kmutex_t contract_lock; int ct_ntypes = CTT_MAXTYPE; static ct_type_t *ct_types_static[CTT_MAXTYPE]; ct_type_t **ct_types = ct_types_static; +int ct_debug; static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int); static void cte_queue_destroy(ct_equeue_t *); @@ -237,6 +241,7 @@ contract_init(void) * Initialize contract types. */ contract_process_init(); + contract_device_init(); /* * Initialize p0/lwp0 contract state. @@ -310,6 +315,9 @@ contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data, ct->ct_ev_crit = tmpl->ctmpl_ev_crit; ct->ct_cookie = tmpl->ctmpl_cookie; ct->ct_owner = author; + ct->ct_ntime.ctm_total = -1; + ct->ct_qtime.ctm_total = -1; + ct->ct_nevent = NULL; /* * Test project.max-contracts. @@ -570,6 +578,12 @@ contract_abandon(contract_t *ct, proc_t *p, int explicit) return (0); } +int +contract_newct(contract_t *ct) +{ + return (ct->ct_type->ct_type_ops->contop_newct(ct)); +} + /* * contract_adopt * @@ -647,11 +661,15 @@ contract_adopt(contract_t *ct, proc_t *p) * Acknowledges receipt of a critical event. */ int -contract_ack(contract_t *ct, uint64_t evid) +contract_ack(contract_t *ct, uint64_t evid, int ack) { ct_kevent_t *ev; list_t *queue = &ct->ct_events.ctq_events; int error = ESRCH; + int nego = 0; + uint_t evtype; + + ASSERT(ack == CT_ACK || ack == CT_NACK); mutex_enter(&ct->ct_lock); mutex_enter(&ct->ct_events.ctq_lock); @@ -660,9 +678,14 @@ contract_ack(contract_t *ct, uint64_t evid) */ for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { if (ev->cte_id == evid) { + if (ev->cte_flags & CTE_NEG) + nego = 1; + else if (ack == CT_NACK) + break; if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { ev->cte_flags |= CTE_ACK; ct->ct_evcnt--; + evtype = ev->cte_type; error = 0; } break; @@ -671,9 +694,86 @@ contract_ack(contract_t *ct, uint64_t evid) mutex_exit(&ct->ct_events.ctq_lock); mutex_exit(&ct->ct_lock); + /* + * Not all critical events are negotiation events, however + * every negotiation event is a critical event. NEGEND events + * are critical events but are not negotiation events + */ + if (error || !nego) + return (error); + + if (ack == CT_ACK) + error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid); + else + error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid); + return (error); } +/*ARGSUSED*/ +int +contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid) +{ + cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", + ct->ct_id); + return (ENOSYS); +} + +/*ARGSUSED*/ +int +contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid) +{ + cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u", + ct->ct_id); + return (ENOSYS); +} + +/*ARGSUSED*/ +int +contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid) +{ + return (ERANGE); +} + +/* + * contract_qack + * + * Asks that negotiations be extended by another time quantum + */ +int +contract_qack(contract_t *ct, uint64_t evid) +{ + ct_kevent_t *ev; + list_t *queue = &ct->ct_events.ctq_events; + int nego = 0; + uint_t evtype; + + mutex_enter(&ct->ct_lock); + mutex_enter(&ct->ct_events.ctq_lock); + + for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { + if (ev->cte_id == evid) { + if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) { + evtype = ev->cte_type; + nego = 1; + } + break; + } + } + mutex_exit(&ct->ct_events.ctq_lock); + mutex_exit(&ct->ct_lock); + + /* + * Only a negotiated event (which is by definition also a critical + * event) which has not yet been acknowledged can provide + * time quanta to a negotiating owner process. + */ + if (!nego) + return (ESRCH); + + return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid)); +} + /* * contract_orphan * @@ -840,6 +940,20 @@ contract_exit(proc_t *p) } } +static int +get_time_left(struct ct_time *t) +{ + clock_t ticks_elapsed; + int secs_elapsed; + + if (t->ctm_total == -1) + return (-1); + + ticks_elapsed = ddi_get_lbolt() - t->ctm_start; + secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC); + return (secs_elapsed > 0 ? secs_elapsed : 0); +} + /* * contract_status_common * @@ -897,8 +1011,8 @@ contract_status_common(contract_t *ct, zone_t *zone, void *status, CTS_OWNED : ct->ct_state); } STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt); - STRUCT_FSET(lstatus, ctst_ntime, -1); - STRUCT_FSET(lstatus, ctst_qtime, -1); + STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime)); + STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime)); STRUCT_FSET(lstatus, ctst_nevid, ct->ct_nevent ? ct->ct_nevent->cte_id : 0); STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit); @@ -1469,9 +1583,9 @@ ctmpl_clear(ct_template_t *template) * Creates a new contract using the specified template. */ int -ctmpl_create(ct_template_t *template) +ctmpl_create(ct_template_t *template, ctid_t *ctidp) { - return (template->ctmpl_ops->ctop_create(template)); + return (template->ctmpl_ops->ctop_create(template, ctidp)); } /* @@ -1520,7 +1634,7 @@ ctmpl_copy(ct_template_t *new, ct_template_t *old) */ /*ARGSUSED*/ int -ctmpl_create_inval(ct_template_t *template) +ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp) { return (EINVAL); } @@ -2046,19 +2160,34 @@ cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp) * be zallocated by the caller, and the event's flags and type must be * set. The rest of the event's fields are initialized here. */ -void +uint64_t cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) { ct_equeue_t *q; timespec_t ts; + uint64_t evid; + ct_kevent_t *negev; + int negend; e->cte_contract = ct; e->cte_data = data; e->cte_gdata = gdata; e->cte_refs = 3; - e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1); + evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1); contract_hold(ct); + /* + * For a negotiation event we set the ct->ct_nevent field of the + * contract for the duration of the negotiation + */ + negend = 0; + if (e->cte_flags & CTE_NEG) { + cte_hold(e); + ct->ct_nevent = e; + } else if (e->cte_type == CT_EV_NEGEND) { + negend = 1; + } + gethrestime(&ts); /* @@ -2111,7 +2240,17 @@ cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) cte_rele(e); } + if (negend) { + mutex_enter(&ct->ct_lock); + negev = ct->ct_nevent; + ct->ct_nevent = NULL; + cte_rele(negev); + mutex_exit(&ct->ct_lock); + } + mutex_exit(&ct->ct_evtlock); + + return (evid); } /* @@ -2347,7 +2486,8 @@ cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr, STRUCT_FSET(ev, ctev_evid, temp->cte_id); STRUCT_FSET(ev, ctev_cttype, temp->cte_contract->ct_type->ct_type_index); - STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO)); + STRUCT_FSET(ev, ctev_flags, temp->cte_flags & + (CTE_ACK|CTE_INFO|CTE_NEG)); STRUCT_FSET(ev, ctev_type, temp->cte_type); STRUCT_FSET(ev, ctev_nbytes, len); STRUCT_FSET(ev, ctev_goffset, size); diff --git a/usr/src/uts/common/os/devcache.c b/usr/src/uts/common/os/devcache.c index 14cde49faf..8e1313d487 100644 --- a/usr/src/uts/common/os/devcache.c +++ b/usr/src/uts/common/os/devcache.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -224,6 +224,7 @@ i_ddi_devices_init(void) list_create(&nvf_dirty_files, sizeof (nvfd_t), offsetof(nvfd_t, nvf_link)); mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL); + retire_store_init(); devid_cache_init(); } @@ -235,6 +236,16 @@ i_ddi_devices_init(void) void i_ddi_read_devices_files(void) { + /* + * The retire store should be the first file read as it + * may need to offline devices. kfio_disable_read is not + * used for retire. For the rationale see the tunable + * ddi_retire_store_bypass and comments in: + * uts/common/os/retire_store.c + */ + + retire_store_read(); + if (!kfio_disable_read) { mdi_read_devices_files(); devid_cache_read(); diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c index 29150c5d8c..03f7ec89a2 100644 --- a/usr/src/uts/common/os/devcfg.c +++ b/usr/src/uts/common/os/devcfg.c @@ -38,6 +38,7 @@ #include <sys/ddi_impldefs.h> #include <sys/ndi_impldefs.h> #include <sys/modctl.h> +#include <sys/contract/device_impl.h> #include <sys/dacf.h> #include <sys/promif.h> #include <sys/cpuvar.h> @@ -50,6 +51,9 @@ #include <sys/fs/snode.h> #include <sys/fs/dv_node.h> #include <sys/reboot.h> +#include <sys/sysmacros.h> +#include <sys/sunldi.h> +#include <sys/sunldi_impl.h> #ifdef DEBUG int ddidebug = DDI_AUDIT; @@ -192,6 +196,10 @@ static void ndi_devi_exit_and_wait(dev_info_t *dip, int circular, clock_t end_time); static int ndi_devi_unbind_driver(dev_info_t *dip); +static void i_ddi_check_retire(dev_info_t *dip); + + + /* * dev_info cache and node management */ @@ -324,6 +332,15 @@ i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid, mutex_init(&(devi->devi_pm_lock), NULL, MUTEX_DEFAULT, NULL); mutex_init(&(devi->devi_pm_busy_lock), NULL, MUTEX_DEFAULT, NULL); + RIO_TRACE((CE_NOTE, "i_ddi_alloc_node: Initing contract fields: " + "dip=%p, name=%s", (void *)devi, node_name)); + + mutex_init(&(devi->devi_ct_lock), NULL, MUTEX_DEFAULT, NULL); + cv_init(&(devi->devi_ct_cv), NULL, CV_DEFAULT, NULL); + devi->devi_ct_count = -1; /* counter not in use if -1 */ + list_create(&(devi->devi_ct), sizeof (cont_device_t), + offsetof(cont_device_t, cond_next)); + i_ddi_set_node_state((dev_info_t *)devi, DS_PROTO); da_log_enter((dev_info_t *)devi); return ((dev_info_t *)devi); @@ -389,7 +406,6 @@ i_ddi_free_node(dev_info_t *dip) if (devi->devi_audit) { kmem_free(devi->devi_audit, sizeof (devinfo_audit_t)); } - kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1); if (devi->devi_device_class) kmem_free(devi->devi_device_class, strlen(devi->devi_device_class) + 1); @@ -398,6 +414,20 @@ i_ddi_free_node(dev_info_t *dip) mutex_destroy(&(devi->devi_pm_lock)); mutex_destroy(&(devi->devi_pm_busy_lock)); + RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroying contract fields: " + "dip=%p", (void *)dip)); + contract_device_remove_dip(dip); + ASSERT(devi->devi_ct_count == -1); + ASSERT(list_is_empty(&(devi->devi_ct))); + cv_destroy(&(devi->devi_ct_cv)); + list_destroy(&(devi->devi_ct)); + /* free this last since contract_device_remove_dip() uses it */ + mutex_destroy(&(devi->devi_ct_lock)); + RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroyed all contract fields: " + "dip=%p, name=%s", (void *)dip, devi->devi_node_name)); + + kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1); + kmem_cache_free(ddi_node_cache, devi); } @@ -1441,6 +1471,7 @@ i_ndi_config_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag) i_ddi_set_node_state(dip, DS_PROBED); break; case DS_PROBED: + i_ddi_check_retire(dip); atomic_add_long(&devinfo_attach_detach, 1); if ((rv = attach_node(dip)) == DDI_SUCCESS) i_ddi_set_node_state(dip, DS_ATTACHED); @@ -5110,6 +5141,172 @@ ndi_devi_config_obp_args(dev_info_t *parent, char *devnm, return (error); } +/* + * Pay attention, the following is a bit tricky: + * There are three possible cases when constraints are applied + * + * - A constraint is applied and the offline is disallowed. + * Simply return failure and block the offline + * + * - A constraint is applied and the offline is allowed. + * Mark the dip as having passed the constraint and allow + * offline to proceed. + * + * - A constraint is not applied. Allow the offline to proceed for now. + * + * In the latter two cases we allow the offline to proceed. If the + * offline succeeds (no users) everything is fine. It is ok for an unused + * device to be offlined even if no constraints were imposed on the offline. + * If the offline fails because there are users, we look at the constraint + * flag on the dip. If the constraint flag is set (implying that it passed + * a constraint) we allow the dip to be retired. If not, we don't allow + * the retire. This ensures that we don't allow unconstrained retire. + */ +int +e_ddi_offline_notify(dev_info_t *dip) +{ + int retval; + int constraint; + int failure; + + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): entered: dip=%p", + (void *) dip)); + + constraint = 0; + failure = 0; + + /* + * Start with userland constraints first - applied via device contracts + */ + retval = contract_device_offline(dip, DDI_DEV_T_ANY, 0); + switch (retval) { + case CT_NACK: + RIO_DEBUG((CE_NOTE, "Received NACK for dip=%p", (void *)dip)); + failure = 1; + goto out; + case CT_ACK: + constraint = 1; + RIO_DEBUG((CE_NOTE, "Received ACK for dip=%p", (void *)dip)); + break; + case CT_NONE: + /* no contracts */ + RIO_DEBUG((CE_NOTE, "No contracts on dip=%p", (void *)dip)); + break; + default: + ASSERT(retval == CT_NONE); + } + + /* + * Next, use LDI to impose kernel constraints + */ + retval = ldi_invoke_notify(dip, DDI_DEV_T_ANY, 0, LDI_EV_OFFLINE, NULL); + switch (retval) { + case LDI_EV_FAILURE: + contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_FAILURE); + RIO_DEBUG((CE_NOTE, "LDI callback failed on dip=%p", + (void *)dip)); + failure = 1; + goto out; + case LDI_EV_SUCCESS: + constraint = 1; + RIO_DEBUG((CE_NOTE, "LDI callback success on dip=%p", + (void *)dip)); + break; + case LDI_EV_NONE: + /* no matching LDI callbacks */ + RIO_DEBUG((CE_NOTE, "No LDI callbacks for dip=%p", + (void *)dip)); + break; + default: + ASSERT(retval == LDI_EV_NONE); + } + +out: + mutex_enter(&(DEVI(dip)->devi_lock)); + if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && failure) { + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting " + "BLOCKED flag. dip=%p", (void *)dip)); + DEVI(dip)->devi_flags |= DEVI_R_BLOCKED; + if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) { + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): " + "blocked. clearing RCM CONSTRAINT flag. dip=%p", + (void *)dip)); + DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT; + } + } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && constraint) { + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting " + "CONSTRAINT flag. dip=%p", (void *)dip)); + DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT; + } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && + DEVI(dip)->devi_ref == 0) { + /* also allow retire if device is not in use */ + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): device not in " + "use. Setting CONSTRAINT flag. dip=%p", (void *)dip)); + DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT; + } else { + /* + * Note: We cannot ASSERT here that DEVI_R_CONSTRAINT is + * not set, since other sources (such as RCM) may have + * set the flag. + */ + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): not setting " + "constraint flag. dip=%p", (void *)dip)); + } + mutex_exit(&(DEVI(dip)->devi_lock)); + + + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): exit: dip=%p", + (void *) dip)); + + return (failure ? DDI_FAILURE : DDI_SUCCESS); +} + +void +e_ddi_offline_finalize(dev_info_t *dip, int result) +{ + RIO_DEBUG((CE_NOTE, "e_ddi_offline_finalize(): entry: result=%s, " + "dip=%p", result == DDI_SUCCESS ? "SUCCESS" : "FAILURE", + (void *)dip)); + + contract_device_negend(dip, DDI_DEV_T_ANY, 0, result == DDI_SUCCESS ? + CT_EV_SUCCESS : CT_EV_FAILURE); + + ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, + LDI_EV_OFFLINE, result == DDI_SUCCESS ? + LDI_EV_SUCCESS : LDI_EV_FAILURE, NULL); + + RIO_VERBOSE((CE_NOTE, "e_ddi_offline_finalize(): exit: dip=%p", + (void *)dip)); +} + +void +e_ddi_degrade_finalize(dev_info_t *dip) +{ + RIO_DEBUG((CE_NOTE, "e_ddi_degrade_finalize(): entry: " + "result always = DDI_SUCCESS, dip=%p", (void *)dip)); + + contract_device_degrade(dip, DDI_DEV_T_ANY, 0); + contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS); + + ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, LDI_EV_DEGRADE, + LDI_EV_SUCCESS, NULL); + + RIO_VERBOSE((CE_NOTE, "e_ddi_degrade_finalize(): exit: dip=%p", + (void *)dip)); +} + +void +e_ddi_undegrade_finalize(dev_info_t *dip) +{ + RIO_DEBUG((CE_NOTE, "e_ddi_undegrade_finalize(): entry: " + "result always = DDI_SUCCESS, dip=%p", (void *)dip)); + + contract_device_undegrade(dip, DDI_DEV_T_ANY, 0); + contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS); + + RIO_VERBOSE((CE_NOTE, "e_ddi_undegrade_finalize(): exit: dip=%p", + (void *)dip)); +} /* * detach a node with parent already held busy @@ -5123,6 +5320,19 @@ devi_detach_node(dev_info_t *dip, uint_t flags) ASSERT(pdip && DEVI_BUSY_OWNED(pdip)); + /* + * Invoke notify if offlining + */ + if (flags & NDI_DEVI_OFFLINE) { + RIO_DEBUG((CE_NOTE, "devi_detach_node: offlining dip=%p", + (void *)dip)); + if (e_ddi_offline_notify(dip) != DDI_SUCCESS) { + RIO_DEBUG((CE_NOTE, "devi_detach_node: offline NACKed" + "dip=%p", (void *)dip)); + return (NDI_FAILURE); + } + } + if (flags & NDI_POST_EVENT) { if (i_ddi_devi_attached(pdip)) { if (ddi_get_eventcookie(dip, DDI_DEVI_REMOVE_EVENT, @@ -5131,8 +5341,22 @@ devi_detach_node(dev_info_t *dip, uint_t flags) } } - if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS) + if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS) { + if (flags & NDI_DEVI_OFFLINE) { + RIO_DEBUG((CE_NOTE, "devi_detach_node: offline failed." + " Calling e_ddi_offline_finalize with result=%d. " + "dip=%p", DDI_FAILURE, (void *)dip)); + e_ddi_offline_finalize(dip, DDI_FAILURE); + } return (NDI_FAILURE); + } + + if (flags & NDI_DEVI_OFFLINE) { + RIO_DEBUG((CE_NOTE, "devi_detach_node: offline succeeded." + " Calling e_ddi_offline_finalize with result=%d, " + "dip=%p", DDI_SUCCESS, (void *)dip)); + e_ddi_offline_finalize(dip, DDI_SUCCESS); + } if (flags & NDI_AUTODETACH) return (NDI_SUCCESS); @@ -7220,3 +7444,502 @@ ibt_hw_is_present() { return (ib_hw_status); } + +/* + * ASSERT that constraint flag is not set and then set the "retire attempt" + * flag. + */ +int +e_ddi_mark_retiring(dev_info_t *dip, void *arg) +{ + char **cons_array = (char **)arg; + char *path; + int constraint; + int i; + + constraint = 0; + if (cons_array) { + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + for (i = 0; cons_array[i] != NULL; i++) { + if (strcmp(path, cons_array[i]) == 0) { + constraint = 1; + break; + } + } + kmem_free(path, MAXPATHLEN); + } + + mutex_enter(&DEVI(dip)->devi_lock); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)); + DEVI(dip)->devi_flags |= DEVI_RETIRING; + if (constraint) + DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT; + mutex_exit(&DEVI(dip)->devi_lock); + + RIO_VERBOSE((CE_NOTE, "marked dip as undergoing retire process dip=%p", + (void *)dip)); + + if (constraint) + RIO_DEBUG((CE_NOTE, "marked dip as constrained, dip=%p", + (void *)dip)); + + if (MDI_PHCI(dip)) + mdi_phci_mark_retiring(dip, cons_array); + + return (DDI_WALK_CONTINUE); +} + +static void +free_array(char **cons_array) +{ + int i; + + if (cons_array == NULL) + return; + + for (i = 0; cons_array[i] != NULL; i++) { + kmem_free(cons_array[i], strlen(cons_array[i]) + 1); + } + kmem_free(cons_array, (i+1) * sizeof (char *)); +} + +/* + * Walk *every* node in subtree and check if it blocks, allows or has no + * comment on a proposed retire. + */ +int +e_ddi_retire_notify(dev_info_t *dip, void *arg) +{ + int *constraint = (int *)arg; + + RIO_DEBUG((CE_NOTE, "retire notify: dip = %p", (void *)dip)); + + (void) e_ddi_offline_notify(dip); + + mutex_enter(&(DEVI(dip)->devi_lock)); + if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) { + RIO_DEBUG((CE_WARN, "retire notify: dip in retire " + "subtree is not marked: dip = %p", (void *)dip)); + *constraint = 0; + } else if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) { + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)); + RIO_DEBUG((CE_NOTE, "retire notify: BLOCKED: dip = %p", + (void *)dip)); + *constraint = 0; + } else if (!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)) { + RIO_DEBUG((CE_NOTE, "retire notify: NO CONSTRAINT: " + "dip = %p", (void *)dip)); + *constraint = 0; + } else { + RIO_DEBUG((CE_NOTE, "retire notify: CONSTRAINT set: " + "dip = %p", (void *)dip)); + } + mutex_exit(&DEVI(dip)->devi_lock); + + if (MDI_PHCI(dip)) + mdi_phci_retire_notify(dip, constraint); + + return (DDI_WALK_CONTINUE); +} + +int +e_ddi_retire_finalize(dev_info_t *dip, void *arg) +{ + int constraint = *(int *)arg; + int finalize; + int phci_only; + + ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip))); + + mutex_enter(&DEVI(dip)->devi_lock); + if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) { + RIO_DEBUG((CE_WARN, + "retire: unmarked dip(%p) in retire subtree", + (void *)dip)); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRED)); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED)); + mutex_exit(&DEVI(dip)->devi_lock); + return (DDI_WALK_CONTINUE); + } + + /* + * retire the device if constraints have been applied + * or if the device is not in use + */ + finalize = 0; + if (constraint) { + ASSERT(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED)); + DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT; + DEVI(dip)->devi_flags &= ~DEVI_RETIRING; + DEVI(dip)->devi_flags |= DEVI_RETIRED; + mutex_exit(&DEVI(dip)->devi_lock); + (void) spec_fence_snode(dip, NULL); + RIO_DEBUG((CE_NOTE, "Fenced off: dip = %p", (void *)dip)); + e_ddi_offline_finalize(dip, DDI_SUCCESS); + } else { + if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) { + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)); + DEVI(dip)->devi_flags &= ~DEVI_R_BLOCKED; + DEVI(dip)->devi_flags &= ~DEVI_RETIRING; + /* we have already finalized during notify */ + } else if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) { + DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT; + DEVI(dip)->devi_flags &= ~DEVI_RETIRING; + finalize = 1; + } else { + DEVI(dip)->devi_flags &= ~DEVI_RETIRING; + /* + * even if no contracts, need to call finalize + * to clear the contract barrier on the dip + */ + finalize = 1; + } + mutex_exit(&DEVI(dip)->devi_lock); + RIO_DEBUG((CE_NOTE, "finalize: NOT retired: dip = %p", + (void *)dip)); + if (finalize) + e_ddi_offline_finalize(dip, DDI_FAILURE); + mutex_enter(&DEVI(dip)->devi_lock); + DEVI_SET_DEVICE_DEGRADED(dip); + mutex_exit(&DEVI(dip)->devi_lock); + } + + /* + * phci_only variable indicates no client checking, just + * offline the PHCI. We set that to 0 to enable client + * checking + */ + phci_only = 0; + if (MDI_PHCI(dip)) + mdi_phci_retire_finalize(dip, phci_only); + + return (DDI_WALK_CONTINUE); +} + +/* + * Returns + * DDI_SUCCESS if constraints allow retire + * DDI_FAILURE if constraints don't allow retire. + * cons_array is a NULL terminated array of node paths for + * which constraints have already been applied. + */ +int +e_ddi_retire_device(char *path, char **cons_array) +{ + dev_info_t *dip; + dev_info_t *pdip; + int circ; + int circ2; + int constraint; + char *devnm; + + /* + * First, lookup the device + */ + dip = e_ddi_hold_devi_by_path(path, 0); + if (dip == NULL) { + /* + * device does not exist. This device cannot be + * a critical device since it is not in use. Thus + * this device is always retireable. Return DDI_SUCCESS + * to indicate this. If this device is ever + * instantiated, I/O framework will consult the + * the persistent retire store, mark it as + * retired and fence it off. + */ + RIO_DEBUG((CE_NOTE, "Retire device: device doesn't exist." + " NOP. Just returning SUCCESS. path=%s", path)); + free_array(cons_array); + return (DDI_SUCCESS); + } + + RIO_DEBUG((CE_NOTE, "Retire device: found dip = %p.", (void *)dip)); + + pdip = ddi_get_parent(dip); + ndi_hold_devi(pdip); + + /* + * Run devfs_clean() in case dip has no constraints and is + * not in use, so is retireable but there are dv_nodes holding + * ref-count on the dip. Note that devfs_clean() always returns + * success. + */ + devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); + (void) ddi_deviname(dip, devnm); + (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); + kmem_free(devnm, MAXNAMELEN + 1); + + ndi_devi_enter(pdip, &circ); + + /* release hold from e_ddi_hold_devi_by_path */ + ndi_rele_devi(dip); + + /* + * If it cannot make a determination, is_leaf_node() assumes + * dip is a nexus. + */ + (void) e_ddi_mark_retiring(dip, cons_array); + if (!is_leaf_node(dip)) { + ndi_devi_enter(dip, &circ2); + ddi_walk_devs(ddi_get_child(dip), e_ddi_mark_retiring, + cons_array); + ndi_devi_exit(dip, circ2); + } + free_array(cons_array); + + /* + * apply constraints + */ + RIO_DEBUG((CE_NOTE, "retire: subtree retire notify: path = %s", path)); + + constraint = 1; /* assume constraints allow retire */ + (void) e_ddi_retire_notify(dip, &constraint); + if (!is_leaf_node(dip)) { + ndi_devi_enter(dip, &circ2); + ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_notify, + &constraint); + ndi_devi_exit(dip, circ2); + } + + /* + * Now finalize the retire + */ + (void) e_ddi_retire_finalize(dip, &constraint); + if (!is_leaf_node(dip)) { + ndi_devi_enter(dip, &circ2); + ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_finalize, + &constraint); + ndi_devi_exit(dip, circ2); + } + + if (!constraint) { + RIO_DEBUG((CE_WARN, "retire failed: path = %s", path)); + } else { + RIO_DEBUG((CE_NOTE, "retire succeeded: path = %s", path)); + } + + ndi_devi_exit(pdip, circ); + ndi_rele_devi(pdip); + return (constraint ? DDI_SUCCESS : DDI_FAILURE); +} + +static int +unmark_and_unfence(dev_info_t *dip, void *arg) +{ + char *path = (char *)arg; + + ASSERT(path); + + (void) ddi_pathname(dip, path); + + mutex_enter(&DEVI(dip)->devi_lock); + DEVI(dip)->devi_flags &= ~DEVI_RETIRED; + DEVI_SET_DEVICE_ONLINE(dip); + mutex_exit(&DEVI(dip)->devi_lock); + + RIO_VERBOSE((CE_NOTE, "Cleared RETIRED flag: dip=%p, path=%s", + (void *)dip, path)); + + (void) spec_unfence_snode(dip); + RIO_DEBUG((CE_NOTE, "Unfenced device: %s", path)); + + if (MDI_PHCI(dip)) + mdi_phci_unretire(dip); + + return (DDI_WALK_CONTINUE); +} + +struct find_dip { + char *fd_buf; + char *fd_path; + dev_info_t *fd_dip; +}; + +static int +find_dip_fcn(dev_info_t *dip, void *arg) +{ + struct find_dip *findp = (struct find_dip *)arg; + + (void) ddi_pathname(dip, findp->fd_buf); + + if (strcmp(findp->fd_path, findp->fd_buf) != 0) + return (DDI_WALK_CONTINUE); + + ndi_hold_devi(dip); + findp->fd_dip = dip; + + return (DDI_WALK_TERMINATE); +} + +int +e_ddi_unretire_device(char *path) +{ + int circ; + char *path2; + dev_info_t *pdip; + dev_info_t *dip; + struct find_dip find_dip; + + ASSERT(path); + ASSERT(*path == '/'); + + if (strcmp(path, "/") == 0) { + cmn_err(CE_WARN, "Root node cannot be retired. Skipping " + "device unretire: %s", path); + return (0); + } + + /* + * We can't lookup the dip (corresponding to path) via + * e_ddi_hold_devi_by_path() because the dip may be offline + * and may not attach. Use ddi_walk_devs() instead; + */ + find_dip.fd_buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + find_dip.fd_path = path; + find_dip.fd_dip = NULL; + + pdip = ddi_root_node(); + + ndi_devi_enter(pdip, &circ); + ddi_walk_devs(ddi_get_child(pdip), find_dip_fcn, &find_dip); + ndi_devi_exit(pdip, circ); + + kmem_free(find_dip.fd_buf, MAXPATHLEN); + + if (find_dip.fd_dip == NULL) { + cmn_err(CE_WARN, "Device not found in device tree. Skipping " + "device unretire: %s", path); + return (0); + } + + dip = find_dip.fd_dip; + + pdip = ddi_get_parent(dip); + + ndi_hold_devi(pdip); + + ndi_devi_enter(pdip, &circ); + + path2 = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) unmark_and_unfence(dip, path2); + if (!is_leaf_node(dip)) { + ndi_devi_enter(dip, &circ); + ddi_walk_devs(ddi_get_child(dip), unmark_and_unfence, path2); + ndi_devi_exit(dip, circ); + } + + kmem_free(path2, MAXPATHLEN); + + /* release hold from find_dip_fcn() */ + ndi_rele_devi(dip); + + ndi_devi_exit(pdip, circ); + + ndi_rele_devi(pdip); + + return (0); +} + +/* + * Called before attach on a dip that has been retired. + */ +static int +mark_and_fence(dev_info_t *dip, void *arg) +{ + char *fencepath = (char *)arg; + + /* + * We have already decided to retire this device. The various + * constraint checking should not be set. + * NOTE that the retire flag may already be set due to + * fenced -> detach -> fenced transitions. + */ + mutex_enter(&DEVI(dip)->devi_lock); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED)); + ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRING)); + DEVI(dip)->devi_flags |= DEVI_RETIRED; + mutex_exit(&DEVI(dip)->devi_lock); + RIO_VERBOSE((CE_NOTE, "marked as RETIRED dip=%p", (void *)dip)); + + if (fencepath) { + (void) spec_fence_snode(dip, NULL); + RIO_DEBUG((CE_NOTE, "Fenced: %s", + ddi_pathname(dip, fencepath))); + } + + return (DDI_WALK_CONTINUE); +} + +/* + * Checks the retire database and: + * + * - if device is present in the retire database, marks the device retired + * and fences it off. + * - if device is not in retire database, allows the device to attach normally + * + * To be called only by framework attach code on first attach attempt. + * + */ +static void +i_ddi_check_retire(dev_info_t *dip) +{ + char *path; + dev_info_t *pdip; + int circ; + int phci_only; + + pdip = ddi_get_parent(dip); + + /* + * Root dip is treated special and doesn't take this code path. + * Also root can never be retired. + */ + ASSERT(pdip); + ASSERT(DEVI_BUSY_OWNED(pdip)); + ASSERT(i_ddi_node_state(dip) < DS_ATTACHED); + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + (void) ddi_pathname(dip, path); + + RIO_VERBOSE((CE_NOTE, "Checking if dip should attach: dip=%p, path=%s", + (void *)dip, path)); + + /* + * Check if this device is in the "retired" store i.e. should + * be retired. If not, we have nothing to do. + */ + if (e_ddi_device_retired(path) == 0) { + RIO_VERBOSE((CE_NOTE, "device is NOT retired: path=%s", path)); + kmem_free(path, MAXPATHLEN); + return; + } + + RIO_DEBUG((CE_NOTE, "attach: device is retired: path=%s", path)); + + /* + * Mark dips and fence off snodes (if any) + */ + RIO_DEBUG((CE_NOTE, "attach: Mark and fence subtree: path=%s", path)); + (void) mark_and_fence(dip, path); + if (!is_leaf_node(dip)) { + ndi_devi_enter(dip, &circ); + ddi_walk_devs(ddi_get_child(dip), mark_and_fence, path); + ndi_devi_exit(dip, circ); + } + + kmem_free(path, MAXPATHLEN); + + /* + * We don't want to check the client. We just want to + * offline the PHCI + */ + phci_only = 1; + if (MDI_PHCI(dip)) + mdi_phci_retire_finalize(dip, phci_only); +} diff --git a/usr/src/uts/common/os/driver_lyr.c b/usr/src/uts/common/os/driver_lyr.c index f2dea074c1..266e3cbb79 100644 --- a/usr/src/uts/common/os/driver_lyr.c +++ b/usr/src/uts/common/os/driver_lyr.c @@ -69,6 +69,11 @@ #include <sys/socketvar.h> #include <sys/kstr.h> +/* + * Device contract related + */ +#include <sys/contract_impl.h> +#include <sys/contract/device_impl.h> /* * Define macros to manipulate snode, vnode, and open device flags @@ -97,11 +102,23 @@ #define LH_CBDEV (0x2) /* handle to a char/block device */ /* - * Define marco for devid property lookups + * Define macro for devid property lookups */ #define DEVID_PROP_FLAGS (DDI_PROP_DONTPASS | \ DDI_PROP_TYPE_STRING|DDI_PROP_CANSLEEP) +/* + * Dummy string for NDI events + */ +#define NDI_EVENT_SERVICE "NDI_EVENT_SERVICE" + +static void ldi_ev_lock(void); +static void ldi_ev_unlock(void); + +#ifdef LDI_OBSOLETE_EVENT +int ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id); +#endif + /* * globals @@ -113,6 +130,22 @@ static kmutex_t ldi_handle_hash_lock[LH_HASH_SZ]; static struct ldi_handle *ldi_handle_hash[LH_HASH_SZ]; static size_t ldi_handle_hash_count; +static struct ldi_ev_callback_list ldi_ev_callback_list; + +static uint32_t ldi_ev_id_pool = 0; + +struct ldi_ev_cookie { + char *ck_evname; + uint_t ck_sync; + uint_t ck_ctype; +}; + +static struct ldi_ev_cookie ldi_ev_cookies[] = { + { LDI_EV_OFFLINE, 1, CT_DEV_EV_OFFLINE}, + { LDI_EV_DEGRADE, 0, CT_DEV_EV_DEGRADED}, + { NULL} /* must terminate list */ +}; + void ldi_init(void) { @@ -127,6 +160,17 @@ ldi_init(void) mutex_init(&ldi_ident_hash_lock[i], NULL, MUTEX_DEFAULT, NULL); ldi_ident_hash[i] = NULL; } + + /* + * Initialize the LDI event subsystem + */ + mutex_init(&ldi_ev_callback_list.le_lock, NULL, MUTEX_DEFAULT, NULL); + cv_init(&ldi_ev_callback_list.le_cv, NULL, CV_DEFAULT, NULL); + ldi_ev_callback_list.le_busy = 0; + ldi_ev_callback_list.le_thread = NULL; + list_create(&ldi_ev_callback_list.le_head, + sizeof (ldi_ev_callback_impl_t), + offsetof(ldi_ev_callback_impl_t, lec_list)); } /* @@ -334,7 +378,9 @@ handle_alloc(vnode_t *vp, struct ldi_ident *ident) lhp->lh_ref = 1; lhp->lh_vp = vp; lhp->lh_ident = ident; +#ifdef LDI_OBSOLETE_EVENT mutex_init(lhp->lh_lock, NULL, MUTEX_DEFAULT, NULL); +#endif /* set the device type for this handle */ lhp->lh_type = 0; @@ -398,10 +444,13 @@ handle_release(struct ldi_handle *lhp) VN_RELE(lhp->lh_vp); ident_release(lhp->lh_ident); +#ifdef LDI_OBSOLETE_EVENT mutex_destroy(lhp->lh_lock); +#endif kmem_free(lhp, sizeof (struct ldi_handle)); } +#ifdef LDI_OBSOLETE_EVENT /* * LDI event manipulation functions */ @@ -457,6 +506,7 @@ i_ldi_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie, lep->le_handler(lep->le_lhp, event_cookie, lep->le_arg, bus_impldata); } +#endif /* * LDI open helper functions @@ -1629,6 +1679,9 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr) struct ldi_handle *handlep = (struct ldi_handle *)lh; struct ldi_event *lep; int err = 0; + int notify = 0; + list_t *listp; + ldi_ev_callback_impl_t *lecp; if (lh == NULL) return (EINVAL); @@ -1644,6 +1697,8 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr) bflush(dev); } +#ifdef LDI_OBSOLETE_EVENT + /* * Any event handlers should have been unregistered by the * time ldi_close() is called. If they haven't then it's a @@ -1669,6 +1724,7 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr) "failed to unregister layered event handlers before " "closing devices", lip->li_modname); } +#endif /* do a layered close on the device */ err = VOP_CLOSE(handlep->lh_vp, flag | FKLYR, 1, (offset_t)0, cr); @@ -1676,6 +1732,40 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr) LDI_OPENCLOSE((CE_WARN, "%s: lh=0x%p", "ldi close", (void *)lh)); /* + * Search the event callback list for callbacks with this + * handle. There are 2 cases + * 1. Called in the context of a notify. The handle consumer + * is releasing its hold on the device to allow a reconfiguration + * of the device. Simply NULL out the handle and the notify callback. + * The finalize callback is still available so that the consumer + * knows of the final disposition of the device. + * 2. Not called in the context of notify. NULL out the handle as well + * as the notify and finalize callbacks. Since the consumer has + * closed the handle, we assume it is not interested in the + * notify and finalize callbacks. + */ + ldi_ev_lock(); + + if (handlep->lh_flags & LH_FLAGS_NOTIFY) + notify = 1; + listp = &ldi_ev_callback_list.le_head; + for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) { + if (lecp->lec_lhp != handlep) + continue; + lecp->lec_lhp = NULL; + lecp->lec_notify = NULL; + LDI_EVDBG((CE_NOTE, "ldi_close: NULLed lh and notify")); + if (!notify) { + LDI_EVDBG((CE_NOTE, "ldi_close: NULLed finalize")); + lecp->lec_finalize = NULL; + } + } + + if (notify) + handlep->lh_flags &= ~LH_FLAGS_NOTIFY; + ldi_ev_unlock(); + + /* * Free the handle even if the device close failed. why? * * If the device close failed we can't really make assumptions @@ -2678,6 +2768,8 @@ ldi_prop_exists(ldi_handle_t lh, uint_t flags, char *name) return (res); } +#ifdef LDI_OBSOLETE_EVENT + int ldi_get_eventcookie(ldi_handle_t lh, char *name, ddi_eventcookie_t *ecp) { @@ -2794,3 +2886,845 @@ ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id) kmem_free(lep, sizeof (struct ldi_event)); return (res); } + +#endif + +/* + * Here are some definitions of terms used in the following LDI events + * code: + * + * "LDI events" AKA "native events": These are events defined by the + * "new" LDI event framework. These events are serviced by the LDI event + * framework itself and thus are native to it. + * + * "LDI contract events": These are contract events that correspond to the + * LDI events. This mapping of LDI events to contract events is defined by + * the ldi_ev_cookies[] array above. + * + * NDI events: These are events which are serviced by the NDI event subsystem. + * LDI subsystem just provides a thin wrapper around the NDI event interfaces + * These events are thereefore *not* native events. + */ + +static int +ldi_native_event(const char *evname) +{ + int i; + + LDI_EVTRC((CE_NOTE, "ldi_native_event: entered: ev=%s", evname)); + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) + return (1); + } + + return (0); +} + +static uint_t +ldi_ev_sync_event(const char *evname) +{ + int i; + + ASSERT(ldi_native_event(evname)); + + LDI_EVTRC((CE_NOTE, "ldi_ev_sync_event: entered: %s", evname)); + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) + return (ldi_ev_cookies[i].ck_sync); + } + + /* + * This should never happen until non-contract based + * LDI events are introduced. If that happens, we will + * use a "special" token to indicate that there are no + * contracts corresponding to this LDI event. + */ + cmn_err(CE_PANIC, "Unknown LDI event: %s", evname); + + return (0); +} + +static uint_t +ldi_contract_event(const char *evname) +{ + int i; + + ASSERT(ldi_native_event(evname)); + + LDI_EVTRC((CE_NOTE, "ldi_contract_event: entered: %s", evname)); + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) + return (ldi_ev_cookies[i].ck_ctype); + } + + /* + * This should never happen until non-contract based + * LDI events are introduced. If that happens, we will + * use a "special" token to indicate that there are no + * contracts corresponding to this LDI event. + */ + cmn_err(CE_PANIC, "Unknown LDI event: %s", evname); + + return (0); +} + +char * +ldi_ev_get_type(ldi_ev_cookie_t cookie) +{ + int i; + struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie; + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (&ldi_ev_cookies[i] == cookie_impl) { + LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: LDI: %s", + ldi_ev_cookies[i].ck_evname)); + return (ldi_ev_cookies[i].ck_evname); + } + } + + /* + * Not an LDI native event. Must be NDI event service. + * Just return a generic string + */ + LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: is NDI")); + return (NDI_EVENT_SERVICE); +} + +static int +ldi_native_cookie(ldi_ev_cookie_t cookie) +{ + int i; + struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie; + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (&ldi_ev_cookies[i] == cookie_impl) { + LDI_EVTRC((CE_NOTE, "ldi_native_cookie: native LDI")); + return (1); + } + } + + LDI_EVTRC((CE_NOTE, "ldi_native_cookie: is NDI")); + return (0); +} + +static ldi_ev_cookie_t +ldi_get_native_cookie(const char *evname) +{ + int i; + + for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) { + if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) { + LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: found")); + return ((ldi_ev_cookie_t)&ldi_ev_cookies[i]); + } + } + + LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: NOT found")); + return (NULL); +} + +/* + * ldi_ev_lock() needs to be recursive, since layered drivers may call + * other LDI interfaces (such as ldi_close() from within the context of + * a notify callback. Since the notify callback is called with the + * ldi_ev_lock() held and ldi_close() also grabs ldi_ev_lock, the lock needs + * to be recursive. + */ +static void +ldi_ev_lock(void) +{ + LDI_EVTRC((CE_NOTE, "ldi_ev_lock: entered")); + + mutex_enter(&ldi_ev_callback_list.le_lock); + if (ldi_ev_callback_list.le_thread == curthread) { + ASSERT(ldi_ev_callback_list.le_busy >= 1); + ldi_ev_callback_list.le_busy++; + } else { + while (ldi_ev_callback_list.le_busy) + cv_wait(&ldi_ev_callback_list.le_cv, + &ldi_ev_callback_list.le_lock); + ASSERT(ldi_ev_callback_list.le_thread == NULL); + ldi_ev_callback_list.le_busy = 1; + ldi_ev_callback_list.le_thread = curthread; + } + mutex_exit(&ldi_ev_callback_list.le_lock); + + LDI_EVTRC((CE_NOTE, "ldi_ev_lock: exit")); +} + +static void +ldi_ev_unlock(void) +{ + LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: entered")); + mutex_enter(&ldi_ev_callback_list.le_lock); + ASSERT(ldi_ev_callback_list.le_thread == curthread); + ASSERT(ldi_ev_callback_list.le_busy >= 1); + + ldi_ev_callback_list.le_busy--; + if (ldi_ev_callback_list.le_busy == 0) { + ldi_ev_callback_list.le_thread = NULL; + cv_signal(&ldi_ev_callback_list.le_cv); + } + mutex_exit(&ldi_ev_callback_list.le_lock); + LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: exit")); +} + +int +ldi_ev_get_cookie(ldi_handle_t lh, char *evname, ldi_ev_cookie_t *cookiep) +{ + struct ldi_handle *handlep = (struct ldi_handle *)lh; + dev_info_t *dip; + dev_t dev; + int res; + struct snode *csp; + ddi_eventcookie_t ddi_cookie; + ldi_ev_cookie_t tcookie; + + LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: entered: evname=%s", + evname ? evname : "<NULL>")); + + if (lh == NULL || evname == NULL || + strlen(evname) == 0 || cookiep == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: invalid args")); + return (LDI_EV_FAILURE); + } + + *cookiep = NULL; + + /* + * First check if it is a LDI native event + */ + tcookie = ldi_get_native_cookie(evname); + if (tcookie) { + LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: got native cookie")); + *cookiep = tcookie; + return (LDI_EV_SUCCESS); + } + + /* + * Not a LDI native event. Try NDI event services + */ + + dev = handlep->lh_vp->v_rdev; + + csp = VTOCS(handlep->lh_vp); + mutex_enter(&csp->s_lock); + if ((dip = csp->s_dip) != NULL) + e_ddi_hold_devi(dip); + mutex_exit(&csp->s_lock); + if (dip == NULL) + dip = e_ddi_hold_devi_by_dev(dev, 0); + + if (dip == NULL) { + cmn_err(CE_WARN, "ldi_ev_get_cookie: No devinfo node for LDI " + "handle: %p", (void *)handlep); + return (LDI_EV_FAILURE); + } + + LDI_EVDBG((CE_NOTE, "Calling ddi_get_eventcookie: dip=%p, ev=%s", + (void *)dip, evname)); + + res = ddi_get_eventcookie(dip, evname, &ddi_cookie); + + ddi_release_devi(dip); + + if (res == DDI_SUCCESS) { + LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: NDI cookie found")); + *cookiep = (ldi_ev_cookie_t)ddi_cookie; + return (LDI_EV_SUCCESS); + } else { + LDI_EVDBG((CE_WARN, "ldi_ev_get_cookie: NDI cookie: failed")); + return (LDI_EV_FAILURE); + } +} + +/*ARGSUSED*/ +static void +i_ldi_ev_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie, + void *arg, void *ev_data) +{ + ldi_ev_callback_impl_t *lecp = (ldi_ev_callback_impl_t *)arg; + + ASSERT(lecp != NULL); + ASSERT(!ldi_native_cookie(lecp->lec_cookie)); + ASSERT(lecp->lec_lhp); + ASSERT(lecp->lec_notify == NULL); + ASSERT(lecp->lec_finalize); + + LDI_EVDBG((CE_NOTE, "i_ldi_ev_callback: ldh=%p, cookie=%p, arg=%p, " + "ev_data=%p", (void *)lecp->lec_lhp, (void *)event_cookie, + (void *)lecp->lec_arg, (void *)ev_data)); + + lecp->lec_finalize(lecp->lec_lhp, (ldi_ev_cookie_t)event_cookie, + lecp->lec_arg, ev_data); +} + +int +ldi_ev_register_callbacks(ldi_handle_t lh, ldi_ev_cookie_t cookie, + ldi_ev_callback_t *callb, void *arg, ldi_callback_id_t *id) +{ + struct ldi_handle *lhp = (struct ldi_handle *)lh; + ldi_ev_callback_impl_t *lecp; + dev_t dev; + struct snode *csp; + dev_info_t *dip; + int ddi_event; + + ASSERT(!servicing_interrupt()); + + if (lh == NULL || cookie == NULL || callb == NULL || id == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid args")); + return (LDI_EV_FAILURE); + } + + if (callb->cb_vers != LDI_EV_CB_VERS) { + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid vers")); + return (LDI_EV_FAILURE); + } + + if (callb->cb_notify == NULL && callb->cb_finalize == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: NULL callb")); + return (LDI_EV_FAILURE); + } + + *id = 0; + + dev = lhp->lh_vp->v_rdev; + csp = VTOCS(lhp->lh_vp); + mutex_enter(&csp->s_lock); + if ((dip = csp->s_dip) != NULL) + e_ddi_hold_devi(dip); + mutex_exit(&csp->s_lock); + if (dip == NULL) + dip = e_ddi_hold_devi_by_dev(dev, 0); + + if (dip == NULL) { + cmn_err(CE_WARN, "ldi_ev_register: No devinfo node for " + "LDI handle: %p", (void *)lhp); + return (LDI_EV_FAILURE); + } + + lecp = kmem_zalloc(sizeof (ldi_ev_callback_impl_t), KM_SLEEP); + + ddi_event = 0; + if (!ldi_native_cookie(cookie)) { + if (callb->cb_notify || callb->cb_finalize == NULL) { + /* + * NDI event services only accept finalize + */ + cmn_err(CE_WARN, "%s: module: %s: NDI event cookie. " + "Only finalize" + " callback supported with this cookie", + "ldi_ev_register_callbacks", + lhp->lh_ident->li_modname); + kmem_free(lecp, sizeof (ldi_ev_callback_impl_t)); + ddi_release_devi(dip); + return (LDI_EV_FAILURE); + } + + if (ddi_add_event_handler(dip, (ddi_eventcookie_t)cookie, + i_ldi_ev_callback, (void *)lecp, + (ddi_callback_id_t *)&lecp->lec_id) + != DDI_SUCCESS) { + kmem_free(lecp, sizeof (ldi_ev_callback_impl_t)); + ddi_release_devi(dip); + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): " + "ddi_add_event_handler failed")); + return (LDI_EV_FAILURE); + } + ddi_event = 1; + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): " + "ddi_add_event_handler success")); + } + + + + ldi_ev_lock(); + + /* + * Add the notify/finalize callback to the LDI's list of callbacks. + */ + lecp->lec_lhp = lhp; + lecp->lec_dev = lhp->lh_vp->v_rdev; + lecp->lec_spec = (lhp->lh_vp->v_type == VCHR) ? + S_IFCHR : S_IFBLK; + lecp->lec_notify = callb->cb_notify; + lecp->lec_finalize = callb->cb_finalize; + lecp->lec_arg = arg; + lecp->lec_cookie = cookie; + if (!ddi_event) + lecp->lec_id = (void *)(uintptr_t)(++ldi_ev_id_pool); + else + ASSERT(lecp->lec_id); + lecp->lec_dip = dip; + list_insert_tail(&ldi_ev_callback_list.le_head, lecp); + + *id = (ldi_callback_id_t)lecp->lec_id; + + ldi_ev_unlock(); + + ddi_release_devi(dip); + + LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: registered " + "notify/finalize")); + + return (LDI_EV_SUCCESS); +} + +static int +ldi_ev_device_match(ldi_ev_callback_impl_t *lecp, dev_info_t *dip, + dev_t dev, int spec_type) +{ + ASSERT(lecp); + ASSERT(dip); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(dev != NODEV); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFCHR || spec_type == S_IFBLK)); + ASSERT(lecp->lec_dip); + ASSERT(lecp->lec_spec == S_IFCHR || lecp->lec_spec == S_IFBLK); + ASSERT(lecp->lec_dev != DDI_DEV_T_ANY); + ASSERT(lecp->lec_dev != DDI_DEV_T_NONE); + ASSERT(lecp->lec_dev != NODEV); + + if (dip != lecp->lec_dip) + return (0); + + if (dev != DDI_DEV_T_ANY) { + if (dev != lecp->lec_dev || spec_type != lecp->lec_spec) + return (0); + } + + LDI_EVTRC((CE_NOTE, "ldi_ev_device_match: MATCH dip=%p", (void *)dip)); + + return (1); +} + +/* + * LDI framework function to post a "notify" event to all layered drivers + * that have registered for that event + * + * Returns: + * LDI_EV_SUCCESS - registered callbacks allow event + * LDI_EV_FAILURE - registered callbacks block event + * LDI_EV_NONE - No matching LDI callbacks + * + * This function is *not* to be called by layered drivers. It is for I/O + * framework code in Solaris, such as the I/O retire code and DR code + * to call while servicing a device event such as offline or degraded. + */ +int +ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event, + void *ev_data) +{ + ldi_ev_callback_impl_t *lecp; + list_t *listp; + int ret; + char *lec_event; + + ASSERT(dip); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(dev != NODEV); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFCHR || spec_type == S_IFBLK)); + ASSERT(event); + ASSERT(ldi_native_event(event)); + ASSERT(ldi_ev_sync_event(event)); + + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): entered: dip=%p, ev=%s", + (void *)dip, event)); + + ret = LDI_EV_NONE; + ldi_ev_lock(); + listp = &ldi_ev_callback_list.le_head; + for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) { + + /* Check if matching device */ + if (!ldi_ev_device_match(lecp, dip, dev, spec_type)) + continue; + + if (lecp->lec_lhp == NULL) { + /* + * Consumer has unregistered the handle and so + * is no longer interested in notify events. + */ + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No LDI " + "handle, skipping")); + continue; + } + + if (lecp->lec_notify == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No notify " + "callback. skipping")); + continue; /* not interested in notify */ + } + + /* + * Check if matching event + */ + lec_event = ldi_ev_get_type(lecp->lec_cookie); + if (strcmp(event, lec_event) != 0) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching" + " event {%s,%s}. skipping", event, lec_event)); + continue; + } + + lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY; + if (lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie, + lecp->lec_arg, ev_data) != LDI_EV_SUCCESS) { + ret = LDI_EV_FAILURE; + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify" + " FAILURE")); + break; + } + + /* We have a matching callback that allows the event to occur */ + ret = LDI_EV_SUCCESS; + + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success")); + } + + if (ret != LDI_EV_FAILURE) + goto out; + + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify")); + + /* + * Undo notifies already sent + */ + lecp = list_prev(listp, lecp); + for (; lecp; lecp = list_prev(listp, lecp)) { + + /* + * Check if matching device + */ + if (!ldi_ev_device_match(lecp, dip, dev, spec_type)) + continue; + + + if (lecp->lec_finalize == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no finalize, " + "skipping")); + continue; /* not interested in finalize */ + } + + /* + * it is possible that in response to a notify event a + * layered driver closed its LDI handle so it is ok + * to have a NULL LDI handle for finalize. The layered + * driver is expected to maintain state in its "arg" + * parameter to keep track of the closed device. + */ + + /* Check if matching event */ + lec_event = ldi_ev_get_type(lecp->lec_cookie); + if (strcmp(event, lec_event) != 0) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): not matching " + "event: %s,%s, skipping", event, lec_event)); + continue; + } + + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize")); + + lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie, + LDI_EV_FAILURE, lecp->lec_arg, ev_data); + + /* + * If LDI native event and LDI handle closed in context + * of notify, NULL out the finalize callback as we have + * already called the 1 finalize above allowed in this situation + */ + if (lecp->lec_lhp == NULL && + ldi_native_cookie(lecp->lec_cookie)) { + LDI_EVDBG((CE_NOTE, + "ldi_invoke_notify(): NULL-ing finalize after " + "calling 1 finalize following ldi_close")); + lecp->lec_finalize = NULL; + } + } + +out: + ldi_ev_unlock(); + + if (ret == LDI_EV_NONE) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no matching " + "LDI callbacks")); + } + + return (ret); +} + +/* + * Framework function to be called from a layered driver to propagate + * LDI "notify" events to exported minors. + * + * This function is a public interface exported by the LDI framework + * for use by layered drivers to propagate device events up the software + * stack. + */ +int +ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type, + ldi_ev_cookie_t cookie, void *ev_data) +{ + char *evname = ldi_ev_get_type(cookie); + uint_t ct_evtype; + dev_t dev; + major_t major; + int retc; + int retl; + + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + ASSERT(dip); + ASSERT(ldi_native_cookie(cookie)); + + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): entered: event=%s, dip=%p", + evname, (void *)dip)); + + if (!ldi_ev_sync_event(evname)) { + cmn_err(CE_PANIC, "ldi_ev_notify(): %s not a " + "negotiatable event", evname); + return (LDI_EV_SUCCESS); + } + + major = ddi_driver_major(dip); + if (major == (major_t)-1) { + char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + cmn_err(CE_WARN, "ldi_ev_notify: cannot derive major number " + "for device %s", path); + kmem_free(path, MAXPATHLEN); + return (LDI_EV_FAILURE); + } + dev = makedevice(major, minor); + + /* + * Generate negotiation contract events on contracts (if any) associated + * with this minor. + */ + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): calling contract nego.")); + ct_evtype = ldi_contract_event(evname); + retc = contract_device_negotiate(dip, dev, spec_type, ct_evtype); + if (retc == CT_NACK) { + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): contract neg. NACK")); + return (LDI_EV_FAILURE); + } + + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): LDI invoke notify")); + retl = ldi_invoke_notify(dip, dev, spec_type, evname, ev_data); + if (retl == LDI_EV_FAILURE) { + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): ldi_invoke_notify " + "returned FAILURE. Calling contract negend")); + contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); + return (LDI_EV_FAILURE); + } + + /* + * The very fact that we are here indicates that there is a + * LDI callback (and hence a constraint) for the retire of the + * HW device. So we just return success even if there are no + * contracts or LDI callbacks against the minors layered on top + * of the HW minors + */ + LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): returning SUCCESS")); + return (LDI_EV_SUCCESS); +} + +/* + * LDI framework function to invoke "finalize" callbacks for all layered + * drivers that have registered callbacks for that event. + * + * This function is *not* to be called by layered drivers. It is for I/O + * framework code in Solaris, such as the I/O retire code and DR code + * to call while servicing a device event such as offline or degraded. + */ +void +ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event, + int ldi_result, void *ev_data) +{ + ldi_ev_callback_impl_t *lecp; + list_t *listp; + char *lec_event; + int found = 0; + + ASSERT(dip); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(dev != NODEV); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFCHR || spec_type == S_IFBLK)); + ASSERT(event); + ASSERT(ldi_native_event(event)); + ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE); + + LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): entered: dip=%p, result=%d" + " event=%s", (void *)dip, ldi_result, event)); + + ldi_ev_lock(); + listp = &ldi_ev_callback_list.le_head; + for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) { + + if (lecp->lec_finalize == NULL) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): No " + "finalize. Skipping")); + continue; /* Not interested in finalize */ + } + + /* + * Check if matching device + */ + if (!ldi_ev_device_match(lecp, dip, dev, spec_type)) + continue; + + /* + * It is valid for the LDI handle to be NULL during finalize. + * The layered driver may have done an LDI close in the notify + * callback. + */ + + /* + * Check if matching event + */ + lec_event = ldi_ev_get_type(lecp->lec_cookie); + if (strcmp(event, lec_event) != 0) { + LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): Not " + "matching event {%s,%s}. Skipping", + event, lec_event)); + continue; + } + + LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): calling finalize")); + + found = 1; + + lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie, + ldi_result, lecp->lec_arg, ev_data); + + /* + * If LDI native event and LDI handle closed in context + * of notify, NULL out the finalize callback as we have + * already called the 1 finalize above allowed in this situation + */ + if (lecp->lec_lhp == NULL && + ldi_native_cookie(lecp->lec_cookie)) { + LDI_EVDBG((CE_NOTE, + "ldi_invoke_finalize(): NULLing finalize after " + "calling 1 finalize following ldi_close")); + lecp->lec_finalize = NULL; + } + } + ldi_ev_unlock(); + + if (found) + return; + + LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): no matching callbacks")); +} + +/* + * Framework function to be called from a layered driver to propagate + * LDI "finalize" events to exported minors. + * + * This function is a public interface exported by the LDI framework + * for use by layered drivers to propagate device events up the software + * stack. + */ +void +ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type, int ldi_result, + ldi_ev_cookie_t cookie, void *ev_data) +{ + dev_t dev; + major_t major; + char *evname; + int ct_result = (ldi_result == LDI_EV_SUCCESS) ? + CT_EV_SUCCESS : CT_EV_FAILURE; + uint_t ct_evtype; + + ASSERT(dip); + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE); + ASSERT(ldi_native_cookie(cookie)); + + LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: entered: dip=%p", (void *)dip)); + + major = ddi_driver_major(dip); + if (major == (major_t)-1) { + char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + cmn_err(CE_WARN, "ldi_ev_finalize: cannot derive major number " + "for device %s", path); + kmem_free(path, MAXPATHLEN); + return; + } + dev = makedevice(major, minor); + + evname = ldi_ev_get_type(cookie); + + LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling contracts")); + ct_evtype = ldi_contract_event(evname); + contract_device_finalize(dip, dev, spec_type, ct_evtype, ct_result); + + LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling ldi_invoke_finalize")); + ldi_invoke_finalize(dip, dev, spec_type, evname, ldi_result, ev_data); +} + +int +ldi_ev_remove_callbacks(ldi_callback_id_t id) +{ + ldi_ev_callback_impl_t *lecp; + ldi_ev_callback_impl_t *next; + ldi_ev_callback_impl_t *found; + list_t *listp; + + ASSERT(!servicing_interrupt()); + + if (id == 0) { + cmn_err(CE_WARN, "ldi_ev_remove_callbacks: Invalid ID 0"); + return (LDI_EV_FAILURE); + } + + LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: entered: id=%p", + (void *)id)); + + ldi_ev_lock(); + + listp = &ldi_ev_callback_list.le_head; + next = found = NULL; + for (lecp = list_head(listp); lecp; lecp = next) { + next = list_next(listp, lecp); + if (lecp->lec_id == id) { + ASSERT(found == NULL); + list_remove(listp, lecp); + found = lecp; + } + } + ldi_ev_unlock(); + + if (found == NULL) { + cmn_err(CE_WARN, "No LDI event handler for id (%p)", + (void *)id); + return (LDI_EV_SUCCESS); + } + + if (!ldi_native_cookie(found->lec_cookie)) { + ASSERT(found->lec_notify == NULL); + if (ddi_remove_event_handler((ddi_callback_id_t)id) + != DDI_SUCCESS) { + cmn_err(CE_WARN, "failed to remove NDI event handler " + "for id (%p)", (void *)id); + ldi_ev_lock(); + list_insert_tail(listp, found); + ldi_ev_unlock(); + return (LDI_EV_FAILURE); + } + LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: NDI event " + "service removal succeeded")); + } else { + LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: removed " + "LDI native callbacks")); + } + kmem_free(found, sizeof (ldi_ev_callback_impl_t)); + + return (LDI_EV_SUCCESS); +} diff --git a/usr/src/uts/common/os/modctl.c b/usr/src/uts/common/os/modctl.c index 31108c215b..1f821fef85 100644 --- a/usr/src/uts/common/os/modctl.c +++ b/usr/src/uts/common/os/modctl.c @@ -161,8 +161,6 @@ extern int make_mbind(char *, int, char *, struct bind **); static int minorperm_loaded = 0; - - void mod_setup(void) { @@ -798,6 +796,217 @@ modctl_getmaj(char *uname, uint_t ulen, int *umajorp) return (0); } +static char ** +convert_constraint_string(char *constraints, size_t len) +{ + int i; + int n; + char *p; + char **array; + + ASSERT(constraints != NULL); + ASSERT(len > 0); + + for (i = 0, p = constraints; strlen(p) > 0; i++, p += strlen(p) + 1); + + n = i; + + if (n == 0) { + kmem_free(constraints, len); + return (NULL); + } + + array = kmem_alloc((n + 1) * sizeof (char *), KM_SLEEP); + + for (i = 0, p = constraints; i < n; i++, p += strlen(p) + 1) { + array[i] = i_ddi_strdup(p, KM_SLEEP); + } + array[n] = NULL; + + kmem_free(constraints, len); + + return (array); +} +/*ARGSUSED*/ +static int +modctl_retire(char *path, char *uconstraints, size_t ulen) +{ + char *pathbuf; + char *devpath; + size_t pathsz; + int retval; + char *constraints; + char **cons_array; + + if (path == NULL) + return (EINVAL); + + if ((uconstraints == NULL) ^ (ulen == 0)) + return (EINVAL); + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz); + if (retval != 0) { + kmem_free(pathbuf, MAXPATHLEN); + return (retval); + } + devpath = i_ddi_strdup(pathbuf, KM_SLEEP); + kmem_free(pathbuf, MAXPATHLEN); + + /* + * First check if the device is already retired. + * If it is, this becomes a NOP + */ + if (e_ddi_device_retired(devpath)) { + cmn_err(CE_NOTE, "Device: already retired: %s", devpath); + kmem_free(devpath, strlen(devpath) + 1); + return (0); + } + + cons_array = NULL; + if (uconstraints) { + constraints = kmem_alloc(ulen, KM_SLEEP); + if (copyin(uconstraints, constraints, ulen)) { + kmem_free(constraints, ulen); + kmem_free(devpath, strlen(devpath) + 1); + return (EFAULT); + } + cons_array = convert_constraint_string(constraints, ulen); + } + + /* + * Try to retire the device first. The following + * routine will return an error only if the device + * is not retireable i.e. retire constraints forbid + * a retire. A return of success from this routine + * indicates that device is retireable. + */ + retval = e_ddi_retire_device(devpath, cons_array); + if (retval != DDI_SUCCESS) { + cmn_err(CE_WARN, "constraints forbid retire: %s", devpath); + kmem_free(devpath, strlen(devpath) + 1); + return (ENOTSUP); + } + + /* + * Ok, the retire succeeded. Persist the retire. + * If retiring a nexus, we need to only persist the + * nexus retire. Any children of a retired nexus + * are automatically covered by the retire store + * code. + */ + retval = e_ddi_retire_persist(devpath); + if (retval != 0) { + cmn_err(CE_WARN, "Failed to persist device retire: error %d: " + "%s", retval, devpath); + kmem_free(devpath, strlen(devpath) + 1); + return (retval); + } + if (moddebug & MODDEBUG_RETIRE) + cmn_err(CE_NOTE, "Persisted retire of device: %s", devpath); + + kmem_free(devpath, strlen(devpath) + 1); + return (0); +} + +static int +modctl_is_retired(char *path, int *statep) +{ + char *pathbuf; + char *devpath; + size_t pathsz; + int error; + int status; + + if (path == NULL || statep == NULL) + return (EINVAL); + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + error = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz); + if (error != 0) { + kmem_free(pathbuf, MAXPATHLEN); + return (error); + } + devpath = i_ddi_strdup(pathbuf, KM_SLEEP); + kmem_free(pathbuf, MAXPATHLEN); + + if (e_ddi_device_retired(devpath)) + status = 1; + else + status = 0; + kmem_free(devpath, strlen(devpath) + 1); + + return (copyout(&status, statep, sizeof (status)) ? EFAULT : 0); +} + +static int +modctl_unretire(char *path) +{ + char *pathbuf; + char *devpath; + size_t pathsz; + int retired; + int retval; + + if (path == NULL) + return (EINVAL); + + pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz); + if (retval != 0) { + kmem_free(pathbuf, MAXPATHLEN); + return (retval); + } + devpath = i_ddi_strdup(pathbuf, KM_SLEEP); + kmem_free(pathbuf, MAXPATHLEN); + + /* + * We check if a device is retired (first) before + * unpersisting the retire, because we use the + * retire store to determine if a device is retired. + * If we unpersist first, the device will always appear + * to be unretired. For the rationale behind unpersisting + * a device that is not retired, see the next comment. + */ + retired = e_ddi_device_retired(devpath); + + /* + * We call unpersist unconditionally because the lookup + * for retired devices (e_ddi_device_retired()), skips "bypassed" + * devices. We still want to be able remove "bypassed" entries + * from the persistent store, so we unpersist unconditionally + * i.e. whether or not the entry is found on a lookup. + * + * e_ddi_retire_unpersist() returns 1 if it found and cleared + * an entry from the retire store or 0 otherwise. + */ + if (e_ddi_retire_unpersist(devpath)) + if (moddebug & MODDEBUG_RETIRE) { + cmn_err(CE_NOTE, "Unpersisted retire of device: %s", + devpath); + } + + /* + * Check if the device is already unretired. If so, + * the unretire becomes a NOP + */ + if (!retired) { + cmn_err(CE_NOTE, "Not retired: %s", devpath); + kmem_free(devpath, strlen(devpath) + 1); + return (0); + } + + retval = e_ddi_unretire_device(devpath); + if (retval != 0) { + cmn_err(CE_WARN, "cannot unretire device: error %d, path %s\n", + retval, devpath); + } + + kmem_free(devpath, strlen(devpath) + 1); + + return (retval); +} + static int modctl_getname(char *uname, uint_t ulen, int *umajorp) { @@ -2069,6 +2278,18 @@ modctl(int cmd, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, error = modctl_moddevname((int)a1, a2, a3); break; + case MODRETIRE: /* retire device named by physpath a1 */ + error = modctl_retire((char *)a1, (char *)a2, (size_t)a3); + break; + + case MODISRETIRED: /* check if a device is retired. */ + error = modctl_is_retired((char *)a1, (int *)a2); + break; + + case MODUNRETIRE: /* unretire device named by physpath a1 */ + error = modctl_unretire((char *)a1); + break; + default: error = EINVAL; break; diff --git a/usr/src/uts/common/os/retire_store.c b/usr/src/uts/common/os/retire_store.c new file mode 100644 index 0000000000..f1c3db9445 --- /dev/null +++ b/usr/src/uts/common/os/retire_store.c @@ -0,0 +1,457 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/ddi.h> +#include <sys/sunddi.h> +#include <sys/sunndi.h> +#include <sys/ddi_impldefs.h> +#include <sys/ddi_implfuncs.h> +#include <sys/list.h> +#include <sys/reboot.h> +#include <sys/sysmacros.h> +#include <sys/console.h> +#include <sys/devcache.h> + +/* + * The nvpair name in the I/O retire specific sub-nvlist + */ +#define RIO_STORE_VERSION_STR "rio-store-version" +#define RIO_STORE_MAGIC_STR "rio-store-magic" +#define RIO_STORE_FLAGS_STR "rio-store-flags" + +#define RIO_STORE_VERSION_1 1 +#define RIO_STORE_VERSION RIO_STORE_VERSION_1 + +/* + * decoded retire list element + */ + +typedef enum rio_store_flags { + RIO_STORE_F_INVAL = 0, + RIO_STORE_F_RETIRED = 1, + RIO_STORE_F_BYPASS = 2 +} rio_store_flags_t; + +typedef struct rio_store { + char *rst_devpath; + rio_store_flags_t rst_flags; + list_node_t rst_next; +} rio_store_t; + +#define RIO_STORE_MAGIC 0x601fcace /* retire */ + +static int rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name); +static int rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl); +static void retire_list_free(nvf_handle_t nvfh); + + +/* + * Retire I/O persistent store registration info + */ +static nvf_ops_t rio_store_ops = { + "/etc/devices/retire_store", /* path to store */ + rio_store_decode, /* decode nvlist into retire_list */ + rio_store_encode, /* encode retire_list into nvlist */ + retire_list_free, /* free retire_list */ + NULL /* write complete callback */ +}; + +static nvf_handle_t rio_store_handle; +static char store_path[MAXPATHLEN]; +static int store_debug = 0; +static int bypass_msg = 0; +static int retire_msg = 0; + +#define STORE_DEBUG 0x0001 +#define STORE_TRACE 0x0002 + +#define STORE_DBG(args) if (store_debug & STORE_DEBUG) cmn_err args +#define STORE_TRC(args) if (store_debug & STORE_TRACE) cmn_err args + +/* + * We don't use the simple read disable offered by the + * caching framework (see devcache.c) as it will not + * have the desired effect of bypassing the persistent + * store. A simple read disable will + * + * 1. cause any additions to the cache to destroy the + * existing on-disk cache + * + * 2. prevent deletions from the existing on-disk + * cache which is needed for recovery from bad + * retire decisions. + * + * Use the following tunable instead + * + */ +int ddi_retire_store_bypass = 0; + + + +/* + * Initialize retire store data structures + */ +void +retire_store_init(void) +{ + if (boothowto & RB_ASKNAME) { + + printf("Retire store [%s] (/dev/null to bypass): ", + rio_store_ops.nvfr_cache_path); + console_gets(store_path, sizeof (store_path) - 1); + store_path[sizeof (store_path) - 1] = '\0'; + + if (strcmp(store_path, "/dev/null") == 0) { + ddi_retire_store_bypass = 1; + } else if (store_path[0] != '\0') { + if (store_path[0] != '/') { + printf("Invalid store path: %s. Using default" + "\n", store_path); + } else { + rio_store_ops.nvfr_cache_path = store_path; + } + } + } + + rio_store_handle = nvf_register_file(&rio_store_ops); + + list_create(nvf_list(rio_store_handle), sizeof (rio_store_t), + offsetof(rio_store_t, rst_next)); +} + +/* + * Read and populate the in-core retire store + */ +void +retire_store_read(void) +{ + rw_enter(nvf_lock(rio_store_handle), RW_WRITER); + ASSERT(list_head(nvf_list(rio_store_handle)) == NULL); + (void) nvf_read_file(rio_store_handle); + rw_exit(nvf_lock(rio_store_handle)); + STORE_DBG((CE_NOTE, "Read on-disk retire store")); +} + +static void +rio_store_free(rio_store_t *rsp) +{ + int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS; + + ASSERT(rsp); + ASSERT(rsp->rst_devpath); + ASSERT(rsp->rst_flags & RIO_STORE_F_RETIRED); + ASSERT(!(rsp->rst_flags & ~flag_mask)); + + STORE_TRC((CE_NOTE, "store: freed path: %s", rsp->rst_devpath)); + + kmem_free(rsp->rst_devpath, strlen(rsp->rst_devpath) + 1); + kmem_free(rsp, sizeof (*rsp)); +} + +static void +retire_list_free(nvf_handle_t nvfh) +{ + list_t *listp; + rio_store_t *rsp; + + ASSERT(nvfh == rio_store_handle); + ASSERT(RW_WRITE_HELD(nvf_lock(nvfh))); + + listp = nvf_list(nvfh); + while (rsp = list_head(listp)) { + list_remove(listp, rsp); + rio_store_free(rsp); + } + + STORE_DBG((CE_NOTE, "store: freed retire list")); +} + +static int +rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name) +{ + rio_store_t *rsp; + int32_t version; + int32_t magic; + int32_t flags; + int rval; + + ASSERT(nvfh == rio_store_handle); + ASSERT(RW_WRITE_HELD(nvf_lock(nvfh))); + ASSERT(name); + + version = 0; + rval = nvlist_lookup_int32(line_nvl, RIO_STORE_VERSION_STR, &version); + if (rval != 0 || version != RIO_STORE_VERSION) { + return (EINVAL); + } + + magic = 0; + rval = nvlist_lookup_int32(line_nvl, RIO_STORE_MAGIC_STR, &magic); + if (rval != 0 || magic != RIO_STORE_MAGIC) { + return (EINVAL); + } + + flags = 0; + rval = nvlist_lookup_int32(line_nvl, RIO_STORE_FLAGS_STR, &flags); + if (rval != 0 || flags != RIO_STORE_F_RETIRED) { + return (EINVAL); + } + + if (ddi_retire_store_bypass) { + flags |= RIO_STORE_F_BYPASS; + if (!bypass_msg) { + bypass_msg = 1; + cmn_err(CE_WARN, + "Bypassing retire store /etc/devices/retire_store"); + } + } + + rsp = kmem_zalloc(sizeof (rio_store_t), KM_SLEEP); + rsp->rst_devpath = i_ddi_strdup(name, KM_SLEEP); + rsp->rst_flags = flags; + list_insert_tail(nvf_list(nvfh), rsp); + + STORE_TRC((CE_NOTE, "store: added to retire list: %s", name)); + if (!retire_msg) { + retire_msg = 1; + cmn_err(CE_NOTE, "One or more I/O devices have been retired"); + } + + return (0); +} + +static int +rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl) +{ + nvlist_t *nvl; + nvlist_t *line_nvl; + list_t *listp; + rio_store_t *rsp; + int rval; + + ASSERT(nvfh == rio_store_handle); + ASSERT(RW_WRITE_HELD(nvf_lock(nvfh))); + + *ret_nvl = NULL; + + nvl = NULL; + rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP); + if (rval != 0) { + return (DDI_FAILURE); + } + + listp = nvf_list(nvfh); + for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) { + int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS; + int flags; + ASSERT(rsp->rst_devpath); + ASSERT(!(rsp->rst_flags & ~flag_mask)); + + line_nvl = NULL; + rval = nvlist_alloc(&line_nvl, NV_UNIQUE_NAME, KM_SLEEP); + if (rval != 0) { + line_nvl = NULL; + goto error; + } + + rval = nvlist_add_int32(line_nvl, RIO_STORE_VERSION_STR, + RIO_STORE_VERSION); + if (rval != 0) { + goto error; + } + rval = nvlist_add_int32(line_nvl, RIO_STORE_MAGIC_STR, + RIO_STORE_MAGIC); + if (rval != 0) { + goto error; + } + + /* don't save the bypass flag */ + flags = RIO_STORE_F_RETIRED; + rval = nvlist_add_int32(line_nvl, RIO_STORE_FLAGS_STR, + flags); + if (rval != 0) { + goto error; + } + + rval = nvlist_add_nvlist(nvl, rsp->rst_devpath, line_nvl); + if (rval != 0) { + goto error; + } + nvlist_free(line_nvl); + line_nvl = NULL; + } + + *ret_nvl = nvl; + STORE_DBG((CE_NOTE, "packed retire list into nvlist")); + return (DDI_SUCCESS); + +error: + if (line_nvl) + nvlist_free(line_nvl); + ASSERT(nvl); + nvlist_free(nvl); + return (DDI_FAILURE); +} + +int +e_ddi_retire_persist(char *devpath) +{ + rio_store_t *rsp; + rio_store_t *new_rsp; + list_t *listp; + char *new_path; + + STORE_DBG((CE_NOTE, "e_ddi_retire_persist: entered: %s", devpath)); + + new_rsp = kmem_zalloc(sizeof (*new_rsp), KM_SLEEP); + new_rsp->rst_devpath = new_path = i_ddi_strdup(devpath, KM_SLEEP); + new_rsp->rst_flags = RIO_STORE_F_RETIRED; + + rw_enter(nvf_lock(rio_store_handle), RW_WRITER); + + listp = nvf_list(rio_store_handle); + for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) { + int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS; + ASSERT(!(rsp->rst_flags & ~flag_mask)); + + /* already there */ + if (strcmp(devpath, rsp->rst_devpath) == 0) { + /* explicit retire, clear bypass flag (if any) */ + rsp->rst_flags &= ~RIO_STORE_F_BYPASS; + ASSERT(rsp->rst_flags == RIO_STORE_F_RETIRED); + rw_exit(nvf_lock(rio_store_handle)); + kmem_free(new_path, strlen(new_path) + 1); + kmem_free(new_rsp, sizeof (*new_rsp)); + STORE_DBG((CE_NOTE, "store: already in. Clear bypass " + ": %s", devpath)); + return (0); + } + + } + + ASSERT(rsp == NULL); + list_insert_tail(listp, new_rsp); + + nvf_mark_dirty(rio_store_handle); + + rw_exit(nvf_lock(rio_store_handle)); + + nvf_wake_daemon(); + + STORE_DBG((CE_NOTE, "store: New, added to list, dirty: %s", devpath)); + + return (0); +} + +int +e_ddi_retire_unpersist(char *devpath) +{ + rio_store_t *rsp; + rio_store_t *next; + list_t *listp; + int is_dirty = 0; + + STORE_DBG((CE_NOTE, "e_ddi_retire_unpersist: entered: %s", devpath)); + + rw_enter(nvf_lock(rio_store_handle), RW_WRITER); + + listp = nvf_list(rio_store_handle); + for (rsp = list_head(listp); rsp; rsp = next) { + next = list_next(listp, rsp); + if (strcmp(devpath, rsp->rst_devpath) != 0) + continue; + + list_remove(listp, rsp); + rio_store_free(rsp); + + STORE_DBG((CE_NOTE, "store: found in list. Freed: %s", + devpath)); + + nvf_mark_dirty(rio_store_handle); + is_dirty = 1; + } + + rw_exit(nvf_lock(rio_store_handle)); + + if (is_dirty) + nvf_wake_daemon(); + + return (is_dirty); +} + +int +e_ddi_device_retired(char *devpath) +{ + list_t *listp; + rio_store_t *rsp; + size_t len; + int retired; + + retired = 0; + + rw_enter(nvf_lock(rio_store_handle), RW_READER); + + listp = nvf_list(rio_store_handle); + for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) { + int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS; + ASSERT(!(rsp->rst_flags & ~flag_mask)); + + /* + * If the "bypass" flag is set, then the device + * is *not* retired for the current boot of the + * system. It indicates that the retire store + * was read but the devices in the retire store + * were not retired i.e. effectively the store + * was bypassed. For why we bother to even read + * the store when we bypass it, see the comments + * for the tunable ddi_retire_store_bypass. + */ + if (rsp->rst_flags & RIO_STORE_F_BYPASS) { + STORE_TRC((CE_NOTE, "store: found & bypassed: %s", + rsp->rst_devpath)); + continue; + } + + /* + * device is retired, if it or a parent exists + * in the in-core list + */ + len = strlen(rsp->rst_devpath); + if (strncmp(devpath, rsp->rst_devpath, len) != 0) + continue; + if (devpath[len] == '\0' || devpath[len] == '/') { + /* exact match or a child */ + retired = 1; + STORE_TRC((CE_NOTE, "store: found & !bypassed: %s", + devpath)); + break; + } + } + rw_exit(nvf_lock(rio_store_handle)); + + return (retired); +} diff --git a/usr/src/uts/common/os/sunmdi.c b/usr/src/uts/common/os/sunmdi.c index 0c6b1e3055..cec7a252b6 100644 --- a/usr/src/uts/common/os/sunmdi.c +++ b/usr/src/uts/common/os/sunmdi.c @@ -4777,6 +4777,292 @@ i_mdi_phci_offline(dev_info_t *dip, uint_t flags) return (rv); } +void +mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) +{ + mdi_phci_t *ph; + mdi_client_t *ct; + mdi_pathinfo_t *pip; + mdi_pathinfo_t *next; + dev_info_t *cdip; + + if (!MDI_PHCI(dip)) + return; + + ph = i_devi_get_phci(dip); + if (ph == NULL) { + return; + } + + MDI_PHCI_LOCK(ph); + + if (MDI_PHCI_IS_OFFLINE(ph)) { + /* has no last path */ + MDI_PHCI_UNLOCK(ph); + return; + } + + pip = ph->ph_path_head; + while (pip != NULL) { + MDI_PI_LOCK(pip); + next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; + + ct = MDI_PI(pip)->pi_client; + i_mdi_client_lock(ct, pip); + MDI_PI_UNLOCK(pip); + + cdip = ct->ct_dip; + if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && + (i_mdi_client_compute_state(ct, ph) == + MDI_CLIENT_STATE_FAILED)) { + /* Last path. Mark client dip as retiring */ + i_mdi_client_unlock(ct); + MDI_PHCI_UNLOCK(ph); + (void) e_ddi_mark_retiring(cdip, cons_array); + MDI_PHCI_LOCK(ph); + pip = next; + } else { + i_mdi_client_unlock(ct); + pip = next; + } + } + + MDI_PHCI_UNLOCK(ph); + + return; +} + +void +mdi_phci_retire_notify(dev_info_t *dip, int *constraint) +{ + mdi_phci_t *ph; + mdi_client_t *ct; + mdi_pathinfo_t *pip; + mdi_pathinfo_t *next; + dev_info_t *cdip; + + if (!MDI_PHCI(dip)) + return; + + ph = i_devi_get_phci(dip); + if (ph == NULL) + return; + + MDI_PHCI_LOCK(ph); + + if (MDI_PHCI_IS_OFFLINE(ph)) { + MDI_PHCI_UNLOCK(ph); + /* not last path */ + return; + } + + if (ph->ph_unstable) { + MDI_PHCI_UNLOCK(ph); + /* can't check for constraints */ + *constraint = 0; + return; + } + + pip = ph->ph_path_head; + while (pip != NULL) { + MDI_PI_LOCK(pip); + next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; + + /* + * The mdi_pathinfo state is OK. Check the client state. + * If failover in progress fail the pHCI from offlining + */ + ct = MDI_PI(pip)->pi_client; + i_mdi_client_lock(ct, pip); + if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || + (ct->ct_unstable)) { + /* + * Failover is in progress, can't check for constraints + */ + MDI_PI_UNLOCK(pip); + i_mdi_client_unlock(ct); + MDI_PHCI_UNLOCK(ph); + *constraint = 0; + return; + } + MDI_PI_UNLOCK(pip); + + /* + * Check to see of we are retiring the last path of this + * client device... + */ + cdip = ct->ct_dip; + if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && + (i_mdi_client_compute_state(ct, ph) == + MDI_CLIENT_STATE_FAILED)) { + i_mdi_client_unlock(ct); + MDI_PHCI_UNLOCK(ph); + (void) e_ddi_retire_notify(cdip, constraint); + MDI_PHCI_LOCK(ph); + pip = next; + } else { + i_mdi_client_unlock(ct); + pip = next; + } + } + + MDI_PHCI_UNLOCK(ph); + + return; +} + +/* + * offline the path(s) hanging off the PHCI. If the + * last path to any client, check that constraints + * have been applied. + */ +void +mdi_phci_retire_finalize(dev_info_t *dip, int phci_only) +{ + mdi_phci_t *ph; + mdi_client_t *ct; + mdi_pathinfo_t *pip; + mdi_pathinfo_t *next; + dev_info_t *cdip; + int unstable = 0; + int constraint; + + if (!MDI_PHCI(dip)) + return; + + ph = i_devi_get_phci(dip); + if (ph == NULL) { + /* no last path and no pips */ + return; + } + + MDI_PHCI_LOCK(ph); + + if (MDI_PHCI_IS_OFFLINE(ph)) { + MDI_PHCI_UNLOCK(ph); + /* no last path and no pips */ + return; + } + + /* + * Check to see if the pHCI can be offlined + */ + if (ph->ph_unstable) { + unstable = 1; + } + + pip = ph->ph_path_head; + while (pip != NULL) { + MDI_PI_LOCK(pip); + next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; + + /* + * if failover in progress fail the pHCI from offlining + */ + ct = MDI_PI(pip)->pi_client; + i_mdi_client_lock(ct, pip); + if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || + (ct->ct_unstable)) { + unstable = 1; + } + MDI_PI_UNLOCK(pip); + + /* + * Check to see of we are removing the last path of this + * client device... + */ + cdip = ct->ct_dip; + if (!phci_only && cdip && + (i_ddi_node_state(cdip) >= DS_INITIALIZED) && + (i_mdi_client_compute_state(ct, ph) == + MDI_CLIENT_STATE_FAILED)) { + i_mdi_client_unlock(ct); + MDI_PHCI_UNLOCK(ph); + /* + * We don't retire clients we just retire the + * path to a client. If it is the last path + * to a client, constraints are checked and + * if we pass the last path is offlined. MPXIO will + * then fail all I/Os to the client. Since we don't + * want to retire the client on a path error + * set constraint = 0 so that the client dip + * is not retired. + */ + constraint = 0; + (void) e_ddi_retire_finalize(cdip, &constraint); + MDI_PHCI_LOCK(ph); + pip = next; + } else { + i_mdi_client_unlock(ct); + pip = next; + } + } + + /* + * Cannot offline pip(s) + */ + if (unstable) { + cmn_err(CE_WARN, "PHCI in transient state, cannot " + "retire, dip = %p", (void *)dip); + MDI_PHCI_UNLOCK(ph); + return; + } + + /* + * Mark the pHCI as offline + */ + MDI_PHCI_SET_OFFLINE(ph); + + /* + * Mark the child mdi_pathinfo nodes as transient + */ + pip = ph->ph_path_head; + while (pip != NULL) { + MDI_PI_LOCK(pip); + next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; + MDI_PI_SET_OFFLINING(pip); + MDI_PI_UNLOCK(pip); + pip = next; + } + MDI_PHCI_UNLOCK(ph); + /* + * Give a chance for any pending commands to execute + */ + delay(1); + MDI_PHCI_LOCK(ph); + pip = ph->ph_path_head; + while (pip != NULL) { + next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; + (void) i_mdi_pi_offline(pip, 0); + MDI_PI_LOCK(pip); + ct = MDI_PI(pip)->pi_client; + if (!MDI_PI_IS_OFFLINE(pip)) { + cmn_err(CE_WARN, "PHCI busy, cannot offline path: " + "PHCI dip = %p", (void *)dip); + MDI_PI_UNLOCK(pip); + MDI_PHCI_SET_ONLINE(ph); + MDI_PHCI_UNLOCK(ph); + return; + } + MDI_PI_UNLOCK(pip); + pip = next; + } + MDI_PHCI_UNLOCK(ph); + + return; +} + +void +mdi_phci_unretire(dev_info_t *dip) +{ + ASSERT(MDI_PHCI(dip)); + + /* + * Online the phci + */ + i_mdi_phci_online(dip); +} + /*ARGSUSED*/ static int i_mdi_client_offline(dev_info_t *dip, uint_t flags) diff --git a/usr/src/uts/common/os/sunndi.c b/usr/src/uts/common/os/sunndi.c index 58d76dbd69..627f8fe6c6 100644 --- a/usr/src/uts/common/os/sunndi.c +++ b/usr/src/uts/common/os/sunndi.c @@ -68,6 +68,7 @@ #include <sys/nvpair.h> #include <sys/sunmdi.h> #include <sys/fs/dv_node.h> +#include <sys/sunldi_impl.h> #ifdef __sparc #include <sys/archsystm.h> /* getpil/setpil */ @@ -853,6 +854,20 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags, */ if (dcp->flags & DEVCTL_OFFLINE) { /* + * In the unlikely event that the dip was somehow attached by + * the userland process (and device contracts or LDI opens + * were registered against the dip) after it was created by + * a previous DEVCTL_CONSTRUCT call, we start notify + * proceedings on this dip. Note that we don't need to + * return the dip after a failure of the notify since + * for a contract or LDI handle to be created the dip was + * already available to the user. + */ + if (e_ddi_offline_notify(cdip) == DDI_FAILURE) { + return (EBUSY); + } + + /* * hand set the OFFLINE flag to prevent any asynchronous * autoconfiguration operations from attaching this node. */ @@ -860,6 +875,8 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags, DEVI_SET_DEVICE_OFFLINE(cdip); mutex_exit(&(DEVI(cdip)->devi_lock)); + e_ddi_offline_finalize(cdip, DDI_SUCCESS); + rv = ndi_devi_bind_driver(cdip, flags); if (rv != NDI_SUCCESS) { (void) ndi_devi_offline(cdip, NDI_DEVI_REMOVE); diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile index 2c4defc38d..b4591f05d9 100644 --- a/usr/src/uts/common/sys/Makefile +++ b/usr/src/uts/common/sys/Makefile @@ -845,7 +845,9 @@ SYSEVENTHDRS= \ CONTRACTHDRS= \ process.h \ - process_impl.h + process_impl.h \ + device.h \ + device_impl.h USBHDRS= \ usba.h \ diff --git a/usr/src/uts/common/sys/autoconf.h b/usr/src/uts/common/sys/autoconf.h index 3b10e97c89..e7fbd33267 100644 --- a/usr/src/uts/common/sys/autoconf.h +++ b/usr/src/uts/common/sys/autoconf.h @@ -104,6 +104,11 @@ struct devnames { #define DDI_INTR_API 0x0200 /* interrupt interface messages */ #define DDI_INTR_IMPL 0x0400 /* interrupt implementation msgs */ #define DDI_INTR_NEXUS 0x0800 /* interrupt messages from nexuses */ +#define DDI_DBG_RETIRE 0x1000 /* Retire related messages */ +#define DDI_DBG_RTR_VRBOSE 0x2000 /* Verbose Retire messages */ +#define DDI_DBG_RTR_TRACE 0x4000 /* Trace Retire messages */ +#define LDI_EV_DEBUG 0x8000 /* LDI events debug messages */ +#define LDI_EV_TRACE 0x10000 /* LDI events trace messages */ extern int ddidebug; @@ -118,6 +123,11 @@ extern int ddidebug; #define DDI_INTR_APIDBG(args) if (ddidebug & DDI_INTR_API) cmn_err args #define DDI_INTR_IMPLDBG(args) if (ddidebug & DDI_INTR_IMPL) cmn_err args #define DDI_INTR_NEXDBG(args) if (ddidebug & DDI_INTR_NEXUS) cmn_err args +#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args +#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args +#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args +#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args +#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args #else #define NDI_CONFIG_DEBUG(args) #define BMDPRINTF(args) @@ -129,6 +139,11 @@ extern int ddidebug; #define DDI_INTR_APIDBG(args) #define DDI_INTR_IMPLDBG(args) #define DDI_INTR_NEXDBG(args) +#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args +#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args +#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args +#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args +#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args #endif @@ -256,6 +271,15 @@ extern int i_ddi_reconfig(void); extern void i_ddi_set_sysavail(void); extern void i_ddi_set_reconfig(void); +/* I/O retire related */ +extern int e_ddi_retire_device(char *path, char **cons_array); +extern int e_ddi_unretire_device(char *path); +extern int e_ddi_mark_retiring(dev_info_t *dip, void *arg); +extern int e_ddi_retire_notify(dev_info_t *dip, void *arg); +extern int e_ddi_retire_finalize(dev_info_t *dip, void *arg); +extern void e_ddi_degrade_finalize(dev_info_t *dip); +extern void e_ddi_undegrade_finalize(dev_info_t *dip); + #endif /* _KERNEL */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/contract.h b/usr/src/uts/common/sys/contract.h index 163f90cbfa..0bef407b98 100644 --- a/usr/src/uts/common/sys/contract.h +++ b/usr/src/uts/common/sys/contract.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -72,6 +71,7 @@ typedef enum ctstate { typedef enum ct_typeid { CTT_PROCESS, /* process contract */ + CTT_DEVICE, /* device contract */ CTT_MAXTYPE } ct_typeid_t; diff --git a/usr/src/uts/common/sys/contract/device.h b/usr/src/uts/common/sys/contract/device.h new file mode 100644 index 0000000000..252cce3165 --- /dev/null +++ b/usr/src/uts/common/sys/contract/device.h @@ -0,0 +1,76 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CONTRACT_DEVICE_H +#define _SYS_CONTRACT_DEVICE_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/contract.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct ctmpl_device ctmpl_device_t; +typedef struct cont_device cont_device_t; + +/* + * ct_ev_* flags + */ +#define CT_DEV_EV_ONLINE 0x1 /* device is moving to online state */ +#define CT_DEV_EV_DEGRADED 0x2 /* device is moving to degraded state */ +#define CT_DEV_EV_OFFLINE 0x4 /* device is moving to offline state */ +#define CT_DEV_ALLEVENT 0x7 + +/* + * ctp_id values + */ +#define CTDP_ACCEPT 0x1 /* the acceptable set term */ +#define CTDP_NONEG 0x2 /* the non-negotiable term */ +#define CTDP_MINOR 0x4 /* the minor path term */ +#define CTDP_ALLPARAMS 0x7 + +#define CTDP_NONEG_CLEAR 0x0 /* clear the noneg flag */ +#define CTDP_NONEG_SET 0x1 /* set noneg */ + +/* + * Status fields + */ +#define CTDS_STATE "ctds_state" +#define CTDS_ASET "ctds_aset" +#define CTDS_NONEG "ctds_noneg" +#define CTDS_MINOR "ctds_minor" + +/* + * Max Time allowed for synchronous acknowledgement of a negotiation event + */ +#define CT_DEV_ACKTIME 60 /* 60 seconds */ + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CONTRACT_DEVICE_H */ diff --git a/usr/src/uts/common/sys/contract/device_impl.h b/usr/src/uts/common/sys/contract/device_impl.h new file mode 100644 index 0000000000..1bc27c454d --- /dev/null +++ b/usr/src/uts/common/sys/contract/device_impl.h @@ -0,0 +1,93 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef _SYS_CONTRACT_DEVICE_IMPL_H +#define _SYS_CONTRACT_DEVICE_IMPL_H + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/contract_impl.h> +#include <sys/dditypes.h> +#include <sys/contract/device.h> +#include <sys/fs/snode.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Result of negotiation end: event successfully occurred or not + */ +#define CT_EV_SUCCESS 150 +#define CT_EV_FAILURE 151 + +struct ctmpl_device { + ct_template_t ctd_ctmpl; + uint_t ctd_aset; + uint_t ctd_noneg; + char *ctd_minor; +}; + +struct cont_device { + contract_t cond_contract; /* common contract data */ + char *cond_minor; /* minor node resource in contract */ + dev_info_t *cond_dip; /* dip for minor node */ + dev_t cond_devt; /* dev_t of minor node */ + uint_t cond_spec; /* spec type of minor node */ + uint_t cond_aset; /* acceptable state set */ + uint_t cond_noneg; /* no negotiation if set */ + uint_t cond_state; /* current state of device */ + uint_t cond_neg; /* contract undergoing negotiation */ + uint64_t cond_currev_id; /* id of event being negotiated */ + uint_t cond_currev_type; /* type of event being negotiated */ + uint_t cond_currev_ack; /* ack/nack status of ev negotiation */ + list_node_t cond_next; /* linkage - devinfo's contracts */ +}; + +/* + * Kernel APIs + */ +extern ct_type_t *device_type; +/* + * struct proc; + */ +void contract_device_init(void); +ct_ack_t contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type); +void contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type); +void contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type); +int contract_device_open(dev_t dev, int spec_type, contract_t **ctpp); +void contract_device_remove_dip(dev_info_t *dip); +ct_ack_t contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype); +void contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype, int ct_result); +void contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, + int result); + +#ifdef __cplusplus +} +#endif + +#endif /* _SYS_CONTRACT_DEVICE_IMPL_H */ diff --git a/usr/src/uts/common/sys/contract_impl.h b/usr/src/uts/common/sys/contract_impl.h index c45cf06e60..7523de5bf0 100644 --- a/usr/src/uts/common/sys/contract_impl.h +++ b/usr/src/uts/common/sys/contract_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -50,6 +49,10 @@ extern "C" { #endif +extern int ct_debug; + +#define CT_DEBUG(args) if (ct_debug) cmn_err args + #ifdef _SYSCALL32 /* @@ -110,7 +113,7 @@ typedef struct ctmplops { int (*ctop_set)(struct ct_template *, ct_param_t *, const cred_t *); int (*ctop_get)(struct ct_template *, ct_param_t *); - int (*ctop_create)(struct ct_template *); + int (*ctop_create)(struct ct_template *, ctid_t *); uint_t allevents; } ctmplops_t; @@ -127,6 +130,7 @@ typedef struct ct_template { uint_t ctmpl_ev_info; /* term: informative events */ } ct_template_t; + typedef enum ct_listnum { CTEL_CONTRACT, /* ../contracts/type/<id>/events */ CTEL_BUNDLE, /* ../contracts/type/bundle */ @@ -139,6 +143,12 @@ typedef enum ctqflags { CTQ_REFFED = 2 /* queue is reference counted */ } ctqflags_t; +typedef enum ct_ack { + CT_ACK = 1, /* accept break */ + CT_NACK, /* disallow break */ + CT_NONE /* no matching contracts */ +} ct_ack_t; + /* * Contract event queue */ @@ -198,6 +208,12 @@ typedef struct contops { void (*contop_destroy)(struct contract *); void (*contop_status)(struct contract *, zone_t *, int, nvlist_t *, void *, model_t); + int (*contop_ack)(struct contract *, uint_t evtype, + uint64_t evid); + int (*contop_nack)(struct contract *, uint_t evtype, + uint64_t evid); + int (*contop_qack)(struct contract *, uint_t, uint64_t); + int (*contop_newct)(struct contract *); } contops_t; typedef ct_template_t *(ct_f_default_t)(void); @@ -221,6 +237,11 @@ typedef enum ctflags { CTF_INHERIT = 0x1 } ctflags_t; +typedef struct ct_time { + long ctm_total; /* Total time allowed for event */ + clock_t ctm_start; /* starting lbolt for event */ +} ct_time_t; + /* * Contract */ @@ -257,6 +278,8 @@ typedef struct contract { struct contract *ct_regent; /* [prospective] regent contract */ int ct_evcnt; /* number of critical events */ ct_kevent_t *ct_nevent; /* negotiation event */ + ct_time_t ct_ntime; /* negotiation time tracker */ + ct_time_t ct_qtime; /* quantum time tracker */ } contract_t; #define CTLF_COPYOUT 0x1 /* performing copyout */ @@ -284,7 +307,7 @@ int ctmpl_get(ct_template_t *, ct_param_t *); ct_template_t *ctmpl_dup(ct_template_t *); void ctmpl_activate(ct_template_t *); void ctmpl_clear(ct_template_t *); -int ctmpl_create(ct_template_t *); +int ctmpl_create(ct_template_t *, ctid_t *); /* * Contract functions @@ -294,12 +317,14 @@ int contract_abandon(contract_t *, struct proc *, int); int contract_adopt(contract_t *, struct proc *); void contract_destroy(contract_t *); void contract_exit(struct proc *); -int contract_ack(contract_t *, uint64_t); +int contract_ack(contract_t *ct, uint64_t evid, int cmd); +int contract_qack(contract_t *ct, uint64_t evid); +int contract_newct(contract_t *ct); /* * Event interfaces */ -void cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *); +uint64_t cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *); void cte_add_listener(ct_equeue_t *, ct_listener_t *); void cte_remove_listener(ct_listener_t *); void cte_reset_listener(ct_listener_t *); @@ -313,7 +338,7 @@ int cte_set_reliable(ct_listener_t *, const cred_t *); int contract_compar(const void *, const void *); void ctmpl_init(ct_template_t *, ctmplops_t *, ct_type_t *, void *); void ctmpl_copy(ct_template_t *, ct_template_t *); -int ctmpl_create_inval(ct_template_t *); +int ctmpl_create_inval(ct_template_t *, ctid_t *); int contract_ctor(contract_t *, ct_type_t *, ct_template_t *, void *, ctflags_t, struct proc *, int); void contract_hold(contract_t *); @@ -352,6 +377,13 @@ vnode_t *contract_vnode_get(contract_t *, vfs_t *); void contract_vnode_set(contract_t *, contract_vnode_t *, vnode_t *); int contract_vnode_clear(contract_t *, contract_vnode_t *); +/* + * Negotiation stubs + */ +int contract_ack_inval(contract_t *, uint_t, uint64_t); +int contract_qack_inval(contract_t *, uint_t, uint64_t); +int contract_qack_notsup(contract_t *, uint_t, uint64_t); + #ifdef __cplusplus } #endif diff --git a/usr/src/uts/common/sys/ctfs.h b/usr/src/uts/common/sys/ctfs.h index e6702044d1..b46a517f2c 100644 --- a/usr/src/uts/common/sys/ctfs.h +++ b/usr/src/uts/common/sys/ctfs.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -65,6 +64,7 @@ extern "C" { #define CT_CQREQ CTFS_CTL(2) /* Request an additional quantum */ #define CT_CADOPT CTFS_CTL(3) /* Adopt a contract */ #define CT_CNEWCT CTFS_CTL(4) /* Define new contract */ +#define CT_CNACK CTFS_CTL(5) /* nack a negotiation */ /* * Control codes for messages written to status files. diff --git a/usr/src/uts/common/sys/ddi_impldefs.h b/usr/src/uts/common/sys/ddi_impldefs.h index 3b99c60997..f5c227e5aa 100644 --- a/usr/src/uts/common/sys/ddi_impldefs.h +++ b/usr/src/uts/common/sys/ddi_impldefs.h @@ -194,6 +194,12 @@ struct dev_info { char *devi_addr_buf; /* buffer for devi_addr */ char *devi_rebinding_name; /* binding_name of rebind */ + /* For device contracts that have this dip's minor node as resource */ + kmutex_t devi_ct_lock; /* contract lock */ + kcondvar_t devi_ct_cv; /* contract cv */ + int devi_ct_count; /* # of outstanding responses */ + int devi_ct_neg; /* neg. occurred on dip */ + list_t devi_ct; }; #define DEVI(dev_info_type) ((struct dev_info *)(dev_info_type)) @@ -271,6 +277,11 @@ struct dev_info { #define DEVI_SET_DEVICE_ONLINE(dip) { \ ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \ + if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \ + mutex_exit(&DEVI(dip)->devi_lock); \ + e_ddi_undegrade_finalize(dip); \ + mutex_enter(&DEVI(dip)->devi_lock); \ + } \ /* setting ONLINE clears DOWN, DEGRADED, OFFLINE */ \ DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DOWN | \ DEVI_DEVICE_DEGRADED | DEVI_DEVICE_OFFLINE); \ @@ -297,12 +308,20 @@ struct dev_info { #define DEVI_SET_DEVICE_DEGRADED(dip) { \ ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \ ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \ + mutex_exit(&DEVI(dip)->devi_lock); \ + e_ddi_degrade_finalize(dip); \ + mutex_enter(&DEVI(dip)->devi_lock); \ DEVI(dip)->devi_state |= (DEVI_DEVICE_DEGRADED | DEVI_S_REPORT); \ } #define DEVI_SET_DEVICE_UP(dip) { \ ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \ ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \ + if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \ + mutex_exit(&DEVI(dip)->devi_lock); \ + e_ddi_undegrade_finalize(dip); \ + mutex_enter(&DEVI(dip)->devi_lock); \ + } \ DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DEGRADED | DEVI_DEVICE_DOWN); \ DEVI(dip)->devi_state |= DEVI_S_REPORT; \ } @@ -503,6 +522,11 @@ void i_devi_exit(dev_info_t *, uint_t c_mask, int has_lock); #define DEVI_REGISTERED_DEVID 0x00000020 /* device registered a devid */ #define DEVI_PHCI_SIGNALS_VHCI 0x00000040 /* pHCI ndi_devi_exit signals vHCI */ #define DEVI_REBIND 0x00000080 /* post initchild driver rebind */ +#define DEVI_RETIRED 0x00000100 /* device is retired */ +#define DEVI_RETIRING 0x00000200 /* being evaluated for retire */ +#define DEVI_R_CONSTRAINT 0x00000400 /* constraints have been applied */ +#define DEVI_R_BLOCKED 0x00000800 /* constraints block retire */ +#define DEVI_CT_NOP 0x00001000 /* NOP contract event occurred */ #define DEVI_BUSY_CHANGING(dip) (DEVI(dip)->devi_flags & DEVI_BUSY) #define DEVI_BUSY_OWNED(dip) (DEVI_BUSY_CHANGING(dip) && \ diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h index 5105c4ce18..4aa213c1b2 100644 --- a/usr/src/uts/common/sys/ddi_implfuncs.h +++ b/usr/src/uts/common/sys/ddi_implfuncs.h @@ -20,7 +20,7 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -274,6 +274,15 @@ int e_devid_cache_to_devt_list(ddi_devid_t, char *, int *, dev_t **); void e_devid_cache_free_devt_list(int, dev_t *); /* + * I/O retire persistent store + */ +void retire_store_init(void); +void retire_store_read(void); +int e_ddi_retire_persist(char *devpath); +int e_ddi_retire_unpersist(char *devpath); +int e_ddi_device_retired(char *devpath); + +/* * Resource control functions to lock down device memory. */ extern int i_ddi_incr_locked_memory(proc_t *, rctl_qty_t); diff --git a/usr/src/uts/common/sys/ddi_obsolete.h b/usr/src/uts/common/sys/ddi_obsolete.h index c6a44c78de..84970dbb54 100644 --- a/usr/src/uts/common/sys/ddi_obsolete.h +++ b/usr/src/uts/common/sys/ddi_obsolete.h @@ -1,5 +1,5 @@ /* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -14,6 +14,7 @@ #include <sys/types.h> #include <sys/dditypes.h> +#include <sys/sunldi.h> #ifdef __cplusplus @@ -192,6 +193,15 @@ extern void repoutsw(int port, uint16_t *addr, int count); extern void repoutsd(int port, uint32_t *addr, int count); #endif +/* Obsolete LDI event interfaces */ +extern int ldi_get_eventcookie(ldi_handle_t, char *, + ddi_eventcookie_t *); +extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t, + void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *), + void *, ldi_callback_id_t *); +extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t); + + #endif /* not _DDI_STRICT */ #ifdef __cplusplus diff --git a/usr/src/uts/common/sys/fs/snode.h b/usr/src/uts/common/sys/fs/snode.h index ecef85390c..cd572d545c 100644 --- a/usr/src/uts/common/sys/fs/snode.h +++ b/usr/src/uts/common/sys/fs/snode.h @@ -119,6 +119,7 @@ struct snode { #define SSELFCLONE 0x2000 /* represents a self cloning device */ #define SNOFLUSH 0x4000 /* do not flush device on fsync */ #define SCLOSING 0x8000 /* in last close(9E) */ +#define SFENCED 0x10000 /* snode fenced off for I/O retire */ #ifdef _KERNEL /* @@ -128,6 +129,12 @@ struct snode { #define VTOCS(vp) (VTOS(VTOS(vp)->s_commonvp)) #define STOV(sp) ((sp)->s_vnode) +extern int spec_debug; + +#define SPEC_FENCE_DEBUG 0x0001 /* emit fence related debug messages */ + +#define FENDBG(args) if (spec_debug & SPEC_FENCE_DEBUG) cmn_err args + /* * Forward declarations @@ -167,6 +174,8 @@ void spec_snode_walk(int (*callback)(struct snode *, void *), void *); int spec_devi_open_count(struct snode *, dev_info_t **); int spec_is_clone(struct vnode *); int spec_is_selfclone(struct vnode *); +int spec_fence_snode(dev_info_t *dip, struct vnode *vp); +int spec_unfence_snode(dev_info_t *dip); /* diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h index 255d02d7b2..9eab8025da 100644 --- a/usr/src/uts/common/sys/modctl.h +++ b/usr/src/uts/common/sys/modctl.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -258,6 +258,9 @@ struct modlinkage { #define MODDEVNAME 37 #define MODGETDEVFSPATH_MI_LEN 38 #define MODGETDEVFSPATH_MI 39 +#define MODRETIRE 40 +#define MODUNRETIRE 41 +#define MODISRETIRED 42 /* * sub cmds for MODEVENTS @@ -641,6 +644,7 @@ extern int modctl(int, ...); #define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */ #define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */ #define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */ +#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */ #define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */ #define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */ #define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */ diff --git a/usr/src/uts/common/sys/sunldi.h b/usr/src/uts/common/sys/sunldi.h index f80cc44f8f..71e9d9a7da 100644 --- a/usr/src/uts/common/sys/sunldi.h +++ b/usr/src/uts/common/sys/sunldi.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -60,6 +59,26 @@ typedef struct __ldi_handle *ldi_handle_t; typedef struct __ldi_callback_id *ldi_callback_id_t; +typedef struct __ldi_ev_cookie *ldi_ev_cookie_t; + +/* + * LDI event interface related + */ +#define LDI_EV_SUCCESS 0 +#define LDI_EV_FAILURE (-1) +#define LDI_EV_NONE (-2) /* no matching callbacks registered */ +#define LDI_EV_OFFLINE "LDI:EVENT:OFFLINE" +#define LDI_EV_DEGRADE "LDI:EVENT:DEGRADE" + +#define LDI_EV_CB_VERS_1 1 +#define LDI_EV_CB_VERS LDI_EV_CB_VERS_1 + +typedef struct ldi_ev_callback { + uint_t cb_vers; + int (*cb_notify)(ldi_handle_t, ldi_ev_cookie_t, void *, void *); + void (*cb_finalize)(ldi_handle_t, ldi_ev_cookie_t, int, void *, void *); +} ldi_ev_callback_t; + /* * LDI Ident manipulation functions */ @@ -93,13 +112,6 @@ extern int ldi_get_size(ldi_handle_t, uint64_t *); extern int ldi_prop_op(ldi_handle_t, ddi_prop_op_t, int, char *, caddr_t, int *); -extern int ldi_get_eventcookie(ldi_handle_t, char *, - ddi_eventcookie_t *); -extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t, - void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *), - void *, ldi_callback_id_t *); -extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t); - extern int ldi_strategy(ldi_handle_t, struct buf *); extern int ldi_dump(ldi_handle_t, caddr_t, daddr_t, int); extern int ldi_devmap(ldi_handle_t, devmap_cookie_t, offset_t, @@ -132,6 +144,20 @@ extern int ldi_get_otyp(ldi_handle_t, int *); extern int ldi_get_devid(ldi_handle_t, ddi_devid_t *); extern int ldi_get_minor_name(ldi_handle_t, char **); +/* + * LDI events related declarations + */ +extern int ldi_ev_get_cookie(ldi_handle_t lh, char *evname, + ldi_ev_cookie_t *cookiep); +extern char *ldi_ev_get_type(ldi_ev_cookie_t cookie); +extern int ldi_ev_register_callbacks(ldi_handle_t lh, + ldi_ev_cookie_t cookie, ldi_ev_callback_t *callb, + void *arg, ldi_callback_id_t *id); +extern int ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type, + ldi_ev_cookie_t cookie, void *ev_data); +extern void ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type, + int ldi_result, ldi_ev_cookie_t cookie, void *ev_data); +extern int ldi_ev_remove_callbacks(ldi_callback_id_t id); #endif /* _KERNEL */ diff --git a/usr/src/uts/common/sys/sunldi_impl.h b/usr/src/uts/common/sys/sunldi_impl.h index 9cbffc3ed2..1156fe2c41 100644 --- a/usr/src/uts/common/sys/sunldi_impl.h +++ b/usr/src/uts/common/sys/sunldi_impl.h @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -53,6 +52,17 @@ extern "C" { #define LI_HASH_SZ 32 /* + * Obsolete LDI event interfaces are available for now but are deprecated and a + * warning will be issued to consumers. + */ +#define LDI_OBSOLETE_EVENT 1 + +/* + * Flag for LDI handle's lh_flags field + */ +#define LH_FLAGS_NOTIFY 0x0001 /* invoked in context of a notify */ + +/* * LDI initialization function */ void ldi_init(void); @@ -87,20 +97,24 @@ struct ldi_handle { /* protected by ldi_handle_hash_lock */ struct ldi_handle *lh_next; uint_t lh_ref; + uint_t lh_flags; /* unique/static fields in the handle */ uint_t lh_type; struct ldi_ident *lh_ident; vnode_t *lh_vp; +#ifdef LDI_OBSOLETE_EVENT /* fields protected by lh_lock */ kmutex_t lh_lock[1]; struct ldi_event *lh_events; +#endif }; /* * LDI event information */ +#ifdef LDI_OBSOLETE_EVENT typedef struct ldi_event { /* fields protected by le_lhp->lh_lock */ struct ldi_event *le_next; @@ -112,6 +126,36 @@ typedef struct ldi_event { void *le_arg; ddi_callback_id_t le_id; } ldi_event_t; +#endif + +typedef struct ldi_ev_callback_impl { + struct ldi_handle *lec_lhp; + dev_info_t *lec_dip; + dev_t lec_dev; + int lec_spec; + int (*lec_notify)(); + void (*lec_finalize)(); + void *lec_arg; + void *lec_cookie; + void *lec_id; + list_node_t lec_list; +} ldi_ev_callback_impl_t; + +struct ldi_ev_callback_list { + kmutex_t le_lock; + kcondvar_t le_cv; + int le_busy; + void *le_thread; + list_t le_head; +}; + +int ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event, + void *ev_data); +void ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event, + int ldi_result, void *ev_data); +int e_ddi_offline_notify(dev_info_t *dip); +void e_ddi_offline_finalize(dev_info_t *dip, int result); + /* * LDI device usage interfaces diff --git a/usr/src/uts/common/sys/sunmdi.h b/usr/src/uts/common/sys/sunmdi.h index 75b4f83ef7..c4a42633be 100644 --- a/usr/src/uts/common/sys/sunmdi.h +++ b/usr/src/uts/common/sys/sunmdi.h @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -133,6 +133,14 @@ int mdi_devi_online(dev_info_t *, uint_t); int mdi_devi_offline(dev_info_t *, uint_t); /* + * MDI path retire interfaces + */ +void mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array); +void mdi_phci_retire_notify(dev_info_t *dip, int *constraint); +void mdi_phci_retire_finalize(dev_info_t *dip, int phci_only); +void mdi_phci_unretire(dev_info_t *dip); + +/* * MDI devinfo locking functions. */ void mdi_devi_enter(dev_info_t *, int *); diff --git a/usr/src/uts/intel/ia32/ml/modstubs.s b/usr/src/uts/intel/ia32/ml/modstubs.s index 2d16849c3d..889a6e350a 100644 --- a/usr/src/uts/intel/ia32/ml/modstubs.s +++ b/usr/src/uts/intel/ia32/ml/modstubs.s @@ -457,6 +457,8 @@ fcnname/**/_info: \ NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one); NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero); NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one); + NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one); END_MODULE(specfs); #endif diff --git a/usr/src/uts/sparc/ml/modstubs.s b/usr/src/uts/sparc/ml/modstubs.s index 5eade06f7b..a22853aba3 100644 --- a/usr/src/uts/sparc/ml/modstubs.s +++ b/usr/src/uts/sparc/ml/modstubs.s @@ -345,6 +345,8 @@ stubs_base: NO_UNLOAD_STUB(specfs, spec_devi_open_count, nomod_minus_one); NO_UNLOAD_STUB(specfs, spec_is_clone, nomod_zero); NO_UNLOAD_STUB(specfs, spec_is_selfclone, nomod_zero); + NO_UNLOAD_STUB(specfs, spec_fence_snode, nomod_minus_one); + NO_UNLOAD_STUB(specfs, spec_unfence_snode, nomod_minus_one); END_MODULE(specfs); #endif diff --git a/usr/src/uts/sun4/os/ddi_impl.c b/usr/src/uts/sun4/os/ddi_impl.c index 954ae51bf5..eb068ac4e9 100644 --- a/usr/src/uts/sun4/os/ddi_impl.c +++ b/usr/src/uts/sun4/os/ddi_impl.c @@ -54,6 +54,8 @@ #include <sys/fs/snode.h> #include <sys/ddi_isa.h> #include <sys/modhash.h> +#include <sys/modctl.h> +#include <sys/sunldi_impl.h> dev_info_t *get_intr_parent(dev_info_t *, dev_info_t *, ddi_intr_handle_impl_t *); @@ -2036,31 +2038,127 @@ visit_node(pnode_t nodeid, struct pta *ap) } } -/*ARGSUSED*/ +/* + * NOTE: The caller of this function must check for device contracts + * or LDI callbacks against this dip before setting the dip offline. + */ static int -set_dip_offline(dev_info_t *dip, void *arg) +set_infant_dip_offline(dev_info_t *dip, void *arg) { + char *path = (char *)arg; + ASSERT(dip); + ASSERT(arg); + + if (i_ddi_node_state(dip) >= DS_ATTACHED) { + (void) ddi_pathname(dip, path); + cmn_err(CE_WARN, "Attempt to set offline flag on attached " + "node: %s", path); + return (DDI_FAILURE); + } mutex_enter(&(DEVI(dip)->devi_lock)); if (!DEVI_IS_DEVICE_OFFLINE(dip)) DEVI_SET_DEVICE_OFFLINE(dip); mutex_exit(&(DEVI(dip)->devi_lock)); + return (DDI_SUCCESS); +} + +typedef struct result { + char *path; + int result; +} result_t; + +static int +dip_set_offline(dev_info_t *dip, void *arg) +{ + int end; + result_t *resp = (result_t *)arg; + + ASSERT(dip); + ASSERT(resp); + + /* + * We stop the walk if e_ddi_offline_notify() returns + * failure, because this implies that one or more consumers + * (either LDI or contract based) has blocked the offline. + * So there is no point in conitnuing the walk + */ + if (e_ddi_offline_notify(dip) == DDI_FAILURE) { + resp->result = DDI_FAILURE; + return (DDI_WALK_TERMINATE); + } + + /* + * If set_infant_dip_offline() returns failure, it implies + * that we failed to set a particular dip offline. This + * does not imply that the offline as a whole should fail. + * We want to do the best we can, so we continue the walk. + */ + if (set_infant_dip_offline(dip, resp->path) == DDI_SUCCESS) + end = DDI_SUCCESS; + else + end = DDI_FAILURE; + + e_ddi_offline_finalize(dip, end); + return (DDI_WALK_CONTINUE); } +/* + * The call to e_ddi_offline_notify() exists for the + * unlikely error case that a branch we are trying to + * create already exists and has device contracts or LDI + * event callbacks against it. + * + * We allow create to succeed for such branches only if + * no constraints block the offline. + */ +static int +branch_set_offline(dev_info_t *dip, char *path) +{ + int circ; + int end; + result_t res; + + + if (e_ddi_offline_notify(dip) == DDI_FAILURE) { + return (DDI_FAILURE); + } + + if (set_infant_dip_offline(dip, path) == DDI_SUCCESS) + end = DDI_SUCCESS; + else + end = DDI_FAILURE; + + e_ddi_offline_finalize(dip, end); + + if (end == DDI_FAILURE) + return (DDI_FAILURE); + + res.result = DDI_SUCCESS; + res.path = path; + + ndi_devi_enter(dip, &circ); + ddi_walk_devs(ddi_get_child(dip), dip_set_offline, &res); + ndi_devi_exit(dip, circ); + + return (res.result); +} + /*ARGSUSED*/ static int create_prom_branch(void *arg, int has_changed) { - int circ, c; + int circ; int exists, rv; pnode_t nodeid; struct ptnode *tnp; dev_info_t *dip; struct pta *ap = arg; devi_branch_t *bp; + char *path; ASSERT(ap); ASSERT(ap->fdip == NULL); @@ -2086,6 +2184,7 @@ create_prom_branch(void *arg, int has_changed) if (ap->head == NULL) return (ENODEV); + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); rv = 0; while ((tnp = ap->head) != NULL) { ap->head = tnp->next; @@ -2112,32 +2211,28 @@ create_prom_branch(void *arg, int has_changed) kmem_free(tnp, sizeof (struct ptnode)); - if (dip == NULL) { - ndi_devi_exit(ap->pdip, circ); - rv = EIO; - continue; - } - - ASSERT(ddi_get_parent(dip) == ap->pdip); - /* * Hold the branch if it is not already held */ - if (!exists) + if (dip && !exists) { e_ddi_branch_hold(dip); + } - ASSERT(e_ddi_branch_held(dip)); + ASSERT(dip == NULL || e_ddi_branch_held(dip)); /* - * Set all dips in the branch offline so that + * Set all dips in the newly created branch offline so that * only a "configure" operation can attach * the branch */ - (void) set_dip_offline(dip, NULL); + if (dip == NULL || branch_set_offline(dip, path) + == DDI_FAILURE) { + ndi_devi_exit(ap->pdip, circ); + rv = EIO; + continue; + } - ndi_devi_enter(dip, &c); - ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL); - ndi_devi_exit(dip, c); + ASSERT(ddi_get_parent(dip) == ap->pdip); ndi_devi_exit(ap->pdip, circ); @@ -2155,6 +2250,8 @@ create_prom_branch(void *arg, int has_changed) bp->devi_branch_callback(dip, bp->arg, 0); } + kmem_free(path, MAXPATHLEN); + return (rv); } @@ -2162,9 +2259,10 @@ static int sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp) { int rv, circ, len; - int i, flags; + int i, flags, ret; dev_info_t *dip; char *nbuf; + char *path; static const char *noname = "<none>"; ASSERT(pdip); @@ -2258,9 +2356,23 @@ sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp) *rdipp = dip; /* - * Set device offline - only the "configure" op should cause an attach + * Set device offline - only the "configure" op should cause an attach. + * Note that it is safe to set the dip offline without checking + * for either device contract or layered driver (LDI) based constraints + * since there cannot be any contracts or LDI opens of this device. + * This is because this node is a newly created dip with the parent busy + * held, so no other thread can come in and attach this dip. A dip that + * has never been attached cannot have contracts since by definition + * a device contract (an agreement between a process and a device minor + * node) can only be created against a device that has minor nodes + * i.e is attached. Similarly an LDI open will only succeed if the + * dip is attached. We assert below that the dip is not attached. */ - (void) set_dip_offline(dip, NULL); + ASSERT(i_ddi_node_state(dip) < DS_ATTACHED); + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + ret = set_infant_dip_offline(dip, path); + ASSERT(ret == DDI_SUCCESS); + kmem_free(path, MAXPATHLEN); return (rv); fail: diff --git a/usr/src/uts/sun4u/io/sbd.c b/usr/src/uts/sun4u/io/sbd.c index 8e41b8ea1f..ec1b7096ba 100644 --- a/usr/src/uts/sun4u/io/sbd.c +++ b/usr/src/uts/sun4u/io/sbd.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -5124,6 +5123,14 @@ sbd_get_comp_cond(dev_info_t *dip) return (SBD_COND_UNKNOWN); } + /* + * If retired, return FAILED + */ + if (DEVI(dip)->devi_flags & DEVI_RETIRED) { + PR_CPU("dip is retired\n"); + return (SBD_COND_FAILED); + } + if (ddi_getproplen(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, (char *)status, &len) != DDI_PROP_SUCCESS) { PR_CPU("status in sbd is ok\n"); diff --git a/usr/src/uts/sun4u/io/sbd_io.c b/usr/src/uts/sun4u/io/sbd_io.c index 4f07908874..c8a9ea27f7 100644 --- a/usr/src/uts/sun4u/io/sbd_io.c +++ b/usr/src/uts/sun4u/io/sbd_io.c @@ -2,9 +2,8 @@ * CDDL HEADER START * * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. @@ -20,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -253,6 +252,7 @@ sbd_io_status(sbd_handle_t *hp, sbd_devset_t devset, sbd_dev_stat_t *dsp) isp->is_ostate = ostate_cvt(dstate); isp->is_type = SBD_COMP_IO; ip = SBD_GET_BOARD_IOUNIT(sbp, unit); + ip->sbi_cm.sbdev_cond = sbd_get_comp_cond(dip); isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond; isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy; isp->is_cm.c_time = ip->sbi_cm.sbdev_time; diff --git a/usr/src/uts/sun4u/ngdr/io/dr_io.c b/usr/src/uts/sun4u/ngdr/io/dr_io.c index ac36e971d0..49a9866602 100644 --- a/usr/src/uts/sun4u/ngdr/io/dr_io.c +++ b/usr/src/uts/sun4u/ngdr/io/dr_io.c @@ -19,7 +19,7 @@ * CDDL HEADER END */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ @@ -281,6 +281,26 @@ dr_post_detach_io(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) return (rv); } +static void +dr_get_comp_cond(dr_io_unit_t *ip, dev_info_t *dip) +{ + if (dip == NULL) { + ip->sbi_cm.sbdev_cond = SBD_COND_UNKNOWN; + return; + } + + if (DEVI(dip)->devi_flags & DEVI_RETIRED) { + ip->sbi_cm.sbdev_cond = SBD_COND_FAILED; + return; + } + + if (DR_DEV_IS_ATTACHED(&ip->sbi_cm)) { + ip->sbi_cm.sbdev_cond = SBD_COND_OK; + } else if (DR_DEV_IS_PRESENT(&ip->sbi_cm)) { + ip->sbi_cm.sbdev_cond = SBD_COND_OK; + } +} + int dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) { @@ -322,6 +342,16 @@ dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) return (-1); } + dip = NULL; + err = drmach_get_dip(id, &dip); + if (err) { + /* catch this in debug kernels */ + ASSERT(0); + + sbd_err_clear(&err); + continue; + } + isp = &dsp->d_io; bzero((caddr_t)isp, sizeof (*isp)); @@ -329,21 +359,15 @@ dr_io_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) isp->is_cm.c_id.c_unit = ip->sbi_cm.sbdev_unum; strncpy(isp->is_cm.c_id.c_name, pstat.type, sizeof (isp->is_cm.c_id.c_name)); + + dr_get_comp_cond(ip, dip); isp->is_cm.c_cond = ip->sbi_cm.sbdev_cond; isp->is_cm.c_busy = ip->sbi_cm.sbdev_busy | pstat.busy; isp->is_cm.c_time = ip->sbi_cm.sbdev_time; isp->is_cm.c_ostate = ip->sbi_cm.sbdev_ostate; isp->is_cm.c_sflags = 0; - dip = NULL; - err = drmach_get_dip(id, &dip); - if (err) { - /* catch this in debug kernels */ - ASSERT(0); - - sbd_err_clear(&err); - continue; - } else if (dip == NULL) { + if (dip == NULL) { isp->is_pathname[0] = '\0'; isp->is_referenced = 0; isp->is_unsafe_count = 0; |