summaryrefslogtreecommitdiff
path: root/usr/src/uts/common
diff options
context:
space:
mode:
authorvikram <none@none>2007-08-09 21:43:47 -0700
committervikram <none@none>2007-08-09 21:43:47 -0700
commit25e8c5aa2b496d9026e958ac731a610167574f59 (patch)
tree48d445f55e23f769f3981231d5b06b0b35505b33 /usr/src/uts/common
parentffcd51f34e6cd303b9745909c4632da63426be17 (diff)
downloadillumos-joyent-25e8c5aa2b496d9026e958ac731a610167574f59.tar.gz
PSARC 2007/290 Retire Agent for I/O Devices
6464720 Deliver a FMA I/O retire agent --HG-- rename : usr/src/cmd/fm/modules/common/io-retire/ior_main.c => deleted_files/usr/src/cmd/fm/modules/common/io-retire/ior_main.c
Diffstat (limited to 'usr/src/uts/common')
-rw-r--r--usr/src/uts/common/Makefile.files2
-rw-r--r--usr/src/uts/common/contract/device.c2207
-rw-r--r--usr/src/uts/common/contract/process.c35
-rw-r--r--usr/src/uts/common/fs/ctfs/ctfs_ctl.c9
-rw-r--r--usr/src/uts/common/fs/ctfs/ctfs_tmpl.c7
-rw-r--r--usr/src/uts/common/fs/specfs/specsubr.c161
-rw-r--r--usr/src/uts/common/fs/specfs/specvnops.c104
-rw-r--r--usr/src/uts/common/os/contract.c166
-rw-r--r--usr/src/uts/common/os/devcache.c13
-rw-r--r--usr/src/uts/common/os/devcfg.c727
-rw-r--r--usr/src/uts/common/os/driver_lyr.c936
-rw-r--r--usr/src/uts/common/os/modctl.c225
-rw-r--r--usr/src/uts/common/os/retire_store.c457
-rw-r--r--usr/src/uts/common/os/sunmdi.c286
-rw-r--r--usr/src/uts/common/os/sunndi.c17
-rw-r--r--usr/src/uts/common/sys/Makefile4
-rw-r--r--usr/src/uts/common/sys/autoconf.h24
-rw-r--r--usr/src/uts/common/sys/contract.h8
-rw-r--r--usr/src/uts/common/sys/contract/device.h76
-rw-r--r--usr/src/uts/common/sys/contract/device_impl.h93
-rw-r--r--usr/src/uts/common/sys/contract_impl.h50
-rw-r--r--usr/src/uts/common/sys/ctfs.h8
-rw-r--r--usr/src/uts/common/sys/ddi_impldefs.h24
-rw-r--r--usr/src/uts/common/sys/ddi_implfuncs.h11
-rw-r--r--usr/src/uts/common/sys/ddi_obsolete.h12
-rw-r--r--usr/src/uts/common/sys/fs/snode.h9
-rw-r--r--usr/src/uts/common/sys/modctl.h6
-rw-r--r--usr/src/uts/common/sys/sunldi.h48
-rw-r--r--usr/src/uts/common/sys/sunldi_impl.h52
-rw-r--r--usr/src/uts/common/sys/sunmdi.h10
30 files changed, 5717 insertions, 70 deletions
diff --git a/usr/src/uts/common/Makefile.files b/usr/src/uts/common/Makefile.files
index e9de4ceac8..c439bc5d39 100644
--- a/usr/src/uts/common/Makefile.files
+++ b/usr/src/uts/common/Makefile.files
@@ -120,6 +120,7 @@ GENUNIX_OBJS += \
ddi_strtol.o \
devcfg.o \
devcache.o \
+ device.o \
devid.o \
devid_cache.o \
devid_scsi.o \
@@ -244,6 +245,7 @@ GENUNIX_OBJS += \
refstr.o \
rename.o \
resolvepath.o \
+ retire_store.o \
process.o \
rlimit.o \
rmap.o \
diff --git a/usr/src/uts/common/contract/device.c b/usr/src/uts/common/contract/device.c
new file mode 100644
index 0000000000..4632cdaa9d
--- /dev/null
+++ b/usr/src/uts/common/contract/device.c
@@ -0,0 +1,2207 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/mutex.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/id_space.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/proc.h>
+#include <sys/contract.h>
+#include <sys/contract_impl.h>
+#include <sys/contract/device.h>
+#include <sys/contract/device_impl.h>
+#include <sys/cmn_err.h>
+#include <sys/nvpair.h>
+#include <sys/policy.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi_implfuncs.h>
+#include <sys/systm.h>
+#include <sys/stat.h>
+#include <sys/sunddi.h>
+#include <sys/esunddi.h>
+#include <sys/ddi.h>
+#include <sys/fs/dv_node.h>
+#include <sys/sunndi.h>
+#undef ct_lock /* needed because clnt.h defines ct_lock as a macro */
+
+/*
+ * Device Contracts
+ * -----------------
+ * This file contains the core code for the device contracts framework.
+ * A device contract is an agreement or a contract between a process and
+ * the kernel regarding the state of the device. A device contract may be
+ * created when a relationship is formed between a device and a process
+ * i.e. at open(2) time, or it may be created at some point after the device
+ * has been opened. A device contract once formed may be broken by either party.
+ * A device contract can be broken by the process by an explicit abandon of the
+ * contract or by an implicit abandon when the process exits. A device contract
+ * can be broken by the kernel either asynchronously (without negotiation) or
+ * synchronously (with negotiation). Exactly which happens depends on the device
+ * state transition. The following state diagram shows the transitions between
+ * device states. Only device state transitions currently supported by device
+ * contracts is shown.
+ *
+ * <-- A -->
+ * /-----------------> DEGRADED
+ * | |
+ * | |
+ * | | S
+ * | | |
+ * | | v
+ * v S --> v
+ * ONLINE ------------> OFFLINE
+ *
+ *
+ * In the figure above, the arrows indicate the direction of transition. The
+ * letter S refers to transitions which are inherently synchronous i.e.
+ * require negotiation and the letter A indicates transitions which are
+ * asynchronous i.e. are done without contract negotiations. A good example
+ * of a synchronous transition is the ONLINE -> OFFLINE transition. This
+ * transition cannot happen as long as there are consumers which have the
+ * device open. Thus some form of negotiation needs to happen between the
+ * consumers and the kernel to ensure that consumers either close devices
+ * or disallow the move to OFFLINE. Certain other transitions such as
+ * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
+ * non-negotiable. A device that suffers a fault that degrades its
+ * capabilities will become degraded irrespective of what consumers it has,
+ * so a negotiation in this case is pointless.
+ *
+ * The following device states are currently defined for device contracts:
+ *
+ * CT_DEV_EV_ONLINE
+ * The device is online and functioning normally
+ * CT_DEV_EV_DEGRADED
+ * The device is online but is functioning in a degraded capacity
+ * CT_DEV_EV_OFFLINE
+ * The device is offline and is no longer configured
+ *
+ * A typical consumer of device contracts starts out with a contract
+ * template and adds terms to that template. These include the
+ * "acceptable set" (A-set) term, which is a bitset of device states which
+ * are guaranteed by the contract. If the device moves out of a state in
+ * the A-set, the contract is broken. The breaking of the contract can
+ * be asynchronous in which case a critical contract event is sent to the
+ * contract holder but no negotiations take place. If the breaking of the
+ * contract is synchronous, negotations are opened between the affected
+ * consumer and the kernel. The kernel does this by sending a critical
+ * event to the consumer with the CTE_NEG flag set indicating that this
+ * is a negotiation event. The consumer can accept this change by sending
+ * a ACK message to the kernel. Alternatively, if it has the necessary
+ * privileges, it can send a NACK message to the kernel which will block
+ * the device state change. To NACK a negotiable event, a process must
+ * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
+ *
+ * Other terms include the "minor path" term, specified explicitly if the
+ * contract is not being created at open(2) time or specified implicitly
+ * if the contract is being created at open time via an activated template.
+ *
+ * A contract event is sent on any state change to which the contract
+ * owner has subscribed via the informative or critical event sets. Only
+ * critical events are guaranteed to be delivered. Since all device state
+ * changes are controlled by the kernel and cannot be arbitrarily generated
+ * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
+ * need to be asserted in a process's effective set to designate an event as
+ * critical. To ensure privacy, a process must either have the same effective
+ * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
+ * asserted in its effective set in order to observe device contract events
+ * off the device contract type specific endpoint.
+ *
+ * Yet another term available with device contracts is the "non-negotiable"
+ * term. This term is used to pre-specify a NACK to any contract negotiation.
+ * This term is ignored for asynchronous state changes. For example, a
+ * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
+ * non-negotiable. In this case, the device contract framework assumes a
+ * NACK for any transition to OFFLINE and blocks the offline. If the A-set
+ * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
+ * are NACKed but transitions to DEGRADE succeed.
+ *
+ * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
+ * happens just before the I/O framework attempts to offline a device
+ * (i.e. detach a device and set the offline flag so that it cannot be
+ * reattached). A device contract holder is expected to either NACK the offline
+ * (if privileged) or release the device and allow the offline to proceed.
+ *
+ * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
+ * is generated just before the I/O framework transitions the device state
+ * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
+ *
+ * The contract holder is expected to ACK or NACK a negotiation event
+ * within a certain period of time. If the ACK/NACK is not received
+ * within the timeout period, the device contract framework will behave
+ * as if the contract does not exist and will proceed with the event.
+ *
+ * Unlike a process contract a device contract does not need to exist
+ * once it is abandoned, since it does not define a fault boundary. It
+ * merely represents an agreement between a process and the kernel
+ * regarding the state of the device. Once the process has abandoned
+ * the contract (either implicitly via a process exit or explicitly)
+ * the kernel has no reason to retain the contract. As a result
+ * device contracts are neither inheritable nor need to exist in an
+ * orphan state.
+ *
+ * A device unlike a process may exist in multiple contracts and has
+ * a "life" outside a device contract. A device unlike a process
+ * may exist without an associated contract. Unlike a process contract
+ * a device contract may be formed after a binding relationship is
+ * formed between a process and a device.
+ *
+ * IMPLEMENTATION NOTES
+ * ====================
+ * DATA STRUCTURES
+ * ----------------
+ * The heart of the device contracts implementation is the device contract
+ * private cont_device_t (or ctd for short) data structure. It encapsulates
+ * the generic contract_t data structure and has a number of private
+ * fields.
+ * These include:
+ * cond_minor: The minor device that is the subject of the contract
+ * cond_aset: The bitset of states which are guaranteed by the
+ * contract
+ * cond_noneg: If set, indicates that the result of negotiation has
+ * been predefined to be a NACK
+ * In addition, there are other device identifiers such the devinfo node,
+ * dev_t and spec_type of the minor node. There are also a few fields that
+ * are used during negotiation to maintain state. See
+ * uts/common/sys/contract/device_impl.h
+ * for details.
+ * The ctd structure represents the device private part of a contract of
+ * type "device"
+ *
+ * Another data structure used by device contracts is ctmpl_device. It is
+ * the device contracts private part of the contract template structure. It
+ * encapsulates the generic template structure "ct_template_t" and includes
+ * the following device contract specific fields
+ * ctd_aset: The bitset of states that should be guaranteed by a
+ * contract
+ * ctd_noneg: If set, indicates that contract should NACK a
+ * negotiation
+ * ctd_minor: The devfs_path (without the /devices prefix) of the
+ * minor node that is the subject of the contract.
+ *
+ * ALGORITHMS
+ * ---------
+ * There are three sets of routines in this file
+ * Template related routines
+ * -------------------------
+ * These routines provide support for template related operations initated
+ * via the generic template operations. These include routines that dup
+ * a template, free it, and set various terms in the template
+ * (such as the minor node path, the acceptable state set (or A-set)
+ * and the non-negotiable term) as well as a routine to query the
+ * device specific portion of the template for the abovementioned terms.
+ * There is also a routine to create (ctmpl_device_create) that is used to
+ * create a contract from a template. This routine calls (after initial
+ * setup) the common function used to create a device contract
+ * (contract_device_create).
+ *
+ * core device contract implementation
+ * ----------------------------------
+ * These routines support the generic contract framework to provide
+ * functionality that allows contracts to be created, managed and
+ * destroyed. The contract_device_create() routine is a routine used
+ * to create a contract from a template (either via an explicit create
+ * operation on a template or implicitly via an open with an
+ * activated template.). The contract_device_free() routine assists
+ * in freeing the device contract specific parts. There are routines
+ * used to abandon (contract_device_abandon) a device contract as well
+ * as a routine to destroy (which despite its name does not destroy,
+ * it only moves a contract to a dead state) a contract.
+ * There is also a routine to return status information about a
+ * contract - the level of detail depends on what is requested by the
+ * user. A value of CTD_FIXED only returns fixed length fields such
+ * as the A-set, state of device and value of the "noneg" term. If
+ * CTD_ALL is specified, the minor node path is returned as well.
+ *
+ * In addition there are interfaces (contract_device_ack/nack) which
+ * are used to support negotiation between userland processes and
+ * device contracts. These interfaces record the acknowledgement
+ * or lack thereof for negotiation events and help determine if the
+ * negotiated event should occur.
+ *
+ * "backend routines"
+ * -----------------
+ * The backend routines form the interface between the I/O framework
+ * and the device contract subsystem. These routines, allow the I/O
+ * framework to call into the device contract subsystem to notify it of
+ * impending changes to a device state as well as to inform of the
+ * final disposition of such attempted state changes. Routines in this
+ * class include contract_device_offline() that indicates an attempt to
+ * offline a device, contract_device_degrade() that indicates that
+ * a device is moving to the degraded state and contract_device_negend()
+ * that is used by the I/O framework to inform the contracts subsystem of
+ * the final disposition of an attempted operation.
+ *
+ * SUMMARY
+ * -------
+ * A contract starts its life as a template. A process allocates a device
+ * contract template and sets various terms:
+ * The A-set
+ * The device minor node
+ * Critical and informative events
+ * The noneg i.e. no negotition term
+ * Setting of these terms in the template is done via the
+ * ctmpl_device_set() entry point in this file. A process can query a
+ * template to determine the terms already set in the template - this is
+ * facilitated by the ctmpl_device_get() routine.
+ *
+ * Once all the appropriate terms are set, the contract is instantiated via
+ * one of two methods
+ * - via an explicit create operation - this is facilitated by the
+ * ctmpl_device_create() entry point
+ * - synchronously with the open(2) system call - this is achieved via the
+ * contract_device_open() routine.
+ * The core work for both these above functions is done by
+ * contract_device_create()
+ *
+ * A contract once created can be queried for its status. Support for
+ * status info is provided by both the common contracts framework and by
+ * the "device" contract type. If the level of detail requested is
+ * CTD_COMMON, only the common contract framework data is used. Higher
+ * levels of detail result in calls to contract_device_status() to supply
+ * device contract type specific status information.
+ *
+ * A contract once created may be abandoned either explicitly or implictly.
+ * In either case, the contract_device_abandon() function is invoked. This
+ * function merely calls contract_destroy() which moves the contract to
+ * the DEAD state. The device contract portion of destroy processing is
+ * provided by contract_device_destroy() which merely disassociates the
+ * contract from its device devinfo node. A contract in the DEAD state is
+ * not freed. It hanbgs around until all references to the contract are
+ * gone. When that happens, the contract is finally deallocated. The
+ * device contract specific portion of the free is done by
+ * contract_device_free() which finally frees the device contract specific
+ * data structure (cont_device_t).
+ *
+ * When a device undergoes a state change, the I/O framework calls the
+ * corresponding device contract entry point. For example, when a device
+ * is about to go OFFLINE, the routine contract_device_offline() is
+ * invoked. Similarly if a device moves to DEGRADED state, the routine
+ * contract_device_degrade() function is called. These functions call the
+ * core routine contract_device_publish(). This function determines via
+ * the function is_sync_neg() whether an event is a synchronous (i.e.
+ * negotiable) event or not. In the former case contract_device_publish()
+ * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
+ * and/or NACKs from contract holders. In the latter case, it simply
+ * publishes the event and does not wait. In the negotiation case, ACKs or
+ * NACKs from userland consumers results in contract_device_ack_nack()
+ * being called where the result of the negotiation is recorded in the
+ * contract data structure. Once all outstanding contract owners have
+ * responded, the device contract code in wait_for_acks() determines the
+ * final result of the negotiation. A single NACK overrides all other ACKs
+ * If there is no NACK, then a single ACK will result in an overall ACK
+ * result. If there are no ACKs or NACKs, then the result CT_NONE is
+ * returned back to the I/O framework. Once the event is permitted or
+ * blocked, the I/O framework proceeds or aborts the state change. The
+ * I/O framework then calls contract_device_negend() with a result code
+ * indicating final disposition of the event. This call releases the
+ * barrier and other state associated with the previous negotiation,
+ * which permits the next event (if any) to come into the device contract
+ * framework.
+ *
+ * Finally, a device that has outstanding contracts may be removed from
+ * the system which results in its devinfo node being freed. The devinfo
+ * free routine in the I/O framework, calls into the device contract
+ * function - contract_device_remove_dip(). This routine, disassociates
+ * the dip from all contracts associated with the contract being freed,
+ * allowing the devinfo node to be freed.
+ *
+ * LOCKING
+ * ---------
+ * There are four sets of data that need to be protected by locks
+ *
+ * i) device contract specific portion of the contract template - This data
+ * is protected by the template lock ctmpl_lock.
+ *
+ * ii) device contract specific portion of the contract - This data is
+ * protected by the contract lock ct_lock
+ *
+ * iii) The linked list of contracts hanging off a devinfo node - This
+ * list is protected by the per-devinfo node lock devi_ct_lock
+ *
+ * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
+ * and devi_ct_count that controls state changes to a dip
+ *
+ * The template lock is independent in that none of the other locks in this
+ * file may be taken while holding the template lock (and vice versa).
+ *
+ * The remaining three locks have the following lock order
+ *
+ * devi_ct_lock -> ct_count barrier -> ct_lock
+ *
+ */
+
+static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
+ int spec_type, proc_t *owner, int *errorp);
+
+/* barrier routines */
+static void ct_barrier_acquire(dev_info_t *dip);
+static void ct_barrier_release(dev_info_t *dip);
+static int ct_barrier_held(dev_info_t *dip);
+static int ct_barrier_empty(dev_info_t *dip);
+static void ct_barrier_wait_for_release(dev_info_t *dip);
+static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
+static void ct_barrier_decr(dev_info_t *dip);
+static void ct_barrier_incr(dev_info_t *dip);
+
+ct_type_t *device_type;
+
+/*
+ * Macro predicates for determining when events should be sent and how.
+ */
+#define EVSENDP(ctd, flag) \
+ ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
+
+#define EVINFOP(ctd, flag) \
+ ((ctd->cond_contract.ct_ev_crit & flag) == 0)
+
+/*
+ * State transition table showing which transitions are synchronous and which
+ * are not.
+ */
+struct ct_dev_negtable {
+ uint_t st_old;
+ uint_t st_new;
+ uint_t st_neg;
+} ct_dev_negtable[] = {
+ {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1},
+ {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0},
+ {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0},
+ {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1},
+ {0}
+};
+
+/*
+ * Device contract template implementation
+ */
+
+/*
+ * ctmpl_device_dup
+ *
+ * The device contract template dup entry point.
+ * This simply copies all the fields (generic as well as device contract
+ * specific) fields of the original.
+ */
+static struct ct_template *
+ctmpl_device_dup(struct ct_template *template)
+{
+ ctmpl_device_t *new;
+ ctmpl_device_t *old = template->ctmpl_data;
+ char *buf;
+ char *minor;
+
+ new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * copy generic fields.
+ * ctmpl_copy returns with old template lock held
+ */
+ ctmpl_copy(&new->ctd_ctmpl, template);
+
+ new->ctd_ctmpl.ctmpl_data = new;
+ new->ctd_aset = old->ctd_aset;
+ new->ctd_minor = NULL;
+ new->ctd_noneg = old->ctd_noneg;
+
+ if (old->ctd_minor) {
+ ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
+ bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
+ } else {
+ kmem_free(buf, MAXPATHLEN);
+ buf = NULL;
+ }
+
+ mutex_exit(&template->ctmpl_lock);
+ if (buf) {
+ minor = i_ddi_strdup(buf, KM_SLEEP);
+ kmem_free(buf, MAXPATHLEN);
+ buf = NULL;
+ } else {
+ minor = NULL;
+ }
+ mutex_enter(&template->ctmpl_lock);
+
+ if (minor) {
+ new->ctd_minor = minor;
+ }
+
+ ASSERT(buf == NULL);
+ return (&new->ctd_ctmpl);
+}
+
+/*
+ * ctmpl_device_free
+ *
+ * The device contract template free entry point. Just
+ * frees the template.
+ */
+static void
+ctmpl_device_free(struct ct_template *template)
+{
+ ctmpl_device_t *dtmpl = template->ctmpl_data;
+
+ if (dtmpl->ctd_minor)
+ kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
+
+ kmem_free(dtmpl, sizeof (ctmpl_device_t));
+}
+
+/*
+ * SAFE_EV is the set of events which a non-privileged process is
+ * allowed to make critical. An unprivileged device contract owner has
+ * no control over when a device changes state, so all device events
+ * can be in the critical set.
+ *
+ * EXCESS tells us if "value", a critical event set, requires
+ * additional privilege. For device contracts EXCESS currently
+ * evaluates to 0.
+ */
+#define SAFE_EV (CT_DEV_ALLEVENT)
+#define EXCESS(value) ((value) & ~SAFE_EV)
+
+
+/*
+ * ctmpl_device_set
+ *
+ * The device contract template set entry point. Sets various terms in the
+ * template. The non-negotiable term can only be set if the process has
+ * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
+ */
+static int
+ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr)
+{
+ ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
+ char *buf;
+ int error;
+ dev_info_t *dip;
+ int spec_type;
+
+ ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
+
+ switch (param->ctpm_id) {
+ case CTDP_ACCEPT:
+ if (param->ctpm_value & ~CT_DEV_ALLEVENT)
+ return (EINVAL);
+ if (param->ctpm_value == 0)
+ return (EINVAL);
+ if (param->ctpm_value == CT_DEV_ALLEVENT)
+ return (EINVAL);
+
+ dtmpl->ctd_aset = param->ctpm_value;
+ break;
+ case CTDP_NONEG:
+ if (param->ctpm_value != CTDP_NONEG_SET &&
+ param->ctpm_value != CTDP_NONEG_CLEAR)
+ return (EINVAL);
+
+ /*
+ * only privileged processes can designate a contract
+ * non-negotiatble.
+ */
+ if (param->ctpm_value == CTDP_NONEG_SET &&
+ (error = secpolicy_sys_devices(cr)) != 0) {
+ return (error);
+ }
+
+ dtmpl->ctd_noneg = param->ctpm_value;
+ break;
+
+ case CTDP_MINOR:
+ if (param->ctpm_value == NULL)
+ return (EINVAL);
+
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ /*
+ * Copyin the device path
+ */
+ error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf,
+ MAXPATHLEN, NULL);
+ if (error != 0) {
+ kmem_free(buf, MAXPATHLEN);
+ return (error);
+ }
+ buf[MAXPATHLEN - 1] = '\0';
+
+ if (*buf != '/' ||
+ strncmp(buf, "/devices/", strlen("/devices/")) == 0 ||
+ strstr(buf, "../devices/") || strchr(buf, ':') == NULL) {
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ }
+
+ spec_type = 0;
+ dip = NULL;
+ if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) {
+ kmem_free(buf, MAXPATHLEN);
+ return (ERANGE);
+ }
+ ddi_release_devi(dip);
+
+ if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ }
+
+ if (dtmpl->ctd_minor != NULL) {
+ kmem_free(dtmpl->ctd_minor,
+ strlen(dtmpl->ctd_minor) + 1);
+ }
+ dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP);
+ kmem_free(buf, MAXPATHLEN);
+ break;
+ case CTP_EV_CRITICAL:
+ /*
+ * Currently for device contracts, any event
+ * may be added to the critical set. We retain the
+ * following code however for future enhancements.
+ */
+ if (EXCESS(param->ctpm_value) &&
+ (error = secpolicy_contract_event(cr)) != 0)
+ return (error);
+ tmpl->ctmpl_ev_crit = param->ctpm_value;
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * ctmpl_device_get
+ *
+ * The device contract template get entry point. Simply fetches and
+ * returns the value of the requested term.
+ */
+static int
+ctmpl_device_get(struct ct_template *template, ct_param_t *param)
+{
+ ctmpl_device_t *dtmpl = template->ctmpl_data;
+ int error;
+
+ ASSERT(MUTEX_HELD(&template->ctmpl_lock));
+
+ switch (param->ctpm_id) {
+ case CTDP_ACCEPT:
+ param->ctpm_value = dtmpl->ctd_aset;
+ break;
+ case CTDP_NONEG:
+ param->ctpm_value = dtmpl->ctd_noneg;
+ break;
+ case CTDP_MINOR:
+ if (dtmpl->ctd_minor) {
+ error = copyoutstr(dtmpl->ctd_minor,
+ (char *)(uintptr_t)param->ctpm_value,
+ MAXPATHLEN, NULL);
+ if (error != 0)
+ return (error);
+ } else {
+ return (ENOENT);
+ }
+ break;
+ default:
+ return (EINVAL);
+ }
+
+ return (0);
+}
+
+/*
+ * Device contract type specific portion of creating a contract using
+ * a specified template
+ */
+/*ARGSUSED*/
+int
+ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
+{
+ ctmpl_device_t *dtmpl;
+ char *buf;
+ dev_t dev;
+ int spec_type;
+ int error;
+ cont_device_t *ctd;
+
+ if (ctidp == NULL)
+ return (EINVAL);
+
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ dtmpl = template->ctmpl_data;
+
+ mutex_enter(&template->ctmpl_lock);
+ if (dtmpl->ctd_minor == NULL) {
+ /* incomplete template */
+ mutex_exit(&template->ctmpl_lock);
+ kmem_free(buf, MAXPATHLEN);
+ return (EINVAL);
+ } else {
+ ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
+ bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
+ }
+ mutex_exit(&template->ctmpl_lock);
+
+ spec_type = 0;
+ dev = NODEV;
+ if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
+ dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
+ (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
+ CT_DEBUG((CE_WARN,
+ "tmpl_create: failed to find device: %s", buf));
+ kmem_free(buf, MAXPATHLEN);
+ return (ERANGE);
+ }
+ kmem_free(buf, MAXPATHLEN);
+
+ ctd = contract_device_create(template->ctmpl_data,
+ dev, spec_type, curproc, &error);
+
+ if (ctd == NULL) {
+ CT_DEBUG((CE_WARN, "Failed to create device contract for "
+ "process (%d) with device (devt = %lu, spec_type = %s)",
+ curproc->p_pid, dev,
+ spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
+ return (error);
+ }
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ *ctidp = ctd->cond_contract.ct_id;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+
+ return (0);
+}
+
+/*
+ * Device contract specific template entry points
+ */
+static ctmplops_t ctmpl_device_ops = {
+ ctmpl_device_dup, /* ctop_dup */
+ ctmpl_device_free, /* ctop_free */
+ ctmpl_device_set, /* ctop_set */
+ ctmpl_device_get, /* ctop_get */
+ ctmpl_device_create, /* ctop_create */
+ CT_DEV_ALLEVENT /* all device events bitmask */
+};
+
+
+/*
+ * Device contract implementation
+ */
+
+/*
+ * contract_device_default
+ *
+ * The device contract default template entry point. Creates a
+ * device contract template with a default A-set and no "noneg" ,
+ * with informative degrade events and critical offline events.
+ * There is no default minor path.
+ */
+static ct_template_t *
+contract_device_default(void)
+{
+ ctmpl_device_t *new;
+
+ new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
+ ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
+
+ new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
+ new->ctd_noneg = 0;
+ new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
+ new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
+
+ return (&new->ctd_ctmpl);
+}
+
+/*
+ * contract_device_free
+ *
+ * Destroys the device contract specific portion of a contract and
+ * frees the contract.
+ */
+static void
+contract_device_free(contract_t *ct)
+{
+ cont_device_t *ctd = ct->ct_data;
+
+ ASSERT(ctd->cond_minor);
+ ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
+ kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
+
+ ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
+ ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
+
+ ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
+
+ ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
+ ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
+
+ ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
+ ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
+
+ ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
+ ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
+
+ ASSERT(!list_link_active(&ctd->cond_next));
+
+ kmem_free(ctd, sizeof (cont_device_t));
+}
+
+/*
+ * contract_device_abandon
+ *
+ * The device contract abandon entry point.
+ */
+static void
+contract_device_abandon(contract_t *ct)
+{
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+
+ /*
+ * device contracts cannot be inherited or orphaned.
+ * Move the contract to the DEAD_STATE. It will be freed
+ * once all references to it are gone.
+ */
+ contract_destroy(ct);
+}
+
+/*
+ * contract_device_destroy
+ *
+ * The device contract destroy entry point.
+ * Called from contract_destroy() to do any type specific destroy. Note
+ * that destroy is a misnomer - this does not free the contract, it only
+ * moves it to the dead state. A contract is actually freed via
+ * contract_rele() -> contract_dtor(), contop_free()
+ */
+static void
+contract_device_destroy(contract_t *ct)
+{
+ cont_device_t *ctd = ct->ct_data;
+ dev_info_t *dip = ctd->cond_dip;
+
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+
+ if (dip == NULL) {
+ /*
+ * The dip has been removed, this is a dangling contract
+ * Check that dip linkages are NULL
+ */
+ ASSERT(!list_link_active(&ctd->cond_next));
+ CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no "
+ "devinfo node. contract ctid : %d", ct->ct_id));
+ return;
+ }
+
+ /*
+ * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock
+ */
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * Waiting for the barrier to be released is strictly speaking not
+ * necessary. But it simplifies the implementation of
+ * contract_device_publish() by establishing the invariant that
+ * device contracts cannot go away during negotiation.
+ */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_wait_for_release(dip);
+ mutex_enter(&ct->ct_lock);
+
+ list_remove(&(DEVI(dip)->devi_ct), ctd);
+ ctd->cond_dip = NULL; /* no longer linked to dip */
+ contract_rele(ct); /* remove hold for dip linkage */
+
+ mutex_exit(&ct->ct_lock);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+ mutex_enter(&ct->ct_lock);
+}
+
+/*
+ * contract_device_status
+ *
+ * The device contract status entry point. Called when level of "detail"
+ * is either CTD_FIXED or CTD_ALL
+ *
+ */
+static void
+contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
+ void *status, model_t model)
+{
+ cont_device_t *ctd = ct->ct_data;
+
+ ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
+
+ mutex_enter(&ct->ct_lock);
+ contract_status_common(ct, zone, status, model);
+
+ /*
+ * There's no need to hold the contract lock while accessing static
+ * data like aset or noneg. But since we need the lock to access other
+ * data like state, we hold it anyway.
+ */
+ VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
+ VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
+ VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
+
+ if (detail == CTD_FIXED) {
+ mutex_exit(&ct->ct_lock);
+ return;
+ }
+
+ ASSERT(ctd->cond_minor);
+ VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
+
+ mutex_exit(&ct->ct_lock);
+}
+
+/*
+ * Converts a result integer into the corresponding string. Used for printing
+ * messages
+ */
+static char *
+result_str(uint_t result)
+{
+ switch (result) {
+ case CT_ACK:
+ return ("CT_ACK");
+ case CT_NACK:
+ return ("CT_NACK");
+ case CT_NONE:
+ return ("CT_NONE");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+/*
+ * Converts a device state integer constant into the corresponding string.
+ * Used to print messages.
+ */
+static char *
+state_str(uint_t state)
+{
+ switch (state) {
+ case CT_DEV_EV_ONLINE:
+ return ("ONLINE");
+ case CT_DEV_EV_DEGRADED:
+ return ("DEGRADED");
+ case CT_DEV_EV_OFFLINE:
+ return ("OFFLINE");
+ default:
+ return ("UNKNOWN");
+ }
+}
+
+/*
+ * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
+ * synchronous state change or not.
+ */
+static int
+is_sync_neg(uint_t old, uint_t new)
+{
+ int i;
+
+ ASSERT(old & CT_DEV_ALLEVENT);
+ ASSERT(new & CT_DEV_ALLEVENT);
+
+ if (old == new) {
+ CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
+ state_str(new)));
+ return (-2);
+ }
+
+ for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
+ if (old == ct_dev_negtable[i].st_old &&
+ new == ct_dev_negtable[i].st_new) {
+ return (ct_dev_negtable[i].st_neg);
+ }
+ }
+
+ CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
+ "old = %s -> new = %s", state_str(old), state_str(new)));
+
+ return (-1);
+}
+
+/*
+ * Used to cleanup cached dv_nodes so that when a device is released by
+ * a contract holder, its devinfo node can be successfully detached.
+ */
+static int
+contract_device_dvclean(dev_info_t *dip)
+{
+ char *devnm;
+ dev_info_t *pdip;
+ int error;
+
+ ASSERT(dip);
+
+ /* pdip can be NULL if we have contracts against the root dip */
+ pdip = ddi_get_parent(dip);
+
+ if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
+ char *path;
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
+ "device=%s", path));
+ kmem_free(path, MAXPATHLEN);
+ return (EDEADLOCK);
+ }
+
+ if (pdip) {
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devnm);
+ error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+ kmem_free(devnm, MAXNAMELEN + 1);
+ } else {
+ error = devfs_clean(dip, NULL, DV_CLEAN_FORCE);
+ }
+
+ return (error);
+}
+
+/*
+ * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
+ * Results in the ACK or NACK being recorded on the dip for one particular
+ * contract. The device contracts framework evaluates the ACK/NACKs for all
+ * contracts against a device to determine if a particular device state change
+ * should be allowed.
+ */
+static int
+contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
+ uint_t cmd)
+{
+ cont_device_t *ctd = ct->ct_data;
+ dev_info_t *dip;
+ ctid_t ctid;
+ int error;
+
+ ctid = ct->ct_id;
+
+ CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
+
+ mutex_enter(&ct->ct_lock);
+ CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
+
+ dip = ctd->cond_dip;
+
+ ASSERT(ctd->cond_minor);
+ ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
+
+ /*
+ * Negotiation only if new state is not in A-set
+ */
+ ASSERT(!(ctd->cond_aset & evtype));
+
+ /*
+ * Negotiation only if transition is synchronous
+ */
+ ASSERT(is_sync_neg(ctd->cond_state, evtype));
+
+ /*
+ * We shouldn't be negotiating if the "noneg" flag is set
+ */
+ ASSERT(!ctd->cond_noneg);
+
+ if (dip)
+ ndi_hold_devi(dip);
+
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * dv_clean only if !NACK and offline state change
+ */
+ if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
+ CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
+ error = contract_device_dvclean(dip);
+ if (error != 0) {
+ CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
+ ctid));
+ ddi_release_devi(dip);
+ }
+ }
+
+ mutex_enter(&ct->ct_lock);
+
+ if (dip)
+ ddi_release_devi(dip);
+
+ if (dip == NULL) {
+ if (ctd->cond_currev_id != evid) {
+ CT_DEBUG((CE_WARN, "%sACK for non-current event "
+ "(type=%s, id=%llu) on removed device",
+ cmd == CT_NACK ? "N" : "",
+ state_str(evtype), (unsigned long long)evid));
+ CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
+ ctid));
+ } else {
+ ASSERT(ctd->cond_currev_type == evtype);
+ CT_DEBUG((CE_WARN, "contract_ack: no such device: "
+ "ctid: %d", ctid));
+ }
+ error = (ct->ct_state == CTS_DEAD) ? ESRCH :
+ ((cmd == CT_NACK) ? ETIMEDOUT : 0);
+ mutex_exit(&ct->ct_lock);
+ return (error);
+ }
+
+ /*
+ * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
+ */
+ mutex_exit(&ct->ct_lock);
+
+ mutex_enter(&DEVI(dip)->devi_ct_lock);
+ mutex_enter(&ct->ct_lock);
+ if (ctd->cond_currev_id != evid) {
+ char *buf;
+ mutex_exit(&ct->ct_lock);
+ mutex_exit(&DEVI(dip)->devi_ct_lock);
+ ndi_hold_devi(dip);
+ buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, buf);
+ ddi_release_devi(dip);
+ CT_DEBUG((CE_WARN, "%sACK for non-current event"
+ "(type=%s, id=%llu) on device %s",
+ cmd == CT_NACK ? "N" : "",
+ state_str(evtype), (unsigned long long)evid, buf));
+ kmem_free(buf, MAXPATHLEN);
+ CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
+ cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
+ return (cmd == CT_ACK ? 0 : ETIMEDOUT);
+ }
+
+ ASSERT(ctd->cond_currev_type == evtype);
+ ASSERT(cmd == CT_ACK || cmd == CT_NACK);
+
+ CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
+ cmd == CT_NACK ? "N" : "", ctid));
+
+ ctd->cond_currev_ack = cmd;
+ mutex_exit(&ct->ct_lock);
+
+ ct_barrier_decr(dip);
+ mutex_exit(&DEVI(dip)->devi_ct_lock);
+
+ CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
+
+ return (0);
+}
+
+/*
+ * Invoked when a userland contract holder approves (i.e. ACKs) a state change
+ */
+static int
+contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
+}
+
+/*
+ * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
+ */
+static int
+contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
+}
+
+/*
+ * Creates a new contract synchronously with the breaking of an existing
+ * contract. Currently not supported.
+ */
+/*ARGSUSED*/
+static int
+contract_device_newct(contract_t *ct)
+{
+ return (ENOTSUP);
+}
+
+/*
+ * Core device contract implementation entry points
+ */
+static contops_t contract_device_ops = {
+ contract_device_free, /* contop_free */
+ contract_device_abandon, /* contop_abandon */
+ contract_device_destroy, /* contop_destroy */
+ contract_device_status, /* contop_status */
+ contract_device_ack, /* contop_ack */
+ contract_device_nack, /* contop_nack */
+ contract_qack_notsup, /* contop_qack */
+ contract_device_newct /* contop_newct */
+};
+
+/*
+ * contract_device_init
+ *
+ * Initializes the device contract type.
+ */
+void
+contract_device_init(void)
+{
+ device_type = contract_type_init(CTT_DEVICE, "device",
+ &contract_device_ops, contract_device_default);
+}
+
+/*
+ * contract_device_create
+ *
+ * create a device contract given template "tmpl" and the "owner" process.
+ * May fail and return NULL if project.max-contracts would have been exceeded.
+ *
+ * Common device contract creation routine called for both open-time and
+ * non-open time device contract creation
+ */
+static cont_device_t *
+contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
+ proc_t *owner, int *errorp)
+{
+ cont_device_t *ctd;
+ char *minor;
+ char *path;
+ dev_info_t *dip;
+
+ ASSERT(dtmpl != NULL);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
+ ASSERT(errorp);
+
+ *errorp = 0;
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
+ bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+
+ dip = e_ddi_hold_devi_by_path(path, 0);
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
+ "for device path (%s)", path);
+ kmem_free(path, MAXPATHLEN);
+ *errorp = ERANGE;
+ return (NULL);
+ }
+
+ /*
+ * Lock out any parallel contract negotiations
+ */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_acquire(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ minor = i_ddi_strdup(path, KM_SLEEP);
+ kmem_free(path, MAXPATHLEN);
+
+ (void) contract_type_pbundle(device_type, owner);
+
+ ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
+
+ /*
+ * Only we hold a refernce to this contract. Safe to access
+ * the fields without a ct_lock
+ */
+ ctd->cond_minor = minor;
+ /*
+ * It is safe to set the dip pointer in the contract
+ * as the contract will always be destroyed before the dip
+ * is released
+ */
+ ctd->cond_dip = dip;
+ ctd->cond_devt = dev;
+ ctd->cond_spec = spec_type;
+
+ /*
+ * Since we are able to lookup the device, it is either
+ * online or degraded
+ */
+ ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
+ CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
+
+ mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ ctd->cond_aset = dtmpl->ctd_aset;
+ ctd->cond_noneg = dtmpl->ctd_noneg;
+
+ /*
+ * contract_ctor() initailizes the common portion of a contract
+ * contract_dtor() destroys the common portion of a contract
+ */
+ if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
+ ctd, 0, owner, B_TRUE)) {
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+ /*
+ * contract_device_free() destroys the type specific
+ * portion of a contract and frees the contract.
+ * The "minor" path and "cred" is a part of the type specific
+ * portion of the contract and will be freed by
+ * contract_device_free()
+ */
+ contract_device_free(&ctd->cond_contract);
+
+ /* release barrier */
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ ddi_release_devi(dip);
+ *errorp = EAGAIN;
+ return (NULL);
+ }
+ mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
+ ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
+ ctd->cond_contract.ct_ntime.ctm_start = -1;
+ ctd->cond_contract.ct_qtime.ctm_start = -1;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+
+ /*
+ * Insert device contract into list hanging off the dip
+ * Bump up the ref-count on the contract to reflect this
+ */
+ contract_hold(&ctd->cond_contract);
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
+
+ /* release barrier */
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+ ddi_release_devi(dip);
+
+ return (ctd);
+}
+
+/*
+ * Called when a device is successfully opened to create an open-time contract
+ * i.e. synchronously with a device open.
+ */
+int
+contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
+{
+ ctmpl_device_t *dtmpl;
+ ct_template_t *tmpl;
+ cont_device_t *ctd;
+ char *path;
+ klwp_t *lwp;
+ int error;
+
+ if (ctpp)
+ *ctpp = NULL;
+
+ /*
+ * Check if we are in user-context i.e. if we have an lwp
+ */
+ lwp = ttolwp(curthread);
+ if (lwp == NULL) {
+ CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
+ return (0);
+ }
+
+ tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
+ if (tmpl == NULL) {
+ return (0);
+ }
+ dtmpl = tmpl->ctmpl_data;
+
+ /*
+ * If the user set a minor path in the template before an open,
+ * ignore it. We use the minor path of the actual minor opened.
+ */
+ mutex_enter(&tmpl->ctmpl_lock);
+ if (dtmpl->ctd_minor != NULL) {
+ CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
+ "ignoring device minor path in active template: %s",
+ curproc->p_pid, dtmpl->ctd_minor));
+ /*
+ * This is a copy of the actual activated template.
+ * Safe to make changes such as freeing the minor
+ * path in the template.
+ */
+ kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
+ dtmpl->ctd_minor = NULL;
+ }
+ mutex_exit(&tmpl->ctmpl_lock);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
+ CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
+ "minor path from dev_t,spec {%lu, %d} for process (%d)",
+ dev, spec_type, curproc->p_pid));
+ ctmpl_free(tmpl);
+ kmem_free(path, MAXPATHLEN);
+ return (1);
+ }
+
+ mutex_enter(&tmpl->ctmpl_lock);
+ ASSERT(dtmpl->ctd_minor == NULL);
+ dtmpl->ctd_minor = path;
+ mutex_exit(&tmpl->ctmpl_lock);
+
+ ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
+
+ mutex_enter(&tmpl->ctmpl_lock);
+ ASSERT(dtmpl->ctd_minor);
+ dtmpl->ctd_minor = NULL;
+ mutex_exit(&tmpl->ctmpl_lock);
+ ctmpl_free(tmpl);
+ kmem_free(path, MAXPATHLEN);
+
+ if (ctd == NULL) {
+ cmn_err(CE_NOTE, "contract_device_open(): Failed to "
+ "create device contract for process (%d) holding "
+ "device (devt = %lu, spec_type = %d)",
+ curproc->p_pid, dev, spec_type);
+ return (1);
+ }
+
+ if (ctpp) {
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ *ctpp = &ctd->cond_contract;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+ return (0);
+}
+
+/*
+ * Called during contract negotiation by the device contract framework to wait
+ * for ACKs or NACKs from contract holders. If all responses are not received
+ * before a specified timeout, this routine times out.
+ */
+static uint_t
+wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
+{
+ cont_device_t *ctd;
+ int timed_out = 0;
+ int result = CT_NONE;
+ int ack;
+ char *f = "wait_for_acks";
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(dip);
+ ASSERT(evtype & CT_DEV_ALLEVENT);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFBLK || spec_type == S_IFCHR));
+
+ CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
+
+ if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
+ /*
+ * some contract owner(s) didn't respond in time
+ */
+ CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
+ timed_out = 1;
+ }
+
+ ack = 0;
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ ASSERT(ctd->cond_dip == dip);
+
+ if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /* skip if non-negotiable contract */
+ if (ctd->cond_noneg) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ ASSERT(ctd->cond_currev_type == evtype);
+ if (ctd->cond_currev_ack == CT_NACK) {
+ CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
+ f, (void *)dip));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ return (CT_NACK);
+ } else if (ctd->cond_currev_ack == CT_ACK) {
+ ack = 1;
+ CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
+ f, (void *)dip));
+ }
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+
+ if (ack) {
+ result = CT_ACK;
+ CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
+ } else if (timed_out) {
+ result = CT_NONE;
+ CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
+ f, (void *)dip));
+ } else {
+ CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
+ f, (void *)dip));
+ }
+
+
+ return (result);
+}
+
+/*
+ * Determines the current state of a device (i.e a devinfo node
+ */
+static int
+get_state(dev_info_t *dip)
+{
+ if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
+ return (CT_DEV_EV_OFFLINE);
+ else if (DEVI_IS_DEVICE_DEGRADED(dip))
+ return (CT_DEV_EV_DEGRADED);
+ else
+ return (CT_DEV_EV_ONLINE);
+}
+
+/*
+ * Sets the current state of a device in a device contract
+ */
+static void
+set_cond_state(dev_info_t *dip)
+{
+ uint_t state = get_state(dip);
+ cont_device_t *ctd;
+
+ /* verify that barrier is held */
+ ASSERT(ct_barrier_held(dip));
+
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+ mutex_enter(&ctd->cond_contract.ct_lock);
+ ASSERT(ctd->cond_dip == dip);
+ ctd->cond_state = state;
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+}
+
+/*
+ * Core routine called by event-specific routines when an event occurs.
+ * Determines if an event should be be published, and if it is to be
+ * published, whether a negotiation should take place. Also implements
+ * NEGEND events which publish the final disposition of an event after
+ * negotiations are complete.
+ *
+ * When an event occurs on a minor node, this routine walks the list of
+ * contracts hanging off a devinfo node and for each contract on the affected
+ * dip, evaluates the following cases
+ *
+ * a. an event that is synchronous, breaks the contract and NONEG not set
+ * - bumps up the outstanding negotiation counts on the dip
+ * - marks the dip as undergoing negotiation (devi_ct_neg)
+ * - event of type CTE_NEG is published
+ * b. an event that is synchronous, breaks the contract and NONEG is set
+ * - sets the final result to CT_NACK, event is blocked
+ * - does not publish an event
+ * c. event is asynchronous and breaks the contract
+ * - publishes a critical event irrespect of whether the NONEG
+ * flag is set, since the contract will be broken and contract
+ * owner needs to be informed.
+ * d. No contract breakage but the owner has subscribed to the event
+ * - publishes the event irrespective of the NONEG event as the
+ * owner has explicitly subscribed to the event.
+ * e. NEGEND event
+ * - publishes a critical event. Should only be doing this if
+ * if NONEG is not set.
+ * f. all other events
+ * - Since a contract is not broken and this event has not been
+ * subscribed to, this event does not need to be published for
+ * for this contract.
+ *
+ * Once an event is published, what happens next depends on the type of
+ * event:
+ *
+ * a. NEGEND event
+ * - cleanup all state associated with the preceding negotiation
+ * and return CT_ACK to the caller of contract_device_publish()
+ * b. NACKed event
+ * - One or more contracts had the NONEG term, so the event was
+ * blocked. Return CT_NACK to the caller.
+ * c. Negotiated event
+ * - Call wait_for_acks() to wait for responses from contract
+ * holders. The end result is either CT_ACK (event is permitted),
+ * CT_NACK (event is blocked) or CT_NONE (no contract owner)
+ * responded. This result is returned back to the caller.
+ * d. All other events
+ * - If the event was asynchronous (i.e. not negotiated) or
+ * a contract was not broken return CT_ACK to the caller.
+ */
+static uint_t
+contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, nvlist_t *tnvl)
+{
+ cont_device_t *ctd;
+ uint_t result = CT_NONE;
+ uint64_t evid = 0;
+ uint64_t nevid = 0;
+ char *path = NULL;
+ int negend;
+ int match;
+ int sync = 0;
+ contract_t *ct;
+ ct_kevent_t *event;
+ nvlist_t *nvl;
+ int broken = 0;
+
+ ASSERT(dip);
+ ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFBLK || spec_type == S_IFCHR));
+ ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
+
+ /* Is this a synchronous state change ? */
+ if (evtype != CT_EV_NEGEND) {
+ sync = is_sync_neg(get_state(dip), evtype);
+ /* NOP if unsupported transition */
+ if (sync == -2 || sync == -1) {
+ DEVI(dip)->devi_flags |= DEVI_CT_NOP;
+ result = (sync == -2) ? CT_ACK : CT_NONE;
+ goto out;
+ }
+ CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
+ sync ? "" : " not"));
+ } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
+ DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
+ result = CT_ACK;
+ goto out;
+ }
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+
+ /*
+ * Negotiation end - set the state of the device in the contract
+ */
+ if (evtype == CT_EV_NEGEND) {
+ CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
+ set_cond_state(dip);
+ }
+
+ /*
+ * If this device didn't go through negotiation, don't publish
+ * a NEGEND event - simply release the barrier to allow other
+ * device events in.
+ */
+ negend = 0;
+ if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
+ CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
+ ct_barrier_release(dip);
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+ result = CT_ACK;
+ goto out;
+ } else if (evtype == CT_EV_NEGEND) {
+ /*
+ * There are negotiated contract breakages that
+ * need a NEGEND event
+ */
+ ASSERT(ct_barrier_held(dip));
+ negend = 1;
+ CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
+ } else {
+ /*
+ * This is a new event, not a NEGEND event. Wait for previous
+ * contract events to complete.
+ */
+ ct_barrier_acquire(dip);
+ }
+
+
+ match = 0;
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
+ ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
+
+ ctid_t ctid;
+ size_t len = strlen(path);
+
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ ASSERT(ctd->cond_dip == dip);
+ ASSERT(ctd->cond_minor);
+ ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
+ ctd->cond_minor[len] == ':');
+
+ if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /* We have a matching contract */
+ match = 1;
+ ctid = ctd->cond_contract.ct_id;
+ CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
+ ctid));
+
+ /*
+ * There are 4 possible cases
+ * 1. A contract is broken (dev not in acceptable state) and
+ * the state change is synchronous - start negotiation
+ * by sending a CTE_NEG critical event.
+ * 2. A contract is broken and the state change is
+ * asynchronous - just send a critical event and
+ * break the contract.
+ * 3. Contract is not broken, but consumer has subscribed
+ * to the event as a critical or informative event
+ * - just send the appropriate event
+ * 4. contract waiting for negend event - just send the critical
+ * NEGEND event.
+ */
+ broken = 0;
+ if (!negend && !(evtype & ctd->cond_aset)) {
+ broken = 1;
+ CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
+ ctid));
+ }
+
+ /*
+ * Don't send event if
+ * - contract is not broken AND
+ * - contract holder has not subscribed to this event AND
+ * - contract not waiting for a NEGEND event
+ */
+ if (!broken && !EVSENDP(ctd, evtype) &&
+ !ctd->cond_neg) {
+ CT_DEBUG((CE_NOTE, "contract_device_publish(): "
+ "contract (%d): no publish reqd: event %d",
+ ctd->cond_contract.ct_id, evtype));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ /*
+ * Note: need to kmem_zalloc() the event so mutexes are
+ * initialized automatically
+ */
+ ct = &ctd->cond_contract;
+ event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
+ event->cte_type = evtype;
+
+ if (broken && sync) {
+ CT_DEBUG((CE_NOTE, "publish: broken + sync: "
+ "ctid: %d", ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ if (ctd->cond_noneg) {
+ /* Nothing to publish. Event has been blocked */
+ CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
+ "not publishing blocked ev: ctid: %d",
+ ctid));
+ result = CT_NACK;
+ kmem_free(event, sizeof (ct_kevent_t));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+ event->cte_flags = CTE_NEG; /* critical neg. event */
+ ctd->cond_currev_type = event->cte_type;
+ ct_barrier_incr(dip);
+ DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
+ ctd->cond_neg = 1;
+ } else if (broken && !sync) {
+ CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
+ ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ event->cte_flags = 0; /* critical event */
+ } else if (EVSENDP(ctd, event->cte_type)) {
+ CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
+ ctid));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ event->cte_flags = EVINFOP(ctd, event->cte_type) ?
+ CTE_INFO : 0;
+ } else if (ctd->cond_neg) {
+ CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
+ ASSERT(negend);
+ ASSERT(ctd->cond_noneg == 0);
+ nevid = ctd->cond_contract.ct_nevent ?
+ ctd->cond_contract.ct_nevent->cte_id : 0;
+ ASSERT(ctd->cond_currev_id == nevid);
+ event->cte_flags = 0; /* NEGEND is always critical */
+ ctd->cond_currev_id = 0;
+ ctd->cond_currev_type = 0;
+ ctd->cond_currev_ack = 0;
+ ctd->cond_neg = 0;
+ } else {
+ CT_DEBUG((CE_NOTE, "publish: not publishing event for "
+ "ctid: %d, evtype: %d",
+ ctd->cond_contract.ct_id, event->cte_type));
+ ASSERT(!negend);
+ ASSERT(ctd->cond_currev_id == 0);
+ ASSERT(ctd->cond_currev_type == 0);
+ ASSERT(ctd->cond_currev_ack == 0);
+ ASSERT(ctd->cond_neg == 0);
+ kmem_free(event, sizeof (ct_kevent_t));
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ continue;
+ }
+
+ nvl = NULL;
+ if (tnvl) {
+ VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
+ if (negend) {
+ int32_t newct = 0;
+ ASSERT(ctd->cond_noneg == 0);
+ VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
+ == 0);
+ VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
+ &newct) == 0);
+ VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
+ newct == 1 ? 0 :
+ ctd->cond_contract.ct_id) == 0);
+ CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
+ "CTS_NEVID: %llu, CTS_NEWCT: %s",
+ ctid, (unsigned long long)nevid,
+ newct ? "success" : "failure"));
+
+ }
+ }
+
+ if (ctd->cond_neg) {
+ ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
+ ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
+ ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
+ ctd->cond_contract.ct_qtime.ctm_start =
+ ctd->cond_contract.ct_ntime.ctm_start;
+ }
+
+ /*
+ * by holding the dip's devi_ct_lock we ensure that
+ * all ACK/NACKs are held up until we have finished
+ * publishing to all contracts.
+ */
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ evid = cte_publish_all(ct, event, nvl, NULL);
+ mutex_enter(&ctd->cond_contract.ct_lock);
+
+ if (ctd->cond_neg) {
+ ASSERT(!negend);
+ ASSERT(broken);
+ ASSERT(sync);
+ ASSERT(!ctd->cond_noneg);
+ CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
+ ": %d", ctid));
+ ctd->cond_currev_id = evid;
+ } else if (negend) {
+ ctd->cond_contract.ct_ntime.ctm_start = -1;
+ ctd->cond_contract.ct_qtime.ctm_start = -1;
+ }
+ mutex_exit(&ctd->cond_contract.ct_lock);
+ }
+
+ /*
+ * If "negend" set counter back to initial state (-1) so that
+ * other events can be published. Also clear the negotiation flag
+ * on dip.
+ *
+ * 0 .. n are used for counting.
+ * -1 indicates counter is available for use.
+ */
+ if (negend) {
+ /*
+ * devi_ct_count not necessarily 0. We may have
+ * timed out in which case, count will be non-zero.
+ */
+ ct_barrier_release(dip);
+ DEVI(dip)->devi_ct_neg = 0;
+ CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
+ (void *)dip));
+ } else if (DEVI(dip)->devi_ct_neg) {
+ ASSERT(match);
+ ASSERT(!ct_barrier_empty(dip));
+ CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
+ DEVI(dip)->devi_ct_count, (void *)dip));
+ } else {
+ /*
+ * for non-negotiated events or subscribed events or no
+ * matching contracts
+ */
+ ASSERT(ct_barrier_empty(dip));
+ ASSERT(DEVI(dip)->devi_ct_neg == 0);
+ CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
+ "dip=%p", (void *)dip));
+
+ /*
+ * only this function when called from contract_device_negend()
+ * can reset the counter to READY state i.e. -1. This function
+ * is so called for every event whether a NEGEND event is needed
+ * or not, but the negend event is only published if the event
+ * whose end they signal is a negotiated event for the contract.
+ */
+ }
+
+ if (!match) {
+ /* No matching contracts */
+ CT_DEBUG((CE_NOTE, "publish: No matching contract"));
+ result = CT_NONE;
+ } else if (result == CT_NACK) {
+ /* a non-negotiable contract exists and this is a neg. event */
+ CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
+ (void) wait_for_acks(dip, dev, spec_type, evtype);
+ } else if (DEVI(dip)->devi_ct_neg) {
+ /* one or more contracts going through negotations */
+ CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
+ result = wait_for_acks(dip, dev, spec_type, evtype);
+ } else {
+ /* no negotiated contracts or no broken contracts or NEGEND */
+ CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
+ result = CT_ACK;
+ }
+
+ /*
+ * Release the lock only now so that the only point where we
+ * drop the lock is in wait_for_acks(). This is so that we don't
+ * miss cv_signal/cv_broadcast from contract holders
+ */
+ CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+
+out:
+ if (tnvl)
+ nvlist_free(tnvl);
+ if (path)
+ kmem_free(path, MAXPATHLEN);
+
+
+ CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
+ return (result);
+}
+
+
+/*
+ * contract_device_offline
+ *
+ * Event publishing routine called by I/O framework when a device is offlined.
+ */
+ct_ack_t
+contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t result;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_OFFLINE;
+ result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
+
+ /*
+ * If a contract offline is NACKED, the framework expects us to call
+ * NEGEND ourselves, since we know the final result
+ */
+ if (result == CT_NACK) {
+ contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
+ }
+
+ return (result);
+}
+
+/*
+ * contract_device_degrade
+ *
+ * Event publishing routine called by I/O framework when a device
+ * moves to degrade state.
+ */
+/*ARGSUSED*/
+void
+contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_DEGRADED;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+}
+
+/*
+ * contract_device_undegrade
+ *
+ * Event publishing routine called by I/O framework when a device
+ * moves from degraded state to online state.
+ */
+/*ARGSUSED*/
+void
+contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ evtype = CT_DEV_EV_ONLINE;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+}
+
+/*
+ * For all contracts which have undergone a negotiation (because the device
+ * moved out of the acceptable state for that contract and the state
+ * change is synchronous i.e. requires negotiation) this routine publishes
+ * a CT_EV_NEGEND event with the final disposition of the event.
+ *
+ * This event is always a critical event.
+ */
+void
+contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
+{
+ nvlist_t *nvl;
+ uint_t evtype;
+
+ ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
+
+ CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
+ "dip: %p", result, (void *)dip));
+
+ VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
+ result == CT_EV_SUCCESS ? 1 : 0) == 0);
+
+ evtype = CT_EV_NEGEND;
+ (void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
+
+ CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
+ (void *)dip));
+}
+
+/*
+ * Wrapper routine called by other subsystems (such as LDI) to start
+ * negotiations when a synchronous device state change occurs.
+ * Returns CT_ACK or CT_NACK.
+ */
+ct_ack_t
+contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype)
+{
+ int result;
+
+ ASSERT(dip);
+ ASSERT(dev != NODEV);
+ ASSERT(dev != DDI_DEV_T_ANY);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+
+ switch (evtype) {
+ case CT_DEV_EV_OFFLINE:
+ result = contract_device_offline(dip, dev, spec_type);
+ break;
+ default:
+ cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
+ "not supported: event (%d) for dev_t (%lu) and spec (%d), "
+ "dip (%p)", evtype, dev, spec_type, (void *)dip);
+ result = CT_NACK;
+ break;
+ }
+
+ return (result);
+}
+
+/*
+ * A wrapper routine called by other subsystems (such as the LDI) to
+ * finalize event processing for a state change event. For synchronous
+ * state changes, this publishes NEGEND events. For asynchronous i.e.
+ * non-negotiable events this publishes the event.
+ */
+void
+contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, int ct_result)
+{
+ ASSERT(dip);
+ ASSERT(dev != NODEV);
+ ASSERT(dev != DDI_DEV_T_ANY);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+
+ switch (evtype) {
+ case CT_DEV_EV_OFFLINE:
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ case CT_DEV_EV_DEGRADED:
+ contract_device_degrade(dip, dev, spec_type);
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ case CT_DEV_EV_ONLINE:
+ contract_device_undegrade(dip, dev, spec_type);
+ contract_device_negend(dip, dev, spec_type, ct_result);
+ break;
+ default:
+ cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
+ "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
+ evtype, dev, spec_type, (void *)dip);
+ break;
+ }
+}
+
+/*
+ * Called by I/O framework when a devinfo node is freed to remove the
+ * association between a devinfo node and its contracts.
+ */
+void
+contract_device_remove_dip(dev_info_t *dip)
+{
+ cont_device_t *ctd;
+ cont_device_t *next;
+ contract_t *ct;
+
+ mutex_enter(&(DEVI(dip)->devi_ct_lock));
+ ct_barrier_wait_for_release(dip);
+
+ for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
+ next = list_next(&(DEVI(dip)->devi_ct), ctd);
+ list_remove(&(DEVI(dip)->devi_ct), ctd);
+ ct = &ctd->cond_contract;
+ /*
+ * Unlink the dip associated with this contract
+ */
+ mutex_enter(&ct->ct_lock);
+ ASSERT(ctd->cond_dip == dip);
+ ctd->cond_dip = NULL; /* no longer linked to dip */
+ contract_rele(ct); /* remove hold for dip linkage */
+ CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
+ "ctid: %d", ct->ct_id));
+ mutex_exit(&ct->ct_lock);
+ }
+ ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
+ mutex_exit(&(DEVI(dip)->devi_ct_lock));
+}
+
+/*
+ * Barrier related routines
+ */
+static void
+ct_barrier_acquire(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
+ while (DEVI(dip)->devi_ct_count != -1)
+ cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
+ DEVI(dip)->devi_ct_count = 0;
+ CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
+}
+
+static void
+ct_barrier_release(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count != -1);
+ DEVI(dip)->devi_ct_count = -1;
+ cv_broadcast(&(DEVI(dip)->devi_ct_cv));
+ CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
+}
+
+static int
+ct_barrier_held(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ return (DEVI(dip)->devi_ct_count != -1);
+}
+
+static int
+ct_barrier_empty(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count != -1);
+ return (DEVI(dip)->devi_ct_count == 0);
+}
+
+static void
+ct_barrier_wait_for_release(dev_info_t *dip)
+{
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ while (DEVI(dip)->devi_ct_count != -1)
+ cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
+}
+
+static void
+ct_barrier_decr(dev_info_t *dip)
+{
+ CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d",
+ DEVI(dip)->devi_ct_count));
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+ ASSERT(DEVI(dip)->devi_ct_count > 0);
+
+ DEVI(dip)->devi_ct_count--;
+ if (DEVI(dip)->devi_ct_count == 0) {
+ cv_broadcast(&DEVI(dip)->devi_ct_cv);
+ CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
+ }
+}
+
+static void
+ct_barrier_incr(dev_info_t *dip)
+{
+ ASSERT(ct_barrier_held(dip));
+ DEVI(dip)->devi_ct_count++;
+}
+
+static int
+ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
+{
+ clock_t abstime;
+
+ ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
+
+ abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
+ while (DEVI(dip)->devi_ct_count) {
+ if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
+ &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
+ return (-1);
+ }
+ }
+ return (0);
+}
diff --git a/usr/src/uts/common/contract/process.c b/usr/src/uts/common/contract/process.c
index 8240051f00..c92ce34352 100644
--- a/usr/src/uts/common/contract/process.c
+++ b/usr/src/uts/common/contract/process.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -502,7 +501,7 @@ contract_process_adopt(contract_t *ct, proc_t *p)
}
/*
- * contract_process_status
+ * contract_process_abandon
*
* The process contract abandon entry point.
*/
@@ -632,11 +631,23 @@ contract_process_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
}
}
+/*ARGSUSED*/
+static int
+contract_process_newct(contract_t *ct)
+{
+ return (0);
+}
+
+/* process contracts don't negotiate */
static contops_t contract_process_ops = {
contract_process_free, /* contop_free */
contract_process_abandon, /* contop_abandon */
contract_process_destroy, /* contop_destroy */
- contract_process_status /* contop_status */
+ contract_process_status, /* contop_status */
+ contract_ack_inval, /* contop_ack */
+ contract_ack_inval, /* contop_nack */
+ contract_qack_inval, /* contop_qack */
+ contract_process_newct /* contop_newct */
};
/*
@@ -774,7 +785,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus)
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_EXIT) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_EXIT;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
mutex_enter(&ct->ct_lock);
}
if (empty) {
@@ -793,7 +804,7 @@ contract_process_exit(cont_process_t *ctp, proc_t *p, int exitstatus)
event->cte_flags = EVINFOP(ctp, CT_PR_EV_EMPTY) ?
CTE_INFO : 0;
event->cte_type = CT_PR_EV_EMPTY;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
mutex_enter(&ct->ct_lock);
}
@@ -877,7 +888,7 @@ contract_process_fork(ctmpl_process_t *rtmpl, proc_t *cp, proc_t *pp,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_FORK) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_FORK;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
}
return (ctp);
}
@@ -924,7 +935,7 @@ contract_process_core(cont_process_t *ctp, proc_t *p, int sig,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_CORE) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_CORE;
- cte_publish_all(ct, event, nvl, gnvl);
+ (void) cte_publish_all(ct, event, nvl, gnvl);
}
if (EVFATALP(ctp, CT_PR_EV_CORE)) {
@@ -956,7 +967,7 @@ contract_process_hwerr(cont_process_t *ctp, proc_t *p)
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_HWERR) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_HWERR;
- cte_publish_all(ct, event, nvl, NULL);
+ (void) cte_publish_all(ct, event, nvl, NULL);
}
if (EVFATALP(ctp, CT_PR_EV_HWERR)) {
@@ -1006,7 +1017,7 @@ contract_process_sig(cont_process_t *ctp, proc_t *p, int sig, pid_t pid,
event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
event->cte_flags = EVINFOP(ctp, CT_PR_EV_SIGNAL) ? CTE_INFO : 0;
event->cte_type = CT_PR_EV_SIGNAL;
- cte_publish_all(ct, event, nvl, gnvl);
+ (void) cte_publish_all(ct, event, nvl, gnvl);
}
if (EVFATALP(ctp, CT_PR_EV_SIGNAL)) {
diff --git a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
index f5a0514565..da293cbb21 100644
--- a/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
+++ b/usr/src/uts/common/fs/ctfs/ctfs_ctl.c
@@ -177,6 +177,7 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
contract_t *ct = ctlnode->ctfs_ctl_contract;
int error = 0;
uint64_t event;
+ int ack;
switch (cmd) {
case CT_CABANDON:
@@ -184,15 +185,21 @@ ctfs_ctl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
break;
case CT_CACK:
+ case CT_CNACK:
if (copyin((void *)arg, &event, sizeof (uint64_t)))
return (EFAULT);
- error = contract_ack(ct, event);
+ ack = (cmd == CT_CACK) ? CT_ACK : CT_NACK;
+ error = contract_ack(ct, event, ack);
break;
case CT_CNEWCT:
+ error = contract_newct(ct);
break;
case CT_CQREQ:
+ if (copyin((void *)arg, &event, sizeof (uint64_t)))
+ return (EFAULT);
+ error = contract_qack(ct, event);
break;
case CT_CADOPT:
diff --git a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
index 28d0c93662..d99b8f56e8 100644
--- a/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
+++ b/usr/src/uts/common/fs/ctfs/ctfs_tmpl.c
@@ -114,6 +114,7 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
{
ctfs_tmplnode_t *tmplnode = vp->v_data;
ct_param_t param;
+ ctid_t ctid;
int error;
switch (cmd) {
@@ -127,7 +128,11 @@ ctfs_tmpl_ioctl(vnode_t *vp, int cmd, intptr_t arg, int flag, cred_t *cr,
break;
case CT_TCREATE:
ASSERT(tmplnode->ctfs_tmn_tmpl != NULL);
- return (ctmpl_create(tmplnode->ctfs_tmn_tmpl));
+ error = ctmpl_create(tmplnode->ctfs_tmn_tmpl, &ctid);
+ if (error)
+ return (error);
+ *rvalp = ctid;
+ break;
case CT_TSET:
if (copyin((void *)arg, &param, sizeof (ct_param_t)))
return (EFAULT);
diff --git a/usr/src/uts/common/fs/specfs/specsubr.c b/usr/src/uts/common/fs/specfs/specsubr.c
index ea87c688d6..85d9089b82 100644
--- a/usr/src/uts/common/fs/specfs/specsubr.c
+++ b/usr/src/uts/common/fs/specfs/specsubr.c
@@ -70,6 +70,7 @@
struct vfs spec_vfs;
static dev_t specdev;
struct kmem_cache *snode_cache;
+int spec_debug = 0;
static struct snode *sfind(dev_t, vtype_t, struct vnode *);
static struct vnode *get_cvp(dev_t, vtype_t, struct snode *, int *);
@@ -259,6 +260,54 @@ makespecvp(dev_t dev, vtype_t type)
return (svp);
}
+
+/*
+ * This function is called from spec_assoc_vp_with_devi(). That function
+ * associates a "new" dip with a common snode, releasing (any) old dip
+ * in the process. This function (spec_assoc_fence()) looks at the "new dip"
+ * and determines whether the snode should be fenced of or not. As the table
+ * below indicates, the value of old-dip is a don't care for all cases.
+ *
+ * old-dip new-dip common-snode
+ * =========================================
+ * Don't care NULL unfence
+ * Don't care retired fence
+ * Don't care not-retired unfence
+ *
+ * Since old-dip value is a "don't care", it is not passed into this function.
+ */
+static void
+spec_assoc_fence(dev_info_t *ndip, vnode_t *vp)
+{
+ int fence;
+ struct snode *csp;
+
+ ASSERT(vp);
+ ASSERT(vn_matchops(vp, spec_getvnodeops()));
+
+ fence = 0;
+ if (ndip != NULL) {
+ mutex_enter(&DEVI(ndip)->devi_lock);
+ if (DEVI(ndip)->devi_flags & DEVI_RETIRED)
+ fence = 1;
+ mutex_exit(&DEVI(ndip)->devi_lock);
+ }
+
+ csp = VTOCS(vp);
+ ASSERT(csp);
+
+ /* SFENCED flag only set on common snode */
+ mutex_enter(&csp->s_lock);
+ if (fence)
+ csp->s_flag |= SFENCED;
+ else
+ csp->s_flag &= ~SFENCED;
+ mutex_exit(&csp->s_lock);
+
+ FENDBG((CE_NOTE, "%sfenced common snode (%p) for new dip=%p",
+ fence ? "" : "un", (void *)csp, (void *)ndip));
+}
+
/*
* Associate the common snode with a devinfo node. This is called from:
*
@@ -322,6 +371,8 @@ spec_assoc_vp_with_devi(struct vnode *vp, dev_info_t *dip)
csp->s_flag &= ~SSIZEVALID;
mutex_exit(&csp->s_lock);
+ spec_assoc_fence(dip, vp);
+
/* release the old */
if (olddip)
ddi_release_devi(olddip);
@@ -889,3 +940,113 @@ spec_is_selfclone(vnode_t *vp)
return (0);
}
+
+/*
+ * We may be invoked with a NULL vp in which case we fence off
+ * all snodes associated with dip
+ */
+int
+spec_fence_snode(dev_info_t *dip, struct vnode *vp)
+{
+ struct snode *sp;
+ struct snode *csp;
+ int retired;
+ int i;
+ char *path;
+ int emitted;
+
+ ASSERT(dip);
+
+ retired = 0;
+ mutex_enter(&DEVI(dip)->devi_lock);
+ if (DEVI(dip)->devi_flags & DEVI_RETIRED)
+ retired = 1;
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ if (!retired)
+ return (0);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+
+ if (vp != NULL) {
+ ASSERT(vn_matchops(vp, spec_getvnodeops()));
+ csp = VTOCS(vp);
+ ASSERT(csp);
+ mutex_enter(&csp->s_lock);
+ csp->s_flag |= SFENCED;
+ mutex_exit(&csp->s_lock);
+ FENDBG((CE_NOTE, "fenced off snode(%p) for dip: %s",
+ (void *)csp, path));
+ kmem_free(path, MAXPATHLEN);
+ return (0);
+ }
+
+ emitted = 0;
+ mutex_enter(&stable_lock);
+ for (i = 0; i < STABLESIZE; i++) {
+ for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
+ ASSERT(sp->s_commonvp);
+ csp = VTOS(sp->s_commonvp);
+ if (csp->s_dip == dip) {
+ /* fence off the common snode */
+ mutex_enter(&csp->s_lock);
+ csp->s_flag |= SFENCED;
+ mutex_exit(&csp->s_lock);
+ if (!emitted) {
+ FENDBG((CE_NOTE, "fenced 1 of N"));
+ emitted++;
+ }
+ }
+ }
+ }
+ mutex_exit(&stable_lock);
+
+ FENDBG((CE_NOTE, "fenced off all snodes for dip: %s", path));
+ kmem_free(path, MAXPATHLEN);
+
+ return (0);
+}
+
+
+int
+spec_unfence_snode(dev_info_t *dip)
+{
+ struct snode *sp;
+ struct snode *csp;
+ int i;
+ char *path;
+ int emitted;
+
+ ASSERT(dip);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+
+ emitted = 0;
+ mutex_enter(&stable_lock);
+ for (i = 0; i < STABLESIZE; i++) {
+ for (sp = stable[i]; sp != NULL; sp = sp->s_next) {
+ ASSERT(sp->s_commonvp);
+ csp = VTOS(sp->s_commonvp);
+ ASSERT(csp);
+ if (csp->s_dip == dip) {
+ /* unfence the common snode */
+ mutex_enter(&csp->s_lock);
+ csp->s_flag &= ~SFENCED;
+ mutex_exit(&csp->s_lock);
+ if (!emitted) {
+ FENDBG((CE_NOTE, "unfenced 1 of N"));
+ emitted++;
+ }
+ }
+ }
+ }
+ mutex_exit(&stable_lock);
+
+ FENDBG((CE_NOTE, "unfenced all snodes for dip: %s", path));
+ kmem_free(path, MAXPATHLEN);
+
+ return (0);
+}
diff --git a/usr/src/uts/common/fs/specfs/specvnops.c b/usr/src/uts/common/fs/specfs/specvnops.c
index 1841d107fb..ffaba36a21 100644
--- a/usr/src/uts/common/fs/specfs/specvnops.c
+++ b/usr/src/uts/common/fs/specfs/specvnops.c
@@ -93,6 +93,7 @@
#include <sys/esunddi.h>
#include <sys/autoconf.h>
#include <sys/sunndi.h>
+#include <sys/contract/device_impl.h>
static int spec_open(struct vnode **, int, struct cred *);
@@ -153,8 +154,23 @@ static int spec_pathconf(struct vnode *, int, ulong_t *, struct cred *);
mutex_exit(&csp->s_lock); \
}
+#define S_ISFENCED(sp) ((VTOS((sp)->s_commonvp))->s_flag & SFENCED)
+
struct vnodeops *spec_vnodeops;
+/*
+ * *PLEASE NOTE*: If you add new entry points to specfs, do
+ * not forget to add support for fencing. A fenced snode
+ * is indicated by the SFENCED flag in the common snode.
+ * If a snode is fenced, determine if your entry point is
+ * a configuration operation (Example: open), a detection
+ * operation (Example: gettattr), an I/O operation (Example: ioctl())
+ * or an unconfiguration operation (Example: close). If it is
+ * a configuration or detection operation, fail the operation
+ * for a fenced snode with an ENXIO or EIO as appropriate. If
+ * it is any other operation, let it through.
+ */
+
const fs_operation_def_t spec_vnodeops_template[] = {
VOPNAME_OPEN, { .vop_open = spec_open },
VOPNAME_CLOSE, { .vop_close = spec_close },
@@ -530,6 +546,7 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
struct stdata *stp;
dev_info_t *dip;
int error, type;
+ contract_t *ct = NULL;
int open_returns_eintr;
flag &= ~FCREAT; /* paranoia */
@@ -579,6 +596,10 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
ddi_release_devi(dip); /* from e_ddi_hold_devi_by_dev */
}
+ /* check if device fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
#ifdef DEBUG
/* verify attach/open exclusion guarantee */
dip = csp->s_dip;
@@ -628,6 +649,18 @@ spec_open(struct vnode **vpp, int flag, struct cred *cr)
csp = VTOS(sp->s_commonvp);
}
+ /*
+ * create contracts only for userland opens
+ * Successful open and cloning is done at this point.
+ */
+ if (error == 0 && !(flag & FKLYR)) {
+ int spec_type;
+ spec_type = (STOV(csp)->v_type == VCHR) ? S_IFCHR : S_IFBLK;
+ if (contract_device_open(newdev, spec_type, NULL) != 0) {
+ error = EIO;
+ }
+ }
+
if (error == 0) {
sp->s_size = SPEC_SIZE(csp);
@@ -729,6 +762,19 @@ streams_open:
UNLOCK_CSP(csp);
}
+ /*
+ * create contracts only for userland opens
+ * Successful open and cloning is done at this point.
+ */
+ if (error == 0 && !(flag & FKLYR)) {
+ /* STREAM is of type S_IFCHR */
+ if (contract_device_open(newdev, S_IFCHR, &ct) != 0) {
+ UNLOCK_CSP(csp);
+ (void) spec_close(vp, flag, 1, 0, cr);
+ return (EIO);
+ }
+ }
+
if (error == 0) {
/* STREAMS devices don't have a size */
sp->s_size = csp->s_size = 0;
@@ -741,6 +787,11 @@ streams_open:
return (0);
/* strctty() was interrupted by a signal */
+ if (ct) {
+ /* we only create contracts for userland opens */
+ ASSERT(ttoproc(curthread));
+ (void) contract_abandon(ct, ttoproc(curthread), 0);
+ }
(void) spec_close(vp, flag, 1, 0, cr);
return (EINTR);
}
@@ -795,6 +846,7 @@ spec_close(
if (count > 1)
return (0);
+ /* we allow close to succeed even if device is fenced off */
sp = VTOS(vp);
cvp = sp->s_commonvp;
@@ -1157,6 +1209,13 @@ spec_ioctl(struct vnode *vp, int cmd, intptr_t arg, int mode, struct cred *cr,
if (vp->v_type != VCHR)
return (ENOTTY);
+
+ /*
+ * allow ioctls() to go through even for fenced snodes, as they
+ * may include unconfiguration operation - for example popping of
+ * streams modules.
+ */
+
sp = VTOS(vp);
dev = sp->s_dev;
if (STREAMSTAB(getmajor(dev))) {
@@ -1180,6 +1239,11 @@ spec_getattr(struct vnode *vp, struct vattr *vap, int flags, struct cred *cr)
vp = sp->s_commonvp;
}
sp = VTOS(vp);
+
+ /* we want stat() to fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
realvp = sp->s_realvp;
if (realvp == NULL) {
@@ -1258,6 +1322,10 @@ spec_setattr(
struct vnode *realvp;
int error;
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if (vp->v_type == VCHR && vp->v_stream && (vap->va_mask & AT_SIZE)) {
/*
* 1135080: O_TRUNC should have no effect on
@@ -1293,6 +1361,10 @@ spec_access(struct vnode *vp, int mode, int flags, struct cred *cr)
struct vnode *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_ACCESS(realvp, mode, flags, cr));
else
@@ -1309,6 +1381,11 @@ spec_create(struct vnode *dvp, char *name, vattr_t *vap, enum vcexcl excl,
int mode, struct vnode **vpp, struct cred *cr, int flag)
{
int error;
+ struct snode *sp = VTOS(dvp);
+
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
ASSERT(dvp && (dvp->v_flag & VROOT) && *name == '\0');
if (excl == NONEXCL) {
@@ -1333,6 +1410,8 @@ spec_fsync(struct vnode *vp, int syncflag, struct cred *cr)
struct vnode *cvp;
struct vattr va, vatmp;
+ /* allow syncing even if device is fenced off */
+
/* If times didn't change, don't flush anything. */
mutex_enter(&sp->s_lock);
if ((sp->s_flag & (SACC|SUPD|SCHG)) == 0 && vp->v_type != VBLK) {
@@ -2222,10 +2301,15 @@ spec_map(
struct cred *cred)
{
int error = 0;
+ struct snode *sp = VTOS(vp);
if (vp->v_flag & VNOMAP)
return (ENOSYS);
+ /* fail map with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
/*
* If file is locked, fail mapping attempt.
*/
@@ -2314,6 +2398,10 @@ spec_addmap(
if (vp->v_flag & VNOMAP)
return (ENOSYS);
+ /* fail with EIO if the device is fenced off */
+ if (S_ISFENCED(csp))
+ return (EIO);
+
npages = btopr(len);
LOCK_CSP(csp);
csp->s_mapcnt += npages;
@@ -2343,6 +2431,8 @@ spec_delmap(
ASSERT(vp != NULL && VTOS(vp)->s_commonvp == vp);
+ /* allow delmap to succeed even if device fenced off */
+
/*
* XXX Given the above assertion, this might not
* be a particularly sensible thing to test..
@@ -2389,6 +2479,8 @@ spec_delmap(
static int
spec_dump(struct vnode *vp, caddr_t addr, int bn, int count)
{
+ /* allow dump to succeed even if device fenced off */
+
ASSERT(vp->v_type == VBLK);
return (bdev_dump(vp->v_rdev, addr, bn, count));
}
@@ -2438,6 +2530,10 @@ spec_setsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr)
struct snode *sp = VTOS(vp);
int error;
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
/*
* The acl(2) system calls VOP_RWLOCK on the file before setting an
* ACL, but since specfs does not serialize reads and writes, this
@@ -2464,6 +2560,10 @@ spec_getsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *cr)
struct vnode *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_GETSECATTR(realvp, vsap, flag, cr));
else
@@ -2476,6 +2576,10 @@ spec_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
vnode_t *realvp;
struct snode *sp = VTOS(vp);
+ /* fail with ENXIO if the device is fenced off */
+ if (S_ISFENCED(sp))
+ return (ENXIO);
+
if ((realvp = sp->s_realvp) != NULL)
return (VOP_PATHCONF(realvp, cmd, valp, cr));
else
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c
index aadfb92e62..6fde3f5714 100644
--- a/usr/src/uts/common/os/contract.c
+++ b/usr/src/uts/common/os/contract.c
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -174,6 +173,8 @@
#include <sys/proc.h>
#include <sys/contract_impl.h>
#include <sys/contract/process_impl.h>
+#include <sys/dditypes.h>
+#include <sys/contract/device_impl.h>
#include <sys/systm.h>
#include <sys/atomic.h>
#include <sys/cmn_err.h>
@@ -181,6 +182,8 @@
#include <sys/policy.h>
#include <sys/zone.h>
#include <sys/task.h>
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
extern rctl_hndl_t rc_project_contract;
@@ -191,6 +194,7 @@ static kmutex_t contract_lock;
int ct_ntypes = CTT_MAXTYPE;
static ct_type_t *ct_types_static[CTT_MAXTYPE];
ct_type_t **ct_types = ct_types_static;
+int ct_debug;
static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
static void cte_queue_destroy(ct_equeue_t *);
@@ -237,6 +241,7 @@ contract_init(void)
* Initialize contract types.
*/
contract_process_init();
+ contract_device_init();
/*
* Initialize p0/lwp0 contract state.
@@ -310,6 +315,9 @@ contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
ct->ct_cookie = tmpl->ctmpl_cookie;
ct->ct_owner = author;
+ ct->ct_ntime.ctm_total = -1;
+ ct->ct_qtime.ctm_total = -1;
+ ct->ct_nevent = NULL;
/*
* Test project.max-contracts.
@@ -570,6 +578,12 @@ contract_abandon(contract_t *ct, proc_t *p, int explicit)
return (0);
}
+int
+contract_newct(contract_t *ct)
+{
+ return (ct->ct_type->ct_type_ops->contop_newct(ct));
+}
+
/*
* contract_adopt
*
@@ -647,11 +661,15 @@ contract_adopt(contract_t *ct, proc_t *p)
* Acknowledges receipt of a critical event.
*/
int
-contract_ack(contract_t *ct, uint64_t evid)
+contract_ack(contract_t *ct, uint64_t evid, int ack)
{
ct_kevent_t *ev;
list_t *queue = &ct->ct_events.ctq_events;
int error = ESRCH;
+ int nego = 0;
+ uint_t evtype;
+
+ ASSERT(ack == CT_ACK || ack == CT_NACK);
mutex_enter(&ct->ct_lock);
mutex_enter(&ct->ct_events.ctq_lock);
@@ -660,9 +678,14 @@ contract_ack(contract_t *ct, uint64_t evid)
*/
for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
if (ev->cte_id == evid) {
+ if (ev->cte_flags & CTE_NEG)
+ nego = 1;
+ else if (ack == CT_NACK)
+ break;
if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
ev->cte_flags |= CTE_ACK;
ct->ct_evcnt--;
+ evtype = ev->cte_type;
error = 0;
}
break;
@@ -671,9 +694,86 @@ contract_ack(contract_t *ct, uint64_t evid)
mutex_exit(&ct->ct_events.ctq_lock);
mutex_exit(&ct->ct_lock);
+ /*
+ * Not all critical events are negotiation events, however
+ * every negotiation event is a critical event. NEGEND events
+ * are critical events but are not negotiation events
+ */
+ if (error || !nego)
+ return (error);
+
+ if (ack == CT_ACK)
+ error = ct->ct_type->ct_type_ops->contop_ack(ct, evtype, evid);
+ else
+ error = ct->ct_type->ct_type_ops->contop_nack(ct, evtype, evid);
+
return (error);
}
+/*ARGSUSED*/
+int
+contract_ack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
+ ct->ct_id);
+ return (ENOSYS);
+}
+
+/*ARGSUSED*/
+int
+contract_qack_inval(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ cmn_err(CE_PANIC, "contract_ack_inval: unsupported call: ctid: %u",
+ ct->ct_id);
+ return (ENOSYS);
+}
+
+/*ARGSUSED*/
+int
+contract_qack_notsup(contract_t *ct, uint_t evtype, uint64_t evid)
+{
+ return (ERANGE);
+}
+
+/*
+ * contract_qack
+ *
+ * Asks that negotiations be extended by another time quantum
+ */
+int
+contract_qack(contract_t *ct, uint64_t evid)
+{
+ ct_kevent_t *ev;
+ list_t *queue = &ct->ct_events.ctq_events;
+ int nego = 0;
+ uint_t evtype;
+
+ mutex_enter(&ct->ct_lock);
+ mutex_enter(&ct->ct_events.ctq_lock);
+
+ for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
+ if (ev->cte_id == evid) {
+ if ((ev->cte_flags & (CTE_NEG | CTE_ACK)) == CTE_NEG) {
+ evtype = ev->cte_type;
+ nego = 1;
+ }
+ break;
+ }
+ }
+ mutex_exit(&ct->ct_events.ctq_lock);
+ mutex_exit(&ct->ct_lock);
+
+ /*
+ * Only a negotiated event (which is by definition also a critical
+ * event) which has not yet been acknowledged can provide
+ * time quanta to a negotiating owner process.
+ */
+ if (!nego)
+ return (ESRCH);
+
+ return (ct->ct_type->ct_type_ops->contop_qack(ct, evtype, evid));
+}
+
/*
* contract_orphan
*
@@ -840,6 +940,20 @@ contract_exit(proc_t *p)
}
}
+static int
+get_time_left(struct ct_time *t)
+{
+ clock_t ticks_elapsed;
+ int secs_elapsed;
+
+ if (t->ctm_total == -1)
+ return (-1);
+
+ ticks_elapsed = ddi_get_lbolt() - t->ctm_start;
+ secs_elapsed = t->ctm_total - (drv_hztousec(ticks_elapsed)/MICROSEC);
+ return (secs_elapsed > 0 ? secs_elapsed : 0);
+}
+
/*
* contract_status_common
*
@@ -897,8 +1011,8 @@ contract_status_common(contract_t *ct, zone_t *zone, void *status,
CTS_OWNED : ct->ct_state);
}
STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
- STRUCT_FSET(lstatus, ctst_ntime, -1);
- STRUCT_FSET(lstatus, ctst_qtime, -1);
+ STRUCT_FSET(lstatus, ctst_ntime, get_time_left(&ct->ct_ntime));
+ STRUCT_FSET(lstatus, ctst_qtime, get_time_left(&ct->ct_qtime));
STRUCT_FSET(lstatus, ctst_nevid,
ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
@@ -1469,9 +1583,9 @@ ctmpl_clear(ct_template_t *template)
* Creates a new contract using the specified template.
*/
int
-ctmpl_create(ct_template_t *template)
+ctmpl_create(ct_template_t *template, ctid_t *ctidp)
{
- return (template->ctmpl_ops->ctop_create(template));
+ return (template->ctmpl_ops->ctop_create(template, ctidp));
}
/*
@@ -1520,7 +1634,7 @@ ctmpl_copy(ct_template_t *new, ct_template_t *old)
*/
/*ARGSUSED*/
int
-ctmpl_create_inval(ct_template_t *template)
+ctmpl_create_inval(ct_template_t *template, ctid_t *ctidp)
{
return (EINVAL);
}
@@ -2046,19 +2160,34 @@ cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
* be zallocated by the caller, and the event's flags and type must be
* set. The rest of the event's fields are initialized here.
*/
-void
+uint64_t
cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
{
ct_equeue_t *q;
timespec_t ts;
+ uint64_t evid;
+ ct_kevent_t *negev;
+ int negend;
e->cte_contract = ct;
e->cte_data = data;
e->cte_gdata = gdata;
e->cte_refs = 3;
- e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
+ evid = e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
contract_hold(ct);
+ /*
+ * For a negotiation event we set the ct->ct_nevent field of the
+ * contract for the duration of the negotiation
+ */
+ negend = 0;
+ if (e->cte_flags & CTE_NEG) {
+ cte_hold(e);
+ ct->ct_nevent = e;
+ } else if (e->cte_type == CT_EV_NEGEND) {
+ negend = 1;
+ }
+
gethrestime(&ts);
/*
@@ -2111,7 +2240,17 @@ cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
cte_rele(e);
}
+ if (negend) {
+ mutex_enter(&ct->ct_lock);
+ negev = ct->ct_nevent;
+ ct->ct_nevent = NULL;
+ cte_rele(negev);
+ mutex_exit(&ct->ct_lock);
+ }
+
mutex_exit(&ct->ct_evtlock);
+
+ return (evid);
}
/*
@@ -2347,7 +2486,8 @@ cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
STRUCT_FSET(ev, ctev_evid, temp->cte_id);
STRUCT_FSET(ev, ctev_cttype,
temp->cte_contract->ct_type->ct_type_index);
- STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO));
+ STRUCT_FSET(ev, ctev_flags, temp->cte_flags &
+ (CTE_ACK|CTE_INFO|CTE_NEG));
STRUCT_FSET(ev, ctev_type, temp->cte_type);
STRUCT_FSET(ev, ctev_nbytes, len);
STRUCT_FSET(ev, ctev_goffset, size);
diff --git a/usr/src/uts/common/os/devcache.c b/usr/src/uts/common/os/devcache.c
index 14cde49faf..8e1313d487 100644
--- a/usr/src/uts/common/os/devcache.c
+++ b/usr/src/uts/common/os/devcache.c
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -224,6 +224,7 @@ i_ddi_devices_init(void)
list_create(&nvf_dirty_files, sizeof (nvfd_t),
offsetof(nvfd_t, nvf_link));
mutex_init(&nvf_cache_mutex, NULL, MUTEX_DEFAULT, NULL);
+ retire_store_init();
devid_cache_init();
}
@@ -235,6 +236,16 @@ i_ddi_devices_init(void)
void
i_ddi_read_devices_files(void)
{
+ /*
+ * The retire store should be the first file read as it
+ * may need to offline devices. kfio_disable_read is not
+ * used for retire. For the rationale see the tunable
+ * ddi_retire_store_bypass and comments in:
+ * uts/common/os/retire_store.c
+ */
+
+ retire_store_read();
+
if (!kfio_disable_read) {
mdi_read_devices_files();
devid_cache_read();
diff --git a/usr/src/uts/common/os/devcfg.c b/usr/src/uts/common/os/devcfg.c
index 29150c5d8c..03f7ec89a2 100644
--- a/usr/src/uts/common/os/devcfg.c
+++ b/usr/src/uts/common/os/devcfg.c
@@ -38,6 +38,7 @@
#include <sys/ddi_impldefs.h>
#include <sys/ndi_impldefs.h>
#include <sys/modctl.h>
+#include <sys/contract/device_impl.h>
#include <sys/dacf.h>
#include <sys/promif.h>
#include <sys/cpuvar.h>
@@ -50,6 +51,9 @@
#include <sys/fs/snode.h>
#include <sys/fs/dv_node.h>
#include <sys/reboot.h>
+#include <sys/sysmacros.h>
+#include <sys/sunldi.h>
+#include <sys/sunldi_impl.h>
#ifdef DEBUG
int ddidebug = DDI_AUDIT;
@@ -192,6 +196,10 @@ static void ndi_devi_exit_and_wait(dev_info_t *dip,
int circular, clock_t end_time);
static int ndi_devi_unbind_driver(dev_info_t *dip);
+static void i_ddi_check_retire(dev_info_t *dip);
+
+
+
/*
* dev_info cache and node management
*/
@@ -324,6 +332,15 @@ i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid,
mutex_init(&(devi->devi_pm_lock), NULL, MUTEX_DEFAULT, NULL);
mutex_init(&(devi->devi_pm_busy_lock), NULL, MUTEX_DEFAULT, NULL);
+ RIO_TRACE((CE_NOTE, "i_ddi_alloc_node: Initing contract fields: "
+ "dip=%p, name=%s", (void *)devi, node_name));
+
+ mutex_init(&(devi->devi_ct_lock), NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&(devi->devi_ct_cv), NULL, CV_DEFAULT, NULL);
+ devi->devi_ct_count = -1; /* counter not in use if -1 */
+ list_create(&(devi->devi_ct), sizeof (cont_device_t),
+ offsetof(cont_device_t, cond_next));
+
i_ddi_set_node_state((dev_info_t *)devi, DS_PROTO);
da_log_enter((dev_info_t *)devi);
return ((dev_info_t *)devi);
@@ -389,7 +406,6 @@ i_ddi_free_node(dev_info_t *dip)
if (devi->devi_audit) {
kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
}
- kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
if (devi->devi_device_class)
kmem_free(devi->devi_device_class,
strlen(devi->devi_device_class) + 1);
@@ -398,6 +414,20 @@ i_ddi_free_node(dev_info_t *dip)
mutex_destroy(&(devi->devi_pm_lock));
mutex_destroy(&(devi->devi_pm_busy_lock));
+ RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroying contract fields: "
+ "dip=%p", (void *)dip));
+ contract_device_remove_dip(dip);
+ ASSERT(devi->devi_ct_count == -1);
+ ASSERT(list_is_empty(&(devi->devi_ct)));
+ cv_destroy(&(devi->devi_ct_cv));
+ list_destroy(&(devi->devi_ct));
+ /* free this last since contract_device_remove_dip() uses it */
+ mutex_destroy(&(devi->devi_ct_lock));
+ RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroyed all contract fields: "
+ "dip=%p, name=%s", (void *)dip, devi->devi_node_name));
+
+ kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
+
kmem_cache_free(ddi_node_cache, devi);
}
@@ -1441,6 +1471,7 @@ i_ndi_config_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
i_ddi_set_node_state(dip, DS_PROBED);
break;
case DS_PROBED:
+ i_ddi_check_retire(dip);
atomic_add_long(&devinfo_attach_detach, 1);
if ((rv = attach_node(dip)) == DDI_SUCCESS)
i_ddi_set_node_state(dip, DS_ATTACHED);
@@ -5110,6 +5141,172 @@ ndi_devi_config_obp_args(dev_info_t *parent, char *devnm,
return (error);
}
+/*
+ * Pay attention, the following is a bit tricky:
+ * There are three possible cases when constraints are applied
+ *
+ * - A constraint is applied and the offline is disallowed.
+ * Simply return failure and block the offline
+ *
+ * - A constraint is applied and the offline is allowed.
+ * Mark the dip as having passed the constraint and allow
+ * offline to proceed.
+ *
+ * - A constraint is not applied. Allow the offline to proceed for now.
+ *
+ * In the latter two cases we allow the offline to proceed. If the
+ * offline succeeds (no users) everything is fine. It is ok for an unused
+ * device to be offlined even if no constraints were imposed on the offline.
+ * If the offline fails because there are users, we look at the constraint
+ * flag on the dip. If the constraint flag is set (implying that it passed
+ * a constraint) we allow the dip to be retired. If not, we don't allow
+ * the retire. This ensures that we don't allow unconstrained retire.
+ */
+int
+e_ddi_offline_notify(dev_info_t *dip)
+{
+ int retval;
+ int constraint;
+ int failure;
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): entered: dip=%p",
+ (void *) dip));
+
+ constraint = 0;
+ failure = 0;
+
+ /*
+ * Start with userland constraints first - applied via device contracts
+ */
+ retval = contract_device_offline(dip, DDI_DEV_T_ANY, 0);
+ switch (retval) {
+ case CT_NACK:
+ RIO_DEBUG((CE_NOTE, "Received NACK for dip=%p", (void *)dip));
+ failure = 1;
+ goto out;
+ case CT_ACK:
+ constraint = 1;
+ RIO_DEBUG((CE_NOTE, "Received ACK for dip=%p", (void *)dip));
+ break;
+ case CT_NONE:
+ /* no contracts */
+ RIO_DEBUG((CE_NOTE, "No contracts on dip=%p", (void *)dip));
+ break;
+ default:
+ ASSERT(retval == CT_NONE);
+ }
+
+ /*
+ * Next, use LDI to impose kernel constraints
+ */
+ retval = ldi_invoke_notify(dip, DDI_DEV_T_ANY, 0, LDI_EV_OFFLINE, NULL);
+ switch (retval) {
+ case LDI_EV_FAILURE:
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_FAILURE);
+ RIO_DEBUG((CE_NOTE, "LDI callback failed on dip=%p",
+ (void *)dip));
+ failure = 1;
+ goto out;
+ case LDI_EV_SUCCESS:
+ constraint = 1;
+ RIO_DEBUG((CE_NOTE, "LDI callback success on dip=%p",
+ (void *)dip));
+ break;
+ case LDI_EV_NONE:
+ /* no matching LDI callbacks */
+ RIO_DEBUG((CE_NOTE, "No LDI callbacks for dip=%p",
+ (void *)dip));
+ break;
+ default:
+ ASSERT(retval == LDI_EV_NONE);
+ }
+
+out:
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && failure) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
+ "BLOCKED flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_BLOCKED;
+ if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): "
+ "blocked. clearing RCM CONSTRAINT flag. dip=%p",
+ (void *)dip));
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ }
+ } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && constraint) {
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
+ "CONSTRAINT flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) &&
+ DEVI(dip)->devi_ref == 0) {
+ /* also allow retire if device is not in use */
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): device not in "
+ "use. Setting CONSTRAINT flag. dip=%p", (void *)dip));
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ } else {
+ /*
+ * Note: We cannot ASSERT here that DEVI_R_CONSTRAINT is
+ * not set, since other sources (such as RCM) may have
+ * set the flag.
+ */
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): not setting "
+ "constraint flag. dip=%p", (void *)dip));
+ }
+ mutex_exit(&(DEVI(dip)->devi_lock));
+
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): exit: dip=%p",
+ (void *) dip));
+
+ return (failure ? DDI_FAILURE : DDI_SUCCESS);
+}
+
+void
+e_ddi_offline_finalize(dev_info_t *dip, int result)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_offline_finalize(): entry: result=%s, "
+ "dip=%p", result == DDI_SUCCESS ? "SUCCESS" : "FAILURE",
+ (void *)dip));
+
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, result == DDI_SUCCESS ?
+ CT_EV_SUCCESS : CT_EV_FAILURE);
+
+ ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0,
+ LDI_EV_OFFLINE, result == DDI_SUCCESS ?
+ LDI_EV_SUCCESS : LDI_EV_FAILURE, NULL);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_offline_finalize(): exit: dip=%p",
+ (void *)dip));
+}
+
+void
+e_ddi_degrade_finalize(dev_info_t *dip)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_degrade_finalize(): entry: "
+ "result always = DDI_SUCCESS, dip=%p", (void *)dip));
+
+ contract_device_degrade(dip, DDI_DEV_T_ANY, 0);
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
+
+ ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, LDI_EV_DEGRADE,
+ LDI_EV_SUCCESS, NULL);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_degrade_finalize(): exit: dip=%p",
+ (void *)dip));
+}
+
+void
+e_ddi_undegrade_finalize(dev_info_t *dip)
+{
+ RIO_DEBUG((CE_NOTE, "e_ddi_undegrade_finalize(): entry: "
+ "result always = DDI_SUCCESS, dip=%p", (void *)dip));
+
+ contract_device_undegrade(dip, DDI_DEV_T_ANY, 0);
+ contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
+
+ RIO_VERBOSE((CE_NOTE, "e_ddi_undegrade_finalize(): exit: dip=%p",
+ (void *)dip));
+}
/*
* detach a node with parent already held busy
@@ -5123,6 +5320,19 @@ devi_detach_node(dev_info_t *dip, uint_t flags)
ASSERT(pdip && DEVI_BUSY_OWNED(pdip));
+ /*
+ * Invoke notify if offlining
+ */
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offlining dip=%p",
+ (void *)dip));
+ if (e_ddi_offline_notify(dip) != DDI_SUCCESS) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline NACKed"
+ "dip=%p", (void *)dip));
+ return (NDI_FAILURE);
+ }
+ }
+
if (flags & NDI_POST_EVENT) {
if (i_ddi_devi_attached(pdip)) {
if (ddi_get_eventcookie(dip, DDI_DEVI_REMOVE_EVENT,
@@ -5131,8 +5341,22 @@ devi_detach_node(dev_info_t *dip, uint_t flags)
}
}
- if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS)
+ if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS) {
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline failed."
+ " Calling e_ddi_offline_finalize with result=%d. "
+ "dip=%p", DDI_FAILURE, (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_FAILURE);
+ }
return (NDI_FAILURE);
+ }
+
+ if (flags & NDI_DEVI_OFFLINE) {
+ RIO_DEBUG((CE_NOTE, "devi_detach_node: offline succeeded."
+ " Calling e_ddi_offline_finalize with result=%d, "
+ "dip=%p", DDI_SUCCESS, (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_SUCCESS);
+ }
if (flags & NDI_AUTODETACH)
return (NDI_SUCCESS);
@@ -7220,3 +7444,502 @@ ibt_hw_is_present()
{
return (ib_hw_status);
}
+
+/*
+ * ASSERT that constraint flag is not set and then set the "retire attempt"
+ * flag.
+ */
+int
+e_ddi_mark_retiring(dev_info_t *dip, void *arg)
+{
+ char **cons_array = (char **)arg;
+ char *path;
+ int constraint;
+ int i;
+
+ constraint = 0;
+ if (cons_array) {
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ for (i = 0; cons_array[i] != NULL; i++) {
+ if (strcmp(path, cons_array[i]) == 0) {
+ constraint = 1;
+ break;
+ }
+ }
+ kmem_free(path, MAXPATHLEN);
+ }
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ DEVI(dip)->devi_flags |= DEVI_RETIRING;
+ if (constraint)
+ DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ RIO_VERBOSE((CE_NOTE, "marked dip as undergoing retire process dip=%p",
+ (void *)dip));
+
+ if (constraint)
+ RIO_DEBUG((CE_NOTE, "marked dip as constrained, dip=%p",
+ (void *)dip));
+
+ if (MDI_PHCI(dip))
+ mdi_phci_mark_retiring(dip, cons_array);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+static void
+free_array(char **cons_array)
+{
+ int i;
+
+ if (cons_array == NULL)
+ return;
+
+ for (i = 0; cons_array[i] != NULL; i++) {
+ kmem_free(cons_array[i], strlen(cons_array[i]) + 1);
+ }
+ kmem_free(cons_array, (i+1) * sizeof (char *));
+}
+
+/*
+ * Walk *every* node in subtree and check if it blocks, allows or has no
+ * comment on a proposed retire.
+ */
+int
+e_ddi_retire_notify(dev_info_t *dip, void *arg)
+{
+ int *constraint = (int *)arg;
+
+ RIO_DEBUG((CE_NOTE, "retire notify: dip = %p", (void *)dip));
+
+ (void) e_ddi_offline_notify(dip);
+
+ mutex_enter(&(DEVI(dip)->devi_lock));
+ if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
+ RIO_DEBUG((CE_WARN, "retire notify: dip in retire "
+ "subtree is not marked: dip = %p", (void *)dip));
+ *constraint = 0;
+ } else if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ RIO_DEBUG((CE_NOTE, "retire notify: BLOCKED: dip = %p",
+ (void *)dip));
+ *constraint = 0;
+ } else if (!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)) {
+ RIO_DEBUG((CE_NOTE, "retire notify: NO CONSTRAINT: "
+ "dip = %p", (void *)dip));
+ *constraint = 0;
+ } else {
+ RIO_DEBUG((CE_NOTE, "retire notify: CONSTRAINT set: "
+ "dip = %p", (void *)dip));
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_notify(dip, constraint);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+int
+e_ddi_retire_finalize(dev_info_t *dip, void *arg)
+{
+ int constraint = *(int *)arg;
+ int finalize;
+ int phci_only;
+
+ ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
+ RIO_DEBUG((CE_WARN,
+ "retire: unmarked dip(%p) in retire subtree",
+ (void *)dip));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRED));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ mutex_exit(&DEVI(dip)->devi_lock);
+ return (DDI_WALK_CONTINUE);
+ }
+
+ /*
+ * retire the device if constraints have been applied
+ * or if the device is not in use
+ */
+ finalize = 0;
+ if (constraint) {
+ ASSERT(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ DEVI(dip)->devi_flags |= DEVI_RETIRED;
+ mutex_exit(&DEVI(dip)->devi_lock);
+ (void) spec_fence_snode(dip, NULL);
+ RIO_DEBUG((CE_NOTE, "Fenced off: dip = %p", (void *)dip));
+ e_ddi_offline_finalize(dip, DDI_SUCCESS);
+ } else {
+ if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ DEVI(dip)->devi_flags &= ~DEVI_R_BLOCKED;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ /* we have already finalized during notify */
+ } else if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
+ DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ finalize = 1;
+ } else {
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
+ /*
+ * even if no contracts, need to call finalize
+ * to clear the contract barrier on the dip
+ */
+ finalize = 1;
+ }
+ mutex_exit(&DEVI(dip)->devi_lock);
+ RIO_DEBUG((CE_NOTE, "finalize: NOT retired: dip = %p",
+ (void *)dip));
+ if (finalize)
+ e_ddi_offline_finalize(dip, DDI_FAILURE);
+ mutex_enter(&DEVI(dip)->devi_lock);
+ DEVI_SET_DEVICE_DEGRADED(dip);
+ mutex_exit(&DEVI(dip)->devi_lock);
+ }
+
+ /*
+ * phci_only variable indicates no client checking, just
+ * offline the PHCI. We set that to 0 to enable client
+ * checking
+ */
+ phci_only = 0;
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_finalize(dip, phci_only);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * Returns
+ * DDI_SUCCESS if constraints allow retire
+ * DDI_FAILURE if constraints don't allow retire.
+ * cons_array is a NULL terminated array of node paths for
+ * which constraints have already been applied.
+ */
+int
+e_ddi_retire_device(char *path, char **cons_array)
+{
+ dev_info_t *dip;
+ dev_info_t *pdip;
+ int circ;
+ int circ2;
+ int constraint;
+ char *devnm;
+
+ /*
+ * First, lookup the device
+ */
+ dip = e_ddi_hold_devi_by_path(path, 0);
+ if (dip == NULL) {
+ /*
+ * device does not exist. This device cannot be
+ * a critical device since it is not in use. Thus
+ * this device is always retireable. Return DDI_SUCCESS
+ * to indicate this. If this device is ever
+ * instantiated, I/O framework will consult the
+ * the persistent retire store, mark it as
+ * retired and fence it off.
+ */
+ RIO_DEBUG((CE_NOTE, "Retire device: device doesn't exist."
+ " NOP. Just returning SUCCESS. path=%s", path));
+ free_array(cons_array);
+ return (DDI_SUCCESS);
+ }
+
+ RIO_DEBUG((CE_NOTE, "Retire device: found dip = %p.", (void *)dip));
+
+ pdip = ddi_get_parent(dip);
+ ndi_hold_devi(pdip);
+
+ /*
+ * Run devfs_clean() in case dip has no constraints and is
+ * not in use, so is retireable but there are dv_nodes holding
+ * ref-count on the dip. Note that devfs_clean() always returns
+ * success.
+ */
+ devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
+ (void) ddi_deviname(dip, devnm);
+ (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
+ kmem_free(devnm, MAXNAMELEN + 1);
+
+ ndi_devi_enter(pdip, &circ);
+
+ /* release hold from e_ddi_hold_devi_by_path */
+ ndi_rele_devi(dip);
+
+ /*
+ * If it cannot make a determination, is_leaf_node() assumes
+ * dip is a nexus.
+ */
+ (void) e_ddi_mark_retiring(dip, cons_array);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_mark_retiring,
+ cons_array);
+ ndi_devi_exit(dip, circ2);
+ }
+ free_array(cons_array);
+
+ /*
+ * apply constraints
+ */
+ RIO_DEBUG((CE_NOTE, "retire: subtree retire notify: path = %s", path));
+
+ constraint = 1; /* assume constraints allow retire */
+ (void) e_ddi_retire_notify(dip, &constraint);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_notify,
+ &constraint);
+ ndi_devi_exit(dip, circ2);
+ }
+
+ /*
+ * Now finalize the retire
+ */
+ (void) e_ddi_retire_finalize(dip, &constraint);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ2);
+ ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_finalize,
+ &constraint);
+ ndi_devi_exit(dip, circ2);
+ }
+
+ if (!constraint) {
+ RIO_DEBUG((CE_WARN, "retire failed: path = %s", path));
+ } else {
+ RIO_DEBUG((CE_NOTE, "retire succeeded: path = %s", path));
+ }
+
+ ndi_devi_exit(pdip, circ);
+ ndi_rele_devi(pdip);
+ return (constraint ? DDI_SUCCESS : DDI_FAILURE);
+}
+
+static int
+unmark_and_unfence(dev_info_t *dip, void *arg)
+{
+ char *path = (char *)arg;
+
+ ASSERT(path);
+
+ (void) ddi_pathname(dip, path);
+
+ mutex_enter(&DEVI(dip)->devi_lock);
+ DEVI(dip)->devi_flags &= ~DEVI_RETIRED;
+ DEVI_SET_DEVICE_ONLINE(dip);
+ mutex_exit(&DEVI(dip)->devi_lock);
+
+ RIO_VERBOSE((CE_NOTE, "Cleared RETIRED flag: dip=%p, path=%s",
+ (void *)dip, path));
+
+ (void) spec_unfence_snode(dip);
+ RIO_DEBUG((CE_NOTE, "Unfenced device: %s", path));
+
+ if (MDI_PHCI(dip))
+ mdi_phci_unretire(dip);
+
+ return (DDI_WALK_CONTINUE);
+}
+
+struct find_dip {
+ char *fd_buf;
+ char *fd_path;
+ dev_info_t *fd_dip;
+};
+
+static int
+find_dip_fcn(dev_info_t *dip, void *arg)
+{
+ struct find_dip *findp = (struct find_dip *)arg;
+
+ (void) ddi_pathname(dip, findp->fd_buf);
+
+ if (strcmp(findp->fd_path, findp->fd_buf) != 0)
+ return (DDI_WALK_CONTINUE);
+
+ ndi_hold_devi(dip);
+ findp->fd_dip = dip;
+
+ return (DDI_WALK_TERMINATE);
+}
+
+int
+e_ddi_unretire_device(char *path)
+{
+ int circ;
+ char *path2;
+ dev_info_t *pdip;
+ dev_info_t *dip;
+ struct find_dip find_dip;
+
+ ASSERT(path);
+ ASSERT(*path == '/');
+
+ if (strcmp(path, "/") == 0) {
+ cmn_err(CE_WARN, "Root node cannot be retired. Skipping "
+ "device unretire: %s", path);
+ return (0);
+ }
+
+ /*
+ * We can't lookup the dip (corresponding to path) via
+ * e_ddi_hold_devi_by_path() because the dip may be offline
+ * and may not attach. Use ddi_walk_devs() instead;
+ */
+ find_dip.fd_buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ find_dip.fd_path = path;
+ find_dip.fd_dip = NULL;
+
+ pdip = ddi_root_node();
+
+ ndi_devi_enter(pdip, &circ);
+ ddi_walk_devs(ddi_get_child(pdip), find_dip_fcn, &find_dip);
+ ndi_devi_exit(pdip, circ);
+
+ kmem_free(find_dip.fd_buf, MAXPATHLEN);
+
+ if (find_dip.fd_dip == NULL) {
+ cmn_err(CE_WARN, "Device not found in device tree. Skipping "
+ "device unretire: %s", path);
+ return (0);
+ }
+
+ dip = find_dip.fd_dip;
+
+ pdip = ddi_get_parent(dip);
+
+ ndi_hold_devi(pdip);
+
+ ndi_devi_enter(pdip, &circ);
+
+ path2 = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ (void) unmark_and_unfence(dip, path2);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), unmark_and_unfence, path2);
+ ndi_devi_exit(dip, circ);
+ }
+
+ kmem_free(path2, MAXPATHLEN);
+
+ /* release hold from find_dip_fcn() */
+ ndi_rele_devi(dip);
+
+ ndi_devi_exit(pdip, circ);
+
+ ndi_rele_devi(pdip);
+
+ return (0);
+}
+
+/*
+ * Called before attach on a dip that has been retired.
+ */
+static int
+mark_and_fence(dev_info_t *dip, void *arg)
+{
+ char *fencepath = (char *)arg;
+
+ /*
+ * We have already decided to retire this device. The various
+ * constraint checking should not be set.
+ * NOTE that the retire flag may already be set due to
+ * fenced -> detach -> fenced transitions.
+ */
+ mutex_enter(&DEVI(dip)->devi_lock);
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
+ ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRING));
+ DEVI(dip)->devi_flags |= DEVI_RETIRED;
+ mutex_exit(&DEVI(dip)->devi_lock);
+ RIO_VERBOSE((CE_NOTE, "marked as RETIRED dip=%p", (void *)dip));
+
+ if (fencepath) {
+ (void) spec_fence_snode(dip, NULL);
+ RIO_DEBUG((CE_NOTE, "Fenced: %s",
+ ddi_pathname(dip, fencepath)));
+ }
+
+ return (DDI_WALK_CONTINUE);
+}
+
+/*
+ * Checks the retire database and:
+ *
+ * - if device is present in the retire database, marks the device retired
+ * and fences it off.
+ * - if device is not in retire database, allows the device to attach normally
+ *
+ * To be called only by framework attach code on first attach attempt.
+ *
+ */
+static void
+i_ddi_check_retire(dev_info_t *dip)
+{
+ char *path;
+ dev_info_t *pdip;
+ int circ;
+ int phci_only;
+
+ pdip = ddi_get_parent(dip);
+
+ /*
+ * Root dip is treated special and doesn't take this code path.
+ * Also root can never be retired.
+ */
+ ASSERT(pdip);
+ ASSERT(DEVI_BUSY_OWNED(pdip));
+ ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
+
+ path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+
+ (void) ddi_pathname(dip, path);
+
+ RIO_VERBOSE((CE_NOTE, "Checking if dip should attach: dip=%p, path=%s",
+ (void *)dip, path));
+
+ /*
+ * Check if this device is in the "retired" store i.e. should
+ * be retired. If not, we have nothing to do.
+ */
+ if (e_ddi_device_retired(path) == 0) {
+ RIO_VERBOSE((CE_NOTE, "device is NOT retired: path=%s", path));
+ kmem_free(path, MAXPATHLEN);
+ return;
+ }
+
+ RIO_DEBUG((CE_NOTE, "attach: device is retired: path=%s", path));
+
+ /*
+ * Mark dips and fence off snodes (if any)
+ */
+ RIO_DEBUG((CE_NOTE, "attach: Mark and fence subtree: path=%s", path));
+ (void) mark_and_fence(dip, path);
+ if (!is_leaf_node(dip)) {
+ ndi_devi_enter(dip, &circ);
+ ddi_walk_devs(ddi_get_child(dip), mark_and_fence, path);
+ ndi_devi_exit(dip, circ);
+ }
+
+ kmem_free(path, MAXPATHLEN);
+
+ /*
+ * We don't want to check the client. We just want to
+ * offline the PHCI
+ */
+ phci_only = 1;
+ if (MDI_PHCI(dip))
+ mdi_phci_retire_finalize(dip, phci_only);
+}
diff --git a/usr/src/uts/common/os/driver_lyr.c b/usr/src/uts/common/os/driver_lyr.c
index f2dea074c1..266e3cbb79 100644
--- a/usr/src/uts/common/os/driver_lyr.c
+++ b/usr/src/uts/common/os/driver_lyr.c
@@ -69,6 +69,11 @@
#include <sys/socketvar.h>
#include <sys/kstr.h>
+/*
+ * Device contract related
+ */
+#include <sys/contract_impl.h>
+#include <sys/contract/device_impl.h>
/*
* Define macros to manipulate snode, vnode, and open device flags
@@ -97,11 +102,23 @@
#define LH_CBDEV (0x2) /* handle to a char/block device */
/*
- * Define marco for devid property lookups
+ * Define macro for devid property lookups
*/
#define DEVID_PROP_FLAGS (DDI_PROP_DONTPASS | \
DDI_PROP_TYPE_STRING|DDI_PROP_CANSLEEP)
+/*
+ * Dummy string for NDI events
+ */
+#define NDI_EVENT_SERVICE "NDI_EVENT_SERVICE"
+
+static void ldi_ev_lock(void);
+static void ldi_ev_unlock(void);
+
+#ifdef LDI_OBSOLETE_EVENT
+int ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id);
+#endif
+
/*
* globals
@@ -113,6 +130,22 @@ static kmutex_t ldi_handle_hash_lock[LH_HASH_SZ];
static struct ldi_handle *ldi_handle_hash[LH_HASH_SZ];
static size_t ldi_handle_hash_count;
+static struct ldi_ev_callback_list ldi_ev_callback_list;
+
+static uint32_t ldi_ev_id_pool = 0;
+
+struct ldi_ev_cookie {
+ char *ck_evname;
+ uint_t ck_sync;
+ uint_t ck_ctype;
+};
+
+static struct ldi_ev_cookie ldi_ev_cookies[] = {
+ { LDI_EV_OFFLINE, 1, CT_DEV_EV_OFFLINE},
+ { LDI_EV_DEGRADE, 0, CT_DEV_EV_DEGRADED},
+ { NULL} /* must terminate list */
+};
+
void
ldi_init(void)
{
@@ -127,6 +160,17 @@ ldi_init(void)
mutex_init(&ldi_ident_hash_lock[i], NULL, MUTEX_DEFAULT, NULL);
ldi_ident_hash[i] = NULL;
}
+
+ /*
+ * Initialize the LDI event subsystem
+ */
+ mutex_init(&ldi_ev_callback_list.le_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ldi_ev_callback_list.le_cv, NULL, CV_DEFAULT, NULL);
+ ldi_ev_callback_list.le_busy = 0;
+ ldi_ev_callback_list.le_thread = NULL;
+ list_create(&ldi_ev_callback_list.le_head,
+ sizeof (ldi_ev_callback_impl_t),
+ offsetof(ldi_ev_callback_impl_t, lec_list));
}
/*
@@ -334,7 +378,9 @@ handle_alloc(vnode_t *vp, struct ldi_ident *ident)
lhp->lh_ref = 1;
lhp->lh_vp = vp;
lhp->lh_ident = ident;
+#ifdef LDI_OBSOLETE_EVENT
mutex_init(lhp->lh_lock, NULL, MUTEX_DEFAULT, NULL);
+#endif
/* set the device type for this handle */
lhp->lh_type = 0;
@@ -398,10 +444,13 @@ handle_release(struct ldi_handle *lhp)
VN_RELE(lhp->lh_vp);
ident_release(lhp->lh_ident);
+#ifdef LDI_OBSOLETE_EVENT
mutex_destroy(lhp->lh_lock);
+#endif
kmem_free(lhp, sizeof (struct ldi_handle));
}
+#ifdef LDI_OBSOLETE_EVENT
/*
* LDI event manipulation functions
*/
@@ -457,6 +506,7 @@ i_ldi_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie,
lep->le_handler(lep->le_lhp, event_cookie, lep->le_arg, bus_impldata);
}
+#endif
/*
* LDI open helper functions
@@ -1629,6 +1679,9 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
struct ldi_handle *handlep = (struct ldi_handle *)lh;
struct ldi_event *lep;
int err = 0;
+ int notify = 0;
+ list_t *listp;
+ ldi_ev_callback_impl_t *lecp;
if (lh == NULL)
return (EINVAL);
@@ -1644,6 +1697,8 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
bflush(dev);
}
+#ifdef LDI_OBSOLETE_EVENT
+
/*
* Any event handlers should have been unregistered by the
* time ldi_close() is called. If they haven't then it's a
@@ -1669,6 +1724,7 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
"failed to unregister layered event handlers before "
"closing devices", lip->li_modname);
}
+#endif
/* do a layered close on the device */
err = VOP_CLOSE(handlep->lh_vp, flag | FKLYR, 1, (offset_t)0, cr);
@@ -1676,6 +1732,40 @@ ldi_close(ldi_handle_t lh, int flag, cred_t *cr)
LDI_OPENCLOSE((CE_WARN, "%s: lh=0x%p", "ldi close", (void *)lh));
/*
+ * Search the event callback list for callbacks with this
+ * handle. There are 2 cases
+ * 1. Called in the context of a notify. The handle consumer
+ * is releasing its hold on the device to allow a reconfiguration
+ * of the device. Simply NULL out the handle and the notify callback.
+ * The finalize callback is still available so that the consumer
+ * knows of the final disposition of the device.
+ * 2. Not called in the context of notify. NULL out the handle as well
+ * as the notify and finalize callbacks. Since the consumer has
+ * closed the handle, we assume it is not interested in the
+ * notify and finalize callbacks.
+ */
+ ldi_ev_lock();
+
+ if (handlep->lh_flags & LH_FLAGS_NOTIFY)
+ notify = 1;
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+ if (lecp->lec_lhp != handlep)
+ continue;
+ lecp->lec_lhp = NULL;
+ lecp->lec_notify = NULL;
+ LDI_EVDBG((CE_NOTE, "ldi_close: NULLed lh and notify"));
+ if (!notify) {
+ LDI_EVDBG((CE_NOTE, "ldi_close: NULLed finalize"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+
+ if (notify)
+ handlep->lh_flags &= ~LH_FLAGS_NOTIFY;
+ ldi_ev_unlock();
+
+ /*
* Free the handle even if the device close failed. why?
*
* If the device close failed we can't really make assumptions
@@ -2678,6 +2768,8 @@ ldi_prop_exists(ldi_handle_t lh, uint_t flags, char *name)
return (res);
}
+#ifdef LDI_OBSOLETE_EVENT
+
int
ldi_get_eventcookie(ldi_handle_t lh, char *name, ddi_eventcookie_t *ecp)
{
@@ -2794,3 +2886,845 @@ ldi_remove_event_handler(ldi_handle_t lh, ldi_callback_id_t id)
kmem_free(lep, sizeof (struct ldi_event));
return (res);
}
+
+#endif
+
+/*
+ * Here are some definitions of terms used in the following LDI events
+ * code:
+ *
+ * "LDI events" AKA "native events": These are events defined by the
+ * "new" LDI event framework. These events are serviced by the LDI event
+ * framework itself and thus are native to it.
+ *
+ * "LDI contract events": These are contract events that correspond to the
+ * LDI events. This mapping of LDI events to contract events is defined by
+ * the ldi_ev_cookies[] array above.
+ *
+ * NDI events: These are events which are serviced by the NDI event subsystem.
+ * LDI subsystem just provides a thin wrapper around the NDI event interfaces
+ * These events are thereefore *not* native events.
+ */
+
+static int
+ldi_native_event(const char *evname)
+{
+ int i;
+
+ LDI_EVTRC((CE_NOTE, "ldi_native_event: entered: ev=%s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (1);
+ }
+
+ return (0);
+}
+
+static uint_t
+ldi_ev_sync_event(const char *evname)
+{
+ int i;
+
+ ASSERT(ldi_native_event(evname));
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_sync_event: entered: %s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (ldi_ev_cookies[i].ck_sync);
+ }
+
+ /*
+ * This should never happen until non-contract based
+ * LDI events are introduced. If that happens, we will
+ * use a "special" token to indicate that there are no
+ * contracts corresponding to this LDI event.
+ */
+ cmn_err(CE_PANIC, "Unknown LDI event: %s", evname);
+
+ return (0);
+}
+
+static uint_t
+ldi_contract_event(const char *evname)
+{
+ int i;
+
+ ASSERT(ldi_native_event(evname));
+
+ LDI_EVTRC((CE_NOTE, "ldi_contract_event: entered: %s", evname));
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0)
+ return (ldi_ev_cookies[i].ck_ctype);
+ }
+
+ /*
+ * This should never happen until non-contract based
+ * LDI events are introduced. If that happens, we will
+ * use a "special" token to indicate that there are no
+ * contracts corresponding to this LDI event.
+ */
+ cmn_err(CE_PANIC, "Unknown LDI event: %s", evname);
+
+ return (0);
+}
+
+char *
+ldi_ev_get_type(ldi_ev_cookie_t cookie)
+{
+ int i;
+ struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (&ldi_ev_cookies[i] == cookie_impl) {
+ LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: LDI: %s",
+ ldi_ev_cookies[i].ck_evname));
+ return (ldi_ev_cookies[i].ck_evname);
+ }
+ }
+
+ /*
+ * Not an LDI native event. Must be NDI event service.
+ * Just return a generic string
+ */
+ LDI_EVTRC((CE_NOTE, "ldi_ev_get_type: is NDI"));
+ return (NDI_EVENT_SERVICE);
+}
+
+static int
+ldi_native_cookie(ldi_ev_cookie_t cookie)
+{
+ int i;
+ struct ldi_ev_cookie *cookie_impl = (struct ldi_ev_cookie *)cookie;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (&ldi_ev_cookies[i] == cookie_impl) {
+ LDI_EVTRC((CE_NOTE, "ldi_native_cookie: native LDI"));
+ return (1);
+ }
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_native_cookie: is NDI"));
+ return (0);
+}
+
+static ldi_ev_cookie_t
+ldi_get_native_cookie(const char *evname)
+{
+ int i;
+
+ for (i = 0; ldi_ev_cookies[i].ck_evname != NULL; i++) {
+ if (strcmp(ldi_ev_cookies[i].ck_evname, evname) == 0) {
+ LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: found"));
+ return ((ldi_ev_cookie_t)&ldi_ev_cookies[i]);
+ }
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_get_native_cookie: NOT found"));
+ return (NULL);
+}
+
+/*
+ * ldi_ev_lock() needs to be recursive, since layered drivers may call
+ * other LDI interfaces (such as ldi_close() from within the context of
+ * a notify callback. Since the notify callback is called with the
+ * ldi_ev_lock() held and ldi_close() also grabs ldi_ev_lock, the lock needs
+ * to be recursive.
+ */
+static void
+ldi_ev_lock(void)
+{
+ LDI_EVTRC((CE_NOTE, "ldi_ev_lock: entered"));
+
+ mutex_enter(&ldi_ev_callback_list.le_lock);
+ if (ldi_ev_callback_list.le_thread == curthread) {
+ ASSERT(ldi_ev_callback_list.le_busy >= 1);
+ ldi_ev_callback_list.le_busy++;
+ } else {
+ while (ldi_ev_callback_list.le_busy)
+ cv_wait(&ldi_ev_callback_list.le_cv,
+ &ldi_ev_callback_list.le_lock);
+ ASSERT(ldi_ev_callback_list.le_thread == NULL);
+ ldi_ev_callback_list.le_busy = 1;
+ ldi_ev_callback_list.le_thread = curthread;
+ }
+ mutex_exit(&ldi_ev_callback_list.le_lock);
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_lock: exit"));
+}
+
+static void
+ldi_ev_unlock(void)
+{
+ LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: entered"));
+ mutex_enter(&ldi_ev_callback_list.le_lock);
+ ASSERT(ldi_ev_callback_list.le_thread == curthread);
+ ASSERT(ldi_ev_callback_list.le_busy >= 1);
+
+ ldi_ev_callback_list.le_busy--;
+ if (ldi_ev_callback_list.le_busy == 0) {
+ ldi_ev_callback_list.le_thread = NULL;
+ cv_signal(&ldi_ev_callback_list.le_cv);
+ }
+ mutex_exit(&ldi_ev_callback_list.le_lock);
+ LDI_EVTRC((CE_NOTE, "ldi_ev_unlock: exit"));
+}
+
+int
+ldi_ev_get_cookie(ldi_handle_t lh, char *evname, ldi_ev_cookie_t *cookiep)
+{
+ struct ldi_handle *handlep = (struct ldi_handle *)lh;
+ dev_info_t *dip;
+ dev_t dev;
+ int res;
+ struct snode *csp;
+ ddi_eventcookie_t ddi_cookie;
+ ldi_ev_cookie_t tcookie;
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: entered: evname=%s",
+ evname ? evname : "<NULL>"));
+
+ if (lh == NULL || evname == NULL ||
+ strlen(evname) == 0 || cookiep == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: invalid args"));
+ return (LDI_EV_FAILURE);
+ }
+
+ *cookiep = NULL;
+
+ /*
+ * First check if it is a LDI native event
+ */
+ tcookie = ldi_get_native_cookie(evname);
+ if (tcookie) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: got native cookie"));
+ *cookiep = tcookie;
+ return (LDI_EV_SUCCESS);
+ }
+
+ /*
+ * Not a LDI native event. Try NDI event services
+ */
+
+ dev = handlep->lh_vp->v_rdev;
+
+ csp = VTOCS(handlep->lh_vp);
+ mutex_enter(&csp->s_lock);
+ if ((dip = csp->s_dip) != NULL)
+ e_ddi_hold_devi(dip);
+ mutex_exit(&csp->s_lock);
+ if (dip == NULL)
+ dip = e_ddi_hold_devi_by_dev(dev, 0);
+
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "ldi_ev_get_cookie: No devinfo node for LDI "
+ "handle: %p", (void *)handlep);
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "Calling ddi_get_eventcookie: dip=%p, ev=%s",
+ (void *)dip, evname));
+
+ res = ddi_get_eventcookie(dip, evname, &ddi_cookie);
+
+ ddi_release_devi(dip);
+
+ if (res == DDI_SUCCESS) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_get_cookie: NDI cookie found"));
+ *cookiep = (ldi_ev_cookie_t)ddi_cookie;
+ return (LDI_EV_SUCCESS);
+ } else {
+ LDI_EVDBG((CE_WARN, "ldi_ev_get_cookie: NDI cookie: failed"));
+ return (LDI_EV_FAILURE);
+ }
+}
+
+/*ARGSUSED*/
+static void
+i_ldi_ev_callback(dev_info_t *dip, ddi_eventcookie_t event_cookie,
+ void *arg, void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp = (ldi_ev_callback_impl_t *)arg;
+
+ ASSERT(lecp != NULL);
+ ASSERT(!ldi_native_cookie(lecp->lec_cookie));
+ ASSERT(lecp->lec_lhp);
+ ASSERT(lecp->lec_notify == NULL);
+ ASSERT(lecp->lec_finalize);
+
+ LDI_EVDBG((CE_NOTE, "i_ldi_ev_callback: ldh=%p, cookie=%p, arg=%p, "
+ "ev_data=%p", (void *)lecp->lec_lhp, (void *)event_cookie,
+ (void *)lecp->lec_arg, (void *)ev_data));
+
+ lecp->lec_finalize(lecp->lec_lhp, (ldi_ev_cookie_t)event_cookie,
+ lecp->lec_arg, ev_data);
+}
+
+int
+ldi_ev_register_callbacks(ldi_handle_t lh, ldi_ev_cookie_t cookie,
+ ldi_ev_callback_t *callb, void *arg, ldi_callback_id_t *id)
+{
+ struct ldi_handle *lhp = (struct ldi_handle *)lh;
+ ldi_ev_callback_impl_t *lecp;
+ dev_t dev;
+ struct snode *csp;
+ dev_info_t *dip;
+ int ddi_event;
+
+ ASSERT(!servicing_interrupt());
+
+ if (lh == NULL || cookie == NULL || callb == NULL || id == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid args"));
+ return (LDI_EV_FAILURE);
+ }
+
+ if (callb->cb_vers != LDI_EV_CB_VERS) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: Invalid vers"));
+ return (LDI_EV_FAILURE);
+ }
+
+ if (callb->cb_notify == NULL && callb->cb_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: NULL callb"));
+ return (LDI_EV_FAILURE);
+ }
+
+ *id = 0;
+
+ dev = lhp->lh_vp->v_rdev;
+ csp = VTOCS(lhp->lh_vp);
+ mutex_enter(&csp->s_lock);
+ if ((dip = csp->s_dip) != NULL)
+ e_ddi_hold_devi(dip);
+ mutex_exit(&csp->s_lock);
+ if (dip == NULL)
+ dip = e_ddi_hold_devi_by_dev(dev, 0);
+
+ if (dip == NULL) {
+ cmn_err(CE_WARN, "ldi_ev_register: No devinfo node for "
+ "LDI handle: %p", (void *)lhp);
+ return (LDI_EV_FAILURE);
+ }
+
+ lecp = kmem_zalloc(sizeof (ldi_ev_callback_impl_t), KM_SLEEP);
+
+ ddi_event = 0;
+ if (!ldi_native_cookie(cookie)) {
+ if (callb->cb_notify || callb->cb_finalize == NULL) {
+ /*
+ * NDI event services only accept finalize
+ */
+ cmn_err(CE_WARN, "%s: module: %s: NDI event cookie. "
+ "Only finalize"
+ " callback supported with this cookie",
+ "ldi_ev_register_callbacks",
+ lhp->lh_ident->li_modname);
+ kmem_free(lecp, sizeof (ldi_ev_callback_impl_t));
+ ddi_release_devi(dip);
+ return (LDI_EV_FAILURE);
+ }
+
+ if (ddi_add_event_handler(dip, (ddi_eventcookie_t)cookie,
+ i_ldi_ev_callback, (void *)lecp,
+ (ddi_callback_id_t *)&lecp->lec_id)
+ != DDI_SUCCESS) {
+ kmem_free(lecp, sizeof (ldi_ev_callback_impl_t));
+ ddi_release_devi(dip);
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): "
+ "ddi_add_event_handler failed"));
+ return (LDI_EV_FAILURE);
+ }
+ ddi_event = 1;
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks(): "
+ "ddi_add_event_handler success"));
+ }
+
+
+
+ ldi_ev_lock();
+
+ /*
+ * Add the notify/finalize callback to the LDI's list of callbacks.
+ */
+ lecp->lec_lhp = lhp;
+ lecp->lec_dev = lhp->lh_vp->v_rdev;
+ lecp->lec_spec = (lhp->lh_vp->v_type == VCHR) ?
+ S_IFCHR : S_IFBLK;
+ lecp->lec_notify = callb->cb_notify;
+ lecp->lec_finalize = callb->cb_finalize;
+ lecp->lec_arg = arg;
+ lecp->lec_cookie = cookie;
+ if (!ddi_event)
+ lecp->lec_id = (void *)(uintptr_t)(++ldi_ev_id_pool);
+ else
+ ASSERT(lecp->lec_id);
+ lecp->lec_dip = dip;
+ list_insert_tail(&ldi_ev_callback_list.le_head, lecp);
+
+ *id = (ldi_callback_id_t)lecp->lec_id;
+
+ ldi_ev_unlock();
+
+ ddi_release_devi(dip);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_register_callbacks: registered "
+ "notify/finalize"));
+
+ return (LDI_EV_SUCCESS);
+}
+
+static int
+ldi_ev_device_match(ldi_ev_callback_impl_t *lecp, dev_info_t *dip,
+ dev_t dev, int spec_type)
+{
+ ASSERT(lecp);
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(lecp->lec_dip);
+ ASSERT(lecp->lec_spec == S_IFCHR || lecp->lec_spec == S_IFBLK);
+ ASSERT(lecp->lec_dev != DDI_DEV_T_ANY);
+ ASSERT(lecp->lec_dev != DDI_DEV_T_NONE);
+ ASSERT(lecp->lec_dev != NODEV);
+
+ if (dip != lecp->lec_dip)
+ return (0);
+
+ if (dev != DDI_DEV_T_ANY) {
+ if (dev != lecp->lec_dev || spec_type != lecp->lec_spec)
+ return (0);
+ }
+
+ LDI_EVTRC((CE_NOTE, "ldi_ev_device_match: MATCH dip=%p", (void *)dip));
+
+ return (1);
+}
+
+/*
+ * LDI framework function to post a "notify" event to all layered drivers
+ * that have registered for that event
+ *
+ * Returns:
+ * LDI_EV_SUCCESS - registered callbacks allow event
+ * LDI_EV_FAILURE - registered callbacks block event
+ * LDI_EV_NONE - No matching LDI callbacks
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+int
+ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp;
+ list_t *listp;
+ int ret;
+ char *lec_event;
+
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(event);
+ ASSERT(ldi_native_event(event));
+ ASSERT(ldi_ev_sync_event(event));
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): entered: dip=%p, ev=%s",
+ (void *)dip, event));
+
+ ret = LDI_EV_NONE;
+ ldi_ev_lock();
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+
+ /* Check if matching device */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+ if (lecp->lec_lhp == NULL) {
+ /*
+ * Consumer has unregistered the handle and so
+ * is no longer interested in notify events.
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No LDI "
+ "handle, skipping"));
+ continue;
+ }
+
+ if (lecp->lec_notify == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): No notify "
+ "callback. skipping"));
+ continue; /* not interested in notify */
+ }
+
+ /*
+ * Check if matching event
+ */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching"
+ " event {%s,%s}. skipping", event, lec_event));
+ continue;
+ }
+
+ lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY;
+ if (lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie,
+ lecp->lec_arg, ev_data) != LDI_EV_SUCCESS) {
+ ret = LDI_EV_FAILURE;
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify"
+ " FAILURE"));
+ break;
+ }
+
+ /* We have a matching callback that allows the event to occur */
+ ret = LDI_EV_SUCCESS;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success"));
+ }
+
+ if (ret != LDI_EV_FAILURE)
+ goto out;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify"));
+
+ /*
+ * Undo notifies already sent
+ */
+ lecp = list_prev(listp, lecp);
+ for (; lecp; lecp = list_prev(listp, lecp)) {
+
+ /*
+ * Check if matching device
+ */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+
+ if (lecp->lec_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no finalize, "
+ "skipping"));
+ continue; /* not interested in finalize */
+ }
+
+ /*
+ * it is possible that in response to a notify event a
+ * layered driver closed its LDI handle so it is ok
+ * to have a NULL LDI handle for finalize. The layered
+ * driver is expected to maintain state in its "arg"
+ * parameter to keep track of the closed device.
+ */
+
+ /* Check if matching event */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): not matching "
+ "event: %s,%s, skipping", event, lec_event));
+ continue;
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize"));
+
+ lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
+ LDI_EV_FAILURE, lecp->lec_arg, ev_data);
+
+ /*
+ * If LDI native event and LDI handle closed in context
+ * of notify, NULL out the finalize callback as we have
+ * already called the 1 finalize above allowed in this situation
+ */
+ if (lecp->lec_lhp == NULL &&
+ ldi_native_cookie(lecp->lec_cookie)) {
+ LDI_EVDBG((CE_NOTE,
+ "ldi_invoke_notify(): NULL-ing finalize after "
+ "calling 1 finalize following ldi_close"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+
+out:
+ ldi_ev_unlock();
+
+ if (ret == LDI_EV_NONE) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no matching "
+ "LDI callbacks"));
+ }
+
+ return (ret);
+}
+
+/*
+ * Framework function to be called from a layered driver to propagate
+ * LDI "notify" events to exported minors.
+ *
+ * This function is a public interface exported by the LDI framework
+ * for use by layered drivers to propagate device events up the software
+ * stack.
+ */
+int
+ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type,
+ ldi_ev_cookie_t cookie, void *ev_data)
+{
+ char *evname = ldi_ev_get_type(cookie);
+ uint_t ct_evtype;
+ dev_t dev;
+ major_t major;
+ int retc;
+ int retl;
+
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+ ASSERT(dip);
+ ASSERT(ldi_native_cookie(cookie));
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): entered: event=%s, dip=%p",
+ evname, (void *)dip));
+
+ if (!ldi_ev_sync_event(evname)) {
+ cmn_err(CE_PANIC, "ldi_ev_notify(): %s not a "
+ "negotiatable event", evname);
+ return (LDI_EV_SUCCESS);
+ }
+
+ major = ddi_driver_major(dip);
+ if (major == (major_t)-1) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "ldi_ev_notify: cannot derive major number "
+ "for device %s", path);
+ kmem_free(path, MAXPATHLEN);
+ return (LDI_EV_FAILURE);
+ }
+ dev = makedevice(major, minor);
+
+ /*
+ * Generate negotiation contract events on contracts (if any) associated
+ * with this minor.
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): calling contract nego."));
+ ct_evtype = ldi_contract_event(evname);
+ retc = contract_device_negotiate(dip, dev, spec_type, ct_evtype);
+ if (retc == CT_NACK) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): contract neg. NACK"));
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): LDI invoke notify"));
+ retl = ldi_invoke_notify(dip, dev, spec_type, evname, ev_data);
+ if (retl == LDI_EV_FAILURE) {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): ldi_invoke_notify "
+ "returned FAILURE. Calling contract negend"));
+ contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
+ return (LDI_EV_FAILURE);
+ }
+
+ /*
+ * The very fact that we are here indicates that there is a
+ * LDI callback (and hence a constraint) for the retire of the
+ * HW device. So we just return success even if there are no
+ * contracts or LDI callbacks against the minors layered on top
+ * of the HW minors
+ */
+ LDI_EVDBG((CE_NOTE, "ldi_ev_notify(): returning SUCCESS"));
+ return (LDI_EV_SUCCESS);
+}
+
+/*
+ * LDI framework function to invoke "finalize" callbacks for all layered
+ * drivers that have registered callbacks for that event.
+ *
+ * This function is *not* to be called by layered drivers. It is for I/O
+ * framework code in Solaris, such as the I/O retire code and DR code
+ * to call while servicing a device event such as offline or degraded.
+ */
+void
+ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ int ldi_result, void *ev_data)
+{
+ ldi_ev_callback_impl_t *lecp;
+ list_t *listp;
+ char *lec_event;
+ int found = 0;
+
+ ASSERT(dip);
+ ASSERT(dev != DDI_DEV_T_NONE);
+ ASSERT(dev != NODEV);
+ ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
+ (spec_type == S_IFCHR || spec_type == S_IFBLK));
+ ASSERT(event);
+ ASSERT(ldi_native_event(event));
+ ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE);
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): entered: dip=%p, result=%d"
+ " event=%s", (void *)dip, ldi_result, event));
+
+ ldi_ev_lock();
+ listp = &ldi_ev_callback_list.le_head;
+ for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
+
+ if (lecp->lec_finalize == NULL) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): No "
+ "finalize. Skipping"));
+ continue; /* Not interested in finalize */
+ }
+
+ /*
+ * Check if matching device
+ */
+ if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
+ continue;
+
+ /*
+ * It is valid for the LDI handle to be NULL during finalize.
+ * The layered driver may have done an LDI close in the notify
+ * callback.
+ */
+
+ /*
+ * Check if matching event
+ */
+ lec_event = ldi_ev_get_type(lecp->lec_cookie);
+ if (strcmp(event, lec_event) != 0) {
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): Not "
+ "matching event {%s,%s}. Skipping",
+ event, lec_event));
+ continue;
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): calling finalize"));
+
+ found = 1;
+
+ lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
+ ldi_result, lecp->lec_arg, ev_data);
+
+ /*
+ * If LDI native event and LDI handle closed in context
+ * of notify, NULL out the finalize callback as we have
+ * already called the 1 finalize above allowed in this situation
+ */
+ if (lecp->lec_lhp == NULL &&
+ ldi_native_cookie(lecp->lec_cookie)) {
+ LDI_EVDBG((CE_NOTE,
+ "ldi_invoke_finalize(): NULLing finalize after "
+ "calling 1 finalize following ldi_close"));
+ lecp->lec_finalize = NULL;
+ }
+ }
+ ldi_ev_unlock();
+
+ if (found)
+ return;
+
+ LDI_EVDBG((CE_NOTE, "ldi_invoke_finalize(): no matching callbacks"));
+}
+
+/*
+ * Framework function to be called from a layered driver to propagate
+ * LDI "finalize" events to exported minors.
+ *
+ * This function is a public interface exported by the LDI framework
+ * for use by layered drivers to propagate device events up the software
+ * stack.
+ */
+void
+ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type, int ldi_result,
+ ldi_ev_cookie_t cookie, void *ev_data)
+{
+ dev_t dev;
+ major_t major;
+ char *evname;
+ int ct_result = (ldi_result == LDI_EV_SUCCESS) ?
+ CT_EV_SUCCESS : CT_EV_FAILURE;
+ uint_t ct_evtype;
+
+ ASSERT(dip);
+ ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
+ ASSERT(ldi_result == LDI_EV_SUCCESS || ldi_result == LDI_EV_FAILURE);
+ ASSERT(ldi_native_cookie(cookie));
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: entered: dip=%p", (void *)dip));
+
+ major = ddi_driver_major(dip);
+ if (major == (major_t)-1) {
+ char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ (void) ddi_pathname(dip, path);
+ cmn_err(CE_WARN, "ldi_ev_finalize: cannot derive major number "
+ "for device %s", path);
+ kmem_free(path, MAXPATHLEN);
+ return;
+ }
+ dev = makedevice(major, minor);
+
+ evname = ldi_ev_get_type(cookie);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling contracts"));
+ ct_evtype = ldi_contract_event(evname);
+ contract_device_finalize(dip, dev, spec_type, ct_evtype, ct_result);
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_finalize: calling ldi_invoke_finalize"));
+ ldi_invoke_finalize(dip, dev, spec_type, evname, ldi_result, ev_data);
+}
+
+int
+ldi_ev_remove_callbacks(ldi_callback_id_t id)
+{
+ ldi_ev_callback_impl_t *lecp;
+ ldi_ev_callback_impl_t *next;
+ ldi_ev_callback_impl_t *found;
+ list_t *listp;
+
+ ASSERT(!servicing_interrupt());
+
+ if (id == 0) {
+ cmn_err(CE_WARN, "ldi_ev_remove_callbacks: Invalid ID 0");
+ return (LDI_EV_FAILURE);
+ }
+
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: entered: id=%p",
+ (void *)id));
+
+ ldi_ev_lock();
+
+ listp = &ldi_ev_callback_list.le_head;
+ next = found = NULL;
+ for (lecp = list_head(listp); lecp; lecp = next) {
+ next = list_next(listp, lecp);
+ if (lecp->lec_id == id) {
+ ASSERT(found == NULL);
+ list_remove(listp, lecp);
+ found = lecp;
+ }
+ }
+ ldi_ev_unlock();
+
+ if (found == NULL) {
+ cmn_err(CE_WARN, "No LDI event handler for id (%p)",
+ (void *)id);
+ return (LDI_EV_SUCCESS);
+ }
+
+ if (!ldi_native_cookie(found->lec_cookie)) {
+ ASSERT(found->lec_notify == NULL);
+ if (ddi_remove_event_handler((ddi_callback_id_t)id)
+ != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "failed to remove NDI event handler "
+ "for id (%p)", (void *)id);
+ ldi_ev_lock();
+ list_insert_tail(listp, found);
+ ldi_ev_unlock();
+ return (LDI_EV_FAILURE);
+ }
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: NDI event "
+ "service removal succeeded"));
+ } else {
+ LDI_EVDBG((CE_NOTE, "ldi_ev_remove_callbacks: removed "
+ "LDI native callbacks"));
+ }
+ kmem_free(found, sizeof (ldi_ev_callback_impl_t));
+
+ return (LDI_EV_SUCCESS);
+}
diff --git a/usr/src/uts/common/os/modctl.c b/usr/src/uts/common/os/modctl.c
index 31108c215b..1f821fef85 100644
--- a/usr/src/uts/common/os/modctl.c
+++ b/usr/src/uts/common/os/modctl.c
@@ -161,8 +161,6 @@ extern int make_mbind(char *, int, char *, struct bind **);
static int minorperm_loaded = 0;
-
-
void
mod_setup(void)
{
@@ -798,6 +796,217 @@ modctl_getmaj(char *uname, uint_t ulen, int *umajorp)
return (0);
}
+static char **
+convert_constraint_string(char *constraints, size_t len)
+{
+ int i;
+ int n;
+ char *p;
+ char **array;
+
+ ASSERT(constraints != NULL);
+ ASSERT(len > 0);
+
+ for (i = 0, p = constraints; strlen(p) > 0; i++, p += strlen(p) + 1);
+
+ n = i;
+
+ if (n == 0) {
+ kmem_free(constraints, len);
+ return (NULL);
+ }
+
+ array = kmem_alloc((n + 1) * sizeof (char *), KM_SLEEP);
+
+ for (i = 0, p = constraints; i < n; i++, p += strlen(p) + 1) {
+ array[i] = i_ddi_strdup(p, KM_SLEEP);
+ }
+ array[n] = NULL;
+
+ kmem_free(constraints, len);
+
+ return (array);
+}
+/*ARGSUSED*/
+static int
+modctl_retire(char *path, char *uconstraints, size_t ulen)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int retval;
+ char *constraints;
+ char **cons_array;
+
+ if (path == NULL)
+ return (EINVAL);
+
+ if ((uconstraints == NULL) ^ (ulen == 0))
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (retval != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (retval);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ /*
+ * First check if the device is already retired.
+ * If it is, this becomes a NOP
+ */
+ if (e_ddi_device_retired(devpath)) {
+ cmn_err(CE_NOTE, "Device: already retired: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+ }
+
+ cons_array = NULL;
+ if (uconstraints) {
+ constraints = kmem_alloc(ulen, KM_SLEEP);
+ if (copyin(uconstraints, constraints, ulen)) {
+ kmem_free(constraints, ulen);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (EFAULT);
+ }
+ cons_array = convert_constraint_string(constraints, ulen);
+ }
+
+ /*
+ * Try to retire the device first. The following
+ * routine will return an error only if the device
+ * is not retireable i.e. retire constraints forbid
+ * a retire. A return of success from this routine
+ * indicates that device is retireable.
+ */
+ retval = e_ddi_retire_device(devpath, cons_array);
+ if (retval != DDI_SUCCESS) {
+ cmn_err(CE_WARN, "constraints forbid retire: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (ENOTSUP);
+ }
+
+ /*
+ * Ok, the retire succeeded. Persist the retire.
+ * If retiring a nexus, we need to only persist the
+ * nexus retire. Any children of a retired nexus
+ * are automatically covered by the retire store
+ * code.
+ */
+ retval = e_ddi_retire_persist(devpath);
+ if (retval != 0) {
+ cmn_err(CE_WARN, "Failed to persist device retire: error %d: "
+ "%s", retval, devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (retval);
+ }
+ if (moddebug & MODDEBUG_RETIRE)
+ cmn_err(CE_NOTE, "Persisted retire of device: %s", devpath);
+
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+}
+
+static int
+modctl_is_retired(char *path, int *statep)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int error;
+ int status;
+
+ if (path == NULL || statep == NULL)
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ error = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (error != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (error);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ if (e_ddi_device_retired(devpath))
+ status = 1;
+ else
+ status = 0;
+ kmem_free(devpath, strlen(devpath) + 1);
+
+ return (copyout(&status, statep, sizeof (status)) ? EFAULT : 0);
+}
+
+static int
+modctl_unretire(char *path)
+{
+ char *pathbuf;
+ char *devpath;
+ size_t pathsz;
+ int retired;
+ int retval;
+
+ if (path == NULL)
+ return (EINVAL);
+
+ pathbuf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
+ retval = copyinstr(path, pathbuf, MAXPATHLEN, &pathsz);
+ if (retval != 0) {
+ kmem_free(pathbuf, MAXPATHLEN);
+ return (retval);
+ }
+ devpath = i_ddi_strdup(pathbuf, KM_SLEEP);
+ kmem_free(pathbuf, MAXPATHLEN);
+
+ /*
+ * We check if a device is retired (first) before
+ * unpersisting the retire, because we use the
+ * retire store to determine if a device is retired.
+ * If we unpersist first, the device will always appear
+ * to be unretired. For the rationale behind unpersisting
+ * a device that is not retired, see the next comment.
+ */
+ retired = e_ddi_device_retired(devpath);
+
+ /*
+ * We call unpersist unconditionally because the lookup
+ * for retired devices (e_ddi_device_retired()), skips "bypassed"
+ * devices. We still want to be able remove "bypassed" entries
+ * from the persistent store, so we unpersist unconditionally
+ * i.e. whether or not the entry is found on a lookup.
+ *
+ * e_ddi_retire_unpersist() returns 1 if it found and cleared
+ * an entry from the retire store or 0 otherwise.
+ */
+ if (e_ddi_retire_unpersist(devpath))
+ if (moddebug & MODDEBUG_RETIRE) {
+ cmn_err(CE_NOTE, "Unpersisted retire of device: %s",
+ devpath);
+ }
+
+ /*
+ * Check if the device is already unretired. If so,
+ * the unretire becomes a NOP
+ */
+ if (!retired) {
+ cmn_err(CE_NOTE, "Not retired: %s", devpath);
+ kmem_free(devpath, strlen(devpath) + 1);
+ return (0);
+ }
+
+ retval = e_ddi_unretire_device(devpath);
+ if (retval != 0) {
+ cmn_err(CE_WARN, "cannot unretire device: error %d, path %s\n",
+ retval, devpath);
+ }
+
+ kmem_free(devpath, strlen(devpath) + 1);
+
+ return (retval);
+}
+
static int
modctl_getname(char *uname, uint_t ulen, int *umajorp)
{
@@ -2069,6 +2278,18 @@ modctl(int cmd, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4,
error = modctl_moddevname((int)a1, a2, a3);
break;
+ case MODRETIRE: /* retire device named by physpath a1 */
+ error = modctl_retire((char *)a1, (char *)a2, (size_t)a3);
+ break;
+
+ case MODISRETIRED: /* check if a device is retired. */
+ error = modctl_is_retired((char *)a1, (int *)a2);
+ break;
+
+ case MODUNRETIRE: /* unretire device named by physpath a1 */
+ error = modctl_unretire((char *)a1);
+ break;
+
default:
error = EINVAL;
break;
diff --git a/usr/src/uts/common/os/retire_store.c b/usr/src/uts/common/os/retire_store.c
new file mode 100644
index 0000000000..f1c3db9445
--- /dev/null
+++ b/usr/src/uts/common/os/retire_store.c
@@ -0,0 +1,457 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/ddi.h>
+#include <sys/sunddi.h>
+#include <sys/sunndi.h>
+#include <sys/ddi_impldefs.h>
+#include <sys/ddi_implfuncs.h>
+#include <sys/list.h>
+#include <sys/reboot.h>
+#include <sys/sysmacros.h>
+#include <sys/console.h>
+#include <sys/devcache.h>
+
+/*
+ * The nvpair name in the I/O retire specific sub-nvlist
+ */
+#define RIO_STORE_VERSION_STR "rio-store-version"
+#define RIO_STORE_MAGIC_STR "rio-store-magic"
+#define RIO_STORE_FLAGS_STR "rio-store-flags"
+
+#define RIO_STORE_VERSION_1 1
+#define RIO_STORE_VERSION RIO_STORE_VERSION_1
+
+/*
+ * decoded retire list element
+ */
+
+typedef enum rio_store_flags {
+ RIO_STORE_F_INVAL = 0,
+ RIO_STORE_F_RETIRED = 1,
+ RIO_STORE_F_BYPASS = 2
+} rio_store_flags_t;
+
+typedef struct rio_store {
+ char *rst_devpath;
+ rio_store_flags_t rst_flags;
+ list_node_t rst_next;
+} rio_store_t;
+
+#define RIO_STORE_MAGIC 0x601fcace /* retire */
+
+static int rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name);
+static int rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl);
+static void retire_list_free(nvf_handle_t nvfh);
+
+
+/*
+ * Retire I/O persistent store registration info
+ */
+static nvf_ops_t rio_store_ops = {
+ "/etc/devices/retire_store", /* path to store */
+ rio_store_decode, /* decode nvlist into retire_list */
+ rio_store_encode, /* encode retire_list into nvlist */
+ retire_list_free, /* free retire_list */
+ NULL /* write complete callback */
+};
+
+static nvf_handle_t rio_store_handle;
+static char store_path[MAXPATHLEN];
+static int store_debug = 0;
+static int bypass_msg = 0;
+static int retire_msg = 0;
+
+#define STORE_DEBUG 0x0001
+#define STORE_TRACE 0x0002
+
+#define STORE_DBG(args) if (store_debug & STORE_DEBUG) cmn_err args
+#define STORE_TRC(args) if (store_debug & STORE_TRACE) cmn_err args
+
+/*
+ * We don't use the simple read disable offered by the
+ * caching framework (see devcache.c) as it will not
+ * have the desired effect of bypassing the persistent
+ * store. A simple read disable will
+ *
+ * 1. cause any additions to the cache to destroy the
+ * existing on-disk cache
+ *
+ * 2. prevent deletions from the existing on-disk
+ * cache which is needed for recovery from bad
+ * retire decisions.
+ *
+ * Use the following tunable instead
+ *
+ */
+int ddi_retire_store_bypass = 0;
+
+
+
+/*
+ * Initialize retire store data structures
+ */
+void
+retire_store_init(void)
+{
+ if (boothowto & RB_ASKNAME) {
+
+ printf("Retire store [%s] (/dev/null to bypass): ",
+ rio_store_ops.nvfr_cache_path);
+ console_gets(store_path, sizeof (store_path) - 1);
+ store_path[sizeof (store_path) - 1] = '\0';
+
+ if (strcmp(store_path, "/dev/null") == 0) {
+ ddi_retire_store_bypass = 1;
+ } else if (store_path[0] != '\0') {
+ if (store_path[0] != '/') {
+ printf("Invalid store path: %s. Using default"
+ "\n", store_path);
+ } else {
+ rio_store_ops.nvfr_cache_path = store_path;
+ }
+ }
+ }
+
+ rio_store_handle = nvf_register_file(&rio_store_ops);
+
+ list_create(nvf_list(rio_store_handle), sizeof (rio_store_t),
+ offsetof(rio_store_t, rst_next));
+}
+
+/*
+ * Read and populate the in-core retire store
+ */
+void
+retire_store_read(void)
+{
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+ ASSERT(list_head(nvf_list(rio_store_handle)) == NULL);
+ (void) nvf_read_file(rio_store_handle);
+ rw_exit(nvf_lock(rio_store_handle));
+ STORE_DBG((CE_NOTE, "Read on-disk retire store"));
+}
+
+static void
+rio_store_free(rio_store_t *rsp)
+{
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+
+ ASSERT(rsp);
+ ASSERT(rsp->rst_devpath);
+ ASSERT(rsp->rst_flags & RIO_STORE_F_RETIRED);
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ STORE_TRC((CE_NOTE, "store: freed path: %s", rsp->rst_devpath));
+
+ kmem_free(rsp->rst_devpath, strlen(rsp->rst_devpath) + 1);
+ kmem_free(rsp, sizeof (*rsp));
+}
+
+static void
+retire_list_free(nvf_handle_t nvfh)
+{
+ list_t *listp;
+ rio_store_t *rsp;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+
+ listp = nvf_list(nvfh);
+ while (rsp = list_head(listp)) {
+ list_remove(listp, rsp);
+ rio_store_free(rsp);
+ }
+
+ STORE_DBG((CE_NOTE, "store: freed retire list"));
+}
+
+static int
+rio_store_decode(nvf_handle_t nvfh, nvlist_t *line_nvl, char *name)
+{
+ rio_store_t *rsp;
+ int32_t version;
+ int32_t magic;
+ int32_t flags;
+ int rval;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+ ASSERT(name);
+
+ version = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_VERSION_STR, &version);
+ if (rval != 0 || version != RIO_STORE_VERSION) {
+ return (EINVAL);
+ }
+
+ magic = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_MAGIC_STR, &magic);
+ if (rval != 0 || magic != RIO_STORE_MAGIC) {
+ return (EINVAL);
+ }
+
+ flags = 0;
+ rval = nvlist_lookup_int32(line_nvl, RIO_STORE_FLAGS_STR, &flags);
+ if (rval != 0 || flags != RIO_STORE_F_RETIRED) {
+ return (EINVAL);
+ }
+
+ if (ddi_retire_store_bypass) {
+ flags |= RIO_STORE_F_BYPASS;
+ if (!bypass_msg) {
+ bypass_msg = 1;
+ cmn_err(CE_WARN,
+ "Bypassing retire store /etc/devices/retire_store");
+ }
+ }
+
+ rsp = kmem_zalloc(sizeof (rio_store_t), KM_SLEEP);
+ rsp->rst_devpath = i_ddi_strdup(name, KM_SLEEP);
+ rsp->rst_flags = flags;
+ list_insert_tail(nvf_list(nvfh), rsp);
+
+ STORE_TRC((CE_NOTE, "store: added to retire list: %s", name));
+ if (!retire_msg) {
+ retire_msg = 1;
+ cmn_err(CE_NOTE, "One or more I/O devices have been retired");
+ }
+
+ return (0);
+}
+
+static int
+rio_store_encode(nvf_handle_t nvfh, nvlist_t **ret_nvl)
+{
+ nvlist_t *nvl;
+ nvlist_t *line_nvl;
+ list_t *listp;
+ rio_store_t *rsp;
+ int rval;
+
+ ASSERT(nvfh == rio_store_handle);
+ ASSERT(RW_WRITE_HELD(nvf_lock(nvfh)));
+
+ *ret_nvl = NULL;
+
+ nvl = NULL;
+ rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rval != 0) {
+ return (DDI_FAILURE);
+ }
+
+ listp = nvf_list(nvfh);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ int flags;
+ ASSERT(rsp->rst_devpath);
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ line_nvl = NULL;
+ rval = nvlist_alloc(&line_nvl, NV_UNIQUE_NAME, KM_SLEEP);
+ if (rval != 0) {
+ line_nvl = NULL;
+ goto error;
+ }
+
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_VERSION_STR,
+ RIO_STORE_VERSION);
+ if (rval != 0) {
+ goto error;
+ }
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_MAGIC_STR,
+ RIO_STORE_MAGIC);
+ if (rval != 0) {
+ goto error;
+ }
+
+ /* don't save the bypass flag */
+ flags = RIO_STORE_F_RETIRED;
+ rval = nvlist_add_int32(line_nvl, RIO_STORE_FLAGS_STR,
+ flags);
+ if (rval != 0) {
+ goto error;
+ }
+
+ rval = nvlist_add_nvlist(nvl, rsp->rst_devpath, line_nvl);
+ if (rval != 0) {
+ goto error;
+ }
+ nvlist_free(line_nvl);
+ line_nvl = NULL;
+ }
+
+ *ret_nvl = nvl;
+ STORE_DBG((CE_NOTE, "packed retire list into nvlist"));
+ return (DDI_SUCCESS);
+
+error:
+ if (line_nvl)
+ nvlist_free(line_nvl);
+ ASSERT(nvl);
+ nvlist_free(nvl);
+ return (DDI_FAILURE);
+}
+
+int
+e_ddi_retire_persist(char *devpath)
+{
+ rio_store_t *rsp;
+ rio_store_t *new_rsp;
+ list_t *listp;
+ char *new_path;
+
+ STORE_DBG((CE_NOTE, "e_ddi_retire_persist: entered: %s", devpath));
+
+ new_rsp = kmem_zalloc(sizeof (*new_rsp), KM_SLEEP);
+ new_rsp->rst_devpath = new_path = i_ddi_strdup(devpath, KM_SLEEP);
+ new_rsp->rst_flags = RIO_STORE_F_RETIRED;
+
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ /* already there */
+ if (strcmp(devpath, rsp->rst_devpath) == 0) {
+ /* explicit retire, clear bypass flag (if any) */
+ rsp->rst_flags &= ~RIO_STORE_F_BYPASS;
+ ASSERT(rsp->rst_flags == RIO_STORE_F_RETIRED);
+ rw_exit(nvf_lock(rio_store_handle));
+ kmem_free(new_path, strlen(new_path) + 1);
+ kmem_free(new_rsp, sizeof (*new_rsp));
+ STORE_DBG((CE_NOTE, "store: already in. Clear bypass "
+ ": %s", devpath));
+ return (0);
+ }
+
+ }
+
+ ASSERT(rsp == NULL);
+ list_insert_tail(listp, new_rsp);
+
+ nvf_mark_dirty(rio_store_handle);
+
+ rw_exit(nvf_lock(rio_store_handle));
+
+ nvf_wake_daemon();
+
+ STORE_DBG((CE_NOTE, "store: New, added to list, dirty: %s", devpath));
+
+ return (0);
+}
+
+int
+e_ddi_retire_unpersist(char *devpath)
+{
+ rio_store_t *rsp;
+ rio_store_t *next;
+ list_t *listp;
+ int is_dirty = 0;
+
+ STORE_DBG((CE_NOTE, "e_ddi_retire_unpersist: entered: %s", devpath));
+
+ rw_enter(nvf_lock(rio_store_handle), RW_WRITER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = next) {
+ next = list_next(listp, rsp);
+ if (strcmp(devpath, rsp->rst_devpath) != 0)
+ continue;
+
+ list_remove(listp, rsp);
+ rio_store_free(rsp);
+
+ STORE_DBG((CE_NOTE, "store: found in list. Freed: %s",
+ devpath));
+
+ nvf_mark_dirty(rio_store_handle);
+ is_dirty = 1;
+ }
+
+ rw_exit(nvf_lock(rio_store_handle));
+
+ if (is_dirty)
+ nvf_wake_daemon();
+
+ return (is_dirty);
+}
+
+int
+e_ddi_device_retired(char *devpath)
+{
+ list_t *listp;
+ rio_store_t *rsp;
+ size_t len;
+ int retired;
+
+ retired = 0;
+
+ rw_enter(nvf_lock(rio_store_handle), RW_READER);
+
+ listp = nvf_list(rio_store_handle);
+ for (rsp = list_head(listp); rsp; rsp = list_next(listp, rsp)) {
+ int flag_mask = RIO_STORE_F_RETIRED|RIO_STORE_F_BYPASS;
+ ASSERT(!(rsp->rst_flags & ~flag_mask));
+
+ /*
+ * If the "bypass" flag is set, then the device
+ * is *not* retired for the current boot of the
+ * system. It indicates that the retire store
+ * was read but the devices in the retire store
+ * were not retired i.e. effectively the store
+ * was bypassed. For why we bother to even read
+ * the store when we bypass it, see the comments
+ * for the tunable ddi_retire_store_bypass.
+ */
+ if (rsp->rst_flags & RIO_STORE_F_BYPASS) {
+ STORE_TRC((CE_NOTE, "store: found & bypassed: %s",
+ rsp->rst_devpath));
+ continue;
+ }
+
+ /*
+ * device is retired, if it or a parent exists
+ * in the in-core list
+ */
+ len = strlen(rsp->rst_devpath);
+ if (strncmp(devpath, rsp->rst_devpath, len) != 0)
+ continue;
+ if (devpath[len] == '\0' || devpath[len] == '/') {
+ /* exact match or a child */
+ retired = 1;
+ STORE_TRC((CE_NOTE, "store: found & !bypassed: %s",
+ devpath));
+ break;
+ }
+ }
+ rw_exit(nvf_lock(rio_store_handle));
+
+ return (retired);
+}
diff --git a/usr/src/uts/common/os/sunmdi.c b/usr/src/uts/common/os/sunmdi.c
index 0c6b1e3055..cec7a252b6 100644
--- a/usr/src/uts/common/os/sunmdi.c
+++ b/usr/src/uts/common/os/sunmdi.c
@@ -4777,6 +4777,292 @@ i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
return (rv);
}
+void
+mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL) {
+ return;
+ }
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ /* has no last path */
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ MDI_PI_UNLOCK(pip);
+
+ cdip = ct->ct_dip;
+ if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ /* Last path. Mark client dip as retiring */
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ (void) e_ddi_mark_retiring(cdip, cons_array);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+void
+mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL)
+ return;
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ MDI_PHCI_UNLOCK(ph);
+ /* not last path */
+ return;
+ }
+
+ if (ph->ph_unstable) {
+ MDI_PHCI_UNLOCK(ph);
+ /* can't check for constraints */
+ *constraint = 0;
+ return;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ /*
+ * The mdi_pathinfo state is OK. Check the client state.
+ * If failover in progress fail the pHCI from offlining
+ */
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
+ (ct->ct_unstable)) {
+ /*
+ * Failover is in progress, can't check for constraints
+ */
+ MDI_PI_UNLOCK(pip);
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ *constraint = 0;
+ return;
+ }
+ MDI_PI_UNLOCK(pip);
+
+ /*
+ * Check to see of we are retiring the last path of this
+ * client device...
+ */
+ cdip = ct->ct_dip;
+ if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ (void) e_ddi_retire_notify(cdip, constraint);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+/*
+ * offline the path(s) hanging off the PHCI. If the
+ * last path to any client, check that constraints
+ * have been applied.
+ */
+void
+mdi_phci_retire_finalize(dev_info_t *dip, int phci_only)
+{
+ mdi_phci_t *ph;
+ mdi_client_t *ct;
+ mdi_pathinfo_t *pip;
+ mdi_pathinfo_t *next;
+ dev_info_t *cdip;
+ int unstable = 0;
+ int constraint;
+
+ if (!MDI_PHCI(dip))
+ return;
+
+ ph = i_devi_get_phci(dip);
+ if (ph == NULL) {
+ /* no last path and no pips */
+ return;
+ }
+
+ MDI_PHCI_LOCK(ph);
+
+ if (MDI_PHCI_IS_OFFLINE(ph)) {
+ MDI_PHCI_UNLOCK(ph);
+ /* no last path and no pips */
+ return;
+ }
+
+ /*
+ * Check to see if the pHCI can be offlined
+ */
+ if (ph->ph_unstable) {
+ unstable = 1;
+ }
+
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+
+ /*
+ * if failover in progress fail the pHCI from offlining
+ */
+ ct = MDI_PI(pip)->pi_client;
+ i_mdi_client_lock(ct, pip);
+ if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
+ (ct->ct_unstable)) {
+ unstable = 1;
+ }
+ MDI_PI_UNLOCK(pip);
+
+ /*
+ * Check to see of we are removing the last path of this
+ * client device...
+ */
+ cdip = ct->ct_dip;
+ if (!phci_only && cdip &&
+ (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
+ (i_mdi_client_compute_state(ct, ph) ==
+ MDI_CLIENT_STATE_FAILED)) {
+ i_mdi_client_unlock(ct);
+ MDI_PHCI_UNLOCK(ph);
+ /*
+ * We don't retire clients we just retire the
+ * path to a client. If it is the last path
+ * to a client, constraints are checked and
+ * if we pass the last path is offlined. MPXIO will
+ * then fail all I/Os to the client. Since we don't
+ * want to retire the client on a path error
+ * set constraint = 0 so that the client dip
+ * is not retired.
+ */
+ constraint = 0;
+ (void) e_ddi_retire_finalize(cdip, &constraint);
+ MDI_PHCI_LOCK(ph);
+ pip = next;
+ } else {
+ i_mdi_client_unlock(ct);
+ pip = next;
+ }
+ }
+
+ /*
+ * Cannot offline pip(s)
+ */
+ if (unstable) {
+ cmn_err(CE_WARN, "PHCI in transient state, cannot "
+ "retire, dip = %p", (void *)dip);
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+
+ /*
+ * Mark the pHCI as offline
+ */
+ MDI_PHCI_SET_OFFLINE(ph);
+
+ /*
+ * Mark the child mdi_pathinfo nodes as transient
+ */
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ MDI_PI_LOCK(pip);
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+ MDI_PI_SET_OFFLINING(pip);
+ MDI_PI_UNLOCK(pip);
+ pip = next;
+ }
+ MDI_PHCI_UNLOCK(ph);
+ /*
+ * Give a chance for any pending commands to execute
+ */
+ delay(1);
+ MDI_PHCI_LOCK(ph);
+ pip = ph->ph_path_head;
+ while (pip != NULL) {
+ next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
+ (void) i_mdi_pi_offline(pip, 0);
+ MDI_PI_LOCK(pip);
+ ct = MDI_PI(pip)->pi_client;
+ if (!MDI_PI_IS_OFFLINE(pip)) {
+ cmn_err(CE_WARN, "PHCI busy, cannot offline path: "
+ "PHCI dip = %p", (void *)dip);
+ MDI_PI_UNLOCK(pip);
+ MDI_PHCI_SET_ONLINE(ph);
+ MDI_PHCI_UNLOCK(ph);
+ return;
+ }
+ MDI_PI_UNLOCK(pip);
+ pip = next;
+ }
+ MDI_PHCI_UNLOCK(ph);
+
+ return;
+}
+
+void
+mdi_phci_unretire(dev_info_t *dip)
+{
+ ASSERT(MDI_PHCI(dip));
+
+ /*
+ * Online the phci
+ */
+ i_mdi_phci_online(dip);
+}
+
/*ARGSUSED*/
static int
i_mdi_client_offline(dev_info_t *dip, uint_t flags)
diff --git a/usr/src/uts/common/os/sunndi.c b/usr/src/uts/common/os/sunndi.c
index 58d76dbd69..627f8fe6c6 100644
--- a/usr/src/uts/common/os/sunndi.c
+++ b/usr/src/uts/common/os/sunndi.c
@@ -68,6 +68,7 @@
#include <sys/nvpair.h>
#include <sys/sunmdi.h>
#include <sys/fs/dv_node.h>
+#include <sys/sunldi_impl.h>
#ifdef __sparc
#include <sys/archsystm.h> /* getpil/setpil */
@@ -853,6 +854,20 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags,
*/
if (dcp->flags & DEVCTL_OFFLINE) {
/*
+ * In the unlikely event that the dip was somehow attached by
+ * the userland process (and device contracts or LDI opens
+ * were registered against the dip) after it was created by
+ * a previous DEVCTL_CONSTRUCT call, we start notify
+ * proceedings on this dip. Note that we don't need to
+ * return the dip after a failure of the notify since
+ * for a contract or LDI handle to be created the dip was
+ * already available to the user.
+ */
+ if (e_ddi_offline_notify(cdip) == DDI_FAILURE) {
+ return (EBUSY);
+ }
+
+ /*
* hand set the OFFLINE flag to prevent any asynchronous
* autoconfiguration operations from attaching this node.
*/
@@ -860,6 +875,8 @@ ndi_dc_devi_create(struct devctl_iocdata *dcp, dev_info_t *pdip, int flags,
DEVI_SET_DEVICE_OFFLINE(cdip);
mutex_exit(&(DEVI(cdip)->devi_lock));
+ e_ddi_offline_finalize(cdip, DDI_SUCCESS);
+
rv = ndi_devi_bind_driver(cdip, flags);
if (rv != NDI_SUCCESS) {
(void) ndi_devi_offline(cdip, NDI_DEVI_REMOVE);
diff --git a/usr/src/uts/common/sys/Makefile b/usr/src/uts/common/sys/Makefile
index 2c4defc38d..b4591f05d9 100644
--- a/usr/src/uts/common/sys/Makefile
+++ b/usr/src/uts/common/sys/Makefile
@@ -845,7 +845,9 @@ SYSEVENTHDRS= \
CONTRACTHDRS= \
process.h \
- process_impl.h
+ process_impl.h \
+ device.h \
+ device_impl.h
USBHDRS= \
usba.h \
diff --git a/usr/src/uts/common/sys/autoconf.h b/usr/src/uts/common/sys/autoconf.h
index 3b10e97c89..e7fbd33267 100644
--- a/usr/src/uts/common/sys/autoconf.h
+++ b/usr/src/uts/common/sys/autoconf.h
@@ -104,6 +104,11 @@ struct devnames {
#define DDI_INTR_API 0x0200 /* interrupt interface messages */
#define DDI_INTR_IMPL 0x0400 /* interrupt implementation msgs */
#define DDI_INTR_NEXUS 0x0800 /* interrupt messages from nexuses */
+#define DDI_DBG_RETIRE 0x1000 /* Retire related messages */
+#define DDI_DBG_RTR_VRBOSE 0x2000 /* Verbose Retire messages */
+#define DDI_DBG_RTR_TRACE 0x4000 /* Trace Retire messages */
+#define LDI_EV_DEBUG 0x8000 /* LDI events debug messages */
+#define LDI_EV_TRACE 0x10000 /* LDI events trace messages */
extern int ddidebug;
@@ -118,6 +123,11 @@ extern int ddidebug;
#define DDI_INTR_APIDBG(args) if (ddidebug & DDI_INTR_API) cmn_err args
#define DDI_INTR_IMPLDBG(args) if (ddidebug & DDI_INTR_IMPL) cmn_err args
#define DDI_INTR_NEXDBG(args) if (ddidebug & DDI_INTR_NEXUS) cmn_err args
+#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args
+#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args
+#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args
+#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args
+#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args
#else
#define NDI_CONFIG_DEBUG(args)
#define BMDPRINTF(args)
@@ -129,6 +139,11 @@ extern int ddidebug;
#define DDI_INTR_APIDBG(args)
#define DDI_INTR_IMPLDBG(args)
#define DDI_INTR_NEXDBG(args)
+#define RIO_DEBUG(args) if (ddidebug & DDI_DBG_RETIRE) cmn_err args
+#define RIO_VERBOSE(args) if (ddidebug & DDI_DBG_RTR_VRBOSE) cmn_err args
+#define RIO_TRACE(args) if (ddidebug & DDI_DBG_RTR_TRACE) cmn_err args
+#define LDI_EVDBG(args) if (ddidebug & LDI_EV_DEBUG) cmn_err args
+#define LDI_EVTRC(args) if (ddidebug & LDI_EV_TRACE) cmn_err args
#endif
@@ -256,6 +271,15 @@ extern int i_ddi_reconfig(void);
extern void i_ddi_set_sysavail(void);
extern void i_ddi_set_reconfig(void);
+/* I/O retire related */
+extern int e_ddi_retire_device(char *path, char **cons_array);
+extern int e_ddi_unretire_device(char *path);
+extern int e_ddi_mark_retiring(dev_info_t *dip, void *arg);
+extern int e_ddi_retire_notify(dev_info_t *dip, void *arg);
+extern int e_ddi_retire_finalize(dev_info_t *dip, void *arg);
+extern void e_ddi_degrade_finalize(dev_info_t *dip);
+extern void e_ddi_undegrade_finalize(dev_info_t *dip);
+
#endif /* _KERNEL */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/contract.h b/usr/src/uts/common/sys/contract.h
index 163f90cbfa..0bef407b98 100644
--- a/usr/src/uts/common/sys/contract.h
+++ b/usr/src/uts/common/sys/contract.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -72,6 +71,7 @@ typedef enum ctstate {
typedef enum ct_typeid {
CTT_PROCESS, /* process contract */
+ CTT_DEVICE, /* device contract */
CTT_MAXTYPE
} ct_typeid_t;
diff --git a/usr/src/uts/common/sys/contract/device.h b/usr/src/uts/common/sys/contract/device.h
new file mode 100644
index 0000000000..252cce3165
--- /dev/null
+++ b/usr/src/uts/common/sys/contract/device.h
@@ -0,0 +1,76 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CONTRACT_DEVICE_H
+#define _SYS_CONTRACT_DEVICE_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/contract.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ctmpl_device ctmpl_device_t;
+typedef struct cont_device cont_device_t;
+
+/*
+ * ct_ev_* flags
+ */
+#define CT_DEV_EV_ONLINE 0x1 /* device is moving to online state */
+#define CT_DEV_EV_DEGRADED 0x2 /* device is moving to degraded state */
+#define CT_DEV_EV_OFFLINE 0x4 /* device is moving to offline state */
+#define CT_DEV_ALLEVENT 0x7
+
+/*
+ * ctp_id values
+ */
+#define CTDP_ACCEPT 0x1 /* the acceptable set term */
+#define CTDP_NONEG 0x2 /* the non-negotiable term */
+#define CTDP_MINOR 0x4 /* the minor path term */
+#define CTDP_ALLPARAMS 0x7
+
+#define CTDP_NONEG_CLEAR 0x0 /* clear the noneg flag */
+#define CTDP_NONEG_SET 0x1 /* set noneg */
+
+/*
+ * Status fields
+ */
+#define CTDS_STATE "ctds_state"
+#define CTDS_ASET "ctds_aset"
+#define CTDS_NONEG "ctds_noneg"
+#define CTDS_MINOR "ctds_minor"
+
+/*
+ * Max Time allowed for synchronous acknowledgement of a negotiation event
+ */
+#define CT_DEV_ACKTIME 60 /* 60 seconds */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CONTRACT_DEVICE_H */
diff --git a/usr/src/uts/common/sys/contract/device_impl.h b/usr/src/uts/common/sys/contract/device_impl.h
new file mode 100644
index 0000000000..1bc27c454d
--- /dev/null
+++ b/usr/src/uts/common/sys/contract/device_impl.h
@@ -0,0 +1,93 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CONTRACT_DEVICE_IMPL_H
+#define _SYS_CONTRACT_DEVICE_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+#include <sys/contract_impl.h>
+#include <sys/dditypes.h>
+#include <sys/contract/device.h>
+#include <sys/fs/snode.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Result of negotiation end: event successfully occurred or not
+ */
+#define CT_EV_SUCCESS 150
+#define CT_EV_FAILURE 151
+
+struct ctmpl_device {
+ ct_template_t ctd_ctmpl;
+ uint_t ctd_aset;
+ uint_t ctd_noneg;
+ char *ctd_minor;
+};
+
+struct cont_device {
+ contract_t cond_contract; /* common contract data */
+ char *cond_minor; /* minor node resource in contract */
+ dev_info_t *cond_dip; /* dip for minor node */
+ dev_t cond_devt; /* dev_t of minor node */
+ uint_t cond_spec; /* spec type of minor node */
+ uint_t cond_aset; /* acceptable state set */
+ uint_t cond_noneg; /* no negotiation if set */
+ uint_t cond_state; /* current state of device */
+ uint_t cond_neg; /* contract undergoing negotiation */
+ uint64_t cond_currev_id; /* id of event being negotiated */
+ uint_t cond_currev_type; /* type of event being negotiated */
+ uint_t cond_currev_ack; /* ack/nack status of ev negotiation */
+ list_node_t cond_next; /* linkage - devinfo's contracts */
+};
+
+/*
+ * Kernel APIs
+ */
+extern ct_type_t *device_type;
+/*
+ * struct proc;
+ */
+void contract_device_init(void);
+ct_ack_t contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type);
+void contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type);
+void contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type);
+int contract_device_open(dev_t dev, int spec_type, contract_t **ctpp);
+void contract_device_remove_dip(dev_info_t *dip);
+ct_ack_t contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype);
+void contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
+ uint_t evtype, int ct_result);
+void contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type,
+ int result);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CONTRACT_DEVICE_IMPL_H */
diff --git a/usr/src/uts/common/sys/contract_impl.h b/usr/src/uts/common/sys/contract_impl.h
index c45cf06e60..7523de5bf0 100644
--- a/usr/src/uts/common/sys/contract_impl.h
+++ b/usr/src/uts/common/sys/contract_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -50,6 +49,10 @@
extern "C" {
#endif
+extern int ct_debug;
+
+#define CT_DEBUG(args) if (ct_debug) cmn_err args
+
#ifdef _SYSCALL32
/*
@@ -110,7 +113,7 @@ typedef struct ctmplops {
int (*ctop_set)(struct ct_template *, ct_param_t *,
const cred_t *);
int (*ctop_get)(struct ct_template *, ct_param_t *);
- int (*ctop_create)(struct ct_template *);
+ int (*ctop_create)(struct ct_template *, ctid_t *);
uint_t allevents;
} ctmplops_t;
@@ -127,6 +130,7 @@ typedef struct ct_template {
uint_t ctmpl_ev_info; /* term: informative events */
} ct_template_t;
+
typedef enum ct_listnum {
CTEL_CONTRACT, /* ../contracts/type/<id>/events */
CTEL_BUNDLE, /* ../contracts/type/bundle */
@@ -139,6 +143,12 @@ typedef enum ctqflags {
CTQ_REFFED = 2 /* queue is reference counted */
} ctqflags_t;
+typedef enum ct_ack {
+ CT_ACK = 1, /* accept break */
+ CT_NACK, /* disallow break */
+ CT_NONE /* no matching contracts */
+} ct_ack_t;
+
/*
* Contract event queue
*/
@@ -198,6 +208,12 @@ typedef struct contops {
void (*contop_destroy)(struct contract *);
void (*contop_status)(struct contract *, zone_t *, int, nvlist_t *,
void *, model_t);
+ int (*contop_ack)(struct contract *, uint_t evtype,
+ uint64_t evid);
+ int (*contop_nack)(struct contract *, uint_t evtype,
+ uint64_t evid);
+ int (*contop_qack)(struct contract *, uint_t, uint64_t);
+ int (*contop_newct)(struct contract *);
} contops_t;
typedef ct_template_t *(ct_f_default_t)(void);
@@ -221,6 +237,11 @@ typedef enum ctflags {
CTF_INHERIT = 0x1
} ctflags_t;
+typedef struct ct_time {
+ long ctm_total; /* Total time allowed for event */
+ clock_t ctm_start; /* starting lbolt for event */
+} ct_time_t;
+
/*
* Contract
*/
@@ -257,6 +278,8 @@ typedef struct contract {
struct contract *ct_regent; /* [prospective] regent contract */
int ct_evcnt; /* number of critical events */
ct_kevent_t *ct_nevent; /* negotiation event */
+ ct_time_t ct_ntime; /* negotiation time tracker */
+ ct_time_t ct_qtime; /* quantum time tracker */
} contract_t;
#define CTLF_COPYOUT 0x1 /* performing copyout */
@@ -284,7 +307,7 @@ int ctmpl_get(ct_template_t *, ct_param_t *);
ct_template_t *ctmpl_dup(ct_template_t *);
void ctmpl_activate(ct_template_t *);
void ctmpl_clear(ct_template_t *);
-int ctmpl_create(ct_template_t *);
+int ctmpl_create(ct_template_t *, ctid_t *);
/*
* Contract functions
@@ -294,12 +317,14 @@ int contract_abandon(contract_t *, struct proc *, int);
int contract_adopt(contract_t *, struct proc *);
void contract_destroy(contract_t *);
void contract_exit(struct proc *);
-int contract_ack(contract_t *, uint64_t);
+int contract_ack(contract_t *ct, uint64_t evid, int cmd);
+int contract_qack(contract_t *ct, uint64_t evid);
+int contract_newct(contract_t *ct);
/*
* Event interfaces
*/
-void cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *);
+uint64_t cte_publish_all(contract_t *, ct_kevent_t *, nvlist_t *, nvlist_t *);
void cte_add_listener(ct_equeue_t *, ct_listener_t *);
void cte_remove_listener(ct_listener_t *);
void cte_reset_listener(ct_listener_t *);
@@ -313,7 +338,7 @@ int cte_set_reliable(ct_listener_t *, const cred_t *);
int contract_compar(const void *, const void *);
void ctmpl_init(ct_template_t *, ctmplops_t *, ct_type_t *, void *);
void ctmpl_copy(ct_template_t *, ct_template_t *);
-int ctmpl_create_inval(ct_template_t *);
+int ctmpl_create_inval(ct_template_t *, ctid_t *);
int contract_ctor(contract_t *, ct_type_t *, ct_template_t *, void *, ctflags_t,
struct proc *, int);
void contract_hold(contract_t *);
@@ -352,6 +377,13 @@ vnode_t *contract_vnode_get(contract_t *, vfs_t *);
void contract_vnode_set(contract_t *, contract_vnode_t *, vnode_t *);
int contract_vnode_clear(contract_t *, contract_vnode_t *);
+/*
+ * Negotiation stubs
+ */
+int contract_ack_inval(contract_t *, uint_t, uint64_t);
+int contract_qack_inval(contract_t *, uint_t, uint64_t);
+int contract_qack_notsup(contract_t *, uint_t, uint64_t);
+
#ifdef __cplusplus
}
#endif
diff --git a/usr/src/uts/common/sys/ctfs.h b/usr/src/uts/common/sys/ctfs.h
index e6702044d1..b46a517f2c 100644
--- a/usr/src/uts/common/sys/ctfs.h
+++ b/usr/src/uts/common/sys/ctfs.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -65,6 +64,7 @@ extern "C" {
#define CT_CQREQ CTFS_CTL(2) /* Request an additional quantum */
#define CT_CADOPT CTFS_CTL(3) /* Adopt a contract */
#define CT_CNEWCT CTFS_CTL(4) /* Define new contract */
+#define CT_CNACK CTFS_CTL(5) /* nack a negotiation */
/*
* Control codes for messages written to status files.
diff --git a/usr/src/uts/common/sys/ddi_impldefs.h b/usr/src/uts/common/sys/ddi_impldefs.h
index 3b99c60997..f5c227e5aa 100644
--- a/usr/src/uts/common/sys/ddi_impldefs.h
+++ b/usr/src/uts/common/sys/ddi_impldefs.h
@@ -194,6 +194,12 @@ struct dev_info {
char *devi_addr_buf; /* buffer for devi_addr */
char *devi_rebinding_name; /* binding_name of rebind */
+ /* For device contracts that have this dip's minor node as resource */
+ kmutex_t devi_ct_lock; /* contract lock */
+ kcondvar_t devi_ct_cv; /* contract cv */
+ int devi_ct_count; /* # of outstanding responses */
+ int devi_ct_neg; /* neg. occurred on dip */
+ list_t devi_ct;
};
#define DEVI(dev_info_type) ((struct dev_info *)(dev_info_type))
@@ -271,6 +277,11 @@ struct dev_info {
#define DEVI_SET_DEVICE_ONLINE(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
+ if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_undegrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
+ } \
/* setting ONLINE clears DOWN, DEGRADED, OFFLINE */ \
DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DOWN | \
DEVI_DEVICE_DEGRADED | DEVI_DEVICE_OFFLINE); \
@@ -297,12 +308,20 @@ struct dev_info {
#define DEVI_SET_DEVICE_DEGRADED(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_degrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
DEVI(dip)->devi_state |= (DEVI_DEVICE_DEGRADED | DEVI_S_REPORT); \
}
#define DEVI_SET_DEVICE_UP(dip) { \
ASSERT(mutex_owned(&DEVI(dip)->devi_lock)); \
ASSERT(!DEVI_IS_DEVICE_OFFLINE(dip)); \
+ if (DEVI(dip)->devi_state & DEVI_DEVICE_DEGRADED) { \
+ mutex_exit(&DEVI(dip)->devi_lock); \
+ e_ddi_undegrade_finalize(dip); \
+ mutex_enter(&DEVI(dip)->devi_lock); \
+ } \
DEVI(dip)->devi_state &= ~(DEVI_DEVICE_DEGRADED | DEVI_DEVICE_DOWN); \
DEVI(dip)->devi_state |= DEVI_S_REPORT; \
}
@@ -503,6 +522,11 @@ void i_devi_exit(dev_info_t *, uint_t c_mask, int has_lock);
#define DEVI_REGISTERED_DEVID 0x00000020 /* device registered a devid */
#define DEVI_PHCI_SIGNALS_VHCI 0x00000040 /* pHCI ndi_devi_exit signals vHCI */
#define DEVI_REBIND 0x00000080 /* post initchild driver rebind */
+#define DEVI_RETIRED 0x00000100 /* device is retired */
+#define DEVI_RETIRING 0x00000200 /* being evaluated for retire */
+#define DEVI_R_CONSTRAINT 0x00000400 /* constraints have been applied */
+#define DEVI_R_BLOCKED 0x00000800 /* constraints block retire */
+#define DEVI_CT_NOP 0x00001000 /* NOP contract event occurred */
#define DEVI_BUSY_CHANGING(dip) (DEVI(dip)->devi_flags & DEVI_BUSY)
#define DEVI_BUSY_OWNED(dip) (DEVI_BUSY_CHANGING(dip) && \
diff --git a/usr/src/uts/common/sys/ddi_implfuncs.h b/usr/src/uts/common/sys/ddi_implfuncs.h
index 5105c4ce18..4aa213c1b2 100644
--- a/usr/src/uts/common/sys/ddi_implfuncs.h
+++ b/usr/src/uts/common/sys/ddi_implfuncs.h
@@ -20,7 +20,7 @@
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -274,6 +274,15 @@ int e_devid_cache_to_devt_list(ddi_devid_t, char *, int *, dev_t **);
void e_devid_cache_free_devt_list(int, dev_t *);
/*
+ * I/O retire persistent store
+ */
+void retire_store_init(void);
+void retire_store_read(void);
+int e_ddi_retire_persist(char *devpath);
+int e_ddi_retire_unpersist(char *devpath);
+int e_ddi_device_retired(char *devpath);
+
+/*
* Resource control functions to lock down device memory.
*/
extern int i_ddi_incr_locked_memory(proc_t *, rctl_qty_t);
diff --git a/usr/src/uts/common/sys/ddi_obsolete.h b/usr/src/uts/common/sys/ddi_obsolete.h
index c6a44c78de..84970dbb54 100644
--- a/usr/src/uts/common/sys/ddi_obsolete.h
+++ b/usr/src/uts/common/sys/ddi_obsolete.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -14,6 +14,7 @@
#include <sys/types.h>
#include <sys/dditypes.h>
+#include <sys/sunldi.h>
#ifdef __cplusplus
@@ -192,6 +193,15 @@ extern void repoutsw(int port, uint16_t *addr, int count);
extern void repoutsd(int port, uint32_t *addr, int count);
#endif
+/* Obsolete LDI event interfaces */
+extern int ldi_get_eventcookie(ldi_handle_t, char *,
+ ddi_eventcookie_t *);
+extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t,
+ void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *),
+ void *, ldi_callback_id_t *);
+extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t);
+
+
#endif /* not _DDI_STRICT */
#ifdef __cplusplus
diff --git a/usr/src/uts/common/sys/fs/snode.h b/usr/src/uts/common/sys/fs/snode.h
index ecef85390c..cd572d545c 100644
--- a/usr/src/uts/common/sys/fs/snode.h
+++ b/usr/src/uts/common/sys/fs/snode.h
@@ -119,6 +119,7 @@ struct snode {
#define SSELFCLONE 0x2000 /* represents a self cloning device */
#define SNOFLUSH 0x4000 /* do not flush device on fsync */
#define SCLOSING 0x8000 /* in last close(9E) */
+#define SFENCED 0x10000 /* snode fenced off for I/O retire */
#ifdef _KERNEL
/*
@@ -128,6 +129,12 @@ struct snode {
#define VTOCS(vp) (VTOS(VTOS(vp)->s_commonvp))
#define STOV(sp) ((sp)->s_vnode)
+extern int spec_debug;
+
+#define SPEC_FENCE_DEBUG 0x0001 /* emit fence related debug messages */
+
+#define FENDBG(args) if (spec_debug & SPEC_FENCE_DEBUG) cmn_err args
+
/*
* Forward declarations
@@ -167,6 +174,8 @@ void spec_snode_walk(int (*callback)(struct snode *, void *), void *);
int spec_devi_open_count(struct snode *, dev_info_t **);
int spec_is_clone(struct vnode *);
int spec_is_selfclone(struct vnode *);
+int spec_fence_snode(dev_info_t *dip, struct vnode *vp);
+int spec_unfence_snode(dev_info_t *dip);
/*
diff --git a/usr/src/uts/common/sys/modctl.h b/usr/src/uts/common/sys/modctl.h
index 255d02d7b2..9eab8025da 100644
--- a/usr/src/uts/common/sys/modctl.h
+++ b/usr/src/uts/common/sys/modctl.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -258,6 +258,9 @@ struct modlinkage {
#define MODDEVNAME 37
#define MODGETDEVFSPATH_MI_LEN 38
#define MODGETDEVFSPATH_MI 39
+#define MODRETIRE 40
+#define MODUNRETIRE 41
+#define MODISRETIRED 42
/*
* sub cmds for MODEVENTS
@@ -641,6 +644,7 @@ extern int modctl(int, ...);
#define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */
#define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */
#define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */
+#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */
#define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */
#define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */
#define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */
diff --git a/usr/src/uts/common/sys/sunldi.h b/usr/src/uts/common/sys/sunldi.h
index f80cc44f8f..71e9d9a7da 100644
--- a/usr/src/uts/common/sys/sunldi.h
+++ b/usr/src/uts/common/sys/sunldi.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -60,6 +59,26 @@ typedef struct __ldi_handle *ldi_handle_t;
typedef struct __ldi_callback_id *ldi_callback_id_t;
+typedef struct __ldi_ev_cookie *ldi_ev_cookie_t;
+
+/*
+ * LDI event interface related
+ */
+#define LDI_EV_SUCCESS 0
+#define LDI_EV_FAILURE (-1)
+#define LDI_EV_NONE (-2) /* no matching callbacks registered */
+#define LDI_EV_OFFLINE "LDI:EVENT:OFFLINE"
+#define LDI_EV_DEGRADE "LDI:EVENT:DEGRADE"
+
+#define LDI_EV_CB_VERS_1 1
+#define LDI_EV_CB_VERS LDI_EV_CB_VERS_1
+
+typedef struct ldi_ev_callback {
+ uint_t cb_vers;
+ int (*cb_notify)(ldi_handle_t, ldi_ev_cookie_t, void *, void *);
+ void (*cb_finalize)(ldi_handle_t, ldi_ev_cookie_t, int, void *, void *);
+} ldi_ev_callback_t;
+
/*
* LDI Ident manipulation functions
*/
@@ -93,13 +112,6 @@ extern int ldi_get_size(ldi_handle_t, uint64_t *);
extern int ldi_prop_op(ldi_handle_t, ddi_prop_op_t, int,
char *, caddr_t, int *);
-extern int ldi_get_eventcookie(ldi_handle_t, char *,
- ddi_eventcookie_t *);
-extern int ldi_add_event_handler(ldi_handle_t, ddi_eventcookie_t,
- void (*handler)(ldi_handle_t, ddi_eventcookie_t, void *, void *),
- void *, ldi_callback_id_t *);
-extern int ldi_remove_event_handler(ldi_handle_t, ldi_callback_id_t);
-
extern int ldi_strategy(ldi_handle_t, struct buf *);
extern int ldi_dump(ldi_handle_t, caddr_t, daddr_t, int);
extern int ldi_devmap(ldi_handle_t, devmap_cookie_t, offset_t,
@@ -132,6 +144,20 @@ extern int ldi_get_otyp(ldi_handle_t, int *);
extern int ldi_get_devid(ldi_handle_t, ddi_devid_t *);
extern int ldi_get_minor_name(ldi_handle_t, char **);
+/*
+ * LDI events related declarations
+ */
+extern int ldi_ev_get_cookie(ldi_handle_t lh, char *evname,
+ ldi_ev_cookie_t *cookiep);
+extern char *ldi_ev_get_type(ldi_ev_cookie_t cookie);
+extern int ldi_ev_register_callbacks(ldi_handle_t lh,
+ ldi_ev_cookie_t cookie, ldi_ev_callback_t *callb,
+ void *arg, ldi_callback_id_t *id);
+extern int ldi_ev_notify(dev_info_t *dip, minor_t minor, int spec_type,
+ ldi_ev_cookie_t cookie, void *ev_data);
+extern void ldi_ev_finalize(dev_info_t *dip, minor_t minor, int spec_type,
+ int ldi_result, ldi_ev_cookie_t cookie, void *ev_data);
+extern int ldi_ev_remove_callbacks(ldi_callback_id_t id);
#endif /* _KERNEL */
diff --git a/usr/src/uts/common/sys/sunldi_impl.h b/usr/src/uts/common/sys/sunldi_impl.h
index 9cbffc3ed2..1156fe2c41 100644
--- a/usr/src/uts/common/sys/sunldi_impl.h
+++ b/usr/src/uts/common/sys/sunldi_impl.h
@@ -2,9 +2,8 @@
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
@@ -20,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -53,6 +52,17 @@ extern "C" {
#define LI_HASH_SZ 32
/*
+ * Obsolete LDI event interfaces are available for now but are deprecated and a
+ * warning will be issued to consumers.
+ */
+#define LDI_OBSOLETE_EVENT 1
+
+/*
+ * Flag for LDI handle's lh_flags field
+ */
+#define LH_FLAGS_NOTIFY 0x0001 /* invoked in context of a notify */
+
+/*
* LDI initialization function
*/
void ldi_init(void);
@@ -87,20 +97,24 @@ struct ldi_handle {
/* protected by ldi_handle_hash_lock */
struct ldi_handle *lh_next;
uint_t lh_ref;
+ uint_t lh_flags;
/* unique/static fields in the handle */
uint_t lh_type;
struct ldi_ident *lh_ident;
vnode_t *lh_vp;
+#ifdef LDI_OBSOLETE_EVENT
/* fields protected by lh_lock */
kmutex_t lh_lock[1];
struct ldi_event *lh_events;
+#endif
};
/*
* LDI event information
*/
+#ifdef LDI_OBSOLETE_EVENT
typedef struct ldi_event {
/* fields protected by le_lhp->lh_lock */
struct ldi_event *le_next;
@@ -112,6 +126,36 @@ typedef struct ldi_event {
void *le_arg;
ddi_callback_id_t le_id;
} ldi_event_t;
+#endif
+
+typedef struct ldi_ev_callback_impl {
+ struct ldi_handle *lec_lhp;
+ dev_info_t *lec_dip;
+ dev_t lec_dev;
+ int lec_spec;
+ int (*lec_notify)();
+ void (*lec_finalize)();
+ void *lec_arg;
+ void *lec_cookie;
+ void *lec_id;
+ list_node_t lec_list;
+} ldi_ev_callback_impl_t;
+
+struct ldi_ev_callback_list {
+ kmutex_t le_lock;
+ kcondvar_t le_cv;
+ int le_busy;
+ void *le_thread;
+ list_t le_head;
+};
+
+int ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ void *ev_data);
+void ldi_invoke_finalize(dev_info_t *dip, dev_t dev, int spec_type, char *event,
+ int ldi_result, void *ev_data);
+int e_ddi_offline_notify(dev_info_t *dip);
+void e_ddi_offline_finalize(dev_info_t *dip, int result);
+
/*
* LDI device usage interfaces
diff --git a/usr/src/uts/common/sys/sunmdi.h b/usr/src/uts/common/sys/sunmdi.h
index 75b4f83ef7..c4a42633be 100644
--- a/usr/src/uts/common/sys/sunmdi.h
+++ b/usr/src/uts/common/sys/sunmdi.h
@@ -19,7 +19,7 @@
* CDDL HEADER END
*/
/*
- * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
@@ -133,6 +133,14 @@ int mdi_devi_online(dev_info_t *, uint_t);
int mdi_devi_offline(dev_info_t *, uint_t);
/*
+ * MDI path retire interfaces
+ */
+void mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array);
+void mdi_phci_retire_notify(dev_info_t *dip, int *constraint);
+void mdi_phci_retire_finalize(dev_info_t *dip, int phci_only);
+void mdi_phci_unretire(dev_info_t *dip);
+
+/*
* MDI devinfo locking functions.
*/
void mdi_devi_enter(dev_info_t *, int *);