diff options
Diffstat (limited to 'usr/src/uts/common/contract/device.c')
-rw-r--r-- | usr/src/uts/common/contract/device.c | 2207 |
1 files changed, 2207 insertions, 0 deletions
diff --git a/usr/src/uts/common/contract/device.c b/usr/src/uts/common/contract/device.c new file mode 100644 index 0000000000..4632cdaa9d --- /dev/null +++ b/usr/src/uts/common/contract/device.c @@ -0,0 +1,2207 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2007 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/mutex.h> +#include <sys/debug.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/kmem.h> +#include <sys/thread.h> +#include <sys/id_space.h> +#include <sys/avl.h> +#include <sys/list.h> +#include <sys/sysmacros.h> +#include <sys/proc.h> +#include <sys/contract.h> +#include <sys/contract_impl.h> +#include <sys/contract/device.h> +#include <sys/contract/device_impl.h> +#include <sys/cmn_err.h> +#include <sys/nvpair.h> +#include <sys/policy.h> +#include <sys/ddi_impldefs.h> +#include <sys/ddi_implfuncs.h> +#include <sys/systm.h> +#include <sys/stat.h> +#include <sys/sunddi.h> +#include <sys/esunddi.h> +#include <sys/ddi.h> +#include <sys/fs/dv_node.h> +#include <sys/sunndi.h> +#undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ + +/* + * Device Contracts + * ----------------- + * This file contains the core code for the device contracts framework. + * A device contract is an agreement or a contract between a process and + * the kernel regarding the state of the device. A device contract may be + * created when a relationship is formed between a device and a process + * i.e. at open(2) time, or it may be created at some point after the device + * has been opened. A device contract once formed may be broken by either party. + * A device contract can be broken by the process by an explicit abandon of the + * contract or by an implicit abandon when the process exits. A device contract + * can be broken by the kernel either asynchronously (without negotiation) or + * synchronously (with negotiation). Exactly which happens depends on the device + * state transition. The following state diagram shows the transitions between + * device states. Only device state transitions currently supported by device + * contracts is shown. + * + * <-- A --> + * /-----------------> DEGRADED + * | | + * | | + * | | S + * | | | + * | | v + * v S --> v + * ONLINE ------------> OFFLINE + * + * + * In the figure above, the arrows indicate the direction of transition. The + * letter S refers to transitions which are inherently synchronous i.e. + * require negotiation and the letter A indicates transitions which are + * asynchronous i.e. are done without contract negotiations. A good example + * of a synchronous transition is the ONLINE -> OFFLINE transition. This + * transition cannot happen as long as there are consumers which have the + * device open. Thus some form of negotiation needs to happen between the + * consumers and the kernel to ensure that consumers either close devices + * or disallow the move to OFFLINE. Certain other transitions such as + * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. + * non-negotiable. A device that suffers a fault that degrades its + * capabilities will become degraded irrespective of what consumers it has, + * so a negotiation in this case is pointless. + * + * The following device states are currently defined for device contracts: + * + * CT_DEV_EV_ONLINE + * The device is online and functioning normally + * CT_DEV_EV_DEGRADED + * The device is online but is functioning in a degraded capacity + * CT_DEV_EV_OFFLINE + * The device is offline and is no longer configured + * + * A typical consumer of device contracts starts out with a contract + * template and adds terms to that template. These include the + * "acceptable set" (A-set) term, which is a bitset of device states which + * are guaranteed by the contract. If the device moves out of a state in + * the A-set, the contract is broken. The breaking of the contract can + * be asynchronous in which case a critical contract event is sent to the + * contract holder but no negotiations take place. If the breaking of the + * contract is synchronous, negotations are opened between the affected + * consumer and the kernel. The kernel does this by sending a critical + * event to the consumer with the CTE_NEG flag set indicating that this + * is a negotiation event. The consumer can accept this change by sending + * a ACK message to the kernel. Alternatively, if it has the necessary + * privileges, it can send a NACK message to the kernel which will block + * the device state change. To NACK a negotiable event, a process must + * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. + * + * Other terms include the "minor path" term, specified explicitly if the + * contract is not being created at open(2) time or specified implicitly + * if the contract is being created at open time via an activated template. + * + * A contract event is sent on any state change to which the contract + * owner has subscribed via the informative or critical event sets. Only + * critical events are guaranteed to be delivered. Since all device state + * changes are controlled by the kernel and cannot be arbitrarily generated + * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not + * need to be asserted in a process's effective set to designate an event as + * critical. To ensure privacy, a process must either have the same effective + * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege + * asserted in its effective set in order to observe device contract events + * off the device contract type specific endpoint. + * + * Yet another term available with device contracts is the "non-negotiable" + * term. This term is used to pre-specify a NACK to any contract negotiation. + * This term is ignored for asynchronous state changes. For example, a + * provcess may have the A-set {ONLINE|DEGRADED} and make the contract + * non-negotiable. In this case, the device contract framework assumes a + * NACK for any transition to OFFLINE and blocks the offline. If the A-set + * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE + * are NACKed but transitions to DEGRADE succeed. + * + * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) + * happens just before the I/O framework attempts to offline a device + * (i.e. detach a device and set the offline flag so that it cannot be + * reattached). A device contract holder is expected to either NACK the offline + * (if privileged) or release the device and allow the offline to proceed. + * + * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) + * is generated just before the I/O framework transitions the device state + * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). + * + * The contract holder is expected to ACK or NACK a negotiation event + * within a certain period of time. If the ACK/NACK is not received + * within the timeout period, the device contract framework will behave + * as if the contract does not exist and will proceed with the event. + * + * Unlike a process contract a device contract does not need to exist + * once it is abandoned, since it does not define a fault boundary. It + * merely represents an agreement between a process and the kernel + * regarding the state of the device. Once the process has abandoned + * the contract (either implicitly via a process exit or explicitly) + * the kernel has no reason to retain the contract. As a result + * device contracts are neither inheritable nor need to exist in an + * orphan state. + * + * A device unlike a process may exist in multiple contracts and has + * a "life" outside a device contract. A device unlike a process + * may exist without an associated contract. Unlike a process contract + * a device contract may be formed after a binding relationship is + * formed between a process and a device. + * + * IMPLEMENTATION NOTES + * ==================== + * DATA STRUCTURES + * ---------------- + * The heart of the device contracts implementation is the device contract + * private cont_device_t (or ctd for short) data structure. It encapsulates + * the generic contract_t data structure and has a number of private + * fields. + * These include: + * cond_minor: The minor device that is the subject of the contract + * cond_aset: The bitset of states which are guaranteed by the + * contract + * cond_noneg: If set, indicates that the result of negotiation has + * been predefined to be a NACK + * In addition, there are other device identifiers such the devinfo node, + * dev_t and spec_type of the minor node. There are also a few fields that + * are used during negotiation to maintain state. See + * uts/common/sys/contract/device_impl.h + * for details. + * The ctd structure represents the device private part of a contract of + * type "device" + * + * Another data structure used by device contracts is ctmpl_device. It is + * the device contracts private part of the contract template structure. It + * encapsulates the generic template structure "ct_template_t" and includes + * the following device contract specific fields + * ctd_aset: The bitset of states that should be guaranteed by a + * contract + * ctd_noneg: If set, indicates that contract should NACK a + * negotiation + * ctd_minor: The devfs_path (without the /devices prefix) of the + * minor node that is the subject of the contract. + * + * ALGORITHMS + * --------- + * There are three sets of routines in this file + * Template related routines + * ------------------------- + * These routines provide support for template related operations initated + * via the generic template operations. These include routines that dup + * a template, free it, and set various terms in the template + * (such as the minor node path, the acceptable state set (or A-set) + * and the non-negotiable term) as well as a routine to query the + * device specific portion of the template for the abovementioned terms. + * There is also a routine to create (ctmpl_device_create) that is used to + * create a contract from a template. This routine calls (after initial + * setup) the common function used to create a device contract + * (contract_device_create). + * + * core device contract implementation + * ---------------------------------- + * These routines support the generic contract framework to provide + * functionality that allows contracts to be created, managed and + * destroyed. The contract_device_create() routine is a routine used + * to create a contract from a template (either via an explicit create + * operation on a template or implicitly via an open with an + * activated template.). The contract_device_free() routine assists + * in freeing the device contract specific parts. There are routines + * used to abandon (contract_device_abandon) a device contract as well + * as a routine to destroy (which despite its name does not destroy, + * it only moves a contract to a dead state) a contract. + * There is also a routine to return status information about a + * contract - the level of detail depends on what is requested by the + * user. A value of CTD_FIXED only returns fixed length fields such + * as the A-set, state of device and value of the "noneg" term. If + * CTD_ALL is specified, the minor node path is returned as well. + * + * In addition there are interfaces (contract_device_ack/nack) which + * are used to support negotiation between userland processes and + * device contracts. These interfaces record the acknowledgement + * or lack thereof for negotiation events and help determine if the + * negotiated event should occur. + * + * "backend routines" + * ----------------- + * The backend routines form the interface between the I/O framework + * and the device contract subsystem. These routines, allow the I/O + * framework to call into the device contract subsystem to notify it of + * impending changes to a device state as well as to inform of the + * final disposition of such attempted state changes. Routines in this + * class include contract_device_offline() that indicates an attempt to + * offline a device, contract_device_degrade() that indicates that + * a device is moving to the degraded state and contract_device_negend() + * that is used by the I/O framework to inform the contracts subsystem of + * the final disposition of an attempted operation. + * + * SUMMARY + * ------- + * A contract starts its life as a template. A process allocates a device + * contract template and sets various terms: + * The A-set + * The device minor node + * Critical and informative events + * The noneg i.e. no negotition term + * Setting of these terms in the template is done via the + * ctmpl_device_set() entry point in this file. A process can query a + * template to determine the terms already set in the template - this is + * facilitated by the ctmpl_device_get() routine. + * + * Once all the appropriate terms are set, the contract is instantiated via + * one of two methods + * - via an explicit create operation - this is facilitated by the + * ctmpl_device_create() entry point + * - synchronously with the open(2) system call - this is achieved via the + * contract_device_open() routine. + * The core work for both these above functions is done by + * contract_device_create() + * + * A contract once created can be queried for its status. Support for + * status info is provided by both the common contracts framework and by + * the "device" contract type. If the level of detail requested is + * CTD_COMMON, only the common contract framework data is used. Higher + * levels of detail result in calls to contract_device_status() to supply + * device contract type specific status information. + * + * A contract once created may be abandoned either explicitly or implictly. + * In either case, the contract_device_abandon() function is invoked. This + * function merely calls contract_destroy() which moves the contract to + * the DEAD state. The device contract portion of destroy processing is + * provided by contract_device_destroy() which merely disassociates the + * contract from its device devinfo node. A contract in the DEAD state is + * not freed. It hanbgs around until all references to the contract are + * gone. When that happens, the contract is finally deallocated. The + * device contract specific portion of the free is done by + * contract_device_free() which finally frees the device contract specific + * data structure (cont_device_t). + * + * When a device undergoes a state change, the I/O framework calls the + * corresponding device contract entry point. For example, when a device + * is about to go OFFLINE, the routine contract_device_offline() is + * invoked. Similarly if a device moves to DEGRADED state, the routine + * contract_device_degrade() function is called. These functions call the + * core routine contract_device_publish(). This function determines via + * the function is_sync_neg() whether an event is a synchronous (i.e. + * negotiable) event or not. In the former case contract_device_publish() + * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs + * and/or NACKs from contract holders. In the latter case, it simply + * publishes the event and does not wait. In the negotiation case, ACKs or + * NACKs from userland consumers results in contract_device_ack_nack() + * being called where the result of the negotiation is recorded in the + * contract data structure. Once all outstanding contract owners have + * responded, the device contract code in wait_for_acks() determines the + * final result of the negotiation. A single NACK overrides all other ACKs + * If there is no NACK, then a single ACK will result in an overall ACK + * result. If there are no ACKs or NACKs, then the result CT_NONE is + * returned back to the I/O framework. Once the event is permitted or + * blocked, the I/O framework proceeds or aborts the state change. The + * I/O framework then calls contract_device_negend() with a result code + * indicating final disposition of the event. This call releases the + * barrier and other state associated with the previous negotiation, + * which permits the next event (if any) to come into the device contract + * framework. + * + * Finally, a device that has outstanding contracts may be removed from + * the system which results in its devinfo node being freed. The devinfo + * free routine in the I/O framework, calls into the device contract + * function - contract_device_remove_dip(). This routine, disassociates + * the dip from all contracts associated with the contract being freed, + * allowing the devinfo node to be freed. + * + * LOCKING + * --------- + * There are four sets of data that need to be protected by locks + * + * i) device contract specific portion of the contract template - This data + * is protected by the template lock ctmpl_lock. + * + * ii) device contract specific portion of the contract - This data is + * protected by the contract lock ct_lock + * + * iii) The linked list of contracts hanging off a devinfo node - This + * list is protected by the per-devinfo node lock devi_ct_lock + * + * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv + * and devi_ct_count that controls state changes to a dip + * + * The template lock is independent in that none of the other locks in this + * file may be taken while holding the template lock (and vice versa). + * + * The remaining three locks have the following lock order + * + * devi_ct_lock -> ct_count barrier -> ct_lock + * + */ + +static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, + int spec_type, proc_t *owner, int *errorp); + +/* barrier routines */ +static void ct_barrier_acquire(dev_info_t *dip); +static void ct_barrier_release(dev_info_t *dip); +static int ct_barrier_held(dev_info_t *dip); +static int ct_barrier_empty(dev_info_t *dip); +static void ct_barrier_wait_for_release(dev_info_t *dip); +static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); +static void ct_barrier_decr(dev_info_t *dip); +static void ct_barrier_incr(dev_info_t *dip); + +ct_type_t *device_type; + +/* + * Macro predicates for determining when events should be sent and how. + */ +#define EVSENDP(ctd, flag) \ + ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) + +#define EVINFOP(ctd, flag) \ + ((ctd->cond_contract.ct_ev_crit & flag) == 0) + +/* + * State transition table showing which transitions are synchronous and which + * are not. + */ +struct ct_dev_negtable { + uint_t st_old; + uint_t st_new; + uint_t st_neg; +} ct_dev_negtable[] = { + {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, + {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, + {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, + {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, + {0} +}; + +/* + * Device contract template implementation + */ + +/* + * ctmpl_device_dup + * + * The device contract template dup entry point. + * This simply copies all the fields (generic as well as device contract + * specific) fields of the original. + */ +static struct ct_template * +ctmpl_device_dup(struct ct_template *template) +{ + ctmpl_device_t *new; + ctmpl_device_t *old = template->ctmpl_data; + char *buf; + char *minor; + + new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * copy generic fields. + * ctmpl_copy returns with old template lock held + */ + ctmpl_copy(&new->ctd_ctmpl, template); + + new->ctd_ctmpl.ctmpl_data = new; + new->ctd_aset = old->ctd_aset; + new->ctd_minor = NULL; + new->ctd_noneg = old->ctd_noneg; + + if (old->ctd_minor) { + ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); + bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); + } else { + kmem_free(buf, MAXPATHLEN); + buf = NULL; + } + + mutex_exit(&template->ctmpl_lock); + if (buf) { + minor = i_ddi_strdup(buf, KM_SLEEP); + kmem_free(buf, MAXPATHLEN); + buf = NULL; + } else { + minor = NULL; + } + mutex_enter(&template->ctmpl_lock); + + if (minor) { + new->ctd_minor = minor; + } + + ASSERT(buf == NULL); + return (&new->ctd_ctmpl); +} + +/* + * ctmpl_device_free + * + * The device contract template free entry point. Just + * frees the template. + */ +static void +ctmpl_device_free(struct ct_template *template) +{ + ctmpl_device_t *dtmpl = template->ctmpl_data; + + if (dtmpl->ctd_minor) + kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); + + kmem_free(dtmpl, sizeof (ctmpl_device_t)); +} + +/* + * SAFE_EV is the set of events which a non-privileged process is + * allowed to make critical. An unprivileged device contract owner has + * no control over when a device changes state, so all device events + * can be in the critical set. + * + * EXCESS tells us if "value", a critical event set, requires + * additional privilege. For device contracts EXCESS currently + * evaluates to 0. + */ +#define SAFE_EV (CT_DEV_ALLEVENT) +#define EXCESS(value) ((value) & ~SAFE_EV) + + +/* + * ctmpl_device_set + * + * The device contract template set entry point. Sets various terms in the + * template. The non-negotiable term can only be set if the process has + * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. + */ +static int +ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) +{ + ctmpl_device_t *dtmpl = tmpl->ctmpl_data; + char *buf; + int error; + dev_info_t *dip; + int spec_type; + + ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); + + switch (param->ctpm_id) { + case CTDP_ACCEPT: + if (param->ctpm_value & ~CT_DEV_ALLEVENT) + return (EINVAL); + if (param->ctpm_value == 0) + return (EINVAL); + if (param->ctpm_value == CT_DEV_ALLEVENT) + return (EINVAL); + + dtmpl->ctd_aset = param->ctpm_value; + break; + case CTDP_NONEG: + if (param->ctpm_value != CTDP_NONEG_SET && + param->ctpm_value != CTDP_NONEG_CLEAR) + return (EINVAL); + + /* + * only privileged processes can designate a contract + * non-negotiatble. + */ + if (param->ctpm_value == CTDP_NONEG_SET && + (error = secpolicy_sys_devices(cr)) != 0) { + return (error); + } + + dtmpl->ctd_noneg = param->ctpm_value; + break; + + case CTDP_MINOR: + if (param->ctpm_value == NULL) + return (EINVAL); + + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + /* + * Copyin the device path + */ + error = copyinstr((char *)(uintptr_t)param->ctpm_value, buf, + MAXPATHLEN, NULL); + if (error != 0) { + kmem_free(buf, MAXPATHLEN); + return (error); + } + buf[MAXPATHLEN - 1] = '\0'; + + if (*buf != '/' || + strncmp(buf, "/devices/", strlen("/devices/")) == 0 || + strstr(buf, "../devices/") || strchr(buf, ':') == NULL) { + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } + + spec_type = 0; + dip = NULL; + if (resolve_pathname(buf, &dip, NULL, &spec_type) != 0) { + kmem_free(buf, MAXPATHLEN); + return (ERANGE); + } + ddi_release_devi(dip); + + if (spec_type != S_IFCHR && spec_type != S_IFBLK) { + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } + + if (dtmpl->ctd_minor != NULL) { + kmem_free(dtmpl->ctd_minor, + strlen(dtmpl->ctd_minor) + 1); + } + dtmpl->ctd_minor = i_ddi_strdup(buf, KM_SLEEP); + kmem_free(buf, MAXPATHLEN); + break; + case CTP_EV_CRITICAL: + /* + * Currently for device contracts, any event + * may be added to the critical set. We retain the + * following code however for future enhancements. + */ + if (EXCESS(param->ctpm_value) && + (error = secpolicy_contract_event(cr)) != 0) + return (error); + tmpl->ctmpl_ev_crit = param->ctpm_value; + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * ctmpl_device_get + * + * The device contract template get entry point. Simply fetches and + * returns the value of the requested term. + */ +static int +ctmpl_device_get(struct ct_template *template, ct_param_t *param) +{ + ctmpl_device_t *dtmpl = template->ctmpl_data; + int error; + + ASSERT(MUTEX_HELD(&template->ctmpl_lock)); + + switch (param->ctpm_id) { + case CTDP_ACCEPT: + param->ctpm_value = dtmpl->ctd_aset; + break; + case CTDP_NONEG: + param->ctpm_value = dtmpl->ctd_noneg; + break; + case CTDP_MINOR: + if (dtmpl->ctd_minor) { + error = copyoutstr(dtmpl->ctd_minor, + (char *)(uintptr_t)param->ctpm_value, + MAXPATHLEN, NULL); + if (error != 0) + return (error); + } else { + return (ENOENT); + } + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * Device contract type specific portion of creating a contract using + * a specified template + */ +/*ARGSUSED*/ +int +ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) +{ + ctmpl_device_t *dtmpl; + char *buf; + dev_t dev; + int spec_type; + int error; + cont_device_t *ctd; + + if (ctidp == NULL) + return (EINVAL); + + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + dtmpl = template->ctmpl_data; + + mutex_enter(&template->ctmpl_lock); + if (dtmpl->ctd_minor == NULL) { + /* incomplete template */ + mutex_exit(&template->ctmpl_lock); + kmem_free(buf, MAXPATHLEN); + return (EINVAL); + } else { + ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); + bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); + } + mutex_exit(&template->ctmpl_lock); + + spec_type = 0; + dev = NODEV; + if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || + dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || + (spec_type != S_IFCHR && spec_type != S_IFBLK)) { + CT_DEBUG((CE_WARN, + "tmpl_create: failed to find device: %s", buf)); + kmem_free(buf, MAXPATHLEN); + return (ERANGE); + } + kmem_free(buf, MAXPATHLEN); + + ctd = contract_device_create(template->ctmpl_data, + dev, spec_type, curproc, &error); + + if (ctd == NULL) { + CT_DEBUG((CE_WARN, "Failed to create device contract for " + "process (%d) with device (devt = %lu, spec_type = %s)", + curproc->p_pid, dev, + spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); + return (error); + } + + mutex_enter(&ctd->cond_contract.ct_lock); + *ctidp = ctd->cond_contract.ct_id; + mutex_exit(&ctd->cond_contract.ct_lock); + + return (0); +} + +/* + * Device contract specific template entry points + */ +static ctmplops_t ctmpl_device_ops = { + ctmpl_device_dup, /* ctop_dup */ + ctmpl_device_free, /* ctop_free */ + ctmpl_device_set, /* ctop_set */ + ctmpl_device_get, /* ctop_get */ + ctmpl_device_create, /* ctop_create */ + CT_DEV_ALLEVENT /* all device events bitmask */ +}; + + +/* + * Device contract implementation + */ + +/* + * contract_device_default + * + * The device contract default template entry point. Creates a + * device contract template with a default A-set and no "noneg" , + * with informative degrade events and critical offline events. + * There is no default minor path. + */ +static ct_template_t * +contract_device_default(void) +{ + ctmpl_device_t *new; + + new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); + ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); + + new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; + new->ctd_noneg = 0; + new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; + new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; + + return (&new->ctd_ctmpl); +} + +/* + * contract_device_free + * + * Destroys the device contract specific portion of a contract and + * frees the contract. + */ +static void +contract_device_free(contract_t *ct) +{ + cont_device_t *ctd = ct->ct_data; + + ASSERT(ctd->cond_minor); + ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); + kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); + + ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && + ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); + + ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); + + ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); + ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); + + ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); + ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); + + ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); + ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); + + ASSERT(!list_link_active(&ctd->cond_next)); + + kmem_free(ctd, sizeof (cont_device_t)); +} + +/* + * contract_device_abandon + * + * The device contract abandon entry point. + */ +static void +contract_device_abandon(contract_t *ct) +{ + ASSERT(MUTEX_HELD(&ct->ct_lock)); + + /* + * device contracts cannot be inherited or orphaned. + * Move the contract to the DEAD_STATE. It will be freed + * once all references to it are gone. + */ + contract_destroy(ct); +} + +/* + * contract_device_destroy + * + * The device contract destroy entry point. + * Called from contract_destroy() to do any type specific destroy. Note + * that destroy is a misnomer - this does not free the contract, it only + * moves it to the dead state. A contract is actually freed via + * contract_rele() -> contract_dtor(), contop_free() + */ +static void +contract_device_destroy(contract_t *ct) +{ + cont_device_t *ctd = ct->ct_data; + dev_info_t *dip = ctd->cond_dip; + + ASSERT(MUTEX_HELD(&ct->ct_lock)); + + if (dip == NULL) { + /* + * The dip has been removed, this is a dangling contract + * Check that dip linkages are NULL + */ + ASSERT(!list_link_active(&ctd->cond_next)); + CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " + "devinfo node. contract ctid : %d", ct->ct_id)); + return; + } + + /* + * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock + */ + mutex_exit(&ct->ct_lock); + + /* + * Waiting for the barrier to be released is strictly speaking not + * necessary. But it simplifies the implementation of + * contract_device_publish() by establishing the invariant that + * device contracts cannot go away during negotiation. + */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_wait_for_release(dip); + mutex_enter(&ct->ct_lock); + + list_remove(&(DEVI(dip)->devi_ct), ctd); + ctd->cond_dip = NULL; /* no longer linked to dip */ + contract_rele(ct); /* remove hold for dip linkage */ + + mutex_exit(&ct->ct_lock); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + mutex_enter(&ct->ct_lock); +} + +/* + * contract_device_status + * + * The device contract status entry point. Called when level of "detail" + * is either CTD_FIXED or CTD_ALL + * + */ +static void +contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, + void *status, model_t model) +{ + cont_device_t *ctd = ct->ct_data; + + ASSERT(detail == CTD_FIXED || detail == CTD_ALL); + + mutex_enter(&ct->ct_lock); + contract_status_common(ct, zone, status, model); + + /* + * There's no need to hold the contract lock while accessing static + * data like aset or noneg. But since we need the lock to access other + * data like state, we hold it anyway. + */ + VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); + VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); + VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); + + if (detail == CTD_FIXED) { + mutex_exit(&ct->ct_lock); + return; + } + + ASSERT(ctd->cond_minor); + VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); + + mutex_exit(&ct->ct_lock); +} + +/* + * Converts a result integer into the corresponding string. Used for printing + * messages + */ +static char * +result_str(uint_t result) +{ + switch (result) { + case CT_ACK: + return ("CT_ACK"); + case CT_NACK: + return ("CT_NACK"); + case CT_NONE: + return ("CT_NONE"); + default: + return ("UNKNOWN"); + } +} + +/* + * Converts a device state integer constant into the corresponding string. + * Used to print messages. + */ +static char * +state_str(uint_t state) +{ + switch (state) { + case CT_DEV_EV_ONLINE: + return ("ONLINE"); + case CT_DEV_EV_DEGRADED: + return ("DEGRADED"); + case CT_DEV_EV_OFFLINE: + return ("OFFLINE"); + default: + return ("UNKNOWN"); + } +} + +/* + * Routine that determines if a particular CT_DEV_EV_? event corresponds to a + * synchronous state change or not. + */ +static int +is_sync_neg(uint_t old, uint_t new) +{ + int i; + + ASSERT(old & CT_DEV_ALLEVENT); + ASSERT(new & CT_DEV_ALLEVENT); + + if (old == new) { + CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", + state_str(new))); + return (-2); + } + + for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { + if (old == ct_dev_negtable[i].st_old && + new == ct_dev_negtable[i].st_new) { + return (ct_dev_negtable[i].st_neg); + } + } + + CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " + "old = %s -> new = %s", state_str(old), state_str(new))); + + return (-1); +} + +/* + * Used to cleanup cached dv_nodes so that when a device is released by + * a contract holder, its devinfo node can be successfully detached. + */ +static int +contract_device_dvclean(dev_info_t *dip) +{ + char *devnm; + dev_info_t *pdip; + int error; + + ASSERT(dip); + + /* pdip can be NULL if we have contracts against the root dip */ + pdip = ddi_get_parent(dip); + + if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { + char *path; + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " + "device=%s", path)); + kmem_free(path, MAXPATHLEN); + return (EDEADLOCK); + } + + if (pdip) { + devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); + (void) ddi_deviname(dip, devnm); + error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); + kmem_free(devnm, MAXNAMELEN + 1); + } else { + error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); + } + + return (error); +} + +/* + * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. + * Results in the ACK or NACK being recorded on the dip for one particular + * contract. The device contracts framework evaluates the ACK/NACKs for all + * contracts against a device to determine if a particular device state change + * should be allowed. + */ +static int +contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, + uint_t cmd) +{ + cont_device_t *ctd = ct->ct_data; + dev_info_t *dip; + ctid_t ctid; + int error; + + ctid = ct->ct_id; + + CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); + + mutex_enter(&ct->ct_lock); + CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); + + dip = ctd->cond_dip; + + ASSERT(ctd->cond_minor); + ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); + + /* + * Negotiation only if new state is not in A-set + */ + ASSERT(!(ctd->cond_aset & evtype)); + + /* + * Negotiation only if transition is synchronous + */ + ASSERT(is_sync_neg(ctd->cond_state, evtype)); + + /* + * We shouldn't be negotiating if the "noneg" flag is set + */ + ASSERT(!ctd->cond_noneg); + + if (dip) + ndi_hold_devi(dip); + + mutex_exit(&ct->ct_lock); + + /* + * dv_clean only if !NACK and offline state change + */ + if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { + CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); + error = contract_device_dvclean(dip); + if (error != 0) { + CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", + ctid)); + ddi_release_devi(dip); + } + } + + mutex_enter(&ct->ct_lock); + + if (dip) + ddi_release_devi(dip); + + if (dip == NULL) { + if (ctd->cond_currev_id != evid) { + CT_DEBUG((CE_WARN, "%sACK for non-current event " + "(type=%s, id=%llu) on removed device", + cmd == CT_NACK ? "N" : "", + state_str(evtype), (unsigned long long)evid)); + CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", + ctid)); + } else { + ASSERT(ctd->cond_currev_type == evtype); + CT_DEBUG((CE_WARN, "contract_ack: no such device: " + "ctid: %d", ctid)); + } + error = (ct->ct_state == CTS_DEAD) ? ESRCH : + ((cmd == CT_NACK) ? ETIMEDOUT : 0); + mutex_exit(&ct->ct_lock); + return (error); + } + + /* + * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock + */ + mutex_exit(&ct->ct_lock); + + mutex_enter(&DEVI(dip)->devi_ct_lock); + mutex_enter(&ct->ct_lock); + if (ctd->cond_currev_id != evid) { + char *buf; + mutex_exit(&ct->ct_lock); + mutex_exit(&DEVI(dip)->devi_ct_lock); + ndi_hold_devi(dip); + buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, buf); + ddi_release_devi(dip); + CT_DEBUG((CE_WARN, "%sACK for non-current event" + "(type=%s, id=%llu) on device %s", + cmd == CT_NACK ? "N" : "", + state_str(evtype), (unsigned long long)evid, buf)); + kmem_free(buf, MAXPATHLEN); + CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", + cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); + return (cmd == CT_ACK ? 0 : ETIMEDOUT); + } + + ASSERT(ctd->cond_currev_type == evtype); + ASSERT(cmd == CT_ACK || cmd == CT_NACK); + + CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", + cmd == CT_NACK ? "N" : "", ctid)); + + ctd->cond_currev_ack = cmd; + mutex_exit(&ct->ct_lock); + + ct_barrier_decr(dip); + mutex_exit(&DEVI(dip)->devi_ct_lock); + + CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); + + return (0); +} + +/* + * Invoked when a userland contract holder approves (i.e. ACKs) a state change + */ +static int +contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) +{ + return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); +} + +/* + * Invoked when a userland contract holder blocks (i.e. NACKs) a state change + */ +static int +contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) +{ + return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); +} + +/* + * Creates a new contract synchronously with the breaking of an existing + * contract. Currently not supported. + */ +/*ARGSUSED*/ +static int +contract_device_newct(contract_t *ct) +{ + return (ENOTSUP); +} + +/* + * Core device contract implementation entry points + */ +static contops_t contract_device_ops = { + contract_device_free, /* contop_free */ + contract_device_abandon, /* contop_abandon */ + contract_device_destroy, /* contop_destroy */ + contract_device_status, /* contop_status */ + contract_device_ack, /* contop_ack */ + contract_device_nack, /* contop_nack */ + contract_qack_notsup, /* contop_qack */ + contract_device_newct /* contop_newct */ +}; + +/* + * contract_device_init + * + * Initializes the device contract type. + */ +void +contract_device_init(void) +{ + device_type = contract_type_init(CTT_DEVICE, "device", + &contract_device_ops, contract_device_default); +} + +/* + * contract_device_create + * + * create a device contract given template "tmpl" and the "owner" process. + * May fail and return NULL if project.max-contracts would have been exceeded. + * + * Common device contract creation routine called for both open-time and + * non-open time device contract creation + */ +static cont_device_t * +contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, + proc_t *owner, int *errorp) +{ + cont_device_t *ctd; + char *minor; + char *path; + dev_info_t *dip; + + ASSERT(dtmpl != NULL); + ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); + ASSERT(errorp); + + *errorp = 0; + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); + ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); + bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + + dip = e_ddi_hold_devi_by_path(path, 0); + if (dip == NULL) { + cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " + "for device path (%s)", path); + kmem_free(path, MAXPATHLEN); + *errorp = ERANGE; + return (NULL); + } + + /* + * Lock out any parallel contract negotiations + */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_acquire(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + minor = i_ddi_strdup(path, KM_SLEEP); + kmem_free(path, MAXPATHLEN); + + (void) contract_type_pbundle(device_type, owner); + + ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); + + /* + * Only we hold a refernce to this contract. Safe to access + * the fields without a ct_lock + */ + ctd->cond_minor = minor; + /* + * It is safe to set the dip pointer in the contract + * as the contract will always be destroyed before the dip + * is released + */ + ctd->cond_dip = dip; + ctd->cond_devt = dev; + ctd->cond_spec = spec_type; + + /* + * Since we are able to lookup the device, it is either + * online or degraded + */ + ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? + CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; + + mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); + ctd->cond_aset = dtmpl->ctd_aset; + ctd->cond_noneg = dtmpl->ctd_noneg; + + /* + * contract_ctor() initailizes the common portion of a contract + * contract_dtor() destroys the common portion of a contract + */ + if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, + ctd, 0, owner, B_TRUE)) { + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + /* + * contract_device_free() destroys the type specific + * portion of a contract and frees the contract. + * The "minor" path and "cred" is a part of the type specific + * portion of the contract and will be freed by + * contract_device_free() + */ + contract_device_free(&ctd->cond_contract); + + /* release barrier */ + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + ddi_release_devi(dip); + *errorp = EAGAIN; + return (NULL); + } + mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); + + mutex_enter(&ctd->cond_contract.ct_lock); + ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; + ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; + ctd->cond_contract.ct_ntime.ctm_start = -1; + ctd->cond_contract.ct_qtime.ctm_start = -1; + mutex_exit(&ctd->cond_contract.ct_lock); + + /* + * Insert device contract into list hanging off the dip + * Bump up the ref-count on the contract to reflect this + */ + contract_hold(&ctd->cond_contract); + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + list_insert_tail(&(DEVI(dip)->devi_ct), ctd); + + /* release barrier */ + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + + ddi_release_devi(dip); + + return (ctd); +} + +/* + * Called when a device is successfully opened to create an open-time contract + * i.e. synchronously with a device open. + */ +int +contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) +{ + ctmpl_device_t *dtmpl; + ct_template_t *tmpl; + cont_device_t *ctd; + char *path; + klwp_t *lwp; + int error; + + if (ctpp) + *ctpp = NULL; + + /* + * Check if we are in user-context i.e. if we have an lwp + */ + lwp = ttolwp(curthread); + if (lwp == NULL) { + CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); + return (0); + } + + tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); + if (tmpl == NULL) { + return (0); + } + dtmpl = tmpl->ctmpl_data; + + /* + * If the user set a minor path in the template before an open, + * ignore it. We use the minor path of the actual minor opened. + */ + mutex_enter(&tmpl->ctmpl_lock); + if (dtmpl->ctd_minor != NULL) { + CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " + "ignoring device minor path in active template: %s", + curproc->p_pid, dtmpl->ctd_minor)); + /* + * This is a copy of the actual activated template. + * Safe to make changes such as freeing the minor + * path in the template. + */ + kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); + dtmpl->ctd_minor = NULL; + } + mutex_exit(&tmpl->ctmpl_lock); + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { + CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " + "minor path from dev_t,spec {%lu, %d} for process (%d)", + dev, spec_type, curproc->p_pid)); + ctmpl_free(tmpl); + kmem_free(path, MAXPATHLEN); + return (1); + } + + mutex_enter(&tmpl->ctmpl_lock); + ASSERT(dtmpl->ctd_minor == NULL); + dtmpl->ctd_minor = path; + mutex_exit(&tmpl->ctmpl_lock); + + ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); + + mutex_enter(&tmpl->ctmpl_lock); + ASSERT(dtmpl->ctd_minor); + dtmpl->ctd_minor = NULL; + mutex_exit(&tmpl->ctmpl_lock); + ctmpl_free(tmpl); + kmem_free(path, MAXPATHLEN); + + if (ctd == NULL) { + cmn_err(CE_NOTE, "contract_device_open(): Failed to " + "create device contract for process (%d) holding " + "device (devt = %lu, spec_type = %d)", + curproc->p_pid, dev, spec_type); + return (1); + } + + if (ctpp) { + mutex_enter(&ctd->cond_contract.ct_lock); + *ctpp = &ctd->cond_contract; + mutex_exit(&ctd->cond_contract.ct_lock); + } + return (0); +} + +/* + * Called during contract negotiation by the device contract framework to wait + * for ACKs or NACKs from contract holders. If all responses are not received + * before a specified timeout, this routine times out. + */ +static uint_t +wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) +{ + cont_device_t *ctd; + int timed_out = 0; + int result = CT_NONE; + int ack; + char *f = "wait_for_acks"; + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(dip); + ASSERT(evtype & CT_DEV_ALLEVENT); + ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFBLK || spec_type == S_IFCHR)); + + CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); + + if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { + /* + * some contract owner(s) didn't respond in time + */ + CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); + timed_out = 1; + } + + ack = 0; + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + + mutex_enter(&ctd->cond_contract.ct_lock); + + ASSERT(ctd->cond_dip == dip); + + if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* skip if non-negotiable contract */ + if (ctd->cond_noneg) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + ASSERT(ctd->cond_currev_type == evtype); + if (ctd->cond_currev_ack == CT_NACK) { + CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", + f, (void *)dip)); + mutex_exit(&ctd->cond_contract.ct_lock); + return (CT_NACK); + } else if (ctd->cond_currev_ack == CT_ACK) { + ack = 1; + CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", + f, (void *)dip)); + } + mutex_exit(&ctd->cond_contract.ct_lock); + } + + if (ack) { + result = CT_ACK; + CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); + } else if (timed_out) { + result = CT_NONE; + CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", + f, (void *)dip)); + } else { + CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", + f, (void *)dip)); + } + + + return (result); +} + +/* + * Determines the current state of a device (i.e a devinfo node + */ +static int +get_state(dev_info_t *dip) +{ + if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) + return (CT_DEV_EV_OFFLINE); + else if (DEVI_IS_DEVICE_DEGRADED(dip)) + return (CT_DEV_EV_DEGRADED); + else + return (CT_DEV_EV_ONLINE); +} + +/* + * Sets the current state of a device in a device contract + */ +static void +set_cond_state(dev_info_t *dip) +{ + uint_t state = get_state(dip); + cont_device_t *ctd; + + /* verify that barrier is held */ + ASSERT(ct_barrier_held(dip)); + + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + mutex_enter(&ctd->cond_contract.ct_lock); + ASSERT(ctd->cond_dip == dip); + ctd->cond_state = state; + mutex_exit(&ctd->cond_contract.ct_lock); + } +} + +/* + * Core routine called by event-specific routines when an event occurs. + * Determines if an event should be be published, and if it is to be + * published, whether a negotiation should take place. Also implements + * NEGEND events which publish the final disposition of an event after + * negotiations are complete. + * + * When an event occurs on a minor node, this routine walks the list of + * contracts hanging off a devinfo node and for each contract on the affected + * dip, evaluates the following cases + * + * a. an event that is synchronous, breaks the contract and NONEG not set + * - bumps up the outstanding negotiation counts on the dip + * - marks the dip as undergoing negotiation (devi_ct_neg) + * - event of type CTE_NEG is published + * b. an event that is synchronous, breaks the contract and NONEG is set + * - sets the final result to CT_NACK, event is blocked + * - does not publish an event + * c. event is asynchronous and breaks the contract + * - publishes a critical event irrespect of whether the NONEG + * flag is set, since the contract will be broken and contract + * owner needs to be informed. + * d. No contract breakage but the owner has subscribed to the event + * - publishes the event irrespective of the NONEG event as the + * owner has explicitly subscribed to the event. + * e. NEGEND event + * - publishes a critical event. Should only be doing this if + * if NONEG is not set. + * f. all other events + * - Since a contract is not broken and this event has not been + * subscribed to, this event does not need to be published for + * for this contract. + * + * Once an event is published, what happens next depends on the type of + * event: + * + * a. NEGEND event + * - cleanup all state associated with the preceding negotiation + * and return CT_ACK to the caller of contract_device_publish() + * b. NACKed event + * - One or more contracts had the NONEG term, so the event was + * blocked. Return CT_NACK to the caller. + * c. Negotiated event + * - Call wait_for_acks() to wait for responses from contract + * holders. The end result is either CT_ACK (event is permitted), + * CT_NACK (event is blocked) or CT_NONE (no contract owner) + * responded. This result is returned back to the caller. + * d. All other events + * - If the event was asynchronous (i.e. not negotiated) or + * a contract was not broken return CT_ACK to the caller. + */ +static uint_t +contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype, nvlist_t *tnvl) +{ + cont_device_t *ctd; + uint_t result = CT_NONE; + uint64_t evid = 0; + uint64_t nevid = 0; + char *path = NULL; + int negend; + int match; + int sync = 0; + contract_t *ct; + ct_kevent_t *event; + nvlist_t *nvl; + int broken = 0; + + ASSERT(dip); + ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); + ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || + (spec_type == S_IFBLK || spec_type == S_IFCHR)); + ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); + + /* Is this a synchronous state change ? */ + if (evtype != CT_EV_NEGEND) { + sync = is_sync_neg(get_state(dip), evtype); + /* NOP if unsupported transition */ + if (sync == -2 || sync == -1) { + DEVI(dip)->devi_flags |= DEVI_CT_NOP; + result = (sync == -2) ? CT_ACK : CT_NONE; + goto out; + } + CT_DEBUG((CE_NOTE, "publish: is%s sync state change", + sync ? "" : " not")); + } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { + DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; + result = CT_ACK; + goto out; + } + + path = kmem_alloc(MAXPATHLEN, KM_SLEEP); + (void) ddi_pathname(dip, path); + + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + + /* + * Negotiation end - set the state of the device in the contract + */ + if (evtype == CT_EV_NEGEND) { + CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); + set_cond_state(dip); + } + + /* + * If this device didn't go through negotiation, don't publish + * a NEGEND event - simply release the barrier to allow other + * device events in. + */ + negend = 0; + if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { + CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); + ct_barrier_release(dip); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + result = CT_ACK; + goto out; + } else if (evtype == CT_EV_NEGEND) { + /* + * There are negotiated contract breakages that + * need a NEGEND event + */ + ASSERT(ct_barrier_held(dip)); + negend = 1; + CT_DEBUG((CE_NOTE, "publish: setting negend flag")); + } else { + /* + * This is a new event, not a NEGEND event. Wait for previous + * contract events to complete. + */ + ct_barrier_acquire(dip); + } + + + match = 0; + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; + ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { + + ctid_t ctid; + size_t len = strlen(path); + + mutex_enter(&ctd->cond_contract.ct_lock); + + ASSERT(ctd->cond_dip == dip); + ASSERT(ctd->cond_minor); + ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && + ctd->cond_minor[len] == ':'); + + if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* We have a matching contract */ + match = 1; + ctid = ctd->cond_contract.ct_id; + CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", + ctid)); + + /* + * There are 4 possible cases + * 1. A contract is broken (dev not in acceptable state) and + * the state change is synchronous - start negotiation + * by sending a CTE_NEG critical event. + * 2. A contract is broken and the state change is + * asynchronous - just send a critical event and + * break the contract. + * 3. Contract is not broken, but consumer has subscribed + * to the event as a critical or informative event + * - just send the appropriate event + * 4. contract waiting for negend event - just send the critical + * NEGEND event. + */ + broken = 0; + if (!negend && !(evtype & ctd->cond_aset)) { + broken = 1; + CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", + ctid)); + } + + /* + * Don't send event if + * - contract is not broken AND + * - contract holder has not subscribed to this event AND + * - contract not waiting for a NEGEND event + */ + if (!broken && !EVSENDP(ctd, evtype) && + !ctd->cond_neg) { + CT_DEBUG((CE_NOTE, "contract_device_publish(): " + "contract (%d): no publish reqd: event %d", + ctd->cond_contract.ct_id, evtype)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + /* + * Note: need to kmem_zalloc() the event so mutexes are + * initialized automatically + */ + ct = &ctd->cond_contract; + event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); + event->cte_type = evtype; + + if (broken && sync) { + CT_DEBUG((CE_NOTE, "publish: broken + sync: " + "ctid: %d", ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + if (ctd->cond_noneg) { + /* Nothing to publish. Event has been blocked */ + CT_DEBUG((CE_NOTE, "publish: sync and noneg:" + "not publishing blocked ev: ctid: %d", + ctid)); + result = CT_NACK; + kmem_free(event, sizeof (ct_kevent_t)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + event->cte_flags = CTE_NEG; /* critical neg. event */ + ctd->cond_currev_type = event->cte_type; + ct_barrier_incr(dip); + DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ + ctd->cond_neg = 1; + } else if (broken && !sync) { + CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", + ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + event->cte_flags = 0; /* critical event */ + } else if (EVSENDP(ctd, event->cte_type)) { + CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", + ctid)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + event->cte_flags = EVINFOP(ctd, event->cte_type) ? + CTE_INFO : 0; + } else if (ctd->cond_neg) { + CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); + ASSERT(negend); + ASSERT(ctd->cond_noneg == 0); + nevid = ctd->cond_contract.ct_nevent ? + ctd->cond_contract.ct_nevent->cte_id : 0; + ASSERT(ctd->cond_currev_id == nevid); + event->cte_flags = 0; /* NEGEND is always critical */ + ctd->cond_currev_id = 0; + ctd->cond_currev_type = 0; + ctd->cond_currev_ack = 0; + ctd->cond_neg = 0; + } else { + CT_DEBUG((CE_NOTE, "publish: not publishing event for " + "ctid: %d, evtype: %d", + ctd->cond_contract.ct_id, event->cte_type)); + ASSERT(!negend); + ASSERT(ctd->cond_currev_id == 0); + ASSERT(ctd->cond_currev_type == 0); + ASSERT(ctd->cond_currev_ack == 0); + ASSERT(ctd->cond_neg == 0); + kmem_free(event, sizeof (ct_kevent_t)); + mutex_exit(&ctd->cond_contract.ct_lock); + continue; + } + + nvl = NULL; + if (tnvl) { + VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); + if (negend) { + int32_t newct = 0; + ASSERT(ctd->cond_noneg == 0); + VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) + == 0); + VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, + &newct) == 0); + VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, + newct == 1 ? 0 : + ctd->cond_contract.ct_id) == 0); + CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " + "CTS_NEVID: %llu, CTS_NEWCT: %s", + ctid, (unsigned long long)nevid, + newct ? "success" : "failure")); + + } + } + + if (ctd->cond_neg) { + ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); + ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); + ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); + ctd->cond_contract.ct_qtime.ctm_start = + ctd->cond_contract.ct_ntime.ctm_start; + } + + /* + * by holding the dip's devi_ct_lock we ensure that + * all ACK/NACKs are held up until we have finished + * publishing to all contracts. + */ + mutex_exit(&ctd->cond_contract.ct_lock); + evid = cte_publish_all(ct, event, nvl, NULL); + mutex_enter(&ctd->cond_contract.ct_lock); + + if (ctd->cond_neg) { + ASSERT(!negend); + ASSERT(broken); + ASSERT(sync); + ASSERT(!ctd->cond_noneg); + CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" + ": %d", ctid)); + ctd->cond_currev_id = evid; + } else if (negend) { + ctd->cond_contract.ct_ntime.ctm_start = -1; + ctd->cond_contract.ct_qtime.ctm_start = -1; + } + mutex_exit(&ctd->cond_contract.ct_lock); + } + + /* + * If "negend" set counter back to initial state (-1) so that + * other events can be published. Also clear the negotiation flag + * on dip. + * + * 0 .. n are used for counting. + * -1 indicates counter is available for use. + */ + if (negend) { + /* + * devi_ct_count not necessarily 0. We may have + * timed out in which case, count will be non-zero. + */ + ct_barrier_release(dip); + DEVI(dip)->devi_ct_neg = 0; + CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", + (void *)dip)); + } else if (DEVI(dip)->devi_ct_neg) { + ASSERT(match); + ASSERT(!ct_barrier_empty(dip)); + CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", + DEVI(dip)->devi_ct_count, (void *)dip)); + } else { + /* + * for non-negotiated events or subscribed events or no + * matching contracts + */ + ASSERT(ct_barrier_empty(dip)); + ASSERT(DEVI(dip)->devi_ct_neg == 0); + CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " + "dip=%p", (void *)dip)); + + /* + * only this function when called from contract_device_negend() + * can reset the counter to READY state i.e. -1. This function + * is so called for every event whether a NEGEND event is needed + * or not, but the negend event is only published if the event + * whose end they signal is a negotiated event for the contract. + */ + } + + if (!match) { + /* No matching contracts */ + CT_DEBUG((CE_NOTE, "publish: No matching contract")); + result = CT_NONE; + } else if (result == CT_NACK) { + /* a non-negotiable contract exists and this is a neg. event */ + CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); + (void) wait_for_acks(dip, dev, spec_type, evtype); + } else if (DEVI(dip)->devi_ct_neg) { + /* one or more contracts going through negotations */ + CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); + result = wait_for_acks(dip, dev, spec_type, evtype); + } else { + /* no negotiated contracts or no broken contracts or NEGEND */ + CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); + result = CT_ACK; + } + + /* + * Release the lock only now so that the only point where we + * drop the lock is in wait_for_acks(). This is so that we don't + * miss cv_signal/cv_broadcast from contract holders + */ + CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); + +out: + if (tnvl) + nvlist_free(tnvl); + if (path) + kmem_free(path, MAXPATHLEN); + + + CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); + return (result); +} + + +/* + * contract_device_offline + * + * Event publishing routine called by I/O framework when a device is offlined. + */ +ct_ack_t +contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t result; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_OFFLINE; + result = contract_device_publish(dip, dev, spec_type, evtype, nvl); + + /* + * If a contract offline is NACKED, the framework expects us to call + * NEGEND ourselves, since we know the final result + */ + if (result == CT_NACK) { + contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); + } + + return (result); +} + +/* + * contract_device_degrade + * + * Event publishing routine called by I/O framework when a device + * moves to degrade state. + */ +/*ARGSUSED*/ +void +contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_DEGRADED; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); +} + +/* + * contract_device_undegrade + * + * Event publishing routine called by I/O framework when a device + * moves from degraded state to online state. + */ +/*ARGSUSED*/ +void +contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) +{ + nvlist_t *nvl; + uint_t evtype; + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + + evtype = CT_DEV_EV_ONLINE; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); +} + +/* + * For all contracts which have undergone a negotiation (because the device + * moved out of the acceptable state for that contract and the state + * change is synchronous i.e. requires negotiation) this routine publishes + * a CT_EV_NEGEND event with the final disposition of the event. + * + * This event is always a critical event. + */ +void +contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) +{ + nvlist_t *nvl; + uint_t evtype; + + ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); + + CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " + "dip: %p", result, (void *)dip)); + + VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); + VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, + result == CT_EV_SUCCESS ? 1 : 0) == 0); + + evtype = CT_EV_NEGEND; + (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); + + CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", + (void *)dip)); +} + +/* + * Wrapper routine called by other subsystems (such as LDI) to start + * negotiations when a synchronous device state change occurs. + * Returns CT_ACK or CT_NACK. + */ +ct_ack_t +contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype) +{ + int result; + + ASSERT(dip); + ASSERT(dev != NODEV); + ASSERT(dev != DDI_DEV_T_ANY); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + + switch (evtype) { + case CT_DEV_EV_OFFLINE: + result = contract_device_offline(dip, dev, spec_type); + break; + default: + cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " + "not supported: event (%d) for dev_t (%lu) and spec (%d), " + "dip (%p)", evtype, dev, spec_type, (void *)dip); + result = CT_NACK; + break; + } + + return (result); +} + +/* + * A wrapper routine called by other subsystems (such as the LDI) to + * finalize event processing for a state change event. For synchronous + * state changes, this publishes NEGEND events. For asynchronous i.e. + * non-negotiable events this publishes the event. + */ +void +contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, + uint_t evtype, int ct_result) +{ + ASSERT(dip); + ASSERT(dev != NODEV); + ASSERT(dev != DDI_DEV_T_ANY); + ASSERT(dev != DDI_DEV_T_NONE); + ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); + + switch (evtype) { + case CT_DEV_EV_OFFLINE: + contract_device_negend(dip, dev, spec_type, ct_result); + break; + case CT_DEV_EV_DEGRADED: + contract_device_degrade(dip, dev, spec_type); + contract_device_negend(dip, dev, spec_type, ct_result); + break; + case CT_DEV_EV_ONLINE: + contract_device_undegrade(dip, dev, spec_type); + contract_device_negend(dip, dev, spec_type, ct_result); + break; + default: + cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " + "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", + evtype, dev, spec_type, (void *)dip); + break; + } +} + +/* + * Called by I/O framework when a devinfo node is freed to remove the + * association between a devinfo node and its contracts. + */ +void +contract_device_remove_dip(dev_info_t *dip) +{ + cont_device_t *ctd; + cont_device_t *next; + contract_t *ct; + + mutex_enter(&(DEVI(dip)->devi_ct_lock)); + ct_barrier_wait_for_release(dip); + + for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { + next = list_next(&(DEVI(dip)->devi_ct), ctd); + list_remove(&(DEVI(dip)->devi_ct), ctd); + ct = &ctd->cond_contract; + /* + * Unlink the dip associated with this contract + */ + mutex_enter(&ct->ct_lock); + ASSERT(ctd->cond_dip == dip); + ctd->cond_dip = NULL; /* no longer linked to dip */ + contract_rele(ct); /* remove hold for dip linkage */ + CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " + "ctid: %d", ct->ct_id)); + mutex_exit(&ct->ct_lock); + } + ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); + mutex_exit(&(DEVI(dip)->devi_ct_lock)); +} + +/* + * Barrier related routines + */ +static void +ct_barrier_acquire(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); + while (DEVI(dip)->devi_ct_count != -1) + cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); + DEVI(dip)->devi_ct_count = 0; + CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); +} + +static void +ct_barrier_release(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count != -1); + DEVI(dip)->devi_ct_count = -1; + cv_broadcast(&(DEVI(dip)->devi_ct_cv)); + CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); +} + +static int +ct_barrier_held(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + return (DEVI(dip)->devi_ct_count != -1); +} + +static int +ct_barrier_empty(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count != -1); + return (DEVI(dip)->devi_ct_count == 0); +} + +static void +ct_barrier_wait_for_release(dev_info_t *dip) +{ + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + while (DEVI(dip)->devi_ct_count != -1) + cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); +} + +static void +ct_barrier_decr(dev_info_t *dip) +{ + CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", + DEVI(dip)->devi_ct_count)); + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + ASSERT(DEVI(dip)->devi_ct_count > 0); + + DEVI(dip)->devi_ct_count--; + if (DEVI(dip)->devi_ct_count == 0) { + cv_broadcast(&DEVI(dip)->devi_ct_cv); + CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); + } +} + +static void +ct_barrier_incr(dev_info_t *dip) +{ + ASSERT(ct_barrier_held(dip)); + DEVI(dip)->devi_ct_count++; +} + +static int +ct_barrier_wait_for_empty(dev_info_t *dip, int secs) +{ + clock_t abstime; + + ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); + + abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); + while (DEVI(dip)->devi_ct_count) { + if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), + &(DEVI(dip)->devi_ct_lock), abstime) == -1) { + return (-1); + } + } + return (0); +} |