summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/os/contract.c
diff options
context:
space:
mode:
Diffstat (limited to 'usr/src/uts/common/os/contract.c')
-rw-r--r--usr/src/uts/common/os/contract.c2411
1 files changed, 2411 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c
new file mode 100644
index 0000000000..aadfb92e62
--- /dev/null
+++ b/usr/src/uts/common/os/contract.c
@@ -0,0 +1,2411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * Contracts
+ * ---------
+ *
+ * Contracts are a primitive which enrich the relationships between
+ * processes and system resources. The primary purpose of contracts is
+ * to provide a means for the system to negotiate the departure from a
+ * binding relationship (e.g. pages locked in memory or a thread bound
+ * to processor), but they can also be used as a purely asynchronous
+ * error reporting mechanism as they are with process contracts.
+ *
+ * More information on how one interfaces with contracts and what
+ * contracts can do for you can be found in:
+ * PSARC 2003/193 Solaris Contracts
+ * PSARC 2004/460 Contracts addendum
+ *
+ * This file contains the core contracts framework. By itself it is
+ * useless: it depends the contracts filesystem (ctfs) to provide an
+ * interface to user processes and individual contract types to
+ * implement the process/resource relationships.
+ *
+ * Data structure overview
+ * -----------------------
+ *
+ * A contract is represented by a contract_t, which itself points to an
+ * encapsulating contract-type specific contract object. A contract_t
+ * contains the contract's static identity (including its terms), its
+ * linkage to various bookkeeping structures, the contract-specific
+ * event queue, and a reference count.
+ *
+ * A contract template is represented by a ct_template_t, which, like a
+ * contract, points to an encapsulating contract-type specific template
+ * object. A ct_template_t contains the template's terms.
+ *
+ * An event queue is represented by a ct_equeue_t, and consists of a
+ * list of events, a list of listeners, and a list of listeners who are
+ * waiting for new events (affectionately referred to as "tail
+ * listeners"). There are three queue types, defined by ct_listnum_t
+ * (an enum). An event may be on one of each type of queue
+ * simultaneously; the list linkage used by a queue is determined by
+ * its type.
+ *
+ * An event is represented by a ct_kevent_t, which contains mostly
+ * static event data (e.g. id, payload). It also has an array of
+ * ct_member_t structures, each of which contains a list_node_t and
+ * represent the event's linkage in a specific event queue.
+ *
+ * Each open of an event endpoint results in the creation of a new
+ * listener, represented by a ct_listener_t. In addition to linkage
+ * into the aforementioned lists in the event_queue, a ct_listener_t
+ * contains a pointer to the ct_kevent_t it is currently positioned at
+ * as well as a set of status flags and other administrative data.
+ *
+ * Each process has a list of contracts it owns, p_ct_held; a pointer
+ * to the process contract it is a member of, p_ct_process; the linkage
+ * for that membership, p_ct_member; and an array of event queue
+ * structures representing the process bundle queues.
+ *
+ * Each LWP has an array of its active templates, lwp_ct_active; and
+ * the most recently created contracts, lwp_ct_latest.
+ *
+ * A process contract has a list of member processes and a list of
+ * inherited contracts.
+ *
+ * There is a system-wide list of all contracts, as well as per-type
+ * lists of contracts.
+ *
+ * Lock ordering overview
+ * ----------------------
+ *
+ * Locks at the top are taken first:
+ *
+ * ct_evtlock
+ * regent ct_lock
+ * member ct_lock
+ * pidlock
+ * p_lock
+ * contract ctq_lock contract_lock
+ * pbundle ctq_lock
+ * cte_lock
+ * ct_reflock
+ *
+ * contract_lock and ctq_lock/cte_lock are not currently taken at the
+ * same time.
+ *
+ * Reference counting and locking
+ * ------------------------------
+ *
+ * A contract has a reference count, protected by ct_reflock.
+ * (ct_reflock is also used in a couple other places where atomic
+ * access to a variable is needed in an innermost context). A process
+ * maintains a hold on each contract it owns. A process contract has a
+ * hold on each contract is has inherited. Each event has a hold on
+ * the contract which generated it. Process contract templates have
+ * holds on the contracts referred to by their transfer terms. CTFS
+ * contract directory nodes have holds on contracts. Lastly, various
+ * code paths may temporarily take holds on contracts to prevent them
+ * from disappearing while other processing is going on. It is
+ * important to note that the global contract lists do not hold
+ * references on contracts; a contract is removed from these structures
+ * atomically with the release of its last reference.
+ *
+ * At a given point in time, a contract can either be owned by a
+ * process, inherited by a regent process contract, or orphaned. A
+ * contract_t's owner and regent pointers, ct_owner and ct_regent, are
+ * protected by its ct_lock. The linkage in the holder's (holder =
+ * owner or regent) list of contracts, ct_ctlist, is protected by
+ * whatever lock protects the holder's data structure. In order for
+ * these two directions to remain consistent, changing the holder of a
+ * contract requires that both locks be held.
+ *
+ * Events also have reference counts. There is one hold on an event
+ * per queue it is present on, in addition to those needed for the
+ * usual sundry reasons. Individual listeners are associated with
+ * specific queues, and increase a queue-specific reference count
+ * stored in the ct_member_t structure.
+ *
+ * The dynamic contents of an event (reference count and flags) are
+ * protected by its cte_lock, while the contents of the embedded
+ * ct_member_t structures are protected by the locks of the queues they
+ * are linked into. A ct_listener_t's contents are also protected by
+ * its event queue's ctq_lock.
+ *
+ * Resource controls
+ * -----------------
+ *
+ * Control: project.max-contracts (rc_project_contract)
+ * Description: Maximum number of contracts allowed a project.
+ *
+ * When a contract is created, the project's allocation is tested and
+ * (assuming success) increased. When the last reference to a
+ * contract is released, the creating project's allocation is
+ * decreased.
+ */
+
+#include <sys/mutex.h>
+#include <sys/debug.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/kmem.h>
+#include <sys/thread.h>
+#include <sys/id_space.h>
+#include <sys/avl.h>
+#include <sys/list.h>
+#include <sys/sysmacros.h>
+#include <sys/proc.h>
+#include <sys/contract_impl.h>
+#include <sys/contract/process_impl.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/cmn_err.h>
+#include <sys/model.h>
+#include <sys/policy.h>
+#include <sys/zone.h>
+#include <sys/task.h>
+
+extern rctl_hndl_t rc_project_contract;
+
+static id_space_t *contract_ids;
+static avl_tree_t contract_avl;
+static kmutex_t contract_lock;
+
+int ct_ntypes = CTT_MAXTYPE;
+static ct_type_t *ct_types_static[CTT_MAXTYPE];
+ct_type_t **ct_types = ct_types_static;
+
+static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int);
+static void cte_queue_destroy(ct_equeue_t *);
+static void cte_queue_drain(ct_equeue_t *, int);
+static void cte_trim(ct_equeue_t *, contract_t *);
+static void cte_copy(ct_equeue_t *, ct_equeue_t *);
+
+/*
+ * contract_compar
+ *
+ * A contract comparator which sorts on contract ID.
+ */
+int
+contract_compar(const void *x, const void *y)
+{
+ const contract_t *ct1 = x;
+ const contract_t *ct2 = y;
+
+ if (ct1->ct_id < ct2->ct_id)
+ return (-1);
+ if (ct1->ct_id > ct2->ct_id)
+ return (1);
+ return (0);
+}
+
+/*
+ * contract_init
+ *
+ * Initializes the contract subsystem, the specific contract types, and
+ * process 0.
+ */
+void
+contract_init(void)
+{
+ /*
+ * Initialize contract subsystem.
+ */
+ contract_ids = id_space_create("contracts", 1, INT_MAX);
+ avl_create(&contract_avl, contract_compar, sizeof (contract_t),
+ offsetof(contract_t, ct_ctavl));
+ mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /*
+ * Initialize contract types.
+ */
+ contract_process_init();
+
+ /*
+ * Initialize p0/lwp0 contract state.
+ */
+ avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t),
+ offsetof(contract_t, ct_ctlist));
+}
+
+/*
+ * contract_dtor
+ *
+ * Performs basic destruction of the common portions of a contract.
+ * Called from the failure path of contract_ctor and from
+ * contract_rele.
+ */
+static void
+contract_dtor(contract_t *ct)
+{
+ cte_queue_destroy(&ct->ct_events);
+ list_destroy(&ct->ct_vnodes);
+ mutex_destroy(&ct->ct_reflock);
+ mutex_destroy(&ct->ct_lock);
+ mutex_destroy(&ct->ct_evtlock);
+}
+
+/*
+ * contract_ctor
+ *
+ * Called by a contract type to initialize a contract. Fails if the
+ * max-contract resource control would have been exceeded. After a
+ * successful call to contract_ctor, the contract is unlocked and
+ * visible in all namespaces; any type-specific initialization should
+ * be completed before calling contract_ctor. Returns 0 on success.
+ *
+ * Because not all callers can tolerate failure, a 0 value for canfail
+ * instructs contract_ctor to ignore the project.max-contracts resource
+ * control. Obviously, this "out" should only be employed by callers
+ * who are sufficiently constrained in other ways (e.g. newproc).
+ */
+int
+contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data,
+ ctflags_t flags, proc_t *author, int canfail)
+{
+ avl_index_t where;
+ klwp_t *curlwp = ttolwp(curthread);
+
+ ASSERT(author == curproc);
+
+ mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL);
+ ct->ct_id = id_alloc(contract_ids);
+
+ cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0);
+ list_create(&ct->ct_vnodes, sizeof (contract_vnode_t),
+ offsetof(contract_vnode_t, ctv_node));
+
+ /*
+ * Instance data
+ */
+ ct->ct_ref = 2; /* one for the holder, one for "latest" */
+ ct->ct_cuid = crgetuid(CRED());
+ ct->ct_type = type;
+ ct->ct_data = data;
+ gethrestime(&ct->ct_ctime);
+ ct->ct_state = CTS_OWNED;
+ ct->ct_flags = flags;
+ ct->ct_regent = author->p_ct_process ?
+ &author->p_ct_process->conp_contract : NULL;
+ ct->ct_ev_info = tmpl->ctmpl_ev_info;
+ ct->ct_ev_crit = tmpl->ctmpl_ev_crit;
+ ct->ct_cookie = tmpl->ctmpl_cookie;
+ ct->ct_owner = author;
+
+ /*
+ * Test project.max-contracts.
+ */
+ mutex_enter(&author->p_lock);
+ mutex_enter(&contract_lock);
+ if (canfail && rctl_test(rc_project_contract,
+ author->p_task->tk_proj->kpj_rctls, author, 1,
+ RCA_SAFE) & RCT_DENY) {
+ id_free(contract_ids, ct->ct_id);
+ mutex_exit(&contract_lock);
+ mutex_exit(&author->p_lock);
+ ct->ct_events.ctq_flags |= CTQ_DEAD;
+ contract_dtor(ct);
+ return (1);
+ }
+ ct->ct_proj = author->p_task->tk_proj;
+ ct->ct_proj->kpj_data.kpd_contract++;
+ (void) project_hold(ct->ct_proj);
+ mutex_exit(&contract_lock);
+
+ /*
+ * Insert into holder's avl of contracts.
+ * We use an avl not because order is important, but because
+ * readdir of /proc/contracts requires we be able to use a
+ * scalar as an index into the process's list of contracts
+ */
+ ct->ct_zoneid = author->p_zone->zone_id;
+ ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid;
+ VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL);
+ avl_insert(&author->p_ct_held, ct, where);
+ mutex_exit(&author->p_lock);
+
+ /*
+ * Insert into global contract AVL
+ */
+ mutex_enter(&contract_lock);
+ VERIFY(avl_find(&contract_avl, ct, &where) == NULL);
+ avl_insert(&contract_avl, ct, where);
+ mutex_exit(&contract_lock);
+
+ /*
+ * Insert into type AVL
+ */
+ mutex_enter(&type->ct_type_lock);
+ VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL);
+ avl_insert(&type->ct_type_avl, ct, where);
+ type->ct_type_timestruc = ct->ct_ctime;
+ mutex_exit(&type->ct_type_lock);
+
+ if (curlwp->lwp_ct_latest[type->ct_type_index])
+ contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]);
+ curlwp->lwp_ct_latest[type->ct_type_index] = ct;
+
+ return (0);
+}
+
+/*
+ * contract_rele
+ *
+ * Releases a reference to a contract. If the caller had the last
+ * reference, the contract is removed from all namespaces, its
+ * allocation against the max-contracts resource control is released,
+ * and the contract type's free entry point is invoked for any
+ * type-specific deconstruction and to (presumably) free the object.
+ */
+void
+contract_rele(contract_t *ct)
+{
+ uint64_t nref;
+
+ mutex_enter(&ct->ct_reflock);
+ ASSERT(ct->ct_ref > 0);
+ nref = --ct->ct_ref;
+ mutex_exit(&ct->ct_reflock);
+ if (nref == 0) {
+ /*
+ * ct_owner is cleared when it drops its reference.
+ */
+ ASSERT(ct->ct_owner == NULL);
+ ASSERT(ct->ct_evcnt == 0);
+
+ /*
+ * Remove from global contract AVL
+ */
+ mutex_enter(&contract_lock);
+ avl_remove(&contract_avl, ct);
+ mutex_exit(&contract_lock);
+
+ /*
+ * Remove from type AVL
+ */
+ mutex_enter(&ct->ct_type->ct_type_lock);
+ avl_remove(&ct->ct_type->ct_type_avl, ct);
+ mutex_exit(&ct->ct_type->ct_type_lock);
+
+ /*
+ * Release the contract's ID
+ */
+ id_free(contract_ids, ct->ct_id);
+
+ /*
+ * Release project hold
+ */
+ mutex_enter(&contract_lock);
+ ct->ct_proj->kpj_data.kpd_contract--;
+ project_rele(ct->ct_proj);
+ mutex_exit(&contract_lock);
+
+ /*
+ * Free the contract
+ */
+ contract_dtor(ct);
+ ct->ct_type->ct_type_ops->contop_free(ct);
+ }
+}
+
+/*
+ * contract_hold
+ *
+ * Adds a reference to a contract
+ */
+void
+contract_hold(contract_t *ct)
+{
+ mutex_enter(&ct->ct_reflock);
+ ASSERT(ct->ct_ref < UINT64_MAX);
+ ct->ct_ref++;
+ mutex_exit(&ct->ct_reflock);
+}
+
+/*
+ * contract_getzuniqid
+ *
+ * Get a contract's zone unique ID. Needed because 64-bit reads and
+ * writes aren't atomic on x86. Since there are contexts where we are
+ * unable to take ct_lock, we instead use ct_reflock; in actuality any
+ * lock would do.
+ */
+uint64_t
+contract_getzuniqid(contract_t *ct)
+{
+ uint64_t zuniqid;
+
+ mutex_enter(&ct->ct_reflock);
+ zuniqid = ct->ct_mzuniqid;
+ mutex_exit(&ct->ct_reflock);
+
+ return (zuniqid);
+}
+
+/*
+ * contract_setzuniqid
+ *
+ * Sets a contract's zone unique ID. See contract_getzuniqid.
+ */
+void
+contract_setzuniqid(contract_t *ct, uint64_t zuniqid)
+{
+ mutex_enter(&ct->ct_reflock);
+ ct->ct_mzuniqid = zuniqid;
+ mutex_exit(&ct->ct_reflock);
+}
+
+/*
+ * contract_abandon
+ *
+ * Abandons the specified contract. If "explicit" is clear, the
+ * contract was implicitly abandoned (by process exit) and should be
+ * inherited if its terms allow it and its owner was a member of a
+ * regent contract. Otherwise, the contract type's abandon entry point
+ * is invoked to either destroy or orphan the contract.
+ */
+int
+contract_abandon(contract_t *ct, proc_t *p, int explicit)
+{
+ ct_equeue_t *q = NULL;
+ contract_t *parent = &p->p_ct_process->conp_contract;
+ int inherit = 0;
+
+ ASSERT(p == curproc);
+
+ mutex_enter(&ct->ct_lock);
+
+ /*
+ * Multiple contract locks are taken contract -> subcontract.
+ * Check if the contract will be inherited so we can acquire
+ * all the necessary locks before making sensitive changes.
+ */
+ if (!explicit && (ct->ct_flags & CTF_INHERIT) &&
+ contract_process_accept(parent)) {
+ mutex_exit(&ct->ct_lock);
+ mutex_enter(&parent->ct_lock);
+ mutex_enter(&ct->ct_lock);
+ inherit = 1;
+ }
+
+ if (ct->ct_owner != p) {
+ mutex_exit(&ct->ct_lock);
+ if (inherit)
+ mutex_exit(&parent->ct_lock);
+ return (EINVAL);
+ }
+
+ mutex_enter(&p->p_lock);
+ if (explicit)
+ avl_remove(&p->p_ct_held, ct);
+ ct->ct_owner = NULL;
+ mutex_exit(&p->p_lock);
+
+ /*
+ * Since we can't call cte_trim with the contract lock held,
+ * we grab the queue pointer here.
+ */
+ if (p->p_ct_equeue)
+ q = p->p_ct_equeue[ct->ct_type->ct_type_index];
+
+ /*
+ * contop_abandon may destroy the contract so we rely on it to
+ * drop ct_lock. We retain a reference on the contract so that
+ * the cte_trim which follows functions properly. Even though
+ * cte_trim doesn't dereference the contract pointer, it is
+ * still necessary to retain a reference to the contract so
+ * that we don't trim events which are sent by a subsequently
+ * allocated contract infortuitously located at the same address.
+ */
+ contract_hold(ct);
+
+ if (inherit) {
+ ct->ct_state = CTS_INHERITED;
+ ASSERT(ct->ct_regent == parent);
+ contract_process_take(parent, ct);
+
+ /*
+ * We are handing off the process's reference to the
+ * parent contract. For this reason, the order in
+ * which we drop the contract locks is also important.
+ */
+ mutex_exit(&ct->ct_lock);
+ mutex_exit(&parent->ct_lock);
+ } else {
+ ct->ct_regent = NULL;
+ ct->ct_type->ct_type_ops->contop_abandon(ct);
+ }
+
+ /*
+ * ct_lock has been dropped; we can safely trim the event
+ * queue now.
+ */
+ if (q) {
+ mutex_enter(&q->ctq_lock);
+ cte_trim(q, ct);
+ mutex_exit(&q->ctq_lock);
+ }
+
+ contract_rele(ct);
+
+ return (0);
+}
+
+/*
+ * contract_adopt
+ *
+ * Adopts a contract. After a successful call to this routine, the
+ * previously inherited contract will belong to the calling process,
+ * and its events will have been appended to its new owner's process
+ * bundle queue.
+ */
+int
+contract_adopt(contract_t *ct, proc_t *p)
+{
+ avl_index_t where;
+ ct_equeue_t *q;
+ contract_t *parent;
+
+ ASSERT(p == curproc);
+
+ /*
+ * Ensure the process has an event queue. Checked by ASSERTs
+ * below.
+ */
+ (void) contract_type_pbundle(ct->ct_type, p);
+
+ mutex_enter(&ct->ct_lock);
+ parent = ct->ct_regent;
+ if (ct->ct_state != CTS_INHERITED ||
+ &p->p_ct_process->conp_contract != parent ||
+ p->p_zone->zone_uniqid != ct->ct_czuniqid) {
+ mutex_exit(&ct->ct_lock);
+ return (EINVAL);
+ }
+
+ /*
+ * Multiple contract locks are taken contract -> subcontract.
+ */
+ mutex_exit(&ct->ct_lock);
+ mutex_enter(&parent->ct_lock);
+ mutex_enter(&ct->ct_lock);
+
+ /*
+ * It is possible that the contract was adopted by someone else
+ * while its lock was dropped. It isn't possible for the
+ * contract to have been inherited by a different regent
+ * contract.
+ */
+ if (ct->ct_state != CTS_INHERITED) {
+ mutex_exit(&parent->ct_lock);
+ mutex_exit(&ct->ct_lock);
+ return (EBUSY);
+ }
+ ASSERT(ct->ct_regent == parent);
+
+ ct->ct_state = CTS_OWNED;
+
+ contract_process_adopt(ct, p);
+
+ mutex_enter(&p->p_lock);
+ ct->ct_owner = p;
+ VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL);
+ avl_insert(&p->p_ct_held, ct, where);
+ mutex_exit(&p->p_lock);
+
+ ASSERT(ct->ct_owner->p_ct_equeue);
+ ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
+ q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
+ cte_copy(&ct->ct_events, q);
+ mutex_exit(&ct->ct_lock);
+
+ return (0);
+}
+
+/*
+ * contract_ack
+ *
+ * Acknowledges receipt of a critical event.
+ */
+int
+contract_ack(contract_t *ct, uint64_t evid)
+{
+ ct_kevent_t *ev;
+ list_t *queue = &ct->ct_events.ctq_events;
+ int error = ESRCH;
+
+ mutex_enter(&ct->ct_lock);
+ mutex_enter(&ct->ct_events.ctq_lock);
+ /*
+ * We are probably ACKing something near the head of the queue.
+ */
+ for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
+ if (ev->cte_id == evid) {
+ if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
+ ev->cte_flags |= CTE_ACK;
+ ct->ct_evcnt--;
+ error = 0;
+ }
+ break;
+ }
+ }
+ mutex_exit(&ct->ct_events.ctq_lock);
+ mutex_exit(&ct->ct_lock);
+
+ return (error);
+}
+
+/*
+ * contract_orphan
+ *
+ * Icky-poo. This is a process-contract special, used to ACK all
+ * critical messages when a contract is orphaned.
+ */
+void
+contract_orphan(contract_t *ct)
+{
+ ct_kevent_t *ev;
+ list_t *queue = &ct->ct_events.ctq_events;
+
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+ ASSERT(ct->ct_state != CTS_ORPHAN);
+
+ mutex_enter(&ct->ct_events.ctq_lock);
+ ct->ct_state = CTS_ORPHAN;
+ for (ev = list_head(queue); ev; ev = list_next(queue, ev)) {
+ if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
+ ev->cte_flags |= CTE_ACK;
+ ct->ct_evcnt--;
+ }
+ }
+ mutex_exit(&ct->ct_events.ctq_lock);
+
+ ASSERT(ct->ct_evcnt == 0);
+}
+
+/*
+ * contract_destroy
+ *
+ * Explicit contract destruction. Called when contract is empty.
+ * The contract will actually stick around until all of its events are
+ * removed from the bundle and and process bundle queues, and all fds
+ * which refer to it are closed. See contract_dtor if you are looking
+ * for what destroys the contract structure.
+ */
+void
+contract_destroy(contract_t *ct)
+{
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+ ASSERT(ct->ct_state != CTS_DEAD);
+ ASSERT(ct->ct_owner == NULL);
+
+ ct->ct_state = CTS_DEAD;
+ cte_queue_drain(&ct->ct_events, 1);
+ mutex_exit(&ct->ct_lock);
+ mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
+ cte_trim(&ct->ct_type->ct_type_events, ct);
+ mutex_exit(&ct->ct_type->ct_type_events.ctq_lock);
+ mutex_enter(&ct->ct_lock);
+ ct->ct_type->ct_type_ops->contop_destroy(ct);
+ mutex_exit(&ct->ct_lock);
+ contract_rele(ct);
+}
+
+/*
+ * contract_vnode_get
+ *
+ * Obtains the contract directory vnode for this contract, if there is
+ * one. The caller must VN_RELE the vnode when they are through using
+ * it.
+ */
+vnode_t *
+contract_vnode_get(contract_t *ct, vfs_t *vfsp)
+{
+ contract_vnode_t *ctv;
+ vnode_t *vp = NULL;
+
+ mutex_enter(&ct->ct_lock);
+ for (ctv = list_head(&ct->ct_vnodes); ctv != NULL;
+ ctv = list_next(&ct->ct_vnodes, ctv))
+ if (ctv->ctv_vnode->v_vfsp == vfsp) {
+ vp = ctv->ctv_vnode;
+ VN_HOLD(vp);
+ break;
+ }
+ mutex_exit(&ct->ct_lock);
+ return (vp);
+}
+
+/*
+ * contract_vnode_set
+ *
+ * Sets the contract directory vnode for this contract. We don't hold
+ * a reference on the vnode because we don't want to prevent it from
+ * being freed. The vnode's inactive entry point will take care of
+ * notifying us when it should be removed.
+ */
+void
+contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode)
+{
+ mutex_enter(&ct->ct_lock);
+ ctv->ctv_vnode = vnode;
+ list_insert_head(&ct->ct_vnodes, ctv);
+ mutex_exit(&ct->ct_lock);
+}
+
+/*
+ * contract_vnode_clear
+ *
+ * Removes this vnode as the contract directory vnode for this
+ * contract. Called from a contract directory's inactive entry point,
+ * this may return 0 indicating that the vnode gained another reference
+ * because of a simultaneous call to contract_vnode_get.
+ */
+int
+contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv)
+{
+ vnode_t *vp = ctv->ctv_vnode;
+ int result;
+
+ mutex_enter(&ct->ct_lock);
+ mutex_enter(&vp->v_lock);
+ if (vp->v_count == 1) {
+ list_remove(&ct->ct_vnodes, ctv);
+ result = 1;
+ } else {
+ vp->v_count--;
+ result = 0;
+ }
+ mutex_exit(&vp->v_lock);
+ mutex_exit(&ct->ct_lock);
+
+ return (result);
+}
+
+/*
+ * contract_exit
+ *
+ * Abandons all contracts held by process p, and drains process p's
+ * bundle queues. Called on process exit.
+ */
+void
+contract_exit(proc_t *p)
+{
+ contract_t *ct;
+ void *cookie = NULL;
+ int i;
+
+ ASSERT(p == curproc);
+
+ /*
+ * Abandon held contracts. contract_abandon knows enough not
+ * to remove the contract from the list a second time. We are
+ * exiting, so no locks are needed here. But because
+ * contract_abandon will take p_lock, we need to make sure we
+ * aren't holding it.
+ */
+ ASSERT(MUTEX_NOT_HELD(&p->p_lock));
+ while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL)
+ VERIFY(contract_abandon(ct, p, 0) == 0);
+
+ /*
+ * Drain pbundles. Because a process bundle queue could have
+ * been passed to another process, they may not be freed right
+ * away.
+ */
+ if (p->p_ct_equeue) {
+ for (i = 0; i < CTT_MAXTYPE; i++)
+ if (p->p_ct_equeue[i])
+ cte_queue_drain(p->p_ct_equeue[i], 0);
+ kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *));
+ }
+}
+
+/*
+ * contract_status_common
+ *
+ * Populates a ct_status structure. Used by contract types in their
+ * status entry points and ctfs when only common information is
+ * requested.
+ */
+void
+contract_status_common(contract_t *ct, zone_t *zone, void *status,
+ model_t model)
+{
+ STRUCT_HANDLE(ct_status, lstatus);
+
+ STRUCT_SET_HANDLE(lstatus, model, status);
+ ASSERT(MUTEX_HELD(&ct->ct_lock));
+ if (zone->zone_uniqid == GLOBAL_ZONEUNIQID ||
+ zone->zone_uniqid == ct->ct_czuniqid) {
+ zone_t *czone;
+ zoneid_t zoneid = -1;
+
+ /*
+ * Contracts don't have holds on the zones they were
+ * created by. If the contract's zone no longer
+ * exists, we say its zoneid is -1.
+ */
+ if (zone->zone_uniqid == ct->ct_czuniqid ||
+ ct->ct_czuniqid == GLOBAL_ZONEUNIQID) {
+ zoneid = ct->ct_zoneid;
+ } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) {
+ if (czone->zone_uniqid == ct->ct_mzuniqid)
+ zoneid = ct->ct_zoneid;
+ zone_rele(czone);
+ }
+
+ STRUCT_FSET(lstatus, ctst_zoneid, zoneid);
+ STRUCT_FSET(lstatus, ctst_holder,
+ (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid :
+ (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0);
+ STRUCT_FSET(lstatus, ctst_state, ct->ct_state);
+ } else {
+ /*
+ * We are looking at a contract which was created by a
+ * process outside of our zone. We provide fake zone,
+ * holder, and state information.
+ */
+
+ STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id);
+ /*
+ * Since "zone" can't disappear until the calling ctfs
+ * is unmounted, zone_zsched must be valid.
+ */
+ STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ?
+ zone->zone_zsched->p_pid : 0);
+ STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ?
+ CTS_OWNED : ct->ct_state);
+ }
+ STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt);
+ STRUCT_FSET(lstatus, ctst_ntime, -1);
+ STRUCT_FSET(lstatus, ctst_qtime, -1);
+ STRUCT_FSET(lstatus, ctst_nevid,
+ ct->ct_nevent ? ct->ct_nevent->cte_id : 0);
+ STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit);
+ STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info);
+ STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie);
+ STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index);
+ STRUCT_FSET(lstatus, ctst_id, ct->ct_id);
+}
+
+/*
+ * contract_checkcred
+ *
+ * Determines if the specified contract is owned by a process with the
+ * same effective uid as the specified credential. The caller must
+ * ensure that the uid spaces are the same. Returns 1 on success.
+ */
+static int
+contract_checkcred(contract_t *ct, const cred_t *cr)
+{
+ proc_t *p;
+ int fail = 1;
+
+ mutex_enter(&ct->ct_lock);
+ if ((p = ct->ct_owner) != NULL) {
+ mutex_enter(&p->p_crlock);
+ fail = crgetuid(cr) != crgetuid(p->p_cred);
+ mutex_exit(&p->p_crlock);
+ }
+ mutex_exit(&ct->ct_lock);
+
+ return (!fail);
+}
+
+/*
+ * contract_owned
+ *
+ * Determines if the specified credential can view an event generated
+ * by the specified contract. If locked is set, the contract's ct_lock
+ * is held and the caller will need to do additional work to determine
+ * if they truly can see the event. Returns 1 on success.
+ */
+int
+contract_owned(contract_t *ct, const cred_t *cr, int locked)
+{
+ int owner, cmatch, zmatch;
+ uint64_t zuniqid, mzuniqid;
+ uid_t euid;
+
+ ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock));
+
+ zuniqid = curproc->p_zone->zone_uniqid;
+ mzuniqid = contract_getzuniqid(ct);
+ euid = crgetuid(cr);
+
+ /*
+ * owner: we own the contract
+ * cmatch: we are in the creator's (and holder's) zone and our
+ * uid matches the creator's or holder's
+ * zmatch: we are in the effective zone of a contract created
+ * in the global zone, and our uid matches that of the
+ * virtualized holder's (zsched/kcred)
+ */
+ owner = (ct->ct_owner == curproc);
+ cmatch = (zuniqid == ct->ct_czuniqid) &&
+ ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr)));
+ zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) &&
+ (crgetuid(kcred) == euid);
+
+ return (owner || cmatch || zmatch);
+}
+
+
+/*
+ * contract_type_init
+ *
+ * Called by contract types to register themselves with the contracts
+ * framework.
+ */
+ct_type_t *
+contract_type_init(ct_typeid_t type, const char *name, contops_t *ops,
+ ct_f_default_t *dfault)
+{
+ ct_type_t *result;
+
+ ASSERT(type < CTT_MAXTYPE);
+
+ result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP);
+
+ mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL);
+ avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t),
+ offsetof(contract_t, ct_cttavl));
+ cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0);
+ result->ct_type_name = name;
+ result->ct_type_ops = ops;
+ result->ct_type_default = dfault;
+ result->ct_type_evid = 0;
+ gethrestime(&result->ct_type_timestruc);
+ result->ct_type_index = type;
+
+ ct_types[type] = result;
+
+ return (result);
+}
+
+/*
+ * contract_type_count
+ *
+ * Obtains the number of contracts of a particular type.
+ */
+int
+contract_type_count(ct_type_t *type)
+{
+ ulong_t count;
+
+ mutex_enter(&type->ct_type_lock);
+ count = avl_numnodes(&type->ct_type_avl);
+ mutex_exit(&type->ct_type_lock);
+
+ return (count);
+}
+
+/*
+ * contract_type_max
+ *
+ * Obtains the maximum contract id of of a particular type.
+ */
+ctid_t
+contract_type_max(ct_type_t *type)
+{
+ contract_t *ct;
+ ctid_t res;
+
+ mutex_enter(&type->ct_type_lock);
+ ct = avl_last(&type->ct_type_avl);
+ res = ct ? ct->ct_id : -1;
+ mutex_exit(&type->ct_type_lock);
+
+ return (res);
+}
+
+/*
+ * contract_max
+ *
+ * Obtains the maximum contract id.
+ */
+ctid_t
+contract_max(void)
+{
+ contract_t *ct;
+ ctid_t res;
+
+ mutex_enter(&contract_lock);
+ ct = avl_last(&contract_avl);
+ res = ct ? ct->ct_id : -1;
+ mutex_exit(&contract_lock);
+
+ return (res);
+}
+
+/*
+ * contract_lookup_common
+ *
+ * Common code for contract_lookup and contract_type_lookup. Takes a
+ * pointer to an AVL tree to search in. Should be called with the
+ * appropriate tree-protecting lock held (unfortunately unassertable).
+ */
+static ctid_t
+contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current)
+{
+ contract_t template, *ct;
+ avl_index_t where;
+ ctid_t res;
+
+ template.ct_id = current;
+ ct = avl_find(tree, &template, &where);
+ if (ct == NULL)
+ ct = avl_nearest(tree, where, AVL_AFTER);
+ if (zuniqid != GLOBAL_ZONEUNIQID)
+ while (ct && (contract_getzuniqid(ct) != zuniqid))
+ ct = AVL_NEXT(tree, ct);
+ res = ct ? ct->ct_id : -1;
+
+ return (res);
+}
+
+/*
+ * contract_type_lookup
+ *
+ * Returns the next type contract after the specified id, visible from
+ * the specified zone.
+ */
+ctid_t
+contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current)
+{
+ ctid_t res;
+
+ mutex_enter(&type->ct_type_lock);
+ res = contract_lookup_common(&type->ct_type_avl, zuniqid, current);
+ mutex_exit(&type->ct_type_lock);
+
+ return (res);
+}
+
+/*
+ * contract_lookup
+ *
+ * Returns the next contract after the specified id, visible from the
+ * specified zone.
+ */
+ctid_t
+contract_lookup(uint64_t zuniqid, ctid_t current)
+{
+ ctid_t res;
+
+ mutex_enter(&contract_lock);
+ res = contract_lookup_common(&contract_avl, zuniqid, current);
+ mutex_exit(&contract_lock);
+
+ return (res);
+}
+
+/*
+ * contract_plookup
+ *
+ * Returns the next contract held by process p after the specified id,
+ * visible from the specified zone. Made complicated by the fact that
+ * contracts visible in a zone but held by processes outside of the
+ * zone need to appear as being held by zsched to zone members.
+ */
+ctid_t
+contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid)
+{
+ contract_t template, *ct;
+ avl_index_t where;
+ ctid_t res;
+
+ template.ct_id = current;
+ if (zuniqid != GLOBAL_ZONEUNIQID &&
+ (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) {
+ /* This is inelegant. */
+ mutex_enter(&contract_lock);
+ ct = avl_find(&contract_avl, &template, &where);
+ if (ct == NULL)
+ ct = avl_nearest(&contract_avl, where, AVL_AFTER);
+ while (ct && !(ct->ct_state < CTS_ORPHAN &&
+ contract_getzuniqid(ct) == zuniqid &&
+ ct->ct_czuniqid == GLOBAL_ZONEUNIQID))
+ ct = AVL_NEXT(&contract_avl, ct);
+ res = ct ? ct->ct_id : -1;
+ mutex_exit(&contract_lock);
+ } else {
+ mutex_enter(&p->p_lock);
+ ct = avl_find(&p->p_ct_held, &template, &where);
+ if (ct == NULL)
+ ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER);
+ res = ct ? ct->ct_id : -1;
+ mutex_exit(&p->p_lock);
+ }
+
+ return (res);
+}
+
+/*
+ * contract_ptr_common
+ *
+ * Common code for contract_ptr and contract_type_ptr. Takes a pointer
+ * to an AVL tree to search in. Should be called with the appropriate
+ * tree-protecting lock held (unfortunately unassertable).
+ */
+static contract_t *
+contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid)
+{
+ contract_t template, *ct;
+
+ template.ct_id = id;
+ ct = avl_find(tree, &template, NULL);
+ if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID &&
+ contract_getzuniqid(ct) != zuniqid)) {
+ return (NULL);
+ }
+
+ /*
+ * Check to see if a thread is in the window in contract_rele
+ * between dropping the reference count and removing the
+ * contract from the type AVL.
+ */
+ mutex_enter(&ct->ct_reflock);
+ if (ct->ct_ref) {
+ ct->ct_ref++;
+ mutex_exit(&ct->ct_reflock);
+ } else {
+ mutex_exit(&ct->ct_reflock);
+ ct = NULL;
+ }
+
+ return (ct);
+}
+
+/*
+ * contract_type_ptr
+ *
+ * Returns a pointer to the contract with the specified id. The
+ * contract is held, so the caller needs to release the reference when
+ * it is through with the contract.
+ */
+contract_t *
+contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid)
+{
+ contract_t *ct;
+
+ mutex_enter(&type->ct_type_lock);
+ ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid);
+ mutex_exit(&type->ct_type_lock);
+
+ return (ct);
+}
+
+/*
+ * contract_ptr
+ *
+ * Returns a pointer to the contract with the specified id. The
+ * contract is held, so the caller needs to release the reference when
+ * it is through with the contract.
+ */
+contract_t *
+contract_ptr(ctid_t id, uint64_t zuniqid)
+{
+ contract_t *ct;
+
+ mutex_enter(&contract_lock);
+ ct = contract_ptr_common(&contract_avl, id, zuniqid);
+ mutex_exit(&contract_lock);
+
+ return (ct);
+}
+
+/*
+ * contract_type_time
+ *
+ * Obtains the last time a contract of a particular type was created.
+ */
+void
+contract_type_time(ct_type_t *type, timestruc_t *time)
+{
+ mutex_enter(&type->ct_type_lock);
+ *time = type->ct_type_timestruc;
+ mutex_exit(&type->ct_type_lock);
+}
+
+/*
+ * contract_type_bundle
+ *
+ * Obtains a type's bundle queue.
+ */
+ct_equeue_t *
+contract_type_bundle(ct_type_t *type)
+{
+ return (&type->ct_type_events);
+}
+
+/*
+ * contract_type_pbundle
+ *
+ * Obtain's a process's bundle queue. If one doesn't exist, one is
+ * created. Often used simply to ensure that a bundle queue is
+ * allocated.
+ */
+ct_equeue_t *
+contract_type_pbundle(ct_type_t *type, proc_t *pp)
+{
+ /*
+ * If there isn't an array of bundle queues, allocate one.
+ */
+ if (pp->p_ct_equeue == NULL) {
+ size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *);
+ ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP);
+
+ mutex_enter(&pp->p_lock);
+ if (pp->p_ct_equeue)
+ kmem_free(qa, size);
+ else
+ pp->p_ct_equeue = qa;
+ mutex_exit(&pp->p_lock);
+ }
+
+ /*
+ * If there isn't a bundle queue of the required type, allocate
+ * one.
+ */
+ if (pp->p_ct_equeue[type->ct_type_index] == NULL) {
+ ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP);
+ cte_queue_create(q, CTEL_PBUNDLE, 20, 1);
+
+ mutex_enter(&pp->p_lock);
+ if (pp->p_ct_equeue[type->ct_type_index])
+ cte_queue_drain(q, 0);
+ else
+ pp->p_ct_equeue[type->ct_type_index] = q;
+ mutex_exit(&pp->p_lock);
+ }
+
+ return (pp->p_ct_equeue[type->ct_type_index]);
+}
+
+/*
+ * ctmpl_free
+ *
+ * Frees a template.
+ */
+void
+ctmpl_free(ct_template_t *template)
+{
+ mutex_destroy(&template->ctmpl_lock);
+ template->ctmpl_ops->ctop_free(template);
+}
+
+/*
+ * ctmpl_dup
+ *
+ * Creates a copy of a template.
+ */
+ct_template_t *
+ctmpl_dup(ct_template_t *template)
+{
+ ct_template_t *new;
+
+ if (template == NULL)
+ return (NULL);
+
+ new = template->ctmpl_ops->ctop_dup(template);
+ /*
+ * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and
+ * should have remain held until now.
+ */
+ mutex_exit(&template->ctmpl_lock);
+
+ return (new);
+}
+
+/*
+ * ctmpl_set
+ *
+ * Sets the requested terms of a template.
+ */
+int
+ctmpl_set(ct_template_t *template, ct_param_t *param, const cred_t *cr)
+{
+ int result = 0;
+
+ mutex_enter(&template->ctmpl_lock);
+ switch (param->ctpm_id) {
+ case CTP_COOKIE:
+ template->ctmpl_cookie = param->ctpm_value;
+ break;
+ case CTP_EV_INFO:
+ if (param->ctpm_value &
+ ~(uint64_t)template->ctmpl_ops->allevents)
+ result = EINVAL;
+ else
+ template->ctmpl_ev_info = param->ctpm_value;
+ break;
+ case CTP_EV_CRITICAL:
+ if (param->ctpm_value &
+ ~(uint64_t)template->ctmpl_ops->allevents) {
+ result = EINVAL;
+ break;
+ } else if ((~template->ctmpl_ev_crit &
+ param->ctpm_value) == 0) {
+ /*
+ * Assume that a pure reduction of the critical
+ * set is allowed by the contract type.
+ */
+ template->ctmpl_ev_crit = param->ctpm_value;
+ break;
+ }
+ /*
+ * There may be restrictions on what we can make
+ * critical, so we defer to the judgement of the
+ * contract type.
+ */
+ /* FALLTHROUGH */
+ default:
+ result = template->ctmpl_ops->ctop_set(template, param, cr);
+ }
+ mutex_exit(&template->ctmpl_lock);
+
+ return (result);
+}
+
+/*
+ * ctmpl_get
+ *
+ * Obtains the requested terms from a template.
+ */
+int
+ctmpl_get(ct_template_t *template, ct_param_t *param)
+{
+ int result = 0;
+
+ mutex_enter(&template->ctmpl_lock);
+ switch (param->ctpm_id) {
+ case CTP_COOKIE:
+ param->ctpm_value = template->ctmpl_cookie;
+ break;
+ case CTP_EV_INFO:
+ param->ctpm_value = template->ctmpl_ev_info;
+ break;
+ case CTP_EV_CRITICAL:
+ param->ctpm_value = template->ctmpl_ev_crit;
+ break;
+ default:
+ result = template->ctmpl_ops->ctop_get(template, param);
+ }
+ mutex_exit(&template->ctmpl_lock);
+
+ return (result);
+}
+
+/*
+ * ctmpl_makecurrent
+ *
+ * Used by ctmpl_activate and ctmpl_clear to set the current thread's
+ * active template. Frees the old active template, if there was one.
+ */
+static void
+ctmpl_makecurrent(ct_template_t *template, ct_template_t *new)
+{
+ klwp_t *curlwp = ttolwp(curthread);
+ proc_t *p = curproc;
+ ct_template_t *old;
+
+ mutex_enter(&p->p_lock);
+ old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index];
+ curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new;
+ mutex_exit(&p->p_lock);
+
+ if (old)
+ ctmpl_free(old);
+}
+
+/*
+ * ctmpl_activate
+ *
+ * Copy the specified template as the current thread's activate
+ * template of that type.
+ */
+void
+ctmpl_activate(ct_template_t *template)
+{
+ ctmpl_makecurrent(template, ctmpl_dup(template));
+}
+
+/*
+ * ctmpl_clear
+ *
+ * Clears the current thread's activate template of the same type as
+ * the specified template.
+ */
+void
+ctmpl_clear(ct_template_t *template)
+{
+ ctmpl_makecurrent(template, NULL);
+}
+
+/*
+ * ctmpl_create
+ *
+ * Creates a new contract using the specified template.
+ */
+int
+ctmpl_create(ct_template_t *template)
+{
+ return (template->ctmpl_ops->ctop_create(template));
+}
+
+/*
+ * ctmpl_init
+ *
+ * Initializes the common portion of a new contract template.
+ */
+void
+ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data)
+{
+ mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
+ new->ctmpl_ops = ops;
+ new->ctmpl_type = type;
+ new->ctmpl_data = data;
+ new->ctmpl_ev_info = new->ctmpl_ev_crit = 0;
+ new->ctmpl_cookie = 0;
+}
+
+/*
+ * ctmpl_copy
+ *
+ * Copies the common portions of a contract template. Intended for use
+ * by a contract type's ctop_dup template op. Returns with the old
+ * template's lock held, which will should remain held until the
+ * template op returns (it is dropped by ctmpl_dup).
+ */
+void
+ctmpl_copy(ct_template_t *new, ct_template_t *old)
+{
+ mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_enter(&old->ctmpl_lock);
+ new->ctmpl_ops = old->ctmpl_ops;
+ new->ctmpl_type = old->ctmpl_type;
+ new->ctmpl_ev_crit = old->ctmpl_ev_crit;
+ new->ctmpl_ev_info = old->ctmpl_ev_info;
+ new->ctmpl_cookie = old->ctmpl_cookie;
+}
+
+/*
+ * ctmpl_create_inval
+ *
+ * Returns EINVAL. Provided for the convenience of those contract
+ * types which don't support ct_tmpl_create(3contract) and would
+ * otherwise need to create their own stub for the ctop_create template
+ * op.
+ */
+/*ARGSUSED*/
+int
+ctmpl_create_inval(ct_template_t *template)
+{
+ return (EINVAL);
+}
+
+
+/*
+ * cte_queue_create
+ *
+ * Initializes a queue of a particular type. If dynamic is set, the
+ * queue is to be freed when its last listener is removed after being
+ * drained.
+ */
+static void
+cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic)
+{
+ mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL);
+ q->ctq_listno = list;
+ list_create(&q->ctq_events, sizeof (ct_kevent_t),
+ offsetof(ct_kevent_t, cte_nodes[list].ctm_node));
+ list_create(&q->ctq_listeners, sizeof (ct_listener_t),
+ offsetof(ct_listener_t, ctl_allnode));
+ list_create(&q->ctq_tail, sizeof (ct_listener_t),
+ offsetof(ct_listener_t, ctl_tailnode));
+ gethrestime(&q->ctq_atime);
+ q->ctq_nlisteners = 0;
+ q->ctq_nreliable = 0;
+ q->ctq_ninf = 0;
+ q->ctq_max = maxinf;
+
+ /*
+ * Bundle queues and contract queues are embedded in other
+ * structures and are implicitly referenced counted by virtue
+ * of their vnodes' indirect hold on their contracts. Process
+ * bundle queues are dynamically allocated and may persist
+ * after the death of the process, so they must be explicitly
+ * reference counted.
+ */
+ q->ctq_flags = dynamic ? CTQ_REFFED : 0;
+}
+
+/*
+ * cte_queue_destroy
+ *
+ * Destroys the specified queue. The queue is freed if referenced
+ * counted.
+ */
+static void
+cte_queue_destroy(ct_equeue_t *q)
+{
+ ASSERT(q->ctq_flags & CTQ_DEAD);
+ ASSERT(q->ctq_nlisteners == 0);
+ ASSERT(q->ctq_nreliable == 0);
+ list_destroy(&q->ctq_events);
+ list_destroy(&q->ctq_listeners);
+ list_destroy(&q->ctq_tail);
+ mutex_destroy(&q->ctq_lock);
+ if (q->ctq_flags & CTQ_REFFED)
+ kmem_free(q, sizeof (ct_equeue_t));
+}
+
+/*
+ * cte_hold
+ *
+ * Takes a hold on the specified event.
+ */
+static void
+cte_hold(ct_kevent_t *e)
+{
+ mutex_enter(&e->cte_lock);
+ ASSERT(e->cte_refs > 0);
+ e->cte_refs++;
+ mutex_exit(&e->cte_lock);
+}
+
+/*
+ * cte_rele
+ *
+ * Releases a hold on the specified event. If the caller had the last
+ * reference, frees the event and releases its hold on the contract
+ * that generated it.
+ */
+static void
+cte_rele(ct_kevent_t *e)
+{
+ mutex_enter(&e->cte_lock);
+ ASSERT(e->cte_refs > 0);
+ if (--e->cte_refs) {
+ mutex_exit(&e->cte_lock);
+ return;
+ }
+
+ contract_rele(e->cte_contract);
+
+ mutex_destroy(&e->cte_lock);
+ if (e->cte_data)
+ nvlist_free(e->cte_data);
+ if (e->cte_gdata)
+ nvlist_free(e->cte_gdata);
+ kmem_free(e, sizeof (ct_kevent_t));
+}
+
+/*
+ * cte_qrele
+ *
+ * Remove this listener's hold on the specified event, removing and
+ * releasing the queue's hold on the event if appropriate.
+ */
+static void
+cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
+{
+ ct_member_t *member = &e->cte_nodes[q->ctq_listno];
+
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+
+ if (l->ctl_flags & CTLF_RELIABLE)
+ member->ctm_nreliable--;
+ if ((--member->ctm_refs == 0) && member->ctm_trimmed) {
+ member->ctm_trimmed = 0;
+ list_remove(&q->ctq_events, e);
+ cte_rele(e);
+ }
+}
+
+/*
+ * cte_qmove
+ *
+ * Move this listener to the specified event in the queue.
+ */
+static ct_kevent_t *
+cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e)
+{
+ ct_kevent_t *olde;
+
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+ ASSERT(l->ctl_equeue == q);
+
+ if ((olde = l->ctl_position) == NULL)
+ list_remove(&q->ctq_tail, l);
+
+ while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed)
+ e = list_next(&q->ctq_events, e);
+
+ if (e != NULL) {
+ e->cte_nodes[q->ctq_listno].ctm_refs++;
+ if (l->ctl_flags & CTLF_RELIABLE)
+ e->cte_nodes[q->ctq_listno].ctm_nreliable++;
+ } else {
+ list_insert_tail(&q->ctq_tail, l);
+ }
+
+ l->ctl_position = e;
+ if (olde)
+ cte_qrele(q, l, olde);
+
+ return (e);
+}
+
+/*
+ * cte_checkcred
+ *
+ * Determines if the specified event's contract is owned by a process
+ * with the same effective uid as the specified credential. Called
+ * after a failed call to contract_owned with locked set. Because it
+ * drops the queue lock, its caller (cte_qreadable) needs to make sure
+ * we're still in the same place after we return. Returns 1 on
+ * success.
+ */
+static int
+cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr)
+{
+ int result;
+ contract_t *ct = e->cte_contract;
+
+ cte_hold(e);
+ mutex_exit(&q->ctq_lock);
+ result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid &&
+ contract_checkcred(ct, cr);
+ mutex_enter(&q->ctq_lock);
+ cte_rele(e);
+
+ return (result);
+}
+
+/*
+ * cte_qreadable
+ *
+ * Ensures that the listener is pointing to a valid event that the
+ * caller has the credentials to read. Returns 0 if we can read the
+ * event we're pointing to.
+ */
+static int
+cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr,
+ uint64_t zuniqid, int crit)
+{
+ ct_kevent_t *e, *next;
+ contract_t *ct;
+
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+ ASSERT(l->ctl_equeue == q);
+
+ if (l->ctl_flags & CTLF_COPYOUT)
+ return (1);
+
+ next = l->ctl_position;
+ while (e = cte_qmove(q, l, next)) {
+ ct = e->cte_contract;
+ /*
+ * Check obvious things first. If we are looking for a
+ * critical message, is this one? If we aren't in the
+ * global zone, is this message meant for us?
+ */
+ if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) ||
+ (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID &&
+ zuniqid != contract_getzuniqid(ct))) {
+
+ next = list_next(&q->ctq_events, e);
+
+ /*
+ * Next, see if our effective uid equals that of owner
+ * or author of the contract. Since we are holding the
+ * queue lock, contract_owned can't always check if we
+ * have the same effective uid as the contract's
+ * owner. If it comes to that, it fails and we take
+ * the slow(er) path.
+ */
+ } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) {
+
+ /*
+ * At this point we either don't have any claim
+ * to this contract or we match the effective
+ * uid of the owner but couldn't tell. We
+ * first test for a NULL holder so that events
+ * from orphans and inherited contracts avoid
+ * the penalty phase.
+ */
+ if (e->cte_contract->ct_owner == NULL &&
+ !secpolicy_contract_observer_choice(cr))
+ next = list_next(&q->ctq_events, e);
+
+ /*
+ * cte_checkcred will juggle locks to see if we
+ * have the same uid as the event's contract's
+ * current owner. If it succeeds, we have to
+ * make sure we are in the same point in the
+ * queue.
+ */
+ else if (cte_checkcred(q, e, cr) &&
+ l->ctl_position == e)
+ break;
+
+ /*
+ * cte_checkcred failed; see if we're in the
+ * same place.
+ */
+ else if (l->ctl_position == e)
+ if (secpolicy_contract_observer_choice(cr))
+ break;
+ else
+ next = list_next(&q->ctq_events, e);
+
+ /*
+ * cte_checkcred failed, and our position was
+ * changed. Start from there.
+ */
+ else
+ next = l->ctl_position;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * We check for CTLF_COPYOUT again in case we dropped the queue
+ * lock in cte_checkcred.
+ */
+ return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL));
+}
+
+/*
+ * cte_qwakeup
+ *
+ * Wakes up any waiting listeners and points them at the specified event.
+ */
+static void
+cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e)
+{
+ ct_listener_t *l;
+
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+
+ while (l = list_head(&q->ctq_tail)) {
+ list_remove(&q->ctq_tail, l);
+ e->cte_nodes[q->ctq_listno].ctm_refs++;
+ if (l->ctl_flags & CTLF_RELIABLE)
+ e->cte_nodes[q->ctq_listno].ctm_nreliable++;
+ l->ctl_position = e;
+ cv_signal(&l->ctl_cv);
+ pollwakeup(&l->ctl_pollhead, POLLIN);
+ }
+}
+
+/*
+ * cte_copy
+ *
+ * Copies events from the specified contract event queue to the
+ * end of the specified process bundle queue. Only called from
+ * contract_adopt.
+ *
+ * We copy to the end of the target queue instead of mixing the events
+ * in their proper order because otherwise the act of adopting a
+ * contract would require a process to reset all process bundle
+ * listeners it needed to see the new events. This would, in turn,
+ * require the process to keep track of which preexisting events had
+ * already been processed.
+ */
+static void
+cte_copy(ct_equeue_t *q, ct_equeue_t *newq)
+{
+ ct_kevent_t *e, *first = NULL;
+
+ ASSERT(q->ctq_listno == CTEL_CONTRACT);
+ ASSERT(newq->ctq_listno == CTEL_PBUNDLE);
+
+ mutex_enter(&q->ctq_lock);
+ mutex_enter(&newq->ctq_lock);
+
+ /*
+ * For now, only copy critical events.
+ */
+ for (e = list_head(&q->ctq_events); e != NULL;
+ e = list_next(&q->ctq_events, e)) {
+ if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) {
+ if (first == NULL)
+ first = e;
+ list_insert_tail(&newq->ctq_events, e);
+ cte_hold(e);
+ }
+ }
+
+ mutex_exit(&q->ctq_lock);
+
+ if (first)
+ cte_qwakeup(newq, first);
+
+ mutex_exit(&newq->ctq_lock);
+}
+
+/*
+ * cte_trim
+ *
+ * Trims unneeded events from an event queue. Algorithm works as
+ * follows:
+ *
+ * Removes all informative and acknowledged critical events until the
+ * first referenced event is found.
+ *
+ * If a contract is specified, removes all events (regardless of
+ * acknowledgement) generated by that contract until the first event
+ * referenced by a reliable listener is found. Reference events are
+ * removed by marking them "trimmed". Such events will be removed
+ * when the last reference is dropped and will be skipped by future
+ * listeners.
+ *
+ * This is pretty basic. Ideally this should remove from the middle of
+ * the list (i.e. beyond the first referenced event), and even
+ * referenced events.
+ */
+static void
+cte_trim(ct_equeue_t *q, contract_t *ct)
+{
+ ct_kevent_t *e, *next;
+ int flags, stopper;
+ int start = 1;
+
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+
+ for (e = list_head(&q->ctq_events); e != NULL; e = next) {
+ next = list_next(&q->ctq_events, e);
+ flags = e->cte_flags;
+ stopper = (q->ctq_listno != CTEL_PBUNDLE) &&
+ (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0);
+ if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) {
+ if ((start && (flags & (CTE_INFO | CTE_ACK))) ||
+ (e->cte_contract == ct)) {
+ /*
+ * Toss informative and ACKed critical messages.
+ */
+ list_remove(&q->ctq_events, e);
+ cte_rele(e);
+ }
+ } else if ((e->cte_contract == ct) && !stopper) {
+ ASSERT(q->ctq_nlisteners != 0);
+ e->cte_nodes[q->ctq_listno].ctm_trimmed = 1;
+ } else if (ct && !stopper) {
+ start = 0;
+ } else {
+ /*
+ * Don't free messages past the first reader.
+ */
+ break;
+ }
+ }
+}
+
+/*
+ * cte_queue_drain
+ *
+ * Drain all events from the specified queue, and mark it dead. If
+ * "ack" is set, acknowledge any critical events we find along the
+ * way.
+ */
+static void
+cte_queue_drain(ct_equeue_t *q, int ack)
+{
+ ct_kevent_t *e, *next;
+ ct_listener_t *l;
+
+ mutex_enter(&q->ctq_lock);
+
+ for (e = list_head(&q->ctq_events); e != NULL; e = next) {
+ next = list_next(&q->ctq_events, e);
+ if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) {
+ /*
+ * Make sure critical messages are eventually
+ * removed from the bundle queues.
+ */
+ mutex_enter(&e->cte_lock);
+ e->cte_flags |= CTE_ACK;
+ mutex_exit(&e->cte_lock);
+ ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock));
+ e->cte_contract->ct_evcnt--;
+ }
+ list_remove(&q->ctq_events, e);
+ e->cte_nodes[q->ctq_listno].ctm_refs = 0;
+ e->cte_nodes[q->ctq_listno].ctm_nreliable = 0;
+ e->cte_nodes[q->ctq_listno].ctm_trimmed = 0;
+ cte_rele(e);
+ }
+
+ /*
+ * This is necessary only because of CTEL_PBUNDLE listeners;
+ * the events they point to can move from one pbundle to
+ * another. Fortunately, this only happens if the contract is
+ * inherited, which (in turn) only happens if the process
+ * exits, which means it's an all-or-nothing deal. If this
+ * wasn't the case, we would instead need to keep track of
+ * listeners on a per-event basis, not just a per-queue basis.
+ * This would have the side benefit of letting us clean up
+ * trimmed events sooner (i.e. immediately), but would
+ * unfortunately make events even bigger than they already
+ * are.
+ */
+ for (l = list_head(&q->ctq_listeners); l;
+ l = list_next(&q->ctq_listeners, l)) {
+ l->ctl_flags |= CTLF_DEAD;
+ if (l->ctl_position) {
+ l->ctl_position = NULL;
+ list_insert_tail(&q->ctq_tail, l);
+ }
+ cv_broadcast(&l->ctl_cv);
+ }
+
+ /*
+ * Disallow events.
+ */
+ q->ctq_flags |= CTQ_DEAD;
+
+ /*
+ * If we represent the last reference to a reference counted
+ * process bundle queue, free it.
+ */
+ if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0))
+ cte_queue_destroy(q);
+ else
+ mutex_exit(&q->ctq_lock);
+}
+
+/*
+ * cte_publish
+ *
+ * Publishes an event to a specific queue. Only called by
+ * cte_publish_all.
+ */
+static void
+cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp)
+{
+ ASSERT(MUTEX_HELD(&q->ctq_lock));
+
+ q->ctq_atime = *tsp;
+
+ /*
+ * Don't publish if the event is informative and there aren't
+ * any listeners, or if the queue has been shut down.
+ */
+ if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) ||
+ (q->ctq_flags & CTQ_DEAD)) {
+ mutex_exit(&q->ctq_lock);
+ cte_rele(e);
+ return;
+ }
+
+ /*
+ * Enqueue event
+ */
+ list_insert_tail(&q->ctq_events, e);
+
+ /*
+ * Check for waiting listeners
+ */
+ cte_qwakeup(q, e);
+
+ /*
+ * Trim unnecessary events from the queue.
+ */
+ cte_trim(q, NULL);
+ mutex_exit(&q->ctq_lock);
+}
+
+/*
+ * cte_publish_all
+ *
+ * Publish an event to all necessary event queues. The event, e, must
+ * be zallocated by the caller, and the event's flags and type must be
+ * set. The rest of the event's fields are initialized here.
+ */
+void
+cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata)
+{
+ ct_equeue_t *q;
+ timespec_t ts;
+
+ e->cte_contract = ct;
+ e->cte_data = data;
+ e->cte_gdata = gdata;
+ e->cte_refs = 3;
+ e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1);
+ contract_hold(ct);
+
+ gethrestime(&ts);
+
+ /*
+ * ct_evtlock simply (and only) ensures that two events sent
+ * from the same contract are delivered to all queues in the
+ * same order.
+ */
+ mutex_enter(&ct->ct_evtlock);
+
+ /*
+ * CTEL_CONTRACT - First deliver to the contract queue, acking
+ * the event if the contract has been orphaned.
+ */
+ mutex_enter(&ct->ct_lock);
+ mutex_enter(&ct->ct_events.ctq_lock);
+ if ((e->cte_flags & CTE_INFO) == 0) {
+ if (ct->ct_state >= CTS_ORPHAN)
+ e->cte_flags |= CTE_ACK;
+ else
+ ct->ct_evcnt++;
+ }
+ mutex_exit(&ct->ct_lock);
+ cte_publish(&ct->ct_events, e, &ts);
+
+ /*
+ * CTEL_BUNDLE - Next deliver to the contract type's bundle
+ * queue.
+ */
+ mutex_enter(&ct->ct_type->ct_type_events.ctq_lock);
+ cte_publish(&ct->ct_type->ct_type_events, e, &ts);
+
+ /*
+ * CTEL_PBUNDLE - Finally, if the contract has an owner,
+ * deliver to the owner's process bundle queue.
+ */
+ mutex_enter(&ct->ct_lock);
+ if (ct->ct_owner) {
+ /*
+ * proc_exit doesn't free event queues until it has
+ * abandoned all contracts.
+ */
+ ASSERT(ct->ct_owner->p_ct_equeue);
+ ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]);
+ q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index];
+ mutex_enter(&q->ctq_lock);
+ mutex_exit(&ct->ct_lock);
+ cte_publish(q, e, &ts);
+ } else {
+ mutex_exit(&ct->ct_lock);
+ cte_rele(e);
+ }
+
+ mutex_exit(&ct->ct_evtlock);
+}
+
+/*
+ * cte_add_listener
+ *
+ * Add a new listener to an event queue.
+ */
+void
+cte_add_listener(ct_equeue_t *q, ct_listener_t *l)
+{
+ cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL);
+ l->ctl_equeue = q;
+ l->ctl_position = NULL;
+ l->ctl_flags = 0;
+
+ mutex_enter(&q->ctq_lock);
+ list_insert_head(&q->ctq_tail, l);
+ list_insert_head(&q->ctq_listeners, l);
+ q->ctq_nlisteners++;
+ mutex_exit(&q->ctq_lock);
+}
+
+/*
+ * cte_remove_listener
+ *
+ * Remove a listener from an event queue. No other queue activities
+ * (e.g. cte_get event) may be in progress at this endpoint when this
+ * is called.
+ */
+void
+cte_remove_listener(ct_listener_t *l)
+{
+ ct_equeue_t *q = l->ctl_equeue;
+ ct_kevent_t *e;
+
+ mutex_enter(&q->ctq_lock);
+
+ ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0);
+
+ if ((e = l->ctl_position) != NULL)
+ cte_qrele(q, l, e);
+ else
+ list_remove(&q->ctq_tail, l);
+ l->ctl_position = NULL;
+
+ q->ctq_nlisteners--;
+ list_remove(&q->ctq_listeners, l);
+
+ if (l->ctl_flags & CTLF_RELIABLE)
+ q->ctq_nreliable--;
+
+ /*
+ * If we are a the last listener of a dead reference counted
+ * queue (i.e. a process bundle) we free it. Otherwise we just
+ * trim any events which may have been kept around for our
+ * benefit.
+ */
+ if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) &&
+ (q->ctq_nlisteners == 0)) {
+ cte_queue_destroy(q);
+ } else {
+ cte_trim(q, NULL);
+ mutex_exit(&q->ctq_lock);
+ }
+}
+
+/*
+ * cte_reset_listener
+ *
+ * Moves a listener's queue pointer to the beginning of the queue.
+ */
+void
+cte_reset_listener(ct_listener_t *l)
+{
+ ct_equeue_t *q = l->ctl_equeue;
+
+ mutex_enter(&q->ctq_lock);
+
+ /*
+ * We allow an asynchronous reset because it doesn't make a
+ * whole lot of sense to make reset block or fail. We already
+ * have most of the mechanism needed thanks to queue trimming,
+ * so implementing it isn't a big deal.
+ */
+ if (l->ctl_flags & CTLF_COPYOUT)
+ l->ctl_flags |= CTLF_RESET;
+
+ (void) cte_qmove(q, l, list_head(&q->ctq_events));
+
+ /*
+ * Inform blocked readers.
+ */
+ cv_broadcast(&l->ctl_cv);
+ pollwakeup(&l->ctl_pollhead, POLLIN);
+ mutex_exit(&q->ctq_lock);
+}
+
+/*
+ * cte_next_event
+ *
+ * Moves the event pointer for the specified listener to the next event
+ * on the queue. To avoid races, this movement only occurs if the
+ * specified event id matches that of the current event. This is used
+ * primarily to skip events that have been read but whose extended data
+ * haven't been copied out.
+ */
+int
+cte_next_event(ct_listener_t *l, uint64_t id)
+{
+ ct_equeue_t *q = l->ctl_equeue;
+ ct_kevent_t *old;
+
+ mutex_enter(&q->ctq_lock);
+
+ if (l->ctl_flags & CTLF_COPYOUT)
+ l->ctl_flags |= CTLF_RESET;
+
+ if (((old = l->ctl_position) != NULL) && (old->cte_id == id))
+ (void) cte_qmove(q, l, list_next(&q->ctq_events, old));
+
+ mutex_exit(&q->ctq_lock);
+
+ return (0);
+}
+
+/*
+ * cte_get_event
+ *
+ * Reads an event from an event endpoint. If "nonblock" is clear, we
+ * block until a suitable event is ready. If "crit" is set, we only
+ * read critical events. Note that while "cr" is the caller's cred,
+ * "zuniqid" is the unique id of the zone the calling contract
+ * filesystem was mounted in.
+ */
+int
+cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr,
+ uint64_t zuniqid, int crit)
+{
+ ct_equeue_t *q = l->ctl_equeue;
+ ct_kevent_t *temp;
+ int result = 0;
+ int partial = 0;
+ size_t size, gsize, len;
+ model_t mdl = get_udatamodel();
+ STRUCT_DECL(ct_event, ev);
+ STRUCT_INIT(ev, mdl);
+
+ /*
+ * cte_qreadable checks for CTLF_COPYOUT as well as ensures
+ * that there exists, and we are pointing to, an appropriate
+ * event. It may temporarily drop ctq_lock, but that doesn't
+ * really matter to us.
+ */
+ mutex_enter(&q->ctq_lock);
+ while (cte_qreadable(q, l, cr, zuniqid, crit)) {
+ if (nonblock) {
+ result = EAGAIN;
+ goto error;
+ }
+ if (q->ctq_flags & CTQ_DEAD) {
+ result = EIDRM;
+ goto error;
+ }
+ result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock);
+ if (result == 0) {
+ result = EINTR;
+ goto error;
+ }
+ }
+ temp = l->ctl_position;
+ cte_hold(temp);
+ l->ctl_flags |= CTLF_COPYOUT;
+ mutex_exit(&q->ctq_lock);
+
+ /*
+ * We now have an event. Copy in the user event structure to
+ * see how much space we have to work with.
+ */
+ result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev));
+ if (result)
+ goto copyerr;
+
+ /*
+ * Determine what data we have and what the user should be
+ * allowed to see.
+ */
+ size = gsize = 0;
+ if (temp->cte_data) {
+ VERIFY(nvlist_size(temp->cte_data, &size,
+ NV_ENCODE_NATIVE) == 0);
+ ASSERT(size != 0);
+ }
+ if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) {
+ VERIFY(nvlist_size(temp->cte_gdata, &gsize,
+ NV_ENCODE_NATIVE) == 0);
+ ASSERT(gsize != 0);
+ }
+
+ /*
+ * If we have enough space, copy out the extended event data.
+ */
+ len = size + gsize;
+ if (len) {
+ if (STRUCT_FGET(ev, ctev_nbytes) >= len) {
+ char *buf = kmem_alloc(len, KM_SLEEP);
+
+ if (size)
+ VERIFY(nvlist_pack(temp->cte_data, &buf, &size,
+ NV_ENCODE_NATIVE, KM_SLEEP) == 0);
+ if (gsize) {
+ char *tmp = buf + size;
+
+ VERIFY(nvlist_pack(temp->cte_gdata, &tmp,
+ &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0);
+ }
+
+ /* This shouldn't have changed */
+ ASSERT(size + gsize == len);
+ result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer),
+ len);
+ kmem_free(buf, len);
+ if (result)
+ goto copyerr;
+ } else {
+ partial = 1;
+ }
+ }
+
+ /*
+ * Copy out the common event data.
+ */
+ STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id);
+ STRUCT_FSET(ev, ctev_evid, temp->cte_id);
+ STRUCT_FSET(ev, ctev_cttype,
+ temp->cte_contract->ct_type->ct_type_index);
+ STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO));
+ STRUCT_FSET(ev, ctev_type, temp->cte_type);
+ STRUCT_FSET(ev, ctev_nbytes, len);
+ STRUCT_FSET(ev, ctev_goffset, size);
+ result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev));
+
+copyerr:
+ /*
+ * Only move our location in the queue if all copyouts were
+ * successful, the caller provided enough space for the entire
+ * event, and our endpoint wasn't reset or otherwise moved by
+ * another thread.
+ */
+ mutex_enter(&q->ctq_lock);
+ if (result)
+ result = EFAULT;
+ else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) &&
+ (l->ctl_position == temp))
+ (void) cte_qmove(q, l, list_next(&q->ctq_events, temp));
+ l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET);
+ /*
+ * Signal any readers blocked on our CTLF_COPYOUT.
+ */
+ cv_signal(&l->ctl_cv);
+ cte_rele(temp);
+
+error:
+ mutex_exit(&q->ctq_lock);
+ return (result);
+}
+
+/*
+ * cte_set_reliable
+ *
+ * Requests that events be reliably delivered to an event endpoint.
+ * Unread informative and acknowledged critical events will not be
+ * removed from the queue until this listener reads or skips them.
+ * Because a listener could maliciously request reliable delivery and
+ * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the
+ * caller's effective set.
+ */
+int
+cte_set_reliable(ct_listener_t *l, const cred_t *cr)
+{
+ ct_equeue_t *q = l->ctl_equeue;
+ int error;
+
+ if ((error = secpolicy_contract_event(cr)) != 0)
+ return (error);
+
+ mutex_enter(&q->ctq_lock);
+ if ((l->ctl_flags & CTLF_RELIABLE) == 0) {
+ l->ctl_flags |= CTLF_RELIABLE;
+ q->ctq_nreliable++;
+ if (l->ctl_position != NULL)
+ l->ctl_position->cte_nodes[q->ctq_listno].
+ ctm_nreliable++;
+ }
+ mutex_exit(&q->ctq_lock);
+
+ return (0);
+}