diff options
Diffstat (limited to 'usr/src/uts/common/os/contract.c')
-rw-r--r-- | usr/src/uts/common/os/contract.c | 2411 |
1 files changed, 2411 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/contract.c b/usr/src/uts/common/os/contract.c new file mode 100644 index 0000000000..aadfb92e62 --- /dev/null +++ b/usr/src/uts/common/os/contract.c @@ -0,0 +1,2411 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +/* + * Contracts + * --------- + * + * Contracts are a primitive which enrich the relationships between + * processes and system resources. The primary purpose of contracts is + * to provide a means for the system to negotiate the departure from a + * binding relationship (e.g. pages locked in memory or a thread bound + * to processor), but they can also be used as a purely asynchronous + * error reporting mechanism as they are with process contracts. + * + * More information on how one interfaces with contracts and what + * contracts can do for you can be found in: + * PSARC 2003/193 Solaris Contracts + * PSARC 2004/460 Contracts addendum + * + * This file contains the core contracts framework. By itself it is + * useless: it depends the contracts filesystem (ctfs) to provide an + * interface to user processes and individual contract types to + * implement the process/resource relationships. + * + * Data structure overview + * ----------------------- + * + * A contract is represented by a contract_t, which itself points to an + * encapsulating contract-type specific contract object. A contract_t + * contains the contract's static identity (including its terms), its + * linkage to various bookkeeping structures, the contract-specific + * event queue, and a reference count. + * + * A contract template is represented by a ct_template_t, which, like a + * contract, points to an encapsulating contract-type specific template + * object. A ct_template_t contains the template's terms. + * + * An event queue is represented by a ct_equeue_t, and consists of a + * list of events, a list of listeners, and a list of listeners who are + * waiting for new events (affectionately referred to as "tail + * listeners"). There are three queue types, defined by ct_listnum_t + * (an enum). An event may be on one of each type of queue + * simultaneously; the list linkage used by a queue is determined by + * its type. + * + * An event is represented by a ct_kevent_t, which contains mostly + * static event data (e.g. id, payload). It also has an array of + * ct_member_t structures, each of which contains a list_node_t and + * represent the event's linkage in a specific event queue. + * + * Each open of an event endpoint results in the creation of a new + * listener, represented by a ct_listener_t. In addition to linkage + * into the aforementioned lists in the event_queue, a ct_listener_t + * contains a pointer to the ct_kevent_t it is currently positioned at + * as well as a set of status flags and other administrative data. + * + * Each process has a list of contracts it owns, p_ct_held; a pointer + * to the process contract it is a member of, p_ct_process; the linkage + * for that membership, p_ct_member; and an array of event queue + * structures representing the process bundle queues. + * + * Each LWP has an array of its active templates, lwp_ct_active; and + * the most recently created contracts, lwp_ct_latest. + * + * A process contract has a list of member processes and a list of + * inherited contracts. + * + * There is a system-wide list of all contracts, as well as per-type + * lists of contracts. + * + * Lock ordering overview + * ---------------------- + * + * Locks at the top are taken first: + * + * ct_evtlock + * regent ct_lock + * member ct_lock + * pidlock + * p_lock + * contract ctq_lock contract_lock + * pbundle ctq_lock + * cte_lock + * ct_reflock + * + * contract_lock and ctq_lock/cte_lock are not currently taken at the + * same time. + * + * Reference counting and locking + * ------------------------------ + * + * A contract has a reference count, protected by ct_reflock. + * (ct_reflock is also used in a couple other places where atomic + * access to a variable is needed in an innermost context). A process + * maintains a hold on each contract it owns. A process contract has a + * hold on each contract is has inherited. Each event has a hold on + * the contract which generated it. Process contract templates have + * holds on the contracts referred to by their transfer terms. CTFS + * contract directory nodes have holds on contracts. Lastly, various + * code paths may temporarily take holds on contracts to prevent them + * from disappearing while other processing is going on. It is + * important to note that the global contract lists do not hold + * references on contracts; a contract is removed from these structures + * atomically with the release of its last reference. + * + * At a given point in time, a contract can either be owned by a + * process, inherited by a regent process contract, or orphaned. A + * contract_t's owner and regent pointers, ct_owner and ct_regent, are + * protected by its ct_lock. The linkage in the holder's (holder = + * owner or regent) list of contracts, ct_ctlist, is protected by + * whatever lock protects the holder's data structure. In order for + * these two directions to remain consistent, changing the holder of a + * contract requires that both locks be held. + * + * Events also have reference counts. There is one hold on an event + * per queue it is present on, in addition to those needed for the + * usual sundry reasons. Individual listeners are associated with + * specific queues, and increase a queue-specific reference count + * stored in the ct_member_t structure. + * + * The dynamic contents of an event (reference count and flags) are + * protected by its cte_lock, while the contents of the embedded + * ct_member_t structures are protected by the locks of the queues they + * are linked into. A ct_listener_t's contents are also protected by + * its event queue's ctq_lock. + * + * Resource controls + * ----------------- + * + * Control: project.max-contracts (rc_project_contract) + * Description: Maximum number of contracts allowed a project. + * + * When a contract is created, the project's allocation is tested and + * (assuming success) increased. When the last reference to a + * contract is released, the creating project's allocation is + * decreased. + */ + +#include <sys/mutex.h> +#include <sys/debug.h> +#include <sys/types.h> +#include <sys/param.h> +#include <sys/kmem.h> +#include <sys/thread.h> +#include <sys/id_space.h> +#include <sys/avl.h> +#include <sys/list.h> +#include <sys/sysmacros.h> +#include <sys/proc.h> +#include <sys/contract_impl.h> +#include <sys/contract/process_impl.h> +#include <sys/systm.h> +#include <sys/atomic.h> +#include <sys/cmn_err.h> +#include <sys/model.h> +#include <sys/policy.h> +#include <sys/zone.h> +#include <sys/task.h> + +extern rctl_hndl_t rc_project_contract; + +static id_space_t *contract_ids; +static avl_tree_t contract_avl; +static kmutex_t contract_lock; + +int ct_ntypes = CTT_MAXTYPE; +static ct_type_t *ct_types_static[CTT_MAXTYPE]; +ct_type_t **ct_types = ct_types_static; + +static void cte_queue_create(ct_equeue_t *, ct_listnum_t, int, int); +static void cte_queue_destroy(ct_equeue_t *); +static void cte_queue_drain(ct_equeue_t *, int); +static void cte_trim(ct_equeue_t *, contract_t *); +static void cte_copy(ct_equeue_t *, ct_equeue_t *); + +/* + * contract_compar + * + * A contract comparator which sorts on contract ID. + */ +int +contract_compar(const void *x, const void *y) +{ + const contract_t *ct1 = x; + const contract_t *ct2 = y; + + if (ct1->ct_id < ct2->ct_id) + return (-1); + if (ct1->ct_id > ct2->ct_id) + return (1); + return (0); +} + +/* + * contract_init + * + * Initializes the contract subsystem, the specific contract types, and + * process 0. + */ +void +contract_init(void) +{ + /* + * Initialize contract subsystem. + */ + contract_ids = id_space_create("contracts", 1, INT_MAX); + avl_create(&contract_avl, contract_compar, sizeof (contract_t), + offsetof(contract_t, ct_ctavl)); + mutex_init(&contract_lock, NULL, MUTEX_DEFAULT, NULL); + + /* + * Initialize contract types. + */ + contract_process_init(); + + /* + * Initialize p0/lwp0 contract state. + */ + avl_create(&p0.p_ct_held, contract_compar, sizeof (contract_t), + offsetof(contract_t, ct_ctlist)); +} + +/* + * contract_dtor + * + * Performs basic destruction of the common portions of a contract. + * Called from the failure path of contract_ctor and from + * contract_rele. + */ +static void +contract_dtor(contract_t *ct) +{ + cte_queue_destroy(&ct->ct_events); + list_destroy(&ct->ct_vnodes); + mutex_destroy(&ct->ct_reflock); + mutex_destroy(&ct->ct_lock); + mutex_destroy(&ct->ct_evtlock); +} + +/* + * contract_ctor + * + * Called by a contract type to initialize a contract. Fails if the + * max-contract resource control would have been exceeded. After a + * successful call to contract_ctor, the contract is unlocked and + * visible in all namespaces; any type-specific initialization should + * be completed before calling contract_ctor. Returns 0 on success. + * + * Because not all callers can tolerate failure, a 0 value for canfail + * instructs contract_ctor to ignore the project.max-contracts resource + * control. Obviously, this "out" should only be employed by callers + * who are sufficiently constrained in other ways (e.g. newproc). + */ +int +contract_ctor(contract_t *ct, ct_type_t *type, ct_template_t *tmpl, void *data, + ctflags_t flags, proc_t *author, int canfail) +{ + avl_index_t where; + klwp_t *curlwp = ttolwp(curthread); + + ASSERT(author == curproc); + + mutex_init(&ct->ct_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ct->ct_reflock, NULL, MUTEX_DEFAULT, NULL); + mutex_init(&ct->ct_evtlock, NULL, MUTEX_DEFAULT, NULL); + ct->ct_id = id_alloc(contract_ids); + + cte_queue_create(&ct->ct_events, CTEL_CONTRACT, 20, 0); + list_create(&ct->ct_vnodes, sizeof (contract_vnode_t), + offsetof(contract_vnode_t, ctv_node)); + + /* + * Instance data + */ + ct->ct_ref = 2; /* one for the holder, one for "latest" */ + ct->ct_cuid = crgetuid(CRED()); + ct->ct_type = type; + ct->ct_data = data; + gethrestime(&ct->ct_ctime); + ct->ct_state = CTS_OWNED; + ct->ct_flags = flags; + ct->ct_regent = author->p_ct_process ? + &author->p_ct_process->conp_contract : NULL; + ct->ct_ev_info = tmpl->ctmpl_ev_info; + ct->ct_ev_crit = tmpl->ctmpl_ev_crit; + ct->ct_cookie = tmpl->ctmpl_cookie; + ct->ct_owner = author; + + /* + * Test project.max-contracts. + */ + mutex_enter(&author->p_lock); + mutex_enter(&contract_lock); + if (canfail && rctl_test(rc_project_contract, + author->p_task->tk_proj->kpj_rctls, author, 1, + RCA_SAFE) & RCT_DENY) { + id_free(contract_ids, ct->ct_id); + mutex_exit(&contract_lock); + mutex_exit(&author->p_lock); + ct->ct_events.ctq_flags |= CTQ_DEAD; + contract_dtor(ct); + return (1); + } + ct->ct_proj = author->p_task->tk_proj; + ct->ct_proj->kpj_data.kpd_contract++; + (void) project_hold(ct->ct_proj); + mutex_exit(&contract_lock); + + /* + * Insert into holder's avl of contracts. + * We use an avl not because order is important, but because + * readdir of /proc/contracts requires we be able to use a + * scalar as an index into the process's list of contracts + */ + ct->ct_zoneid = author->p_zone->zone_id; + ct->ct_czuniqid = ct->ct_mzuniqid = author->p_zone->zone_uniqid; + VERIFY(avl_find(&author->p_ct_held, ct, &where) == NULL); + avl_insert(&author->p_ct_held, ct, where); + mutex_exit(&author->p_lock); + + /* + * Insert into global contract AVL + */ + mutex_enter(&contract_lock); + VERIFY(avl_find(&contract_avl, ct, &where) == NULL); + avl_insert(&contract_avl, ct, where); + mutex_exit(&contract_lock); + + /* + * Insert into type AVL + */ + mutex_enter(&type->ct_type_lock); + VERIFY(avl_find(&type->ct_type_avl, ct, &where) == NULL); + avl_insert(&type->ct_type_avl, ct, where); + type->ct_type_timestruc = ct->ct_ctime; + mutex_exit(&type->ct_type_lock); + + if (curlwp->lwp_ct_latest[type->ct_type_index]) + contract_rele(curlwp->lwp_ct_latest[type->ct_type_index]); + curlwp->lwp_ct_latest[type->ct_type_index] = ct; + + return (0); +} + +/* + * contract_rele + * + * Releases a reference to a contract. If the caller had the last + * reference, the contract is removed from all namespaces, its + * allocation against the max-contracts resource control is released, + * and the contract type's free entry point is invoked for any + * type-specific deconstruction and to (presumably) free the object. + */ +void +contract_rele(contract_t *ct) +{ + uint64_t nref; + + mutex_enter(&ct->ct_reflock); + ASSERT(ct->ct_ref > 0); + nref = --ct->ct_ref; + mutex_exit(&ct->ct_reflock); + if (nref == 0) { + /* + * ct_owner is cleared when it drops its reference. + */ + ASSERT(ct->ct_owner == NULL); + ASSERT(ct->ct_evcnt == 0); + + /* + * Remove from global contract AVL + */ + mutex_enter(&contract_lock); + avl_remove(&contract_avl, ct); + mutex_exit(&contract_lock); + + /* + * Remove from type AVL + */ + mutex_enter(&ct->ct_type->ct_type_lock); + avl_remove(&ct->ct_type->ct_type_avl, ct); + mutex_exit(&ct->ct_type->ct_type_lock); + + /* + * Release the contract's ID + */ + id_free(contract_ids, ct->ct_id); + + /* + * Release project hold + */ + mutex_enter(&contract_lock); + ct->ct_proj->kpj_data.kpd_contract--; + project_rele(ct->ct_proj); + mutex_exit(&contract_lock); + + /* + * Free the contract + */ + contract_dtor(ct); + ct->ct_type->ct_type_ops->contop_free(ct); + } +} + +/* + * contract_hold + * + * Adds a reference to a contract + */ +void +contract_hold(contract_t *ct) +{ + mutex_enter(&ct->ct_reflock); + ASSERT(ct->ct_ref < UINT64_MAX); + ct->ct_ref++; + mutex_exit(&ct->ct_reflock); +} + +/* + * contract_getzuniqid + * + * Get a contract's zone unique ID. Needed because 64-bit reads and + * writes aren't atomic on x86. Since there are contexts where we are + * unable to take ct_lock, we instead use ct_reflock; in actuality any + * lock would do. + */ +uint64_t +contract_getzuniqid(contract_t *ct) +{ + uint64_t zuniqid; + + mutex_enter(&ct->ct_reflock); + zuniqid = ct->ct_mzuniqid; + mutex_exit(&ct->ct_reflock); + + return (zuniqid); +} + +/* + * contract_setzuniqid + * + * Sets a contract's zone unique ID. See contract_getzuniqid. + */ +void +contract_setzuniqid(contract_t *ct, uint64_t zuniqid) +{ + mutex_enter(&ct->ct_reflock); + ct->ct_mzuniqid = zuniqid; + mutex_exit(&ct->ct_reflock); +} + +/* + * contract_abandon + * + * Abandons the specified contract. If "explicit" is clear, the + * contract was implicitly abandoned (by process exit) and should be + * inherited if its terms allow it and its owner was a member of a + * regent contract. Otherwise, the contract type's abandon entry point + * is invoked to either destroy or orphan the contract. + */ +int +contract_abandon(contract_t *ct, proc_t *p, int explicit) +{ + ct_equeue_t *q = NULL; + contract_t *parent = &p->p_ct_process->conp_contract; + int inherit = 0; + + ASSERT(p == curproc); + + mutex_enter(&ct->ct_lock); + + /* + * Multiple contract locks are taken contract -> subcontract. + * Check if the contract will be inherited so we can acquire + * all the necessary locks before making sensitive changes. + */ + if (!explicit && (ct->ct_flags & CTF_INHERIT) && + contract_process_accept(parent)) { + mutex_exit(&ct->ct_lock); + mutex_enter(&parent->ct_lock); + mutex_enter(&ct->ct_lock); + inherit = 1; + } + + if (ct->ct_owner != p) { + mutex_exit(&ct->ct_lock); + if (inherit) + mutex_exit(&parent->ct_lock); + return (EINVAL); + } + + mutex_enter(&p->p_lock); + if (explicit) + avl_remove(&p->p_ct_held, ct); + ct->ct_owner = NULL; + mutex_exit(&p->p_lock); + + /* + * Since we can't call cte_trim with the contract lock held, + * we grab the queue pointer here. + */ + if (p->p_ct_equeue) + q = p->p_ct_equeue[ct->ct_type->ct_type_index]; + + /* + * contop_abandon may destroy the contract so we rely on it to + * drop ct_lock. We retain a reference on the contract so that + * the cte_trim which follows functions properly. Even though + * cte_trim doesn't dereference the contract pointer, it is + * still necessary to retain a reference to the contract so + * that we don't trim events which are sent by a subsequently + * allocated contract infortuitously located at the same address. + */ + contract_hold(ct); + + if (inherit) { + ct->ct_state = CTS_INHERITED; + ASSERT(ct->ct_regent == parent); + contract_process_take(parent, ct); + + /* + * We are handing off the process's reference to the + * parent contract. For this reason, the order in + * which we drop the contract locks is also important. + */ + mutex_exit(&ct->ct_lock); + mutex_exit(&parent->ct_lock); + } else { + ct->ct_regent = NULL; + ct->ct_type->ct_type_ops->contop_abandon(ct); + } + + /* + * ct_lock has been dropped; we can safely trim the event + * queue now. + */ + if (q) { + mutex_enter(&q->ctq_lock); + cte_trim(q, ct); + mutex_exit(&q->ctq_lock); + } + + contract_rele(ct); + + return (0); +} + +/* + * contract_adopt + * + * Adopts a contract. After a successful call to this routine, the + * previously inherited contract will belong to the calling process, + * and its events will have been appended to its new owner's process + * bundle queue. + */ +int +contract_adopt(contract_t *ct, proc_t *p) +{ + avl_index_t where; + ct_equeue_t *q; + contract_t *parent; + + ASSERT(p == curproc); + + /* + * Ensure the process has an event queue. Checked by ASSERTs + * below. + */ + (void) contract_type_pbundle(ct->ct_type, p); + + mutex_enter(&ct->ct_lock); + parent = ct->ct_regent; + if (ct->ct_state != CTS_INHERITED || + &p->p_ct_process->conp_contract != parent || + p->p_zone->zone_uniqid != ct->ct_czuniqid) { + mutex_exit(&ct->ct_lock); + return (EINVAL); + } + + /* + * Multiple contract locks are taken contract -> subcontract. + */ + mutex_exit(&ct->ct_lock); + mutex_enter(&parent->ct_lock); + mutex_enter(&ct->ct_lock); + + /* + * It is possible that the contract was adopted by someone else + * while its lock was dropped. It isn't possible for the + * contract to have been inherited by a different regent + * contract. + */ + if (ct->ct_state != CTS_INHERITED) { + mutex_exit(&parent->ct_lock); + mutex_exit(&ct->ct_lock); + return (EBUSY); + } + ASSERT(ct->ct_regent == parent); + + ct->ct_state = CTS_OWNED; + + contract_process_adopt(ct, p); + + mutex_enter(&p->p_lock); + ct->ct_owner = p; + VERIFY(avl_find(&p->p_ct_held, ct, &where) == NULL); + avl_insert(&p->p_ct_held, ct, where); + mutex_exit(&p->p_lock); + + ASSERT(ct->ct_owner->p_ct_equeue); + ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); + q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; + cte_copy(&ct->ct_events, q); + mutex_exit(&ct->ct_lock); + + return (0); +} + +/* + * contract_ack + * + * Acknowledges receipt of a critical event. + */ +int +contract_ack(contract_t *ct, uint64_t evid) +{ + ct_kevent_t *ev; + list_t *queue = &ct->ct_events.ctq_events; + int error = ESRCH; + + mutex_enter(&ct->ct_lock); + mutex_enter(&ct->ct_events.ctq_lock); + /* + * We are probably ACKing something near the head of the queue. + */ + for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { + if (ev->cte_id == evid) { + if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { + ev->cte_flags |= CTE_ACK; + ct->ct_evcnt--; + error = 0; + } + break; + } + } + mutex_exit(&ct->ct_events.ctq_lock); + mutex_exit(&ct->ct_lock); + + return (error); +} + +/* + * contract_orphan + * + * Icky-poo. This is a process-contract special, used to ACK all + * critical messages when a contract is orphaned. + */ +void +contract_orphan(contract_t *ct) +{ + ct_kevent_t *ev; + list_t *queue = &ct->ct_events.ctq_events; + + ASSERT(MUTEX_HELD(&ct->ct_lock)); + ASSERT(ct->ct_state != CTS_ORPHAN); + + mutex_enter(&ct->ct_events.ctq_lock); + ct->ct_state = CTS_ORPHAN; + for (ev = list_head(queue); ev; ev = list_next(queue, ev)) { + if ((ev->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { + ev->cte_flags |= CTE_ACK; + ct->ct_evcnt--; + } + } + mutex_exit(&ct->ct_events.ctq_lock); + + ASSERT(ct->ct_evcnt == 0); +} + +/* + * contract_destroy + * + * Explicit contract destruction. Called when contract is empty. + * The contract will actually stick around until all of its events are + * removed from the bundle and and process bundle queues, and all fds + * which refer to it are closed. See contract_dtor if you are looking + * for what destroys the contract structure. + */ +void +contract_destroy(contract_t *ct) +{ + ASSERT(MUTEX_HELD(&ct->ct_lock)); + ASSERT(ct->ct_state != CTS_DEAD); + ASSERT(ct->ct_owner == NULL); + + ct->ct_state = CTS_DEAD; + cte_queue_drain(&ct->ct_events, 1); + mutex_exit(&ct->ct_lock); + mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); + cte_trim(&ct->ct_type->ct_type_events, ct); + mutex_exit(&ct->ct_type->ct_type_events.ctq_lock); + mutex_enter(&ct->ct_lock); + ct->ct_type->ct_type_ops->contop_destroy(ct); + mutex_exit(&ct->ct_lock); + contract_rele(ct); +} + +/* + * contract_vnode_get + * + * Obtains the contract directory vnode for this contract, if there is + * one. The caller must VN_RELE the vnode when they are through using + * it. + */ +vnode_t * +contract_vnode_get(contract_t *ct, vfs_t *vfsp) +{ + contract_vnode_t *ctv; + vnode_t *vp = NULL; + + mutex_enter(&ct->ct_lock); + for (ctv = list_head(&ct->ct_vnodes); ctv != NULL; + ctv = list_next(&ct->ct_vnodes, ctv)) + if (ctv->ctv_vnode->v_vfsp == vfsp) { + vp = ctv->ctv_vnode; + VN_HOLD(vp); + break; + } + mutex_exit(&ct->ct_lock); + return (vp); +} + +/* + * contract_vnode_set + * + * Sets the contract directory vnode for this contract. We don't hold + * a reference on the vnode because we don't want to prevent it from + * being freed. The vnode's inactive entry point will take care of + * notifying us when it should be removed. + */ +void +contract_vnode_set(contract_t *ct, contract_vnode_t *ctv, vnode_t *vnode) +{ + mutex_enter(&ct->ct_lock); + ctv->ctv_vnode = vnode; + list_insert_head(&ct->ct_vnodes, ctv); + mutex_exit(&ct->ct_lock); +} + +/* + * contract_vnode_clear + * + * Removes this vnode as the contract directory vnode for this + * contract. Called from a contract directory's inactive entry point, + * this may return 0 indicating that the vnode gained another reference + * because of a simultaneous call to contract_vnode_get. + */ +int +contract_vnode_clear(contract_t *ct, contract_vnode_t *ctv) +{ + vnode_t *vp = ctv->ctv_vnode; + int result; + + mutex_enter(&ct->ct_lock); + mutex_enter(&vp->v_lock); + if (vp->v_count == 1) { + list_remove(&ct->ct_vnodes, ctv); + result = 1; + } else { + vp->v_count--; + result = 0; + } + mutex_exit(&vp->v_lock); + mutex_exit(&ct->ct_lock); + + return (result); +} + +/* + * contract_exit + * + * Abandons all contracts held by process p, and drains process p's + * bundle queues. Called on process exit. + */ +void +contract_exit(proc_t *p) +{ + contract_t *ct; + void *cookie = NULL; + int i; + + ASSERT(p == curproc); + + /* + * Abandon held contracts. contract_abandon knows enough not + * to remove the contract from the list a second time. We are + * exiting, so no locks are needed here. But because + * contract_abandon will take p_lock, we need to make sure we + * aren't holding it. + */ + ASSERT(MUTEX_NOT_HELD(&p->p_lock)); + while ((ct = avl_destroy_nodes(&p->p_ct_held, &cookie)) != NULL) + VERIFY(contract_abandon(ct, p, 0) == 0); + + /* + * Drain pbundles. Because a process bundle queue could have + * been passed to another process, they may not be freed right + * away. + */ + if (p->p_ct_equeue) { + for (i = 0; i < CTT_MAXTYPE; i++) + if (p->p_ct_equeue[i]) + cte_queue_drain(p->p_ct_equeue[i], 0); + kmem_free(p->p_ct_equeue, CTT_MAXTYPE * sizeof (ct_equeue_t *)); + } +} + +/* + * contract_status_common + * + * Populates a ct_status structure. Used by contract types in their + * status entry points and ctfs when only common information is + * requested. + */ +void +contract_status_common(contract_t *ct, zone_t *zone, void *status, + model_t model) +{ + STRUCT_HANDLE(ct_status, lstatus); + + STRUCT_SET_HANDLE(lstatus, model, status); + ASSERT(MUTEX_HELD(&ct->ct_lock)); + if (zone->zone_uniqid == GLOBAL_ZONEUNIQID || + zone->zone_uniqid == ct->ct_czuniqid) { + zone_t *czone; + zoneid_t zoneid = -1; + + /* + * Contracts don't have holds on the zones they were + * created by. If the contract's zone no longer + * exists, we say its zoneid is -1. + */ + if (zone->zone_uniqid == ct->ct_czuniqid || + ct->ct_czuniqid == GLOBAL_ZONEUNIQID) { + zoneid = ct->ct_zoneid; + } else if ((czone = zone_find_by_id(ct->ct_zoneid)) != NULL) { + if (czone->zone_uniqid == ct->ct_mzuniqid) + zoneid = ct->ct_zoneid; + zone_rele(czone); + } + + STRUCT_FSET(lstatus, ctst_zoneid, zoneid); + STRUCT_FSET(lstatus, ctst_holder, + (ct->ct_state == CTS_OWNED) ? ct->ct_owner->p_pid : + (ct->ct_state == CTS_INHERITED) ? ct->ct_regent->ct_id : 0); + STRUCT_FSET(lstatus, ctst_state, ct->ct_state); + } else { + /* + * We are looking at a contract which was created by a + * process outside of our zone. We provide fake zone, + * holder, and state information. + */ + + STRUCT_FSET(lstatus, ctst_zoneid, zone->zone_id); + /* + * Since "zone" can't disappear until the calling ctfs + * is unmounted, zone_zsched must be valid. + */ + STRUCT_FSET(lstatus, ctst_holder, (ct->ct_state < CTS_ORPHAN) ? + zone->zone_zsched->p_pid : 0); + STRUCT_FSET(lstatus, ctst_state, (ct->ct_state < CTS_ORPHAN) ? + CTS_OWNED : ct->ct_state); + } + STRUCT_FSET(lstatus, ctst_nevents, ct->ct_evcnt); + STRUCT_FSET(lstatus, ctst_ntime, -1); + STRUCT_FSET(lstatus, ctst_qtime, -1); + STRUCT_FSET(lstatus, ctst_nevid, + ct->ct_nevent ? ct->ct_nevent->cte_id : 0); + STRUCT_FSET(lstatus, ctst_critical, ct->ct_ev_crit); + STRUCT_FSET(lstatus, ctst_informative, ct->ct_ev_info); + STRUCT_FSET(lstatus, ctst_cookie, ct->ct_cookie); + STRUCT_FSET(lstatus, ctst_type, ct->ct_type->ct_type_index); + STRUCT_FSET(lstatus, ctst_id, ct->ct_id); +} + +/* + * contract_checkcred + * + * Determines if the specified contract is owned by a process with the + * same effective uid as the specified credential. The caller must + * ensure that the uid spaces are the same. Returns 1 on success. + */ +static int +contract_checkcred(contract_t *ct, const cred_t *cr) +{ + proc_t *p; + int fail = 1; + + mutex_enter(&ct->ct_lock); + if ((p = ct->ct_owner) != NULL) { + mutex_enter(&p->p_crlock); + fail = crgetuid(cr) != crgetuid(p->p_cred); + mutex_exit(&p->p_crlock); + } + mutex_exit(&ct->ct_lock); + + return (!fail); +} + +/* + * contract_owned + * + * Determines if the specified credential can view an event generated + * by the specified contract. If locked is set, the contract's ct_lock + * is held and the caller will need to do additional work to determine + * if they truly can see the event. Returns 1 on success. + */ +int +contract_owned(contract_t *ct, const cred_t *cr, int locked) +{ + int owner, cmatch, zmatch; + uint64_t zuniqid, mzuniqid; + uid_t euid; + + ASSERT(locked || MUTEX_NOT_HELD(&ct->ct_lock)); + + zuniqid = curproc->p_zone->zone_uniqid; + mzuniqid = contract_getzuniqid(ct); + euid = crgetuid(cr); + + /* + * owner: we own the contract + * cmatch: we are in the creator's (and holder's) zone and our + * uid matches the creator's or holder's + * zmatch: we are in the effective zone of a contract created + * in the global zone, and our uid matches that of the + * virtualized holder's (zsched/kcred) + */ + owner = (ct->ct_owner == curproc); + cmatch = (zuniqid == ct->ct_czuniqid) && + ((ct->ct_cuid == euid) || (!locked && contract_checkcred(ct, cr))); + zmatch = (ct->ct_czuniqid != mzuniqid) && (zuniqid == mzuniqid) && + (crgetuid(kcred) == euid); + + return (owner || cmatch || zmatch); +} + + +/* + * contract_type_init + * + * Called by contract types to register themselves with the contracts + * framework. + */ +ct_type_t * +contract_type_init(ct_typeid_t type, const char *name, contops_t *ops, + ct_f_default_t *dfault) +{ + ct_type_t *result; + + ASSERT(type < CTT_MAXTYPE); + + result = kmem_alloc(sizeof (ct_type_t), KM_SLEEP); + + mutex_init(&result->ct_type_lock, NULL, MUTEX_DEFAULT, NULL); + avl_create(&result->ct_type_avl, contract_compar, sizeof (contract_t), + offsetof(contract_t, ct_cttavl)); + cte_queue_create(&result->ct_type_events, CTEL_BUNDLE, 20, 0); + result->ct_type_name = name; + result->ct_type_ops = ops; + result->ct_type_default = dfault; + result->ct_type_evid = 0; + gethrestime(&result->ct_type_timestruc); + result->ct_type_index = type; + + ct_types[type] = result; + + return (result); +} + +/* + * contract_type_count + * + * Obtains the number of contracts of a particular type. + */ +int +contract_type_count(ct_type_t *type) +{ + ulong_t count; + + mutex_enter(&type->ct_type_lock); + count = avl_numnodes(&type->ct_type_avl); + mutex_exit(&type->ct_type_lock); + + return (count); +} + +/* + * contract_type_max + * + * Obtains the maximum contract id of of a particular type. + */ +ctid_t +contract_type_max(ct_type_t *type) +{ + contract_t *ct; + ctid_t res; + + mutex_enter(&type->ct_type_lock); + ct = avl_last(&type->ct_type_avl); + res = ct ? ct->ct_id : -1; + mutex_exit(&type->ct_type_lock); + + return (res); +} + +/* + * contract_max + * + * Obtains the maximum contract id. + */ +ctid_t +contract_max(void) +{ + contract_t *ct; + ctid_t res; + + mutex_enter(&contract_lock); + ct = avl_last(&contract_avl); + res = ct ? ct->ct_id : -1; + mutex_exit(&contract_lock); + + return (res); +} + +/* + * contract_lookup_common + * + * Common code for contract_lookup and contract_type_lookup. Takes a + * pointer to an AVL tree to search in. Should be called with the + * appropriate tree-protecting lock held (unfortunately unassertable). + */ +static ctid_t +contract_lookup_common(avl_tree_t *tree, uint64_t zuniqid, ctid_t current) +{ + contract_t template, *ct; + avl_index_t where; + ctid_t res; + + template.ct_id = current; + ct = avl_find(tree, &template, &where); + if (ct == NULL) + ct = avl_nearest(tree, where, AVL_AFTER); + if (zuniqid != GLOBAL_ZONEUNIQID) + while (ct && (contract_getzuniqid(ct) != zuniqid)) + ct = AVL_NEXT(tree, ct); + res = ct ? ct->ct_id : -1; + + return (res); +} + +/* + * contract_type_lookup + * + * Returns the next type contract after the specified id, visible from + * the specified zone. + */ +ctid_t +contract_type_lookup(ct_type_t *type, uint64_t zuniqid, ctid_t current) +{ + ctid_t res; + + mutex_enter(&type->ct_type_lock); + res = contract_lookup_common(&type->ct_type_avl, zuniqid, current); + mutex_exit(&type->ct_type_lock); + + return (res); +} + +/* + * contract_lookup + * + * Returns the next contract after the specified id, visible from the + * specified zone. + */ +ctid_t +contract_lookup(uint64_t zuniqid, ctid_t current) +{ + ctid_t res; + + mutex_enter(&contract_lock); + res = contract_lookup_common(&contract_avl, zuniqid, current); + mutex_exit(&contract_lock); + + return (res); +} + +/* + * contract_plookup + * + * Returns the next contract held by process p after the specified id, + * visible from the specified zone. Made complicated by the fact that + * contracts visible in a zone but held by processes outside of the + * zone need to appear as being held by zsched to zone members. + */ +ctid_t +contract_plookup(proc_t *p, ctid_t current, uint64_t zuniqid) +{ + contract_t template, *ct; + avl_index_t where; + ctid_t res; + + template.ct_id = current; + if (zuniqid != GLOBAL_ZONEUNIQID && + (p->p_flag & (SSYS|SZONETOP)) == (SSYS|SZONETOP)) { + /* This is inelegant. */ + mutex_enter(&contract_lock); + ct = avl_find(&contract_avl, &template, &where); + if (ct == NULL) + ct = avl_nearest(&contract_avl, where, AVL_AFTER); + while (ct && !(ct->ct_state < CTS_ORPHAN && + contract_getzuniqid(ct) == zuniqid && + ct->ct_czuniqid == GLOBAL_ZONEUNIQID)) + ct = AVL_NEXT(&contract_avl, ct); + res = ct ? ct->ct_id : -1; + mutex_exit(&contract_lock); + } else { + mutex_enter(&p->p_lock); + ct = avl_find(&p->p_ct_held, &template, &where); + if (ct == NULL) + ct = avl_nearest(&p->p_ct_held, where, AVL_AFTER); + res = ct ? ct->ct_id : -1; + mutex_exit(&p->p_lock); + } + + return (res); +} + +/* + * contract_ptr_common + * + * Common code for contract_ptr and contract_type_ptr. Takes a pointer + * to an AVL tree to search in. Should be called with the appropriate + * tree-protecting lock held (unfortunately unassertable). + */ +static contract_t * +contract_ptr_common(avl_tree_t *tree, ctid_t id, uint64_t zuniqid) +{ + contract_t template, *ct; + + template.ct_id = id; + ct = avl_find(tree, &template, NULL); + if (ct == NULL || (zuniqid != GLOBAL_ZONEUNIQID && + contract_getzuniqid(ct) != zuniqid)) { + return (NULL); + } + + /* + * Check to see if a thread is in the window in contract_rele + * between dropping the reference count and removing the + * contract from the type AVL. + */ + mutex_enter(&ct->ct_reflock); + if (ct->ct_ref) { + ct->ct_ref++; + mutex_exit(&ct->ct_reflock); + } else { + mutex_exit(&ct->ct_reflock); + ct = NULL; + } + + return (ct); +} + +/* + * contract_type_ptr + * + * Returns a pointer to the contract with the specified id. The + * contract is held, so the caller needs to release the reference when + * it is through with the contract. + */ +contract_t * +contract_type_ptr(ct_type_t *type, ctid_t id, uint64_t zuniqid) +{ + contract_t *ct; + + mutex_enter(&type->ct_type_lock); + ct = contract_ptr_common(&type->ct_type_avl, id, zuniqid); + mutex_exit(&type->ct_type_lock); + + return (ct); +} + +/* + * contract_ptr + * + * Returns a pointer to the contract with the specified id. The + * contract is held, so the caller needs to release the reference when + * it is through with the contract. + */ +contract_t * +contract_ptr(ctid_t id, uint64_t zuniqid) +{ + contract_t *ct; + + mutex_enter(&contract_lock); + ct = contract_ptr_common(&contract_avl, id, zuniqid); + mutex_exit(&contract_lock); + + return (ct); +} + +/* + * contract_type_time + * + * Obtains the last time a contract of a particular type was created. + */ +void +contract_type_time(ct_type_t *type, timestruc_t *time) +{ + mutex_enter(&type->ct_type_lock); + *time = type->ct_type_timestruc; + mutex_exit(&type->ct_type_lock); +} + +/* + * contract_type_bundle + * + * Obtains a type's bundle queue. + */ +ct_equeue_t * +contract_type_bundle(ct_type_t *type) +{ + return (&type->ct_type_events); +} + +/* + * contract_type_pbundle + * + * Obtain's a process's bundle queue. If one doesn't exist, one is + * created. Often used simply to ensure that a bundle queue is + * allocated. + */ +ct_equeue_t * +contract_type_pbundle(ct_type_t *type, proc_t *pp) +{ + /* + * If there isn't an array of bundle queues, allocate one. + */ + if (pp->p_ct_equeue == NULL) { + size_t size = CTT_MAXTYPE * sizeof (ct_equeue_t *); + ct_equeue_t **qa = kmem_zalloc(size, KM_SLEEP); + + mutex_enter(&pp->p_lock); + if (pp->p_ct_equeue) + kmem_free(qa, size); + else + pp->p_ct_equeue = qa; + mutex_exit(&pp->p_lock); + } + + /* + * If there isn't a bundle queue of the required type, allocate + * one. + */ + if (pp->p_ct_equeue[type->ct_type_index] == NULL) { + ct_equeue_t *q = kmem_zalloc(sizeof (ct_equeue_t), KM_SLEEP); + cte_queue_create(q, CTEL_PBUNDLE, 20, 1); + + mutex_enter(&pp->p_lock); + if (pp->p_ct_equeue[type->ct_type_index]) + cte_queue_drain(q, 0); + else + pp->p_ct_equeue[type->ct_type_index] = q; + mutex_exit(&pp->p_lock); + } + + return (pp->p_ct_equeue[type->ct_type_index]); +} + +/* + * ctmpl_free + * + * Frees a template. + */ +void +ctmpl_free(ct_template_t *template) +{ + mutex_destroy(&template->ctmpl_lock); + template->ctmpl_ops->ctop_free(template); +} + +/* + * ctmpl_dup + * + * Creates a copy of a template. + */ +ct_template_t * +ctmpl_dup(ct_template_t *template) +{ + ct_template_t *new; + + if (template == NULL) + return (NULL); + + new = template->ctmpl_ops->ctop_dup(template); + /* + * ctmpl_lock was taken by ctop_dup's call to ctmpl_copy and + * should have remain held until now. + */ + mutex_exit(&template->ctmpl_lock); + + return (new); +} + +/* + * ctmpl_set + * + * Sets the requested terms of a template. + */ +int +ctmpl_set(ct_template_t *template, ct_param_t *param, const cred_t *cr) +{ + int result = 0; + + mutex_enter(&template->ctmpl_lock); + switch (param->ctpm_id) { + case CTP_COOKIE: + template->ctmpl_cookie = param->ctpm_value; + break; + case CTP_EV_INFO: + if (param->ctpm_value & + ~(uint64_t)template->ctmpl_ops->allevents) + result = EINVAL; + else + template->ctmpl_ev_info = param->ctpm_value; + break; + case CTP_EV_CRITICAL: + if (param->ctpm_value & + ~(uint64_t)template->ctmpl_ops->allevents) { + result = EINVAL; + break; + } else if ((~template->ctmpl_ev_crit & + param->ctpm_value) == 0) { + /* + * Assume that a pure reduction of the critical + * set is allowed by the contract type. + */ + template->ctmpl_ev_crit = param->ctpm_value; + break; + } + /* + * There may be restrictions on what we can make + * critical, so we defer to the judgement of the + * contract type. + */ + /* FALLTHROUGH */ + default: + result = template->ctmpl_ops->ctop_set(template, param, cr); + } + mutex_exit(&template->ctmpl_lock); + + return (result); +} + +/* + * ctmpl_get + * + * Obtains the requested terms from a template. + */ +int +ctmpl_get(ct_template_t *template, ct_param_t *param) +{ + int result = 0; + + mutex_enter(&template->ctmpl_lock); + switch (param->ctpm_id) { + case CTP_COOKIE: + param->ctpm_value = template->ctmpl_cookie; + break; + case CTP_EV_INFO: + param->ctpm_value = template->ctmpl_ev_info; + break; + case CTP_EV_CRITICAL: + param->ctpm_value = template->ctmpl_ev_crit; + break; + default: + result = template->ctmpl_ops->ctop_get(template, param); + } + mutex_exit(&template->ctmpl_lock); + + return (result); +} + +/* + * ctmpl_makecurrent + * + * Used by ctmpl_activate and ctmpl_clear to set the current thread's + * active template. Frees the old active template, if there was one. + */ +static void +ctmpl_makecurrent(ct_template_t *template, ct_template_t *new) +{ + klwp_t *curlwp = ttolwp(curthread); + proc_t *p = curproc; + ct_template_t *old; + + mutex_enter(&p->p_lock); + old = curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index]; + curlwp->lwp_ct_active[template->ctmpl_type->ct_type_index] = new; + mutex_exit(&p->p_lock); + + if (old) + ctmpl_free(old); +} + +/* + * ctmpl_activate + * + * Copy the specified template as the current thread's activate + * template of that type. + */ +void +ctmpl_activate(ct_template_t *template) +{ + ctmpl_makecurrent(template, ctmpl_dup(template)); +} + +/* + * ctmpl_clear + * + * Clears the current thread's activate template of the same type as + * the specified template. + */ +void +ctmpl_clear(ct_template_t *template) +{ + ctmpl_makecurrent(template, NULL); +} + +/* + * ctmpl_create + * + * Creates a new contract using the specified template. + */ +int +ctmpl_create(ct_template_t *template) +{ + return (template->ctmpl_ops->ctop_create(template)); +} + +/* + * ctmpl_init + * + * Initializes the common portion of a new contract template. + */ +void +ctmpl_init(ct_template_t *new, ctmplops_t *ops, ct_type_t *type, void *data) +{ + mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); + new->ctmpl_ops = ops; + new->ctmpl_type = type; + new->ctmpl_data = data; + new->ctmpl_ev_info = new->ctmpl_ev_crit = 0; + new->ctmpl_cookie = 0; +} + +/* + * ctmpl_copy + * + * Copies the common portions of a contract template. Intended for use + * by a contract type's ctop_dup template op. Returns with the old + * template's lock held, which will should remain held until the + * template op returns (it is dropped by ctmpl_dup). + */ +void +ctmpl_copy(ct_template_t *new, ct_template_t *old) +{ + mutex_init(&new->ctmpl_lock, NULL, MUTEX_DEFAULT, NULL); + mutex_enter(&old->ctmpl_lock); + new->ctmpl_ops = old->ctmpl_ops; + new->ctmpl_type = old->ctmpl_type; + new->ctmpl_ev_crit = old->ctmpl_ev_crit; + new->ctmpl_ev_info = old->ctmpl_ev_info; + new->ctmpl_cookie = old->ctmpl_cookie; +} + +/* + * ctmpl_create_inval + * + * Returns EINVAL. Provided for the convenience of those contract + * types which don't support ct_tmpl_create(3contract) and would + * otherwise need to create their own stub for the ctop_create template + * op. + */ +/*ARGSUSED*/ +int +ctmpl_create_inval(ct_template_t *template) +{ + return (EINVAL); +} + + +/* + * cte_queue_create + * + * Initializes a queue of a particular type. If dynamic is set, the + * queue is to be freed when its last listener is removed after being + * drained. + */ +static void +cte_queue_create(ct_equeue_t *q, ct_listnum_t list, int maxinf, int dynamic) +{ + mutex_init(&q->ctq_lock, NULL, MUTEX_DEFAULT, NULL); + q->ctq_listno = list; + list_create(&q->ctq_events, sizeof (ct_kevent_t), + offsetof(ct_kevent_t, cte_nodes[list].ctm_node)); + list_create(&q->ctq_listeners, sizeof (ct_listener_t), + offsetof(ct_listener_t, ctl_allnode)); + list_create(&q->ctq_tail, sizeof (ct_listener_t), + offsetof(ct_listener_t, ctl_tailnode)); + gethrestime(&q->ctq_atime); + q->ctq_nlisteners = 0; + q->ctq_nreliable = 0; + q->ctq_ninf = 0; + q->ctq_max = maxinf; + + /* + * Bundle queues and contract queues are embedded in other + * structures and are implicitly referenced counted by virtue + * of their vnodes' indirect hold on their contracts. Process + * bundle queues are dynamically allocated and may persist + * after the death of the process, so they must be explicitly + * reference counted. + */ + q->ctq_flags = dynamic ? CTQ_REFFED : 0; +} + +/* + * cte_queue_destroy + * + * Destroys the specified queue. The queue is freed if referenced + * counted. + */ +static void +cte_queue_destroy(ct_equeue_t *q) +{ + ASSERT(q->ctq_flags & CTQ_DEAD); + ASSERT(q->ctq_nlisteners == 0); + ASSERT(q->ctq_nreliable == 0); + list_destroy(&q->ctq_events); + list_destroy(&q->ctq_listeners); + list_destroy(&q->ctq_tail); + mutex_destroy(&q->ctq_lock); + if (q->ctq_flags & CTQ_REFFED) + kmem_free(q, sizeof (ct_equeue_t)); +} + +/* + * cte_hold + * + * Takes a hold on the specified event. + */ +static void +cte_hold(ct_kevent_t *e) +{ + mutex_enter(&e->cte_lock); + ASSERT(e->cte_refs > 0); + e->cte_refs++; + mutex_exit(&e->cte_lock); +} + +/* + * cte_rele + * + * Releases a hold on the specified event. If the caller had the last + * reference, frees the event and releases its hold on the contract + * that generated it. + */ +static void +cte_rele(ct_kevent_t *e) +{ + mutex_enter(&e->cte_lock); + ASSERT(e->cte_refs > 0); + if (--e->cte_refs) { + mutex_exit(&e->cte_lock); + return; + } + + contract_rele(e->cte_contract); + + mutex_destroy(&e->cte_lock); + if (e->cte_data) + nvlist_free(e->cte_data); + if (e->cte_gdata) + nvlist_free(e->cte_gdata); + kmem_free(e, sizeof (ct_kevent_t)); +} + +/* + * cte_qrele + * + * Remove this listener's hold on the specified event, removing and + * releasing the queue's hold on the event if appropriate. + */ +static void +cte_qrele(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) +{ + ct_member_t *member = &e->cte_nodes[q->ctq_listno]; + + ASSERT(MUTEX_HELD(&q->ctq_lock)); + + if (l->ctl_flags & CTLF_RELIABLE) + member->ctm_nreliable--; + if ((--member->ctm_refs == 0) && member->ctm_trimmed) { + member->ctm_trimmed = 0; + list_remove(&q->ctq_events, e); + cte_rele(e); + } +} + +/* + * cte_qmove + * + * Move this listener to the specified event in the queue. + */ +static ct_kevent_t * +cte_qmove(ct_equeue_t *q, ct_listener_t *l, ct_kevent_t *e) +{ + ct_kevent_t *olde; + + ASSERT(MUTEX_HELD(&q->ctq_lock)); + ASSERT(l->ctl_equeue == q); + + if ((olde = l->ctl_position) == NULL) + list_remove(&q->ctq_tail, l); + + while (e != NULL && e->cte_nodes[q->ctq_listno].ctm_trimmed) + e = list_next(&q->ctq_events, e); + + if (e != NULL) { + e->cte_nodes[q->ctq_listno].ctm_refs++; + if (l->ctl_flags & CTLF_RELIABLE) + e->cte_nodes[q->ctq_listno].ctm_nreliable++; + } else { + list_insert_tail(&q->ctq_tail, l); + } + + l->ctl_position = e; + if (olde) + cte_qrele(q, l, olde); + + return (e); +} + +/* + * cte_checkcred + * + * Determines if the specified event's contract is owned by a process + * with the same effective uid as the specified credential. Called + * after a failed call to contract_owned with locked set. Because it + * drops the queue lock, its caller (cte_qreadable) needs to make sure + * we're still in the same place after we return. Returns 1 on + * success. + */ +static int +cte_checkcred(ct_equeue_t *q, ct_kevent_t *e, const cred_t *cr) +{ + int result; + contract_t *ct = e->cte_contract; + + cte_hold(e); + mutex_exit(&q->ctq_lock); + result = curproc->p_zone->zone_uniqid == ct->ct_czuniqid && + contract_checkcred(ct, cr); + mutex_enter(&q->ctq_lock); + cte_rele(e); + + return (result); +} + +/* + * cte_qreadable + * + * Ensures that the listener is pointing to a valid event that the + * caller has the credentials to read. Returns 0 if we can read the + * event we're pointing to. + */ +static int +cte_qreadable(ct_equeue_t *q, ct_listener_t *l, const cred_t *cr, + uint64_t zuniqid, int crit) +{ + ct_kevent_t *e, *next; + contract_t *ct; + + ASSERT(MUTEX_HELD(&q->ctq_lock)); + ASSERT(l->ctl_equeue == q); + + if (l->ctl_flags & CTLF_COPYOUT) + return (1); + + next = l->ctl_position; + while (e = cte_qmove(q, l, next)) { + ct = e->cte_contract; + /* + * Check obvious things first. If we are looking for a + * critical message, is this one? If we aren't in the + * global zone, is this message meant for us? + */ + if ((crit && (e->cte_flags & (CTE_INFO | CTE_ACK))) || + (cr != NULL && zuniqid != GLOBAL_ZONEUNIQID && + zuniqid != contract_getzuniqid(ct))) { + + next = list_next(&q->ctq_events, e); + + /* + * Next, see if our effective uid equals that of owner + * or author of the contract. Since we are holding the + * queue lock, contract_owned can't always check if we + * have the same effective uid as the contract's + * owner. If it comes to that, it fails and we take + * the slow(er) path. + */ + } else if (cr != NULL && !contract_owned(ct, cr, B_TRUE)) { + + /* + * At this point we either don't have any claim + * to this contract or we match the effective + * uid of the owner but couldn't tell. We + * first test for a NULL holder so that events + * from orphans and inherited contracts avoid + * the penalty phase. + */ + if (e->cte_contract->ct_owner == NULL && + !secpolicy_contract_observer_choice(cr)) + next = list_next(&q->ctq_events, e); + + /* + * cte_checkcred will juggle locks to see if we + * have the same uid as the event's contract's + * current owner. If it succeeds, we have to + * make sure we are in the same point in the + * queue. + */ + else if (cte_checkcred(q, e, cr) && + l->ctl_position == e) + break; + + /* + * cte_checkcred failed; see if we're in the + * same place. + */ + else if (l->ctl_position == e) + if (secpolicy_contract_observer_choice(cr)) + break; + else + next = list_next(&q->ctq_events, e); + + /* + * cte_checkcred failed, and our position was + * changed. Start from there. + */ + else + next = l->ctl_position; + } else { + break; + } + } + + /* + * We check for CTLF_COPYOUT again in case we dropped the queue + * lock in cte_checkcred. + */ + return ((l->ctl_flags & CTLF_COPYOUT) || (l->ctl_position == NULL)); +} + +/* + * cte_qwakeup + * + * Wakes up any waiting listeners and points them at the specified event. + */ +static void +cte_qwakeup(ct_equeue_t *q, ct_kevent_t *e) +{ + ct_listener_t *l; + + ASSERT(MUTEX_HELD(&q->ctq_lock)); + + while (l = list_head(&q->ctq_tail)) { + list_remove(&q->ctq_tail, l); + e->cte_nodes[q->ctq_listno].ctm_refs++; + if (l->ctl_flags & CTLF_RELIABLE) + e->cte_nodes[q->ctq_listno].ctm_nreliable++; + l->ctl_position = e; + cv_signal(&l->ctl_cv); + pollwakeup(&l->ctl_pollhead, POLLIN); + } +} + +/* + * cte_copy + * + * Copies events from the specified contract event queue to the + * end of the specified process bundle queue. Only called from + * contract_adopt. + * + * We copy to the end of the target queue instead of mixing the events + * in their proper order because otherwise the act of adopting a + * contract would require a process to reset all process bundle + * listeners it needed to see the new events. This would, in turn, + * require the process to keep track of which preexisting events had + * already been processed. + */ +static void +cte_copy(ct_equeue_t *q, ct_equeue_t *newq) +{ + ct_kevent_t *e, *first = NULL; + + ASSERT(q->ctq_listno == CTEL_CONTRACT); + ASSERT(newq->ctq_listno == CTEL_PBUNDLE); + + mutex_enter(&q->ctq_lock); + mutex_enter(&newq->ctq_lock); + + /* + * For now, only copy critical events. + */ + for (e = list_head(&q->ctq_events); e != NULL; + e = list_next(&q->ctq_events, e)) { + if ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0) { + if (first == NULL) + first = e; + list_insert_tail(&newq->ctq_events, e); + cte_hold(e); + } + } + + mutex_exit(&q->ctq_lock); + + if (first) + cte_qwakeup(newq, first); + + mutex_exit(&newq->ctq_lock); +} + +/* + * cte_trim + * + * Trims unneeded events from an event queue. Algorithm works as + * follows: + * + * Removes all informative and acknowledged critical events until the + * first referenced event is found. + * + * If a contract is specified, removes all events (regardless of + * acknowledgement) generated by that contract until the first event + * referenced by a reliable listener is found. Reference events are + * removed by marking them "trimmed". Such events will be removed + * when the last reference is dropped and will be skipped by future + * listeners. + * + * This is pretty basic. Ideally this should remove from the middle of + * the list (i.e. beyond the first referenced event), and even + * referenced events. + */ +static void +cte_trim(ct_equeue_t *q, contract_t *ct) +{ + ct_kevent_t *e, *next; + int flags, stopper; + int start = 1; + + ASSERT(MUTEX_HELD(&q->ctq_lock)); + + for (e = list_head(&q->ctq_events); e != NULL; e = next) { + next = list_next(&q->ctq_events, e); + flags = e->cte_flags; + stopper = (q->ctq_listno != CTEL_PBUNDLE) && + (e->cte_nodes[q->ctq_listno].ctm_nreliable > 0); + if (e->cte_nodes[q->ctq_listno].ctm_refs == 0) { + if ((start && (flags & (CTE_INFO | CTE_ACK))) || + (e->cte_contract == ct)) { + /* + * Toss informative and ACKed critical messages. + */ + list_remove(&q->ctq_events, e); + cte_rele(e); + } + } else if ((e->cte_contract == ct) && !stopper) { + ASSERT(q->ctq_nlisteners != 0); + e->cte_nodes[q->ctq_listno].ctm_trimmed = 1; + } else if (ct && !stopper) { + start = 0; + } else { + /* + * Don't free messages past the first reader. + */ + break; + } + } +} + +/* + * cte_queue_drain + * + * Drain all events from the specified queue, and mark it dead. If + * "ack" is set, acknowledge any critical events we find along the + * way. + */ +static void +cte_queue_drain(ct_equeue_t *q, int ack) +{ + ct_kevent_t *e, *next; + ct_listener_t *l; + + mutex_enter(&q->ctq_lock); + + for (e = list_head(&q->ctq_events); e != NULL; e = next) { + next = list_next(&q->ctq_events, e); + if (ack && ((e->cte_flags & (CTE_INFO | CTE_ACK)) == 0)) { + /* + * Make sure critical messages are eventually + * removed from the bundle queues. + */ + mutex_enter(&e->cte_lock); + e->cte_flags |= CTE_ACK; + mutex_exit(&e->cte_lock); + ASSERT(MUTEX_HELD(&e->cte_contract->ct_lock)); + e->cte_contract->ct_evcnt--; + } + list_remove(&q->ctq_events, e); + e->cte_nodes[q->ctq_listno].ctm_refs = 0; + e->cte_nodes[q->ctq_listno].ctm_nreliable = 0; + e->cte_nodes[q->ctq_listno].ctm_trimmed = 0; + cte_rele(e); + } + + /* + * This is necessary only because of CTEL_PBUNDLE listeners; + * the events they point to can move from one pbundle to + * another. Fortunately, this only happens if the contract is + * inherited, which (in turn) only happens if the process + * exits, which means it's an all-or-nothing deal. If this + * wasn't the case, we would instead need to keep track of + * listeners on a per-event basis, not just a per-queue basis. + * This would have the side benefit of letting us clean up + * trimmed events sooner (i.e. immediately), but would + * unfortunately make events even bigger than they already + * are. + */ + for (l = list_head(&q->ctq_listeners); l; + l = list_next(&q->ctq_listeners, l)) { + l->ctl_flags |= CTLF_DEAD; + if (l->ctl_position) { + l->ctl_position = NULL; + list_insert_tail(&q->ctq_tail, l); + } + cv_broadcast(&l->ctl_cv); + } + + /* + * Disallow events. + */ + q->ctq_flags |= CTQ_DEAD; + + /* + * If we represent the last reference to a reference counted + * process bundle queue, free it. + */ + if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_nlisteners == 0)) + cte_queue_destroy(q); + else + mutex_exit(&q->ctq_lock); +} + +/* + * cte_publish + * + * Publishes an event to a specific queue. Only called by + * cte_publish_all. + */ +static void +cte_publish(ct_equeue_t *q, ct_kevent_t *e, timespec_t *tsp) +{ + ASSERT(MUTEX_HELD(&q->ctq_lock)); + + q->ctq_atime = *tsp; + + /* + * Don't publish if the event is informative and there aren't + * any listeners, or if the queue has been shut down. + */ + if (((q->ctq_nlisteners == 0) && (e->cte_flags & (CTE_INFO|CTE_ACK))) || + (q->ctq_flags & CTQ_DEAD)) { + mutex_exit(&q->ctq_lock); + cte_rele(e); + return; + } + + /* + * Enqueue event + */ + list_insert_tail(&q->ctq_events, e); + + /* + * Check for waiting listeners + */ + cte_qwakeup(q, e); + + /* + * Trim unnecessary events from the queue. + */ + cte_trim(q, NULL); + mutex_exit(&q->ctq_lock); +} + +/* + * cte_publish_all + * + * Publish an event to all necessary event queues. The event, e, must + * be zallocated by the caller, and the event's flags and type must be + * set. The rest of the event's fields are initialized here. + */ +void +cte_publish_all(contract_t *ct, ct_kevent_t *e, nvlist_t *data, nvlist_t *gdata) +{ + ct_equeue_t *q; + timespec_t ts; + + e->cte_contract = ct; + e->cte_data = data; + e->cte_gdata = gdata; + e->cte_refs = 3; + e->cte_id = atomic_add_64_nv(&ct->ct_type->ct_type_evid, 1); + contract_hold(ct); + + gethrestime(&ts); + + /* + * ct_evtlock simply (and only) ensures that two events sent + * from the same contract are delivered to all queues in the + * same order. + */ + mutex_enter(&ct->ct_evtlock); + + /* + * CTEL_CONTRACT - First deliver to the contract queue, acking + * the event if the contract has been orphaned. + */ + mutex_enter(&ct->ct_lock); + mutex_enter(&ct->ct_events.ctq_lock); + if ((e->cte_flags & CTE_INFO) == 0) { + if (ct->ct_state >= CTS_ORPHAN) + e->cte_flags |= CTE_ACK; + else + ct->ct_evcnt++; + } + mutex_exit(&ct->ct_lock); + cte_publish(&ct->ct_events, e, &ts); + + /* + * CTEL_BUNDLE - Next deliver to the contract type's bundle + * queue. + */ + mutex_enter(&ct->ct_type->ct_type_events.ctq_lock); + cte_publish(&ct->ct_type->ct_type_events, e, &ts); + + /* + * CTEL_PBUNDLE - Finally, if the contract has an owner, + * deliver to the owner's process bundle queue. + */ + mutex_enter(&ct->ct_lock); + if (ct->ct_owner) { + /* + * proc_exit doesn't free event queues until it has + * abandoned all contracts. + */ + ASSERT(ct->ct_owner->p_ct_equeue); + ASSERT(ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]); + q = ct->ct_owner->p_ct_equeue[ct->ct_type->ct_type_index]; + mutex_enter(&q->ctq_lock); + mutex_exit(&ct->ct_lock); + cte_publish(q, e, &ts); + } else { + mutex_exit(&ct->ct_lock); + cte_rele(e); + } + + mutex_exit(&ct->ct_evtlock); +} + +/* + * cte_add_listener + * + * Add a new listener to an event queue. + */ +void +cte_add_listener(ct_equeue_t *q, ct_listener_t *l) +{ + cv_init(&l->ctl_cv, NULL, CV_DEFAULT, NULL); + l->ctl_equeue = q; + l->ctl_position = NULL; + l->ctl_flags = 0; + + mutex_enter(&q->ctq_lock); + list_insert_head(&q->ctq_tail, l); + list_insert_head(&q->ctq_listeners, l); + q->ctq_nlisteners++; + mutex_exit(&q->ctq_lock); +} + +/* + * cte_remove_listener + * + * Remove a listener from an event queue. No other queue activities + * (e.g. cte_get event) may be in progress at this endpoint when this + * is called. + */ +void +cte_remove_listener(ct_listener_t *l) +{ + ct_equeue_t *q = l->ctl_equeue; + ct_kevent_t *e; + + mutex_enter(&q->ctq_lock); + + ASSERT((l->ctl_flags & (CTLF_COPYOUT|CTLF_RESET)) == 0); + + if ((e = l->ctl_position) != NULL) + cte_qrele(q, l, e); + else + list_remove(&q->ctq_tail, l); + l->ctl_position = NULL; + + q->ctq_nlisteners--; + list_remove(&q->ctq_listeners, l); + + if (l->ctl_flags & CTLF_RELIABLE) + q->ctq_nreliable--; + + /* + * If we are a the last listener of a dead reference counted + * queue (i.e. a process bundle) we free it. Otherwise we just + * trim any events which may have been kept around for our + * benefit. + */ + if ((q->ctq_flags & CTQ_REFFED) && (q->ctq_flags & CTQ_DEAD) && + (q->ctq_nlisteners == 0)) { + cte_queue_destroy(q); + } else { + cte_trim(q, NULL); + mutex_exit(&q->ctq_lock); + } +} + +/* + * cte_reset_listener + * + * Moves a listener's queue pointer to the beginning of the queue. + */ +void +cte_reset_listener(ct_listener_t *l) +{ + ct_equeue_t *q = l->ctl_equeue; + + mutex_enter(&q->ctq_lock); + + /* + * We allow an asynchronous reset because it doesn't make a + * whole lot of sense to make reset block or fail. We already + * have most of the mechanism needed thanks to queue trimming, + * so implementing it isn't a big deal. + */ + if (l->ctl_flags & CTLF_COPYOUT) + l->ctl_flags |= CTLF_RESET; + + (void) cte_qmove(q, l, list_head(&q->ctq_events)); + + /* + * Inform blocked readers. + */ + cv_broadcast(&l->ctl_cv); + pollwakeup(&l->ctl_pollhead, POLLIN); + mutex_exit(&q->ctq_lock); +} + +/* + * cte_next_event + * + * Moves the event pointer for the specified listener to the next event + * on the queue. To avoid races, this movement only occurs if the + * specified event id matches that of the current event. This is used + * primarily to skip events that have been read but whose extended data + * haven't been copied out. + */ +int +cte_next_event(ct_listener_t *l, uint64_t id) +{ + ct_equeue_t *q = l->ctl_equeue; + ct_kevent_t *old; + + mutex_enter(&q->ctq_lock); + + if (l->ctl_flags & CTLF_COPYOUT) + l->ctl_flags |= CTLF_RESET; + + if (((old = l->ctl_position) != NULL) && (old->cte_id == id)) + (void) cte_qmove(q, l, list_next(&q->ctq_events, old)); + + mutex_exit(&q->ctq_lock); + + return (0); +} + +/* + * cte_get_event + * + * Reads an event from an event endpoint. If "nonblock" is clear, we + * block until a suitable event is ready. If "crit" is set, we only + * read critical events. Note that while "cr" is the caller's cred, + * "zuniqid" is the unique id of the zone the calling contract + * filesystem was mounted in. + */ +int +cte_get_event(ct_listener_t *l, int nonblock, void *uaddr, const cred_t *cr, + uint64_t zuniqid, int crit) +{ + ct_equeue_t *q = l->ctl_equeue; + ct_kevent_t *temp; + int result = 0; + int partial = 0; + size_t size, gsize, len; + model_t mdl = get_udatamodel(); + STRUCT_DECL(ct_event, ev); + STRUCT_INIT(ev, mdl); + + /* + * cte_qreadable checks for CTLF_COPYOUT as well as ensures + * that there exists, and we are pointing to, an appropriate + * event. It may temporarily drop ctq_lock, but that doesn't + * really matter to us. + */ + mutex_enter(&q->ctq_lock); + while (cte_qreadable(q, l, cr, zuniqid, crit)) { + if (nonblock) { + result = EAGAIN; + goto error; + } + if (q->ctq_flags & CTQ_DEAD) { + result = EIDRM; + goto error; + } + result = cv_wait_sig(&l->ctl_cv, &q->ctq_lock); + if (result == 0) { + result = EINTR; + goto error; + } + } + temp = l->ctl_position; + cte_hold(temp); + l->ctl_flags |= CTLF_COPYOUT; + mutex_exit(&q->ctq_lock); + + /* + * We now have an event. Copy in the user event structure to + * see how much space we have to work with. + */ + result = copyin(uaddr, STRUCT_BUF(ev), STRUCT_SIZE(ev)); + if (result) + goto copyerr; + + /* + * Determine what data we have and what the user should be + * allowed to see. + */ + size = gsize = 0; + if (temp->cte_data) { + VERIFY(nvlist_size(temp->cte_data, &size, + NV_ENCODE_NATIVE) == 0); + ASSERT(size != 0); + } + if (zuniqid == GLOBAL_ZONEUNIQID && temp->cte_gdata) { + VERIFY(nvlist_size(temp->cte_gdata, &gsize, + NV_ENCODE_NATIVE) == 0); + ASSERT(gsize != 0); + } + + /* + * If we have enough space, copy out the extended event data. + */ + len = size + gsize; + if (len) { + if (STRUCT_FGET(ev, ctev_nbytes) >= len) { + char *buf = kmem_alloc(len, KM_SLEEP); + + if (size) + VERIFY(nvlist_pack(temp->cte_data, &buf, &size, + NV_ENCODE_NATIVE, KM_SLEEP) == 0); + if (gsize) { + char *tmp = buf + size; + + VERIFY(nvlist_pack(temp->cte_gdata, &tmp, + &gsize, NV_ENCODE_NATIVE, KM_SLEEP) == 0); + } + + /* This shouldn't have changed */ + ASSERT(size + gsize == len); + result = copyout(buf, STRUCT_FGETP(ev, ctev_buffer), + len); + kmem_free(buf, len); + if (result) + goto copyerr; + } else { + partial = 1; + } + } + + /* + * Copy out the common event data. + */ + STRUCT_FSET(ev, ctev_id, temp->cte_contract->ct_id); + STRUCT_FSET(ev, ctev_evid, temp->cte_id); + STRUCT_FSET(ev, ctev_cttype, + temp->cte_contract->ct_type->ct_type_index); + STRUCT_FSET(ev, ctev_flags, temp->cte_flags & (CTE_ACK|CTE_INFO)); + STRUCT_FSET(ev, ctev_type, temp->cte_type); + STRUCT_FSET(ev, ctev_nbytes, len); + STRUCT_FSET(ev, ctev_goffset, size); + result = copyout(STRUCT_BUF(ev), uaddr, STRUCT_SIZE(ev)); + +copyerr: + /* + * Only move our location in the queue if all copyouts were + * successful, the caller provided enough space for the entire + * event, and our endpoint wasn't reset or otherwise moved by + * another thread. + */ + mutex_enter(&q->ctq_lock); + if (result) + result = EFAULT; + else if (!partial && ((l->ctl_flags & CTLF_RESET) == 0) && + (l->ctl_position == temp)) + (void) cte_qmove(q, l, list_next(&q->ctq_events, temp)); + l->ctl_flags &= ~(CTLF_COPYOUT|CTLF_RESET); + /* + * Signal any readers blocked on our CTLF_COPYOUT. + */ + cv_signal(&l->ctl_cv); + cte_rele(temp); + +error: + mutex_exit(&q->ctq_lock); + return (result); +} + +/* + * cte_set_reliable + * + * Requests that events be reliably delivered to an event endpoint. + * Unread informative and acknowledged critical events will not be + * removed from the queue until this listener reads or skips them. + * Because a listener could maliciously request reliable delivery and + * then do nothing, this requires that PRIV_CONTRACT_EVENT be in the + * caller's effective set. + */ +int +cte_set_reliable(ct_listener_t *l, const cred_t *cr) +{ + ct_equeue_t *q = l->ctl_equeue; + int error; + + if ((error = secpolicy_contract_event(cr)) != 0) + return (error); + + mutex_enter(&q->ctq_lock); + if ((l->ctl_flags & CTLF_RELIABLE) == 0) { + l->ctl_flags |= CTLF_RELIABLE; + q->ctq_nreliable++; + if (l->ctl_position != NULL) + l->ctl_position->cte_nodes[q->ctq_listno]. + ctm_nreliable++; + } + mutex_exit(&q->ctq_lock); + + return (0); +} |