diff options
Diffstat (limited to 'usr/src/uts/common/os/flock.c')
-rw-r--r-- | usr/src/uts/common/os/flock.c | 4220 |
1 files changed, 4220 insertions, 0 deletions
diff --git a/usr/src/uts/common/os/flock.c b/usr/src/uts/common/os/flock.c new file mode 100644 index 0000000000..a3028b75dc --- /dev/null +++ b/usr/src/uts/common/os/flock.c @@ -0,0 +1,4220 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* ONC_PLUS EXTRACT START */ + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ +/* All Rights Reserved */ + +#pragma ident "%Z%%M% %I% %E% SMI" + +#include <sys/flock_impl.h> +#include <sys/vfs.h> +#include <sys/t_lock.h> /* for <sys/callb.h> */ +#include <sys/callb.h> +#include <sys/clconf.h> +#include <sys/cladm.h> +#include <sys/nbmlock.h> +#include <sys/cred.h> +#include <sys/policy.h> + +/* + * The following four variables are for statistics purposes and they are + * not protected by locks. They may not be accurate but will at least be + * close to the actual value. + */ + +int flk_lock_allocs; +int flk_lock_frees; +int edge_allocs; +int edge_frees; +int flk_proc_vertex_allocs; +int flk_proc_edge_allocs; +int flk_proc_vertex_frees; +int flk_proc_edge_frees; + +static kmutex_t flock_lock; + +#ifdef DEBUG +int check_debug = 0; +#define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ + check_active_locks(gp); +#define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ + check_sleeping_locks(gp); +#define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ + if (check_debug) \ + check_owner_locks(gp, pid, sysid, vp); +#define CHECK_LOCK_TRANSITION(old_state, new_state) \ + { \ + if (check_lock_transition(old_state, new_state)) { \ + cmn_err(CE_PANIC, "Illegal lock transition \ + from %d to %d", old_state, new_state); \ + } \ + } +#else + +#define CHECK_ACTIVE_LOCKS(gp) +#define CHECK_SLEEPING_LOCKS(gp) +#define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) +#define CHECK_LOCK_TRANSITION(old_state, new_state) + +#endif /* DEBUG */ + +struct kmem_cache *flk_edge_cache; + +graph_t *lock_graph[HASH_SIZE]; +proc_graph_t pgraph; + +/* + * Clustering. + * + * NLM REGISTRY TYPE IMPLEMENTATION + * + * Assumptions: + * 1. Nodes in a cluster are numbered starting at 1; always non-negative + * integers; maximum node id is returned by clconf_maximum_nodeid(). + * 2. We use this node id to identify the node an NLM server runs on. + */ + +/* + * NLM registry object keeps track of NLM servers via their + * nlmids (which are the node ids of the node in the cluster they run on) + * that have requested locks at this LLM with which this registry is + * associated. + * + * Representation of abstraction: + * rep = record[ states: array[nlm_state], + * lock: mutex] + * + * Representation invariants: + * 1. index i of rep.states is between 0 and n - 1 where n is number + * of elements in the array, which happen to be the maximum number + * of nodes in the cluster configuration + 1. + * 2. map nlmid to index i of rep.states + * 0 -> 0 + * 1 -> 1 + * 2 -> 2 + * n-1 -> clconf_maximum_nodeid()+1 + * 3. This 1-1 mapping is quite convenient and it avoids errors resulting + * from forgetting to subtract 1 from the index. + * 4. The reason we keep the 0th index is the following. A legitimate + * cluster configuration includes making a UFS file system NFS + * exportable. The code is structured so that if you're in a cluster + * you do one thing; otherwise, you do something else. The problem + * is what to do if you think you're in a cluster with PXFS loaded, + * but you're using UFS not PXFS? The upper two bytes of the sysid + * encode the node id of the node where NLM server runs; these bytes + * are zero for UFS. Since the nodeid is used to index into the + * registry, we can record the NLM server state information at index + * 0 using the same mechanism used for PXFS file locks! + */ +static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ +static kmutex_t nlm_reg_lock; /* lock to protect arrary */ +static uint_t nlm_status_size; /* size of state array */ + +/* + * Although we need a global lock dependency graph (and associated data + * structures), we also need a per-zone notion of whether the lock manager is + * running, and so whether to allow lock manager requests or not. + * + * Thus, on a per-zone basis we maintain a ``global'' variable + * (flk_lockmgr_status), protected by flock_lock, and set when the lock + * manager is determined to be changing state (starting or stopping). + * + * Each graph/zone pair also has a copy of this variable, which is protected by + * the graph's mutex. + * + * The per-graph copies are used to synchronize lock requests with shutdown + * requests. The global copy is used to initialize the per-graph field when a + * new graph is created. + */ +struct flock_globals { + flk_lockmgr_status_t flk_lockmgr_status; + flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; +}; + +zone_key_t flock_zone_key; + +static void create_flock(lock_descriptor_t *, flock64_t *); +static lock_descriptor_t *flk_get_lock(void); +static void flk_free_lock(lock_descriptor_t *lock); +static void flk_get_first_blocking_lock(lock_descriptor_t *request); +static int flk_process_request(lock_descriptor_t *); +static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); +static edge_t *flk_get_edge(void); +static int flk_wait_execute_request(lock_descriptor_t *); +static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); +static void flk_insert_active_lock(lock_descriptor_t *); +static void flk_delete_active_lock(lock_descriptor_t *, int); +static void flk_insert_sleeping_lock(lock_descriptor_t *); +static void flk_graph_uncolor(graph_t *); +static void flk_wakeup(lock_descriptor_t *, int); +static void flk_free_edge(edge_t *); +static void flk_recompute_dependencies(lock_descriptor_t *, + lock_descriptor_t **, int, int); +static int flk_find_barriers(lock_descriptor_t *); +static void flk_update_barriers(lock_descriptor_t *); +static int flk_color_reachables(lock_descriptor_t *); +static int flk_canceled(lock_descriptor_t *); +static void flk_delete_locks_by_sysid(lock_descriptor_t *); +static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); +static void wait_for_lock(lock_descriptor_t *); +static void unlock_lockmgr_granted(struct flock_globals *); +static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); + +/* Clustering hooks */ +static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); +static void cl_flk_wakeup_sleeping_nlm_locks(int); +static void cl_flk_unlock_nlm_granted(int); + +#ifdef DEBUG +static int check_lock_transition(int, int); +static void check_sleeping_locks(graph_t *); +static void check_active_locks(graph_t *); +static int no_path(lock_descriptor_t *, lock_descriptor_t *); +static void path(lock_descriptor_t *, lock_descriptor_t *); +static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); +static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); +static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); +#endif + +/* proc_graph function definitons */ +static int flk_check_deadlock(lock_descriptor_t *); +static void flk_proc_graph_uncolor(void); +static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); +static proc_edge_t *flk_get_proc_edge(void); +static void flk_proc_release(proc_vertex_t *); +static void flk_free_proc_edge(proc_edge_t *); +static void flk_update_proc_graph(edge_t *, int); + +/* Non-blocking mandatory locking */ +static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, + u_offset_t); + +static struct flock_globals * +flk_get_globals(void) +{ + /* + * The KLM module had better be loaded if we're attempting to handle + * lockmgr requests. + */ + ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); + return (zone_getspecific(flock_zone_key, curproc->p_zone)); +} + +static flk_lockmgr_status_t +flk_get_lockmgr_status(void) +{ + struct flock_globals *fg; + + ASSERT(MUTEX_HELD(&flock_lock)); + + if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { + /* + * KLM module not loaded; lock manager definitely not running. + */ + return (FLK_LOCKMGR_DOWN); + } + fg = flk_get_globals(); + return (fg->flk_lockmgr_status); +} + +/* + * Routine called from fs_frlock in fs/fs_subr.c + */ + +int +reclock(vnode_t *vp, + flock64_t *lckdat, + int cmd, + int flag, + u_offset_t offset, + flk_callback_t *flk_cbp) +{ + lock_descriptor_t stack_lock_request; + lock_descriptor_t *lock_request; + int error = 0; + graph_t *gp; + int nlmid; + + /* + * Check access permissions + */ + if ((cmd & SETFLCK) && + ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || + (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) + return (EBADF); + + /* + * for query and unlock we use the stack_lock_request + */ + + if ((lckdat->l_type == F_UNLCK) || + !((cmd & INOFLCK) || (cmd & SETFLCK))) { + lock_request = &stack_lock_request; + (void) bzero((caddr_t)lock_request, + sizeof (lock_descriptor_t)); + + /* + * following is added to make the assertions in + * flk_execute_request() to pass through + */ + + lock_request->l_edge.edge_in_next = &lock_request->l_edge; + lock_request->l_edge.edge_in_prev = &lock_request->l_edge; + lock_request->l_edge.edge_adj_next = &lock_request->l_edge; + lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; + lock_request->l_status = FLK_INITIAL_STATE; + } else { + lock_request = flk_get_lock(); + } + lock_request->l_state = 0; + lock_request->l_vnode = vp; + lock_request->l_zoneid = getzoneid(); + + /* + * Convert the request range into the canonical start and end + * values. The NLM protocol supports locking over the entire + * 32-bit range, so there's no range checking for remote requests, + * but we still need to verify that local requests obey the rules. + */ + /* Clustering */ + if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { + ASSERT(lckdat->l_whence == 0); + lock_request->l_start = lckdat->l_start; + lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : + lckdat->l_start + (lckdat->l_len - 1); + } else { + /* check the validity of the lock range */ + error = flk_convert_lock_data(vp, lckdat, + &lock_request->l_start, &lock_request->l_end, + offset); + if (error) { + goto done; + } + error = flk_check_lock_data(lock_request->l_start, + lock_request->l_end, MAXEND); + if (error) { + goto done; + } + } + + ASSERT(lock_request->l_end >= lock_request->l_start); + + lock_request->l_type = lckdat->l_type; + if (cmd & INOFLCK) + lock_request->l_state |= IO_LOCK; + if (cmd & SLPFLCK) + lock_request->l_state |= WILLING_TO_SLEEP_LOCK; + if (cmd & RCMDLCK) + lock_request->l_state |= LOCKMGR_LOCK; + if (cmd & NBMLCK) + lock_request->l_state |= NBMAND_LOCK; + /* + * Clustering: set flag for PXFS locks + * We do not _only_ check for the PCMDLCK flag because PXFS locks could + * also be of type 'RCMDLCK'. + * We do not _only_ check the GETPXFSID() macro because local PXFS + * clients use a pxfsid of zero to permit deadlock detection in the LLM. + */ + + if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { + lock_request->l_state |= PXFS_LOCK; + } + if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { + if (lock_request->l_type == F_RDLCK || + lock_request->l_type == F_WRLCK) + lock_request->l_state |= QUERY_LOCK; + } + lock_request->l_flock = (*lckdat); + lock_request->l_callbacks = flk_cbp; + + /* + * We are ready for processing the request + */ + if (IS_LOCKMGR(lock_request)) { + /* + * If the lock request is an NLM server request .... + */ + if (nlm_status_size == 0) { /* not booted as cluster */ + mutex_enter(&flock_lock); + /* + * Bail out if this is a lock manager request and the + * lock manager is not supposed to be running. + */ + if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { + mutex_exit(&flock_lock); + error = ENOLCK; + goto done; + } + mutex_exit(&flock_lock); + } else { /* booted as a cluster */ + nlmid = GETNLMID(lock_request->l_flock.l_sysid); + ASSERT(nlmid <= nlm_status_size && nlmid >= 0); + + mutex_enter(&nlm_reg_lock); + /* + * If the NLM registry does not know about this + * NLM server making the request, add its nlmid + * to the registry. + */ + if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, + nlmid)) { + FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); + } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, + nlmid)) { + /* + * If the NLM server is already known (has made + * previous lock requests) and its state is + * not NLM_UP (means that NLM server is + * shutting down), then bail out with an + * error to deny the lock request. + */ + mutex_exit(&nlm_reg_lock); + error = ENOLCK; + goto done; + } + mutex_exit(&nlm_reg_lock); + } + } + + /* Now get the lock graph for a particular vnode */ + gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); + + /* + * We drop rwlock here otherwise this might end up causing a + * deadlock if this IOLOCK sleeps. (bugid # 1183392). + */ + + if (IS_IO_LOCK(lock_request)) { + VOP_RWUNLOCK(vp, + (lock_request->l_type == F_RDLCK) ? + V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); + } + mutex_enter(&gp->gp_mutex); + + lock_request->l_state |= REFERENCED_LOCK; + lock_request->l_graph = gp; + + switch (lock_request->l_type) { + case F_RDLCK: + case F_WRLCK: + if (IS_QUERY_LOCK(lock_request)) { + flk_get_first_blocking_lock(lock_request); + (*lckdat) = lock_request->l_flock; + break; + } + + /* process the request now */ + + error = flk_process_request(lock_request); + break; + + case F_UNLCK: + /* unlock request will not block so execute it immediately */ + + if (IS_LOCKMGR(lock_request) && + flk_canceled(lock_request)) { + error = 0; + } else { + error = flk_execute_request(lock_request); + } + break; + + case F_UNLKSYS: + /* + * Recovery mechanism to release lock manager locks when + * NFS client crashes and restart. NFS server will clear + * old locks and grant new locks. + */ + + if (lock_request->l_flock.l_sysid == 0) { + mutex_exit(&gp->gp_mutex); + return (EINVAL); + } + if (secpolicy_nfs(CRED()) != 0) { + mutex_exit(&gp->gp_mutex); + return (EPERM); + } + flk_delete_locks_by_sysid(lock_request); + lock_request->l_state &= ~REFERENCED_LOCK; + flk_set_state(lock_request, FLK_DEAD_STATE); + flk_free_lock(lock_request); + mutex_exit(&gp->gp_mutex); + return (0); + + default: + error = EINVAL; + break; + } + + /* Clustering: For blocked PXFS locks, return */ + if (error == PXFS_LOCK_BLOCKED) { + lock_request->l_state &= ~REFERENCED_LOCK; + mutex_exit(&gp->gp_mutex); + return (error); + } + + /* + * Now that we have seen the status of locks in the system for + * this vnode we acquire the rwlock if it is an IO_LOCK. + */ + + if (IS_IO_LOCK(lock_request)) { + (void) VOP_RWLOCK(vp, + (lock_request->l_type == F_RDLCK) ? + V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); + if (!error) { + lckdat->l_type = F_UNLCK; + + /* + * This wake up is needed otherwise + * if IO_LOCK has slept the dependents on this + * will not be woken up at all. (bugid # 1185482). + */ + + flk_wakeup(lock_request, 1); + flk_set_state(lock_request, FLK_DEAD_STATE); + flk_free_lock(lock_request); + } + /* + * else if error had occurred either flk_process_request() + * has returned EDEADLK in which case there will be no + * dependents for this lock or EINTR from flk_wait_execute_ + * request() in which case flk_cancel_sleeping_lock() + * would have been done. same is true with EBADF. + */ + } + + if (lock_request == &stack_lock_request) { + flk_set_state(lock_request, FLK_DEAD_STATE); + } else { + lock_request->l_state &= ~REFERENCED_LOCK; + if ((error != 0) || IS_DELETED(lock_request)) { + flk_set_state(lock_request, FLK_DEAD_STATE); + flk_free_lock(lock_request); + } + } + + mutex_exit(&gp->gp_mutex); + return (error); + +done: + flk_set_state(lock_request, FLK_DEAD_STATE); + if (lock_request != &stack_lock_request) + flk_free_lock(lock_request); + return (error); +} + +/* + * Invoke the callbacks in the given list. If before sleeping, invoke in + * list order. If after sleeping, invoke in reverse order. + * + * CPR (suspend/resume) support: if one of the callbacks returns a + * callb_cpr_t, return it. This will be used to make the thread CPR-safe + * while it is sleeping. There should be at most one callb_cpr_t for the + * thread. + * XXX This is unnecessarily complicated. The CPR information should just + * get passed in directly through VOP_FRLOCK and reclock, rather than + * sneaking it in via a callback. + */ + +callb_cpr_t * +flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) +{ + callb_cpr_t *cpr_callbackp = NULL; + callb_cpr_t *one_result; + flk_callback_t *cb; + + if (cblist == NULL) + return (NULL); + + if (when == FLK_BEFORE_SLEEP) { + cb = cblist; + do { + one_result = (*cb->cb_callback)(when, cb->cb_data); + if (one_result != NULL) { + ASSERT(cpr_callbackp == NULL); + cpr_callbackp = one_result; + } + cb = cb->cb_next; + } while (cb != cblist); + } else { + cb = cblist->cb_prev; + do { + one_result = (*cb->cb_callback)(when, cb->cb_data); + if (one_result != NULL) { + cpr_callbackp = one_result; + } + cb = cb->cb_prev; + } while (cb != cblist->cb_prev); + } + + return (cpr_callbackp); +} + +/* + * Initialize a flk_callback_t to hold the given callback. + */ + +void +flk_init_callback(flk_callback_t *flk_cb, + callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) +{ + flk_cb->cb_next = flk_cb; + flk_cb->cb_prev = flk_cb; + flk_cb->cb_callback = cb_fcn; + flk_cb->cb_data = cbdata; +} + +/* + * Initialize an flk_callback_t and then link it into the head of an + * existing list (which may be NULL). + */ + +void +flk_add_callback(flk_callback_t *newcb, + callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), + void *cbdata, flk_callback_t *cblist) +{ + flk_init_callback(newcb, cb_fcn, cbdata); + + if (cblist == NULL) + return; + + newcb->cb_prev = cblist->cb_prev; + newcb->cb_next = cblist; + cblist->cb_prev->cb_next = newcb; + cblist->cb_prev = newcb; +} +/* ONC_PLUS EXTRACT END */ + +/* + * Initialize the flk_edge_cache data structure and create the + * nlm_reg_status array. + */ + +void +flk_init(void) +{ + uint_t i; + + flk_edge_cache = kmem_cache_create("flk_edges", + sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); + if (flk_edge_cache == NULL) { + cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); + } + /* + * Create the NLM registry object. + */ + + if (cluster_bootflags & CLUSTER_BOOTED) { + /* + * This routine tells you the maximum node id that will be used + * in the cluster. This number will be the size of the nlm + * registry status array. We add 1 because we will be using + * all entries indexed from 0 to maxnodeid; e.g., from 0 + * to 64, for a total of 65 entries. + */ + nlm_status_size = clconf_maximum_nodeid() + 1; + } else { + nlm_status_size = 0; + } + + if (nlm_status_size != 0) { /* booted as a cluster */ + nlm_reg_status = (flk_nlm_status_t *) + kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, + KM_SLEEP); + + /* initialize all NLM states in array to NLM_UNKNOWN */ + for (i = 0; i < nlm_status_size; i++) { + nlm_reg_status[i] = FLK_NLM_UNKNOWN; + } + } +} + +/* + * Zone constructor/destructor callbacks to be executed when a zone is + * created/destroyed. + */ +/* ARGSUSED */ +void * +flk_zone_init(zoneid_t zoneid) +{ + struct flock_globals *fg; + uint_t i; + + fg = kmem_alloc(sizeof (*fg), KM_SLEEP); + fg->flk_lockmgr_status = FLK_LOCKMGR_UP; + for (i = 0; i < HASH_SIZE; i++) + fg->lockmgr_status[i] = FLK_LOCKMGR_UP; + return (fg); +} + +/* ARGSUSED */ +void +flk_zone_fini(zoneid_t zoneid, void *data) +{ + struct flock_globals *fg = data; + + kmem_free(fg, sizeof (*fg)); +} + +/* + * Get a lock_descriptor structure with initialisation of edge lists. + */ + +static lock_descriptor_t * +flk_get_lock(void) +{ + lock_descriptor_t *l; + + l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); + + cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); + l->l_edge.edge_in_next = &l->l_edge; + l->l_edge.edge_in_prev = &l->l_edge; + l->l_edge.edge_adj_next = &l->l_edge; + l->l_edge.edge_adj_prev = &l->l_edge; + l->pvertex = -1; + l->l_status = FLK_INITIAL_STATE; + flk_lock_allocs++; + return (l); +} + +/* + * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag + * when some thread has a reference to it as in reclock(). + */ + +void +flk_free_lock(lock_descriptor_t *lock) +{ + ASSERT(IS_DEAD(lock)); + if (IS_REFERENCED(lock)) { + lock->l_state |= DELETED_LOCK; + return; + } + flk_lock_frees++; + kmem_free((void *)lock, sizeof (lock_descriptor_t)); +} + +void +flk_set_state(lock_descriptor_t *lock, int new_state) +{ + /* + * Locks in the sleeping list may be woken up in a number of ways, + * and more than once. If a sleeping lock is signalled awake more + * than once, then it may or may not change state depending on its + * current state. + * Also note that NLM locks that are sleeping could be moved to an + * interrupted state more than once if the unlock request is + * retransmitted by the NLM client - the second time around, this is + * just a nop. + * The ordering of being signalled awake is: + * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. + * The checks below implement this ordering. + */ + if (IS_INTERRUPTED(lock)) { + if ((new_state == FLK_CANCELLED_STATE) || + (new_state == FLK_GRANTED_STATE) || + (new_state == FLK_INTERRUPTED_STATE)) { + return; + } + } + if (IS_CANCELLED(lock)) { + if ((new_state == FLK_GRANTED_STATE) || + (new_state == FLK_CANCELLED_STATE)) { + return; + } + } + CHECK_LOCK_TRANSITION(lock->l_status, new_state); + if (IS_PXFS(lock)) { + cl_flk_state_transition_notify(lock, lock->l_status, new_state); + } + lock->l_status = new_state; +} + +/* + * Routine that checks whether there are any blocking locks in the system. + * + * The policy followed is if a write lock is sleeping we don't allow read + * locks before this write lock even though there may not be any active + * locks corresponding to the read locks' region. + * + * flk_add_edge() function adds an edge between l1 and l2 iff there + * is no path between l1 and l2. This is done to have a "minimum + * storage representation" of the dependency graph. + * + * Another property of the graph is since only the new request throws + * edges to the existing locks in the graph, the graph is always topologically + * ordered. + */ + +static int +flk_process_request(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + lock_descriptor_t *lock; + int request_blocked_by_active = 0; + int request_blocked_by_granted = 0; + int request_blocked_by_sleeping = 0; + vnode_t *vp = request->l_vnode; + int error = 0; + int request_will_wait = 0; + int found_covering_lock = 0; + lock_descriptor_t *covered_by = NULL; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + request_will_wait = IS_WILLING_TO_SLEEP(request); + + /* + * check active locks + */ + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + + if (lock) { + do { + if (BLOCKS(lock, request)) { + if (!request_will_wait) + return (EAGAIN); + request_blocked_by_active = 1; + break; + } + /* + * Grant lock if it is for the same owner holding active + * lock that covers the request. + */ + + if (SAME_OWNER(lock, request) && + COVERS(lock, request) && + (request->l_type == F_RDLCK)) + return (flk_execute_request(request)); + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + + if (!request_blocked_by_active) { + lock_descriptor_t *lk[1]; + lock_descriptor_t *first_glock = NULL; + /* + * Shall we grant this?! NO!! + * What about those locks that were just granted and still + * in sleep queue. Those threads are woken up and so locks + * are almost active. + */ + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + if (lock) { + do { + if (BLOCKS(lock, request)) { + if (IS_GRANTED(lock)) { + request_blocked_by_granted = 1; + } else { + request_blocked_by_sleeping = 1; + } + } + + lock = lock->l_next; + } while ((lock->l_vnode == vp)); + first_glock = lock->l_prev; + ASSERT(first_glock->l_vnode == vp); + } + + if (request_blocked_by_granted) + goto block; + + if (!request_blocked_by_sleeping) { + /* + * If the request isn't going to be blocked by a + * sleeping request, we know that it isn't going to + * be blocked; we can just execute the request -- + * without performing costly deadlock detection. + */ + ASSERT(!request_blocked_by_active); + return (flk_execute_request(request)); + } else if (request->l_type == F_RDLCK) { + /* + * If we have a sleeping writer in the requested + * lock's range, block. + */ + goto block; + } + + lk[0] = request; + request->l_state |= RECOMPUTE_LOCK; + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + if (lock) { + do { + flk_recompute_dependencies(lock, lk, 1, 0); + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + lock = first_glock; + if (lock) { + do { + if (IS_GRANTED(lock)) { + flk_recompute_dependencies(lock, lk, 1, 0); + } + lock = lock->l_prev; + } while ((lock->l_vnode == vp)); + } + request->l_state &= ~RECOMPUTE_LOCK; + if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) + return (EDEADLK); + return (flk_execute_request(request)); + } + +block: + if (request_will_wait) + flk_graph_uncolor(gp); + + /* check sleeping locks */ + + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + /* + * If we find a sleeping write lock that is a superset of the + * region wanted by request we can be assured that by adding an + * edge to this write lock we have paths to all locks in the + * graph that blocks the request except in one case and that is why + * another check for SAME_OWNER in the loop below. The exception + * case is when this process that owns the sleeping write lock 'l1' + * has other locks l2, l3, l4 that are in the system and arrived + * before l1. l1 does not have path to these locks as they are from + * same process. We break when we find a second covering sleeping + * lock l5 owned by a process different from that owning l1, because + * there cannot be any of l2, l3, l4, etc., arrived before l5, and if + * it has l1 would have produced a deadlock already. + */ + + if (lock) { + do { + if (BLOCKS(lock, request)) { + if (!request_will_wait) + return (EAGAIN); + if (COVERS(lock, request) && + lock->l_type == F_WRLCK) { + if (found_covering_lock && + !SAME_OWNER(lock, covered_by)) { + found_covering_lock++; + break; + } + found_covering_lock = 1; + covered_by = lock; + } + if (found_covering_lock && + !SAME_OWNER(lock, covered_by)) { + lock = lock->l_next; + continue; + } + if ((error = flk_add_edge(request, lock, + !found_covering_lock, 0))) + return (error); + } + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + +/* + * found_covering_lock == 2 iff at this point 'request' has paths + * to all locks that blocks 'request'. found_covering_lock == 1 iff at this + * point 'request' has paths to all locks that blocks 'request' whose owners + * are not same as the one that covers 'request' (covered_by above) and + * we can have locks whose owner is same as covered_by in the active list. + */ + + if (request_blocked_by_active && found_covering_lock != 2) { + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + ASSERT(lock != NULL); + do { + if (BLOCKS(lock, request)) { + if (found_covering_lock && + !SAME_OWNER(lock, covered_by)) { + lock = lock->l_next; + continue; + } + if ((error = flk_add_edge(request, lock, + CHECK_CYCLE, 0))) + return (error); + } + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + + if (NOT_BLOCKED(request)) { + /* + * request not dependent on any other locks + * so execute this request + */ + return (flk_execute_request(request)); + } else { + /* + * check for deadlock + */ + if (flk_check_deadlock(request)) + return (EDEADLK); + /* + * this thread has to sleep + */ + return (flk_wait_execute_request(request)); + } +} + +/* ONC_PLUS EXTRACT START */ +/* + * The actual execution of the request in the simple case is only to + * insert the 'request' in the list of active locks if it is not an + * UNLOCK. + * We have to consider the existing active locks' relation to + * this 'request' if they are owned by same process. flk_relation() does + * this job and sees to that the dependency graph information is maintained + * properly. + */ + +int +flk_execute_request(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + vnode_t *vp = request->l_vnode; + lock_descriptor_t *lock, *lock1; + int done_searching = 0; + + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + + flk_set_state(request, FLK_START_STATE); + + ASSERT(NOT_BLOCKED(request)); + + /* IO_LOCK requests are only to check status */ + + if (IS_IO_LOCK(request)) + return (0); + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock == NULL && request->l_type == F_UNLCK) + return (0); + if (lock == NULL) { + flk_insert_active_lock(request); + return (0); + } + + do { + lock1 = lock->l_next; + if (SAME_OWNER(request, lock)) { + done_searching = flk_relation(lock, request); + } + lock = lock1; + } while (lock->l_vnode == vp && !done_searching); + + /* + * insert in active queue + */ + + if (request->l_type != F_UNLCK) + flk_insert_active_lock(request); + + return (0); +} +/* ONC_PLUS EXTRACT END */ + +/* + * 'request' is blocked by some one therefore we put it into sleep queue. + */ +static int +flk_wait_execute_request(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + callb_cpr_t *cprp; /* CPR info from callback */ + struct flock_globals *fg; + int index; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + ASSERT(IS_WILLING_TO_SLEEP(request)); + + flk_insert_sleeping_lock(request); + + if (IS_LOCKMGR(request)) { + index = HASH_INDEX(request->l_vnode); + fg = flk_get_globals(); + + if (nlm_status_size == 0) { /* not booted as a cluster */ + if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { + flk_cancel_sleeping_lock(request, 1); + return (ENOLCK); + } + } else { /* booted as a cluster */ + /* + * If the request is an NLM server lock request, + * and the NLM state of the lock request is not + * NLM_UP (because the NLM server is shutting + * down), then cancel the sleeping lock and + * return error ENOLCK that will encourage the + * client to retransmit. + */ + if (!IS_NLM_UP(request)) { + flk_cancel_sleeping_lock(request, 1); + return (ENOLCK); + } + } + } + + /* Clustering: For blocking PXFS locks, return */ + if (IS_PXFS(request)) { + /* + * PXFS locks sleep on the client side. + * The callback argument is used to wake up the sleeper + * when the lock is granted. + * We return -1 (rather than an errno value) to indicate + * the client side should sleep + */ + return (PXFS_LOCK_BLOCKED); + } + + if (request->l_callbacks != NULL) { + /* + * To make sure the shutdown code works correctly, either + * the callback must happen after putting the lock on the + * sleep list, or we must check the shutdown status after + * returning from the callback (and before sleeping). At + * least for now, we'll use the first option. If a + * shutdown or signal or whatever happened while the graph + * mutex was dropped, that will be detected by + * wait_for_lock(). + */ + mutex_exit(&gp->gp_mutex); + + cprp = flk_invoke_callbacks(request->l_callbacks, + FLK_BEFORE_SLEEP); + + mutex_enter(&gp->gp_mutex); + + if (cprp == NULL) { + wait_for_lock(request); + } else { + mutex_enter(cprp->cc_lockp); + CALLB_CPR_SAFE_BEGIN(cprp); + mutex_exit(cprp->cc_lockp); + wait_for_lock(request); + mutex_enter(cprp->cc_lockp); + CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); + mutex_exit(cprp->cc_lockp); + } + + mutex_exit(&gp->gp_mutex); + (void) flk_invoke_callbacks(request->l_callbacks, + FLK_AFTER_SLEEP); + mutex_enter(&gp->gp_mutex); + } else { + wait_for_lock(request); + } + + if (IS_LOCKMGR(request)) { + /* + * If the lock manager is shutting down, return an + * error that will encourage the client to retransmit. + */ + if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && + !IS_GRANTED(request)) { + flk_cancel_sleeping_lock(request, 1); + return (ENOLCK); + } + } + + if (IS_INTERRUPTED(request)) { + /* we got a signal, or act like we did */ + flk_cancel_sleeping_lock(request, 1); + return (EINTR); + } + + /* Cancelled if some other thread has closed the file */ + + if (IS_CANCELLED(request)) { + flk_cancel_sleeping_lock(request, 1); + return (EBADF); + } + + request->l_state &= ~GRANTED_LOCK; + REMOVE_SLEEP_QUEUE(request); + return (flk_execute_request(request)); +} + +/* + * This routine adds an edge between from and to because from depends + * to. If asked to check for deadlock it checks whether there are any + * reachable locks from "from_lock" that is owned by the same process + * as "from_lock". + * NOTE: It is the caller's responsibility to make sure that the color + * of the graph is consistent between the calls to flk_add_edge as done + * in flk_process_request. This routine does not color and check for + * deadlock explicitly. + */ + +static int +flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, + int check_cycle, int update_graph) +{ + edge_t *edge; + edge_t *ep; + lock_descriptor_t *vertex; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + /* + * if to vertex already has mark_color just return + * don't add an edge as it is reachable from from vertex + * before itself. + */ + + if (COLORED(to_lock)) + return (0); + + edge = flk_get_edge(); + + /* + * set the from and to vertex + */ + + edge->from_vertex = from_lock; + edge->to_vertex = to_lock; + + /* + * put in adjacency list of from vertex + */ + + from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; + edge->edge_adj_next = from_lock->l_edge.edge_adj_next; + edge->edge_adj_prev = &from_lock->l_edge; + from_lock->l_edge.edge_adj_next = edge; + + /* + * put in in list of to vertex + */ + + to_lock->l_edge.edge_in_next->edge_in_prev = edge; + edge->edge_in_next = to_lock->l_edge.edge_in_next; + to_lock->l_edge.edge_in_next = edge; + edge->edge_in_prev = &to_lock->l_edge; + + + if (update_graph) { + flk_update_proc_graph(edge, 0); + return (0); + } + if (!check_cycle) { + return (0); + } + + STACK_PUSH(vertex_stack, from_lock, l_stack); + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + + STACK_POP(vertex_stack, l_stack); + + for (ep = FIRST_ADJ(vertex); + ep != HEAD(vertex); + ep = NEXT_ADJ(ep)) { + if (COLORED(ep->to_vertex)) + continue; + COLOR(ep->to_vertex); + if (SAME_OWNER(ep->to_vertex, from_lock)) + goto dead_lock; + STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); + } + } + return (0); + +dead_lock: + + /* + * remove all edges + */ + + ep = FIRST_ADJ(from_lock); + + while (ep != HEAD(from_lock)) { + IN_LIST_REMOVE(ep); + from_lock->l_sedge = NEXT_ADJ(ep); + ADJ_LIST_REMOVE(ep); + flk_free_edge(ep); + ep = from_lock->l_sedge; + } + return (EDEADLK); +} + +/* + * Get an edge structure for representing the dependency between two locks. + */ + +static edge_t * +flk_get_edge() +{ + edge_t *ep; + + ASSERT(flk_edge_cache != NULL); + + ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); + edge_allocs++; + return (ep); +} + +/* + * Free the edge structure. + */ + +static void +flk_free_edge(edge_t *ep) +{ + edge_frees++; + kmem_cache_free(flk_edge_cache, (void *)ep); +} + +/* + * Check the relationship of request with lock and perform the + * recomputation of dependencies, break lock if required, and return + * 1 if request cannot have any more relationship with the next + * active locks. + * The 'lock' and 'request' are compared and in case of overlap we + * delete the 'lock' and form new locks to represent the non-overlapped + * portion of original 'lock'. This function has side effects such as + * 'lock' will be freed, new locks will be added to the active list. + */ + +static int +flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) +{ + int lock_effect; + lock_descriptor_t *lock1, *lock2; + lock_descriptor_t *topology[3]; + int nvertex = 0; + int i; + edge_t *ep; + graph_t *gp = (lock->l_graph); + + + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + + topology[0] = topology[1] = topology[2] = NULL; + + if (request->l_type == F_UNLCK) + lock_effect = FLK_UNLOCK; + else if (request->l_type == F_RDLCK && + lock->l_type == F_WRLCK) + lock_effect = FLK_DOWNGRADE; + else if (request->l_type == F_WRLCK && + lock->l_type == F_RDLCK) + lock_effect = FLK_UPGRADE; + else + lock_effect = FLK_STAY_SAME; + + if (lock->l_end < request->l_start) { + if (lock->l_end == request->l_start - 1 && + lock_effect == FLK_STAY_SAME) { + topology[0] = request; + request->l_start = lock->l_start; + nvertex = 1; + goto recompute; + } else { + return (0); + } + } + + if (lock->l_start > request->l_end) { + if (request->l_end == lock->l_start - 1 && + lock_effect == FLK_STAY_SAME) { + topology[0] = request; + request->l_end = lock->l_end; + nvertex = 1; + goto recompute; + } else { + return (1); + } + } + + if (request->l_end < lock->l_end) { + if (request->l_start > lock->l_start) { + if (lock_effect == FLK_STAY_SAME) { + request->l_start = lock->l_start; + request->l_end = lock->l_end; + topology[0] = request; + nvertex = 1; + } else { + lock1 = flk_get_lock(); + lock2 = flk_get_lock(); + COPY(lock1, lock); + COPY(lock2, lock); + lock1->l_start = lock->l_start; + lock1->l_end = request->l_start - 1; + lock2->l_start = request->l_end + 1; + lock2->l_end = lock->l_end; + topology[0] = lock1; + topology[1] = lock2; + topology[2] = request; + nvertex = 3; + } + } else if (request->l_start < lock->l_start) { + if (lock_effect == FLK_STAY_SAME) { + request->l_end = lock->l_end; + topology[0] = request; + nvertex = 1; + } else { + lock1 = flk_get_lock(); + COPY(lock1, lock); + lock1->l_start = request->l_end + 1; + topology[0] = lock1; + topology[1] = request; + nvertex = 2; + } + } else { + if (lock_effect == FLK_STAY_SAME) { + request->l_start = lock->l_start; + request->l_end = lock->l_end; + topology[0] = request; + nvertex = 1; + } else { + lock1 = flk_get_lock(); + COPY(lock1, lock); + lock1->l_start = request->l_end + 1; + topology[0] = lock1; + topology[1] = request; + nvertex = 2; + } + } + } else if (request->l_end > lock->l_end) { + if (request->l_start > lock->l_start) { + if (lock_effect == FLK_STAY_SAME) { + request->l_start = lock->l_start; + topology[0] = request; + nvertex = 1; + } else { + lock1 = flk_get_lock(); + COPY(lock1, lock); + lock1->l_end = request->l_start - 1; + topology[0] = lock1; + topology[1] = request; + nvertex = 2; + } + } else if (request->l_start < lock->l_start) { + topology[0] = request; + nvertex = 1; + } else { + topology[0] = request; + nvertex = 1; + } + } else { + if (request->l_start > lock->l_start) { + if (lock_effect == FLK_STAY_SAME) { + request->l_start = lock->l_start; + topology[0] = request; + nvertex = 1; + } else { + lock1 = flk_get_lock(); + COPY(lock1, lock); + lock1->l_end = request->l_start - 1; + topology[0] = lock1; + topology[1] = request; + nvertex = 2; + } + } else if (request->l_start < lock->l_start) { + topology[0] = request; + nvertex = 1; + } else { + if (lock_effect != FLK_UNLOCK) { + topology[0] = request; + nvertex = 1; + } else { + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + return (1); + } + } + } + +recompute: + + /* + * For unlock we don't send the 'request' to for recomputing + * dependencies because no lock will add an edge to this. + */ + + if (lock_effect == FLK_UNLOCK) { + topology[nvertex-1] = NULL; + nvertex--; + } + for (i = 0; i < nvertex; i++) { + topology[i]->l_state |= RECOMPUTE_LOCK; + topology[i]->l_color = NO_COLOR; + } + + ASSERT(FIRST_ADJ(lock) == HEAD(lock)); + + /* + * we remove the adjacent edges for all vertices' to this vertex + * 'lock'. + */ + + ep = FIRST_IN(lock); + while (ep != HEAD(lock)) { + ADJ_LIST_REMOVE(ep); + ep = NEXT_IN(ep); + } + + flk_delete_active_lock(lock, 0); + + /* We are ready for recomputing the dependencies now */ + + flk_recompute_dependencies(lock, topology, nvertex, 1); + + for (i = 0; i < nvertex; i++) { + topology[i]->l_state &= ~RECOMPUTE_LOCK; + topology[i]->l_color = NO_COLOR; + } + + + if (lock_effect == FLK_UNLOCK) { + nvertex++; + } + for (i = 0; i < nvertex - 1; i++) { + flk_insert_active_lock(topology[i]); + } + + + if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { + flk_wakeup(lock, 0); + } else { + ep = FIRST_IN(lock); + while (ep != HEAD(lock)) { + lock->l_sedge = NEXT_IN(ep); + IN_LIST_REMOVE(ep); + flk_update_proc_graph(ep, 1); + flk_free_edge(ep); + ep = lock->l_sedge; + } + } + flk_free_lock(lock); + + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + return (0); +} + +/* + * Insert a lock into the active queue. + */ + +static void +flk_insert_active_lock(lock_descriptor_t *new_lock) +{ + graph_t *gp = new_lock->l_graph; + vnode_t *vp = new_lock->l_vnode; + lock_descriptor_t *first_lock, *lock; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + first_lock = lock; + + if (first_lock != NULL) { + for (; (lock->l_vnode == vp && + lock->l_start < new_lock->l_start); lock = lock->l_next) + ; + } else { + lock = ACTIVE_HEAD(gp); + } + + lock->l_prev->l_next = new_lock; + new_lock->l_next = lock; + new_lock->l_prev = lock->l_prev; + lock->l_prev = new_lock; + + if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { + vp->v_filocks = (struct filock *)new_lock; + } + flk_set_state(new_lock, FLK_ACTIVE_STATE); + new_lock->l_state |= ACTIVE_LOCK; + + CHECK_ACTIVE_LOCKS(gp); + CHECK_SLEEPING_LOCKS(gp); +} + +/* + * Delete the active lock : Performs two functions depending on the + * value of second parameter. One is to remove from the active lists + * only and other is to both remove and free the lock. + */ + +static void +flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) +{ + vnode_t *vp = lock->l_vnode; + graph_t *gp = lock->l_graph; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + if (free_lock) + ASSERT(NO_DEPENDENTS(lock)); + ASSERT(NOT_BLOCKED(lock)); + ASSERT(IS_ACTIVE(lock)); + + ASSERT((vp->v_filocks != NULL)); + + if (vp->v_filocks == (struct filock *)lock) { + vp->v_filocks = (struct filock *) + ((lock->l_next->l_vnode == vp) ? lock->l_next : + NULL); + } + lock->l_next->l_prev = lock->l_prev; + lock->l_prev->l_next = lock->l_next; + lock->l_next = lock->l_prev = NULL; + flk_set_state(lock, FLK_DEAD_STATE); + lock->l_state &= ~ACTIVE_LOCK; + + if (free_lock) + flk_free_lock(lock); + CHECK_ACTIVE_LOCKS(gp); + CHECK_SLEEPING_LOCKS(gp); +} + +/* + * Insert into the sleep queue. + */ + +static void +flk_insert_sleeping_lock(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + vnode_t *vp = request->l_vnode; + lock_descriptor_t *lock; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + ASSERT(IS_INITIAL(request)); + + for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && + lock->l_vnode < vp); lock = lock->l_next) + ; + + lock->l_prev->l_next = request; + request->l_prev = lock->l_prev; + lock->l_prev = request; + request->l_next = lock; + flk_set_state(request, FLK_SLEEPING_STATE); + request->l_state |= SLEEPING_LOCK; +} + +/* + * Cancelling a sleeping lock implies removing a vertex from the + * dependency graph and therefore we should recompute the dependencies + * of all vertices that have a path to this vertex, w.r.t. all + * vertices reachable from this vertex. + */ + +void +flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) +{ + graph_t *gp = request->l_graph; + vnode_t *vp = request->l_vnode; + lock_descriptor_t **topology = NULL; + edge_t *ep; + lock_descriptor_t *vertex, *lock; + int nvertex = 0; + int i; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + /* + * count number of vertex pointers that has to be allocated + * All vertices that are reachable from request. + */ + + STACK_PUSH(vertex_stack, request, l_stack); + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + STACK_POP(vertex_stack, l_stack); + for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); + ep = NEXT_ADJ(ep)) { + if (IS_RECOMPUTE(ep->to_vertex)) + continue; + ep->to_vertex->l_state |= RECOMPUTE_LOCK; + STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); + nvertex++; + } + } + + /* + * allocate memory for holding the vertex pointers + */ + + if (nvertex) { + topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), + KM_SLEEP); + } + + /* + * one more pass to actually store the vertices in the + * allocated array. + * We first check sleeping locks and then active locks + * so that topology array will be in a topological + * order. + */ + + nvertex = 0; + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + do { + if (IS_RECOMPUTE(lock)) { + lock->l_index = nvertex; + topology[nvertex++] = lock; + } + lock->l_color = NO_COLOR; + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + do { + if (IS_RECOMPUTE(lock)) { + lock->l_index = nvertex; + topology[nvertex++] = lock; + } + lock->l_color = NO_COLOR; + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + + /* + * remove in and out edges of request + * They are freed after updating proc_graph below. + */ + + for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { + ADJ_LIST_REMOVE(ep); + } + + + if (remove_from_queue) + REMOVE_SLEEP_QUEUE(request); + + /* we are ready to recompute */ + + flk_recompute_dependencies(request, topology, nvertex, 1); + + ep = FIRST_ADJ(request); + while (ep != HEAD(request)) { + IN_LIST_REMOVE(ep); + request->l_sedge = NEXT_ADJ(ep); + ADJ_LIST_REMOVE(ep); + flk_update_proc_graph(ep, 1); + flk_free_edge(ep); + ep = request->l_sedge; + } + + + /* + * unset the RECOMPUTE flag in those vertices + */ + + for (i = 0; i < nvertex; i++) { + topology[i]->l_state &= ~RECOMPUTE_LOCK; + } + + /* + * free the topology + */ + if (nvertex) + kmem_free((void *)topology, + (nvertex * sizeof (lock_descriptor_t *))); + /* + * Possibility of some locks unblocked now + */ + + flk_wakeup(request, 0); + + /* + * we expect to have a correctly recomputed graph now. + */ + flk_set_state(request, FLK_DEAD_STATE); + flk_free_lock(request); + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + +} + +/* + * Uncoloring the graph is simply to increment the mark value of the graph + * And only when wrap round takes place will we color all vertices in + * the graph explicitly. + */ + +static void +flk_graph_uncolor(graph_t *gp) +{ + lock_descriptor_t *lock; + + if (gp->mark == UINT_MAX) { + gp->mark = 1; + for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); + lock = lock->l_next) + lock->l_color = 0; + + for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); + lock = lock->l_next) + lock->l_color = 0; + } else { + gp->mark++; + } +} + +/* + * Wake up locks that are blocked on the given lock. + */ + +static void +flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) +{ + edge_t *ep; + graph_t *gp = lock->l_graph; + lock_descriptor_t *lck; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + if (NO_DEPENDENTS(lock)) + return; + ep = FIRST_IN(lock); + do { + /* + * delete the edge from the adjacency list + * of from vertex. if no more adjacent edges + * for this vertex wake this process. + */ + lck = ep->from_vertex; + if (adj_list_remove) + ADJ_LIST_REMOVE(ep); + flk_update_proc_graph(ep, 1); + if (NOT_BLOCKED(lck)) { + GRANT_WAKEUP(lck); + } + lock->l_sedge = NEXT_IN(ep); + IN_LIST_REMOVE(ep); + flk_free_edge(ep); + ep = lock->l_sedge; + } while (ep != HEAD(lock)); + ASSERT(NO_DEPENDENTS(lock)); +} + +/* + * The dependents of request, is checked for its dependency against the + * locks in topology (called topology because the array is and should be in + * topological order for this algorithm, if not in topological order the + * inner loop below might add more edges than necessary. Topological ordering + * of vertices satisfies the property that all edges will be from left to + * right i.e., topology[i] can have an edge to topology[j], iff i<j) + * If lock l1 in the dependent set of request is dependent (blocked by) + * on lock l2 in topology but does not have a path to it, we add an edge + * in the inner loop below. + * + * We don't want to add an edge between l1 and l2 if there exists + * already a path from l1 to l2, so care has to be taken for those vertices + * that have two paths to 'request'. These vertices are referred to here + * as barrier locks. + * + * The barriers has to be found (those vertex that originally had two paths + * to request) because otherwise we may end up adding edges unnecessarily + * to vertices in topology, and thus barrier vertices can have an edge + * to a vertex in topology as well a path to it. + */ + +static void +flk_recompute_dependencies(lock_descriptor_t *request, + lock_descriptor_t **topology, + int nvertex, int update_graph) +{ + lock_descriptor_t *vertex, *lock; + graph_t *gp = request->l_graph; + int i, count; + int barrier_found = 0; + edge_t *ep; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + if (nvertex == 0) + return; + flk_graph_uncolor(request->l_graph); + barrier_found = flk_find_barriers(request); + request->l_state |= RECOMPUTE_DONE; + + STACK_PUSH(vertex_stack, request, l_stack); + request->l_sedge = FIRST_IN(request); + + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + if (vertex->l_state & RECOMPUTE_DONE) { + count = 0; + goto next_in_edge; + } + if (IS_BARRIER(vertex)) { + /* decrement the barrier count */ + if (vertex->l_index) { + vertex->l_index--; + /* this guy will be pushed again anyway ? */ + STACK_POP(vertex_stack, l_stack); + if (vertex->l_index == 0) { + /* + * barrier is over we can recompute + * dependencies for this lock in the + * next stack pop + */ + vertex->l_state &= ~BARRIER_LOCK; + } + continue; + } + } + vertex->l_state |= RECOMPUTE_DONE; + flk_graph_uncolor(gp); + count = flk_color_reachables(vertex); + for (i = 0; i < nvertex; i++) { + lock = topology[i]; + if (COLORED(lock)) + continue; + if (BLOCKS(lock, vertex)) { + (void) flk_add_edge(vertex, lock, + NO_CHECK_CYCLE, update_graph); + COLOR(lock); + count++; + count += flk_color_reachables(lock); + } + + } + +next_in_edge: + if (count == nvertex || + vertex->l_sedge == HEAD(vertex)) { + /* prune the tree below this */ + STACK_POP(vertex_stack, l_stack); + vertex->l_state &= ~RECOMPUTE_DONE; + /* update the barrier locks below this! */ + if (vertex->l_sedge != HEAD(vertex) && barrier_found) { + flk_graph_uncolor(gp); + flk_update_barriers(vertex); + } + continue; + } + + ep = vertex->l_sedge; + lock = ep->from_vertex; + STACK_PUSH(vertex_stack, lock, l_stack); + lock->l_sedge = FIRST_IN(lock); + vertex->l_sedge = NEXT_IN(ep); + } + +} + +/* + * Color all reachable vertices from vertex that belongs to topology (here + * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. + * + * Note: we need to use a different stack_link l_stack1 because this is + * called from flk_recompute_dependencies() that already uses a stack with + * l_stack as stack_link. + */ + +static int +flk_color_reachables(lock_descriptor_t *vertex) +{ + lock_descriptor_t *ver, *lock; + int count; + edge_t *ep; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + STACK_PUSH(vertex_stack, vertex, l_stack1); + count = 0; + while ((ver = STACK_TOP(vertex_stack)) != NULL) { + + STACK_POP(vertex_stack, l_stack1); + for (ep = FIRST_ADJ(ver); ep != HEAD(ver); + ep = NEXT_ADJ(ep)) { + lock = ep->to_vertex; + if (COLORED(lock)) + continue; + COLOR(lock); + if (IS_RECOMPUTE(lock)) + count++; + STACK_PUSH(vertex_stack, lock, l_stack1); + } + + } + return (count); +} + +/* + * Called from flk_recompute_dependencies() this routine decrements + * the barrier count of barrier vertices that are reachable from lock. + */ + +static void +flk_update_barriers(lock_descriptor_t *lock) +{ + lock_descriptor_t *vertex, *lck; + edge_t *ep; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + STACK_PUSH(vertex_stack, lock, l_stack1); + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + STACK_POP(vertex_stack, l_stack1); + for (ep = FIRST_IN(vertex); ep != HEAD(vertex); + ep = NEXT_IN(ep)) { + lck = ep->from_vertex; + if (COLORED(lck)) { + if (IS_BARRIER(lck)) { + ASSERT(lck->l_index > 0); + lck->l_index--; + if (lck->l_index == 0) + lck->l_state &= ~BARRIER_LOCK; + } + continue; + } + COLOR(lck); + if (IS_BARRIER(lck)) { + ASSERT(lck->l_index > 0); + lck->l_index--; + if (lck->l_index == 0) + lck->l_state &= ~BARRIER_LOCK; + } + STACK_PUSH(vertex_stack, lck, l_stack1); + } + } +} + +/* + * Finds all vertices that are reachable from 'lock' more than once and + * mark them as barrier vertices and increment their barrier count. + * The barrier count is one minus the total number of paths from lock + * to that vertex. + */ + +static int +flk_find_barriers(lock_descriptor_t *lock) +{ + lock_descriptor_t *vertex, *lck; + int found = 0; + edge_t *ep; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + STACK_PUSH(vertex_stack, lock, l_stack1); + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + STACK_POP(vertex_stack, l_stack1); + for (ep = FIRST_IN(vertex); ep != HEAD(vertex); + ep = NEXT_IN(ep)) { + lck = ep->from_vertex; + if (COLORED(lck)) { + /* this is a barrier */ + lck->l_state |= BARRIER_LOCK; + /* index will have barrier count */ + lck->l_index++; + if (!found) + found = 1; + continue; + } + COLOR(lck); + lck->l_index = 0; + STACK_PUSH(vertex_stack, lck, l_stack1); + } + } + return (found); +} + +/* + * Finds the first lock that is mainly responsible for blocking this + * request. If there is no such lock, request->l_flock.l_type is set to + * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars + * of the blocking lock. + * + * Note: It is possible a request is blocked by a sleeping lock because + * of the fairness policy used in flk_process_request() to construct the + * dependencies. (see comments before flk_process_request()). + */ + +static void +flk_get_first_blocking_lock(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + vnode_t *vp = request->l_vnode; + lock_descriptor_t *lock, *blocker; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + blocker = NULL; + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + do { + if (BLOCKS(lock, request)) { + blocker = lock; + break; + } + lock = lock->l_next; + } while (lock->l_vnode == vp); + } + + if (blocker) { + report_blocker(blocker, request); + } else + request->l_flock.l_type = F_UNLCK; +} + +/* + * Get the graph_t structure associated with a vnode. + * If 'initialize' is non-zero, and the graph_t structure for this vnode has + * not yet been initialized, then a new element is allocated and returned. + */ +graph_t * +flk_get_lock_graph(vnode_t *vp, int initialize) +{ + graph_t *gp; + graph_t *gp_alloc = NULL; + int index = HASH_INDEX(vp); + + if (initialize == FLK_USE_GRAPH) { + mutex_enter(&flock_lock); + gp = lock_graph[index]; + mutex_exit(&flock_lock); + return (gp); + } + + ASSERT(initialize == FLK_INIT_GRAPH); + + if (lock_graph[index] == NULL) { + + gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); + + /* Initialize the graph */ + + gp_alloc->active_locks.l_next = + gp_alloc->active_locks.l_prev = + (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); + gp_alloc->sleeping_locks.l_next = + gp_alloc->sleeping_locks.l_prev = + (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); + gp_alloc->index = index; + mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); + } + + mutex_enter(&flock_lock); + + gp = lock_graph[index]; + + /* Recheck the value within flock_lock */ + if (gp == NULL) { + struct flock_globals *fg; + + /* We must have previously allocated the graph_t structure */ + ASSERT(gp_alloc != NULL); + lock_graph[index] = gp = gp_alloc; + /* + * The lockmgr status is only needed if KLM is loaded. + */ + if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { + fg = flk_get_globals(); + fg->lockmgr_status[index] = fg->flk_lockmgr_status; + } + } + + mutex_exit(&flock_lock); + + if ((gp_alloc != NULL) && (gp != gp_alloc)) { + /* There was a race to allocate the graph_t and we lost */ + mutex_destroy(&gp_alloc->gp_mutex); + kmem_free(gp_alloc, sizeof (graph_t)); + } + + return (gp); +} + +/* + * PSARC case 1997/292 + */ +int +cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) +{ + lock_descriptor_t *lock; + int result = 0; + graph_t *gp; + int lock_nlmid; + + /* + * Check to see if node is booted as a cluster. If not, return. + */ + if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { + return (0); + } + + gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); + if (gp == NULL) { + return (0); + } + + mutex_enter(&gp->gp_mutex); + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + /* get NLM id from sysid */ + lock_nlmid = GETNLMID(lock->l_flock.l_sysid); + + /* + * If NLM server request _and_ nlmid of lock matches + * nlmid of argument, then we've found a remote lock. + */ + if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + /* get NLM id from sysid */ + lock_nlmid = GETNLMID(lock->l_flock.l_sysid); + + /* + * If NLM server request _and_ nlmid of lock matches + * nlmid of argument, then we've found a remote lock. + */ + if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + +done: + mutex_exit(&gp->gp_mutex); + return (result); +} + +/* ONC_PLUS EXTRACT START */ +/* + * Determine whether there are any locks for the given vnode with a remote + * sysid. Returns zero if not, non-zero if there are. + * + * Note that the return value from this function is potentially invalid + * once it has been returned. The caller is responsible for providing its + * own synchronization mechanism to ensure that the return value is useful + * (e.g., see nfs_lockcompletion()). + */ +int +flk_has_remote_locks(vnode_t *vp) +{ + lock_descriptor_t *lock; + int result = 0; + graph_t *gp; + + gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); + if (gp == NULL) { + return (0); + } + + mutex_enter(&gp->gp_mutex); + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + if (IS_REMOTE(lock)) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + if (IS_REMOTE(lock)) { + result = 1; + goto done; + } + lock = lock->l_next; + } + } + +done: + mutex_exit(&gp->gp_mutex); + return (result); +} + +/* + * Determine if there are any locks owned by the given sysid. + * Returns zero if not, non-zero if there are. Note that this return code + * could be derived from flk_get_{sleeping,active}_locks, but this routine + * avoids all the memory allocations of those routines. + * + * This routine has the same synchronization issues as + * flk_has_remote_locks. + */ + +int +flk_sysid_has_locks(int sysid, int lck_type) +{ + int has_locks = 0; + lock_descriptor_t *lock; + graph_t *gp; + int i; + + for (i = 0; i < HASH_SIZE && !has_locks; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + + if (lck_type & FLK_QUERY_ACTIVE) { + for (lock = ACTIVE_HEAD(gp)->l_next; + lock != ACTIVE_HEAD(gp) && !has_locks; + lock = lock->l_next) { + if (lock->l_flock.l_sysid == sysid) + has_locks = 1; + } + } + + if (lck_type & FLK_QUERY_SLEEPING) { + for (lock = SLEEPING_HEAD(gp)->l_next; + lock != SLEEPING_HEAD(gp) && !has_locks; + lock = lock->l_next) { + if (lock->l_flock.l_sysid == sysid) + has_locks = 1; + } + } + mutex_exit(&gp->gp_mutex); + } + + return (has_locks); +} + + +/* + * PSARC case 1997/292 + * + * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit + * quantity, the real sysid generated by the NLM server; the upper half + * identifies the node of the cluster where the NLM server ran. + * This routine is only called by an NLM server running in a cluster. + * Effects: Remove all locks held on behalf of the client identified + * by "sysid." + */ +void +cl_flk_remove_locks_by_sysid(int sysid) +{ + graph_t *gp; + int i; + lock_descriptor_t *lock, *nlock; + + /* + * Check to see if node is booted as a cluster. If not, return. + */ + if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { + return; + } + + ASSERT(sysid != 0); + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + + if (gp == NULL) + continue; + + mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ + + /* signal sleeping requests so that they bail out */ + lock = SLEEPING_HEAD(gp)->l_next; + while (lock != SLEEPING_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_flock.l_sysid == sysid) { + INTERRUPT_WAKEUP(lock); + } + lock = nlock; + } + + /* delete active locks */ + lock = ACTIVE_HEAD(gp)->l_next; + while (lock != ACTIVE_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_flock.l_sysid == sysid) { + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + lock = nlock; + } + mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ + } +} + +/* + * Delete all locks in the system that belongs to the sysid of the request. + */ + +static void +flk_delete_locks_by_sysid(lock_descriptor_t *request) +{ + int sysid = request->l_flock.l_sysid; + lock_descriptor_t *lock, *nlock; + graph_t *gp; + int i; + + ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); + ASSERT(sysid != 0); + + mutex_exit(&request->l_graph->gp_mutex); + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + + if (gp == NULL) + continue; + + mutex_enter(&gp->gp_mutex); + + /* signal sleeping requests so that they bail out */ + lock = SLEEPING_HEAD(gp)->l_next; + while (lock != SLEEPING_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_flock.l_sysid == sysid) { + INTERRUPT_WAKEUP(lock); + } + lock = nlock; + } + + /* delete active locks */ + lock = ACTIVE_HEAD(gp)->l_next; + while (lock != ACTIVE_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_flock.l_sysid == sysid) { + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + lock = nlock; + } + mutex_exit(&gp->gp_mutex); + } + + mutex_enter(&request->l_graph->gp_mutex); +} + +/* + * Clustering: Deletes PXFS locks + * Effects: Delete all locks on files in the given file system and with the + * given PXFS id. + */ +void +cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) +{ + lock_descriptor_t *lock, *nlock; + graph_t *gp; + int i; + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + + if (gp == NULL) + continue; + + mutex_enter(&gp->gp_mutex); + + /* signal sleeping requests so that they bail out */ + lock = SLEEPING_HEAD(gp)->l_next; + while (lock != SLEEPING_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_vnode->v_vfsp == vfsp) { + ASSERT(IS_PXFS(lock)); + if (GETPXFSID(lock->l_flock.l_sysid) == + pxfsid) { + flk_set_state(lock, + FLK_CANCELLED_STATE); + flk_cancel_sleeping_lock(lock, 1); + } + } + lock = nlock; + } + + /* delete active locks */ + lock = ACTIVE_HEAD(gp)->l_next; + while (lock != ACTIVE_HEAD(gp)) { + nlock = lock->l_next; + if (lock->l_vnode->v_vfsp == vfsp) { + ASSERT(IS_PXFS(lock)); + if (GETPXFSID(lock->l_flock.l_sysid) == + pxfsid) { + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + } + lock = nlock; + } + mutex_exit(&gp->gp_mutex); + } +} + +/* + * Search for a sleeping lock manager lock which matches exactly this lock + * request; if one is found, fake a signal to cancel it. + * + * Return 1 if a matching lock was found, 0 otherwise. + */ + +static int +flk_canceled(lock_descriptor_t *request) +{ + lock_descriptor_t *lock, *nlock; + graph_t *gp = request->l_graph; + vnode_t *vp = request->l_vnode; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + ASSERT(IS_LOCKMGR(request)); + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + while (lock->l_vnode == vp) { + nlock = lock->l_next; + if (SAME_OWNER(lock, request) && + lock->l_start == request->l_start && + lock->l_end == request->l_end) { + INTERRUPT_WAKEUP(lock); + return (1); + } + lock = nlock; + } + } + return (0); +} + +/* + * Remove all the locks for the vnode belonging to the given pid and sysid. + */ + +void +cleanlocks(vnode_t *vp, pid_t pid, int sysid) +{ + graph_t *gp; + lock_descriptor_t *lock, *nlock; + lock_descriptor_t *link_stack; + + STACK_INIT(link_stack); + + gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); + + if (gp == NULL) + return; + mutex_enter(&gp->gp_mutex); + + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + do { + nlock = lock->l_next; + if ((lock->l_flock.l_pid == pid || + pid == IGN_PID) && + lock->l_flock.l_sysid == sysid) { + CANCEL_WAKEUP(lock); + } + lock = nlock; + } while (lock->l_vnode == vp); + } + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + do { + nlock = lock->l_next; + if ((lock->l_flock.l_pid == pid || + pid == IGN_PID) && + lock->l_flock.l_sysid == sysid) { + flk_delete_active_lock(lock, 0); + STACK_PUSH(link_stack, lock, l_stack); + } + lock = nlock; + } while (lock->l_vnode == vp); + } + + while ((lock = STACK_TOP(link_stack)) != NULL) { + STACK_POP(link_stack, l_stack); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + + CHECK_SLEEPING_LOCKS(gp); + CHECK_ACTIVE_LOCKS(gp); + CHECK_OWNER_LOCKS(gp, pid, sysid, vp); + mutex_exit(&gp->gp_mutex); +} +/* ONC_PLUS EXTRACT END */ + + +/* + * Called from 'fs' read and write routines for files that have mandatory + * locking enabled. + */ + +int +chklock( + struct vnode *vp, + int iomode, + u_offset_t offset, + ssize_t len, + int fmode, + caller_context_t *ct) +{ + register int i; + struct flock64 bf; + int error = 0; + + bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + if (ct == NULL) { + bf.l_pid = curproc->p_pid; + bf.l_sysid = 0; + } else { + bf.l_pid = ct->cc_pid; + bf.l_sysid = ct->cc_sysid; + } + i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; + if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || + bf.l_type != F_UNLCK) + error = i ? i : EAGAIN; + return (error); +} + +/* ONC_PLUS EXTRACT START */ +/* + * convoff - converts the given data (start, whence) to the + * given whence. + */ +int +convoff(vp, lckdat, whence, offset) + struct vnode *vp; + struct flock64 *lckdat; + int whence; + offset_t offset; +{ + int error; + struct vattr vattr; + + if ((lckdat->l_whence == 2) || (whence == 2)) { + vattr.va_mask = AT_SIZE; + if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) + return (error); + } + + switch (lckdat->l_whence) { + case 1: + lckdat->l_start += offset; + break; + case 2: + lckdat->l_start += vattr.va_size; + /* FALLTHRU */ + case 0: + break; + default: + return (EINVAL); + } + + if (lckdat->l_start < 0) + return (EINVAL); + + switch (whence) { + case 1: + lckdat->l_start -= offset; + break; + case 2: + lckdat->l_start -= vattr.va_size; + /* FALLTHRU */ + case 0: + break; + default: + return (EINVAL); + } + + lckdat->l_whence = (short)whence; + return (0); +} +/* ONC_PLUS EXTRACT END */ + + +/* proc_graph function definitions */ + +/* + * Function checks for deadlock due to the new 'lock'. If deadlock found + * edges of this lock are freed and returned. + */ + +static int +flk_check_deadlock(lock_descriptor_t *lock) +{ + proc_vertex_t *start_vertex, *pvertex; + proc_vertex_t *dvertex; + proc_edge_t *pep, *ppep; + edge_t *ep, *nep; + proc_vertex_t *process_stack; + + STACK_INIT(process_stack); + + mutex_enter(&flock_lock); + start_vertex = flk_get_proc_vertex(lock); + ASSERT(start_vertex != NULL); + + /* construct the edges from this process to other processes */ + + ep = FIRST_ADJ(lock); + while (ep != HEAD(lock)) { + proc_vertex_t *adj_proc; + + adj_proc = flk_get_proc_vertex(ep->to_vertex); + for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { + if (pep->to_proc == adj_proc) { + ASSERT(pep->refcount); + pep->refcount++; + break; + } + } + if (pep == NULL) { + pep = flk_get_proc_edge(); + pep->to_proc = adj_proc; + pep->refcount = 1; + adj_proc->incount++; + pep->next = start_vertex->edge; + start_vertex->edge = pep; + } + ep = NEXT_ADJ(ep); + } + + ep = FIRST_IN(lock); + + while (ep != HEAD(lock)) { + proc_vertex_t *in_proc; + + in_proc = flk_get_proc_vertex(ep->from_vertex); + + for (pep = in_proc->edge; pep != NULL; pep = pep->next) { + if (pep->to_proc == start_vertex) { + ASSERT(pep->refcount); + pep->refcount++; + break; + } + } + if (pep == NULL) { + pep = flk_get_proc_edge(); + pep->to_proc = start_vertex; + pep->refcount = 1; + start_vertex->incount++; + pep->next = in_proc->edge; + in_proc->edge = pep; + } + ep = NEXT_IN(ep); + } + + if (start_vertex->incount == 0) { + mutex_exit(&flock_lock); + return (0); + } + + flk_proc_graph_uncolor(); + + start_vertex->p_sedge = start_vertex->edge; + + STACK_PUSH(process_stack, start_vertex, p_stack); + + while ((pvertex = STACK_TOP(process_stack)) != NULL) { + for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { + dvertex = pep->to_proc; + if (!PROC_ARRIVED(dvertex)) { + STACK_PUSH(process_stack, dvertex, p_stack); + dvertex->p_sedge = dvertex->edge; + PROC_ARRIVE(pvertex); + pvertex->p_sedge = pep->next; + break; + } + if (!PROC_DEPARTED(dvertex)) + goto deadlock; + } + if (pep == NULL) { + PROC_DEPART(pvertex); + STACK_POP(process_stack, p_stack); + } + } + mutex_exit(&flock_lock); + return (0); + +deadlock: + + /* we remove all lock edges and proc edges */ + + ep = FIRST_ADJ(lock); + while (ep != HEAD(lock)) { + proc_vertex_t *adj_proc; + adj_proc = flk_get_proc_vertex(ep->to_vertex); + nep = NEXT_ADJ(ep); + IN_LIST_REMOVE(ep); + ADJ_LIST_REMOVE(ep); + flk_free_edge(ep); + ppep = start_vertex->edge; + for (pep = start_vertex->edge; pep != NULL; ppep = pep, + pep = ppep->next) { + if (pep->to_proc == adj_proc) { + pep->refcount--; + if (pep->refcount == 0) { + if (pep == ppep) { + start_vertex->edge = pep->next; + } else { + ppep->next = pep->next; + } + adj_proc->incount--; + flk_proc_release(adj_proc); + flk_free_proc_edge(pep); + } + break; + } + } + ep = nep; + } + ep = FIRST_IN(lock); + while (ep != HEAD(lock)) { + proc_vertex_t *in_proc; + in_proc = flk_get_proc_vertex(ep->from_vertex); + nep = NEXT_IN(ep); + IN_LIST_REMOVE(ep); + ADJ_LIST_REMOVE(ep); + flk_free_edge(ep); + ppep = in_proc->edge; + for (pep = in_proc->edge; pep != NULL; ppep = pep, + pep = ppep->next) { + if (pep->to_proc == start_vertex) { + pep->refcount--; + if (pep->refcount == 0) { + if (pep == ppep) { + in_proc->edge = pep->next; + } else { + ppep->next = pep->next; + } + start_vertex->incount--; + flk_proc_release(in_proc); + flk_free_proc_edge(pep); + } + break; + } + } + ep = nep; + } + flk_proc_release(start_vertex); + mutex_exit(&flock_lock); + return (1); +} + +/* + * Get a proc vertex. If lock's pvertex value gets a correct proc vertex + * from the list we return that, otherwise we allocate one. If necessary, + * we grow the list of vertices also. + */ + +static proc_vertex_t * +flk_get_proc_vertex(lock_descriptor_t *lock) +{ + int i; + proc_vertex_t *pv; + proc_vertex_t **palloc; + + ASSERT(MUTEX_HELD(&flock_lock)); + if (lock->pvertex != -1) { + ASSERT(lock->pvertex >= 0); + pv = pgraph.proc[lock->pvertex]; + if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { + return (pv); + } + } + for (i = 0; i < pgraph.gcount; i++) { + pv = pgraph.proc[i]; + if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { + lock->pvertex = pv->index = i; + return (pv); + } + } + pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); + pv->pid = lock->l_flock.l_pid; + pv->sysid = lock->l_flock.l_sysid; + flk_proc_vertex_allocs++; + if (pgraph.free != 0) { + for (i = 0; i < pgraph.gcount; i++) { + if (pgraph.proc[i] == NULL) { + pgraph.proc[i] = pv; + lock->pvertex = pv->index = i; + pgraph.free--; + return (pv); + } + } + } + palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * + sizeof (proc_vertex_t *), KM_SLEEP); + + if (pgraph.proc) { + bcopy(pgraph.proc, palloc, + pgraph.gcount * sizeof (proc_vertex_t *)); + + kmem_free(pgraph.proc, + pgraph.gcount * sizeof (proc_vertex_t *)); + } + pgraph.proc = palloc; + pgraph.free += (PROC_CHUNK - 1); + pv->index = lock->pvertex = pgraph.gcount; + pgraph.gcount += PROC_CHUNK; + pgraph.proc[pv->index] = pv; + return (pv); +} + +/* + * Allocate a proc edge. + */ + +static proc_edge_t * +flk_get_proc_edge() +{ + proc_edge_t *pep; + + pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); + flk_proc_edge_allocs++; + return (pep); +} + +/* + * Free the proc edge. Called whenever its reference count goes to zero. + */ + +static void +flk_free_proc_edge(proc_edge_t *pep) +{ + ASSERT(pep->refcount == 0); + kmem_free((void *)pep, sizeof (proc_edge_t)); + flk_proc_edge_frees++; +} + +/* + * Color the graph explicitly done only when the mark value hits max value. + */ + +static void +flk_proc_graph_uncolor() +{ + int i; + + if (pgraph.mark == UINT_MAX) { + for (i = 0; i < pgraph.gcount; i++) + if (pgraph.proc[i] != NULL) { + pgraph.proc[i]->atime = 0; + pgraph.proc[i]->dtime = 0; + } + pgraph.mark = 1; + } else { + pgraph.mark++; + } +} + +/* + * Release the proc vertex iff both there are no in edges and out edges + */ + +static void +flk_proc_release(proc_vertex_t *proc) +{ + ASSERT(MUTEX_HELD(&flock_lock)); + if (proc->edge == NULL && proc->incount == 0) { + pgraph.proc[proc->index] = NULL; + pgraph.free++; + kmem_free(proc, sizeof (proc_vertex_t)); + flk_proc_vertex_frees++; + } +} + +/* + * Updates process graph to reflect change in a lock_graph. + * Note: We should call this function only after we have a correctly + * recomputed lock graph. Otherwise we might miss a deadlock detection. + * eg: in function flk_relation() we call this function after flk_recompute_ + * dependencies() otherwise if a process tries to lock a vnode hashed + * into another graph it might sleep for ever. + */ + +static void +flk_update_proc_graph(edge_t *ep, int delete) +{ + proc_vertex_t *toproc, *fromproc; + proc_edge_t *pep, *prevpep; + + mutex_enter(&flock_lock); + toproc = flk_get_proc_vertex(ep->to_vertex); + fromproc = flk_get_proc_vertex(ep->from_vertex); + + if (!delete) + goto add; + pep = prevpep = fromproc->edge; + + ASSERT(pep != NULL); + while (pep != NULL) { + if (pep->to_proc == toproc) { + ASSERT(pep->refcount > 0); + pep->refcount--; + if (pep->refcount == 0) { + if (pep == prevpep) { + fromproc->edge = pep->next; + } else { + prevpep->next = pep->next; + } + toproc->incount--; + flk_proc_release(toproc); + flk_free_proc_edge(pep); + } + break; + } + prevpep = pep; + pep = pep->next; + } + flk_proc_release(fromproc); + mutex_exit(&flock_lock); + return; +add: + + pep = fromproc->edge; + + while (pep != NULL) { + if (pep->to_proc == toproc) { + ASSERT(pep->refcount > 0); + pep->refcount++; + break; + } + pep = pep->next; + } + if (pep == NULL) { + pep = flk_get_proc_edge(); + pep->to_proc = toproc; + pep->refcount = 1; + toproc->incount++; + pep->next = fromproc->edge; + fromproc->edge = pep; + } + mutex_exit(&flock_lock); +} + +/* ONC_PLUS EXTRACT START */ +/* + * Set the control status for lock manager requests. + * + */ + +/* + * PSARC case 1997/292 + * + * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). + * Effects: Set the state of the NLM server identified by "nlmid" + * in the NLM registry to state "nlm_state." + * Raises exception no_such_nlm if "nlmid" doesn't identify a known + * NLM server to this LLM. + * Note that when this routine is called with NLM_SHUTTING_DOWN there + * may be locks requests that have gotten started but not finished. In + * particular, there may be blocking requests that are in the callback code + * before sleeping (so they're not holding the lock for the graph). If + * such a thread reacquires the graph's lock (to go to sleep) after + * NLM state in the NLM registry is set to a non-up value, + * it will notice the status and bail out. If the request gets + * granted before the thread can check the NLM registry, let it + * continue normally. It will get flushed when we are called with NLM_DOWN. + * + * Modifies: nlm_reg_obj (global) + * Arguments: + * nlmid (IN): id uniquely identifying an NLM server + * nlm_state (IN): NLM server state to change "nlmid" to + */ +void +cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) +{ + /* + * Check to see if node is booted as a cluster. If not, return. + */ + if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { + return; + } + + /* + * Check for development/debugging. It is possible to boot a node + * in non-cluster mode, and then run a special script, currently + * available only to developers, to bring up the node as part of a + * cluster. The problem is that running such a script does not + * result in the routine flk_init() being called and hence global array + * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, + * but the LLM needs to do an additional check to see if the global + * array has been created or not. If nlm_reg_status is NULL, then + * return, else continue. + */ + if (nlm_reg_status == NULL) { + return; + } + + ASSERT(nlmid <= nlm_status_size && nlmid >= 0); + mutex_enter(&nlm_reg_lock); + + if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { + /* + * If the NLM server "nlmid" is unknown in the NLM registry, + * add it to the registry in the nlm shutting down state. + */ + FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, + FLK_NLM_SHUTTING_DOWN); + } else { + /* + * Change the state of the NLM server identified by "nlmid" + * in the NLM registry to the argument "nlm_state." + */ + FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, + nlm_state); + } + + /* + * The reason we must register the NLM server that is shutting down + * with an LLM that doesn't already know about it (never sent a lock + * request) is to handle correctly a race between shutdown and a new + * lock request. Suppose that a shutdown request from the NLM server + * invokes this routine at the LLM, and a thread is spawned to + * service the request. Now suppose a new lock request is in + * progress and has already passed the first line of defense in + * reclock(), which denies new locks requests from NLM servers + * that are not in the NLM_UP state. After the current routine + * is invoked for both phases of shutdown, the routine will return, + * having done nothing, and the lock request will proceed and + * probably be granted. The problem is that the shutdown was ignored + * by the lock request because there was no record of that NLM server + * shutting down. We will be in the peculiar position of thinking + * that we've shutdown the NLM server and all locks at all LLMs have + * been discarded, but in fact there's still one lock held. + * The solution is to record the existence of NLM server and change + * its state immediately to NLM_SHUTTING_DOWN. The lock request in + * progress may proceed because the next phase NLM_DOWN will catch + * this lock and discard it. + */ + mutex_exit(&nlm_reg_lock); + + switch (nlm_state) { + case FLK_NLM_UP: + /* + * Change the NLM state of all locks still held on behalf of + * the NLM server identified by "nlmid" to NLM_UP. + */ + cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); + break; + + case FLK_NLM_SHUTTING_DOWN: + /* + * Wake up all sleeping locks for the NLM server identified + * by "nlmid." Note that eventually all woken threads will + * have their lock requests cancelled and descriptors + * removed from the sleeping lock list. Note that the NLM + * server state associated with each lock descriptor is + * changed to FLK_NLM_SHUTTING_DOWN. + */ + cl_flk_wakeup_sleeping_nlm_locks(nlmid); + break; + + case FLK_NLM_DOWN: + /* + * Discard all active, granted locks for this NLM server + * identified by "nlmid." + */ + cl_flk_unlock_nlm_granted(nlmid); + break; + + default: + panic("cl_set_nlm_status: bad status (%d)", nlm_state); + } +} + +/* + * Set the control status for lock manager requests. + * + * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there + * may be locks requests that have gotten started but not finished. In + * particular, there may be blocking requests that are in the callback code + * before sleeping (so they're not holding the lock for the graph). If + * such a thread reacquires the graph's lock (to go to sleep) after + * flk_lockmgr_status is set to a non-up value, it will notice the status + * and bail out. If the request gets granted before the thread can check + * flk_lockmgr_status, let it continue normally. It will get flushed when + * we are called with FLK_LOCKMGR_DOWN. + */ + +void +flk_set_lockmgr_status(flk_lockmgr_status_t status) +{ + int i; + graph_t *gp; + struct flock_globals *fg; + + fg = flk_get_globals(); + ASSERT(fg != NULL); + + mutex_enter(&flock_lock); + fg->flk_lockmgr_status = status; + mutex_exit(&flock_lock); + + /* + * If the lock manager is coming back up, all that's needed is to + * propagate this information to the graphs. If the lock manager + * is going down, additional action is required, and each graph's + * copy of the state is updated atomically with this other action. + */ + switch (status) { + case FLK_LOCKMGR_UP: + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) + continue; + mutex_enter(&gp->gp_mutex); + fg->lockmgr_status[i] = status; + mutex_exit(&gp->gp_mutex); + } + break; + case FLK_WAKEUP_SLEEPERS: + wakeup_sleeping_lockmgr_locks(fg); + break; + case FLK_LOCKMGR_DOWN: + unlock_lockmgr_granted(fg); + break; + default: + panic("flk_set_lockmgr_status: bad status (%d)", status); + break; + } +} + +/* + * This routine returns all the locks that are active or sleeping and are + * associated with a particular set of identifiers. If lock_state != 0, then + * only locks that match the lock_state are returned. If lock_state == 0, then + * all locks are returned. If pid == NOPID, the pid is ignored. If + * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the + * vnode pointer is ignored. + * + * A list containing the vnode pointer and an flock structure + * describing the lock is returned. Each element in the list is + * dynammically allocated and must be freed by the caller. The + * last item in the list is denoted by a NULL value in the ll_next + * field. + * + * The vnode pointers returned are held. The caller is responsible + * for releasing these. Note that the returned list is only a snapshot of + * the current lock information, and that it is a snapshot of a moving + * target (only one graph is locked at a time). + */ + +locklist_t * +get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, + pid_t pid, const vnode_t *vp, zoneid_t zoneid) +{ + lock_descriptor_t *lock; + lock_descriptor_t *graph_head; + locklist_t listhead; + locklist_t *llheadp; + locklist_t *llp; + locklist_t *lltp; + graph_t *gp; + int i; + int first_index; /* graph index */ + int num_indexes; /* graph index */ + + ASSERT((list_type == FLK_ACTIVE_STATE) || + (list_type == FLK_SLEEPING_STATE)); + + /* + * Get a pointer to something to use as a list head while building + * the rest of the list. + */ + llheadp = &listhead; + lltp = llheadp; + llheadp->ll_next = (locklist_t *)NULL; + + /* Figure out which graphs we want to look at. */ + if (vp == NULL) { + first_index = 0; + num_indexes = HASH_SIZE; + } else { + first_index = HASH_INDEX(vp); + num_indexes = 1; + } + + for (i = first_index; i < first_index + num_indexes; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + graph_head = (list_type == FLK_ACTIVE_STATE) ? + ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); + for (lock = graph_head->l_next; + lock != graph_head; + lock = lock->l_next) { + if (use_sysid && lock->l_flock.l_sysid != sysid) + continue; + if (pid != NOPID && lock->l_flock.l_pid != pid) + continue; + if (vp != NULL && lock->l_vnode != vp) + continue; + if (lock_state && !(lock_state & lock->l_state)) + continue; + if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) + continue; + /* + * A matching lock was found. Allocate + * space for a new locklist entry and fill + * it in. + */ + llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); + lltp->ll_next = llp; + VN_HOLD(lock->l_vnode); + llp->ll_vp = lock->l_vnode; + create_flock(lock, &(llp->ll_flock)); + llp->ll_next = (locklist_t *)NULL; + lltp = llp; + } + mutex_exit(&gp->gp_mutex); + } + + llp = llheadp->ll_next; + return (llp); +} + +/* + * These two functions are simply interfaces to get_lock_list. They return + * a list of sleeping or active locks for the given sysid and pid. See + * get_lock_list for details. + * + * In either case we don't particularly care to specify the zone of interest; + * the sysid-space is global across zones, so the sysid will map to exactly one + * zone, and we'll return information for that zone. + */ + +locklist_t * +flk_get_sleeping_locks(int sysid, pid_t pid) +{ + return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, + ALL_ZONES)); +} + +locklist_t * +flk_get_active_locks(int sysid, pid_t pid) +{ + return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, + ALL_ZONES)); +} + +/* + * Another interface to get_lock_list. This one returns all the active + * locks for a given vnode. Again, see get_lock_list for details. + * + * We don't need to specify which zone's locks we're interested in. The matter + * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't + * be used by multiple zones, so the list of locks will all be from the right + * zone. + */ + +locklist_t * +flk_active_locks_for_vp(const vnode_t *vp) +{ + return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, + ALL_ZONES)); +} + +/* + * Another interface to get_lock_list. This one returns all the active + * nbmand locks for a given vnode. Again, see get_lock_list for details. + * + * See the comment for flk_active_locks_for_vp() for why we don't care to + * specify the particular zone of interest. + */ +locklist_t * +flk_active_nbmand_locks_for_vp(const vnode_t *vp) +{ + return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, + NOPID, vp, ALL_ZONES)); +} + +/* + * Another interface to get_lock_list. This one returns all the active + * nbmand locks for a given pid. Again, see get_lock_list for details. + * + * The zone doesn't need to be specified here; the locks held by a + * particular process will either be local (ie, non-NFS) or from the zone + * the process is executing in. This is because other parts of the system + * ensure that an NFS vnode can't be used in a zone other than that in + * which it was opened. + */ +locklist_t * +flk_active_nbmand_locks(pid_t pid) +{ + return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, + pid, NULL, ALL_ZONES)); +} + +/* + * Free up all entries in the locklist. + */ +void +flk_free_locklist(locklist_t *llp) +{ + locklist_t *next_llp; + + while (llp) { + next_llp = llp->ll_next; + VN_RELE(llp->ll_vp); + kmem_free(llp, sizeof (*llp)); + llp = next_llp; + } +} + +static void +cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) +{ + /* + * For each graph "lg" in the hash table lock_graph do + * a. Get the list of sleeping locks + * b. For each lock descriptor in the list do + * i. If the requested lock is an NLM server request AND + * the nlmid is the same as the routine argument then + * change the lock descriptor's state field to + * "nlm_state." + * c. Get the list of active locks + * d. For each lock descriptor in the list do + * i. If the requested lock is an NLM server request AND + * the nlmid is the same as the routine argument then + * change the lock descriptor's state field to + * "nlm_state." + */ + + int i; + graph_t *gp; /* lock graph */ + lock_descriptor_t *lock; /* lock */ + lock_descriptor_t *nlock = NULL; /* next lock */ + int lock_nlmid; + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + /* Get list of sleeping locks in current lock graph. */ + mutex_enter(&gp->gp_mutex); + for (lock = SLEEPING_HEAD(gp)->l_next; + lock != SLEEPING_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + /* get NLM id */ + lock_nlmid = GETNLMID(lock->l_flock.l_sysid); + + /* + * If NLM server request AND nlmid of lock matches + * nlmid of argument, then set the NLM state of the + * lock to "nlm_state." + */ + if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { + SET_NLM_STATE(lock, nlm_state); + } + } + + /* Get list of active locks in current lock graph. */ + for (lock = ACTIVE_HEAD(gp)->l_next; + lock != ACTIVE_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + /* get NLM id */ + lock_nlmid = GETNLMID(lock->l_flock.l_sysid); + + /* + * If NLM server request AND nlmid of lock matches + * nlmid of argument, then set the NLM state of the + * lock to "nlm_state." + */ + if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { + ASSERT(IS_ACTIVE(lock)); + SET_NLM_STATE(lock, nlm_state); + } + } + mutex_exit(&gp->gp_mutex); + } +} + +/* + * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). + * Effects: Find all sleeping lock manager requests _only_ for the NLM server + * identified by "nlmid." Poke those lock requests. + */ +static void +cl_flk_wakeup_sleeping_nlm_locks(int nlmid) +{ + lock_descriptor_t *lock; + lock_descriptor_t *nlock = NULL; /* next lock */ + int i; + graph_t *gp; + int lock_nlmid; + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + for (lock = SLEEPING_HEAD(gp)->l_next; + lock != SLEEPING_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + /* + * If NLM server request _and_ nlmid of lock matches + * nlmid of argument, then set the NLM state of the + * lock to NLM_SHUTTING_DOWN, and wake up sleeping + * request. + */ + if (IS_LOCKMGR(lock)) { + /* get NLM id */ + lock_nlmid = + GETNLMID(lock->l_flock.l_sysid); + if (nlmid == lock_nlmid) { + SET_NLM_STATE(lock, + FLK_NLM_SHUTTING_DOWN); + INTERRUPT_WAKEUP(lock); + } + } + } + mutex_exit(&gp->gp_mutex); + } +} + +/* + * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() + * Effects: Find all active (granted) lock manager locks _only_ for the + * NLM server identified by "nlmid" and release them. + */ +static void +cl_flk_unlock_nlm_granted(int nlmid) +{ + lock_descriptor_t *lock; + lock_descriptor_t *nlock = NULL; /* next lock */ + int i; + graph_t *gp; + int lock_nlmid; + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + for (lock = ACTIVE_HEAD(gp)->l_next; + lock != ACTIVE_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + ASSERT(IS_ACTIVE(lock)); + + /* + * If it's an NLM server request _and_ nlmid of + * the lock matches nlmid of argument, then + * remove the active lock the list, wakup blocked + * threads, and free the storage for the lock. + * Note that there's no need to mark the NLM state + * of this lock to NLM_DOWN because the lock will + * be deleted anyway and its storage freed. + */ + if (IS_LOCKMGR(lock)) { + /* get NLM id */ + lock_nlmid = GETNLMID(lock->l_flock.l_sysid); + if (nlmid == lock_nlmid) { + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + } + } + mutex_exit(&gp->gp_mutex); + } +} + +/* + * Find all sleeping lock manager requests and poke them. + */ +static void +wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) +{ + lock_descriptor_t *lock; + lock_descriptor_t *nlock = NULL; /* next lock */ + int i; + graph_t *gp; + zoneid_t zoneid = getzoneid(); + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; + for (lock = SLEEPING_HEAD(gp)->l_next; + lock != SLEEPING_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { + INTERRUPT_WAKEUP(lock); + } + } + mutex_exit(&gp->gp_mutex); + } +} + + +/* + * Find all active (granted) lock manager locks and release them. + */ +static void +unlock_lockmgr_granted(struct flock_globals *fg) +{ + lock_descriptor_t *lock; + lock_descriptor_t *nlock = NULL; /* next lock */ + int i; + graph_t *gp; + zoneid_t zoneid = getzoneid(); + + for (i = 0; i < HASH_SIZE; i++) { + mutex_enter(&flock_lock); + gp = lock_graph[i]; + mutex_exit(&flock_lock); + if (gp == NULL) { + continue; + } + + mutex_enter(&gp->gp_mutex); + fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; + for (lock = ACTIVE_HEAD(gp)->l_next; + lock != ACTIVE_HEAD(gp); + lock = nlock) { + nlock = lock->l_next; + if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { + ASSERT(IS_ACTIVE(lock)); + flk_delete_active_lock(lock, 0); + flk_wakeup(lock, 1); + flk_free_lock(lock); + } + } + mutex_exit(&gp->gp_mutex); + } +} +/* ONC_PLUS EXTRACT END */ + + +/* + * Wait until a lock is granted, cancelled, or interrupted. + */ + +static void +wait_for_lock(lock_descriptor_t *request) +{ + graph_t *gp = request->l_graph; + + ASSERT(MUTEX_HELD(&gp->gp_mutex)); + + while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && + !(IS_INTERRUPTED(request))) { + if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { + flk_set_state(request, FLK_INTERRUPTED_STATE); + request->l_state |= INTERRUPTED_LOCK; + } + } +} + +/* ONC_PLUS EXTRACT START */ +/* + * Create an flock structure from the existing lock information + * + * This routine is used to create flock structures for the lock manager + * to use in a reclaim request. Since the lock was orginated on this + * host, it must be conforming to UNIX semantics, so no checking is + * done to make sure it falls within the lower half of the 32-bit range. + */ + +static void +create_flock(lock_descriptor_t *lp, flock64_t *flp) +{ + ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); + ASSERT(lp->l_end >= lp->l_start); + + flp->l_type = lp->l_type; + flp->l_whence = 0; + flp->l_start = lp->l_start; + flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : + (lp->l_end - lp->l_start + 1); + flp->l_sysid = lp->l_flock.l_sysid; + flp->l_pid = lp->l_flock.l_pid; +} + +/* + * Convert flock_t data describing a lock range into unsigned long starting + * and ending points, which are put into lock_request. Returns 0 or an + * errno value. + * Large Files: max is passed by the caller and we return EOVERFLOW + * as defined by LFS API. + */ + +int +flk_convert_lock_data(vnode_t *vp, flock64_t *flp, + u_offset_t *start, u_offset_t *end, offset_t offset) +{ + struct vattr vattr; + int error; + + /* + * Determine the starting point of the request + */ + switch (flp->l_whence) { + case 0: /* SEEK_SET */ + *start = (u_offset_t)flp->l_start; + break; + case 1: /* SEEK_CUR */ + *start = (u_offset_t)(flp->l_start + offset); + break; + case 2: /* SEEK_END */ + vattr.va_mask = AT_SIZE; + if (error = VOP_GETATTR(vp, &vattr, 0, CRED())) + return (error); + *start = (u_offset_t)(flp->l_start + vattr.va_size); + break; + default: + return (EINVAL); + } + + /* + * Determine the range covered by the request. + */ + if (flp->l_len == 0) + *end = MAX_U_OFFSET_T; + else if ((offset_t)flp->l_len > 0) { + *end = (u_offset_t)(*start + (flp->l_len - 1)); + } else { + /* + * Negative length; why do we even allow this ? + * Because this allows easy specification of + * the last n bytes of the file. + */ + *end = *start; + *start += (u_offset_t)flp->l_len; + (*start)++; + } + return (0); +} + +/* + * Check the validity of lock data. This can used by the NFS + * frlock routines to check data before contacting the server. The + * server must support semantics that aren't as restrictive as + * the UNIX API, so the NFS client is required to check. + * The maximum is now passed in by the caller. + */ + +int +flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) +{ + /* + * The end (length) for local locking should never be greater + * than MAXEND. However, the representation for + * the entire file is MAX_U_OFFSET_T. + */ + if ((start > max) || + ((end > max) && (end != MAX_U_OFFSET_T))) { + return (EINVAL); + } + if (start > end) { + return (EINVAL); + } + return (0); +} + +/* + * Fill in request->l_flock with information about the lock blocking the + * request. The complexity here is that lock manager requests are allowed + * to see into the upper part of the 32-bit address range, whereas local + * requests are only allowed to see signed values. + * + * What should be done when "blocker" is a lock manager lock that uses the + * upper portion of the 32-bit range, but "request" is local? Since the + * request has already been determined to have been blocked by the blocker, + * at least some portion of "blocker" must be in the range of the request, + * or the request extends to the end of file. For the first case, the + * portion in the lower range is returned with the indication that it goes + * "to EOF." For the second case, the last byte of the lower range is + * returned with the indication that it goes "to EOF." + */ + +static void +report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) +{ + flock64_t *flrp; /* l_flock portion of request */ + + ASSERT(blocker != NULL); + + flrp = &request->l_flock; + flrp->l_whence = 0; + flrp->l_type = blocker->l_type; + flrp->l_pid = blocker->l_flock.l_pid; + flrp->l_sysid = blocker->l_flock.l_sysid; + + if (IS_LOCKMGR(request)) { + flrp->l_start = blocker->l_start; + if (blocker->l_end == MAX_U_OFFSET_T) + flrp->l_len = 0; + else + flrp->l_len = blocker->l_end - blocker->l_start + 1; + } else { + if (blocker->l_start > MAXEND) { + flrp->l_start = MAXEND; + flrp->l_len = 0; + } else { + flrp->l_start = blocker->l_start; + if (blocker->l_end == MAX_U_OFFSET_T) + flrp->l_len = 0; + else + flrp->l_len = blocker->l_end - + blocker->l_start + 1; + } + } +} +/* ONC_PLUS EXTRACT END */ + +/* + * PSARC case 1997/292 + */ +/* + * This is the public routine exported by flock.h. + */ +void +cl_flk_change_nlm_state_to_unknown(int nlmid) +{ + /* + * Check to see if node is booted as a cluster. If not, return. + */ + if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { + return; + } + + /* + * See comment in cl_flk_set_nlm_status(). + */ + if (nlm_reg_status == NULL) { + return; + } + + /* + * protect NLM registry state with a mutex. + */ + ASSERT(nlmid <= nlm_status_size && nlmid >= 0); + mutex_enter(&nlm_reg_lock); + FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); + mutex_exit(&nlm_reg_lock); +} + +/* + * Return non-zero if the given I/O request conflicts with an active NBMAND + * lock. + * If svmand is non-zero, it means look at all active locks, not just NBMAND + * locks. + */ + +int +nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, + ssize_t length, int svmand) +{ + int conflict = 0; + graph_t *gp; + lock_descriptor_t *lock; + + mutex_enter(&flock_lock); + gp = lock_graph[HASH_INDEX(vp)]; + mutex_exit(&flock_lock); + if (gp == NULL) + return (0); + + mutex_enter(&gp->gp_mutex); + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + for (; lock && lock->l_vnode == vp; lock = lock->l_next) { + if ((svmand || (lock->l_state & NBMAND_LOCK)) && + lock->l_flock.l_sysid == 0 && + lock->l_flock.l_pid != curproc->p_pid && + lock_blocks_io(op, offset, length, + lock->l_type, lock->l_start, lock->l_end)) { + conflict = 1; + break; + } + } + mutex_exit(&gp->gp_mutex); + + return (conflict); +} + +/* + * Return non-zero if the given I/O request conflicts with the given lock. + */ + +static int +lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, + int lock_type, u_offset_t lock_start, u_offset_t lock_end) +{ + ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); + ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); + + if (op == NBL_READ && lock_type == F_RDLCK) + return (0); + + if (offset <= lock_start && lock_start < offset + length) + return (1); + if (lock_start <= offset && offset <= lock_end) + return (1); + + return (0); +} + +#ifdef DEBUG +static void +check_active_locks(graph_t *gp) +{ + lock_descriptor_t *lock, *lock1; + edge_t *ep; + + for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); + lock = lock->l_next) { + ASSERT(IS_ACTIVE(lock)); + ASSERT(NOT_BLOCKED(lock)); + ASSERT(!IS_BARRIER(lock)); + + ep = FIRST_IN(lock); + + while (ep != HEAD(lock)) { + ASSERT(IS_SLEEPING(ep->from_vertex)); + ASSERT(!NOT_BLOCKED(ep->from_vertex)); + ep = NEXT_IN(ep); + } + + for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); + lock1 = lock1->l_next) { + if (lock1->l_vnode == lock->l_vnode) { + if (BLOCKS(lock1, lock)) { + cmn_err(CE_PANIC, + "active lock %p blocks %p", + (void *)lock1, (void *)lock); + } else if (BLOCKS(lock, lock1)) { + cmn_err(CE_PANIC, + "active lock %p blocks %p", + (void *)lock, (void *)lock1); + } + } + } + } +} + +/* + * Effect: This functions checks to see if the transition from 'old_state' to + * 'new_state' is a valid one. It returns 0 if the transition is valid + * and 1 if it is not. + * For a map of valid transitions, see sys/flock_impl.h + */ +static int +check_lock_transition(int old_state, int new_state) +{ + switch (old_state) { + case FLK_INITIAL_STATE: + if ((new_state == FLK_START_STATE) || + (new_state == FLK_SLEEPING_STATE) || + (new_state == FLK_ACTIVE_STATE) || + (new_state == FLK_DEAD_STATE)) { + return (0); + } else { + return (1); + } + case FLK_START_STATE: + if ((new_state == FLK_ACTIVE_STATE) || + (new_state == FLK_DEAD_STATE)) { + return (0); + } else { + return (1); + } + case FLK_ACTIVE_STATE: + if (new_state == FLK_DEAD_STATE) { + return (0); + } else { + return (1); + } + case FLK_SLEEPING_STATE: + if ((new_state == FLK_GRANTED_STATE) || + (new_state == FLK_INTERRUPTED_STATE) || + (new_state == FLK_CANCELLED_STATE)) { + return (0); + } else { + return (1); + } + case FLK_GRANTED_STATE: + if ((new_state == FLK_START_STATE) || + (new_state == FLK_INTERRUPTED_STATE) || + (new_state == FLK_CANCELLED_STATE)) { + return (0); + } else { + return (1); + } + case FLK_CANCELLED_STATE: + if ((new_state == FLK_INTERRUPTED_STATE) || + (new_state == FLK_DEAD_STATE)) { + return (0); + } else { + return (1); + } + case FLK_INTERRUPTED_STATE: + if (new_state == FLK_DEAD_STATE) { + return (0); + } else { + return (1); + } + case FLK_DEAD_STATE: + /* May be set more than once */ + if (new_state == FLK_DEAD_STATE) { + return (0); + } else { + return (1); + } + default: + return (1); + } +} + +static void +check_sleeping_locks(graph_t *gp) +{ + lock_descriptor_t *lock1, *lock2; + edge_t *ep; + for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); + lock1 = lock1->l_next) { + ASSERT(!IS_BARRIER(lock1)); + for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); + lock2 = lock2->l_next) { + if (lock1->l_vnode == lock2->l_vnode) { + if (BLOCKS(lock2, lock1)) { + ASSERT(!IS_GRANTED(lock1)); + ASSERT(!NOT_BLOCKED(lock1)); + path(lock1, lock2); + } + } + } + + for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); + lock2 = lock2->l_next) { + ASSERT(!IS_BARRIER(lock1)); + if (lock1->l_vnode == lock2->l_vnode) { + if (BLOCKS(lock2, lock1)) { + ASSERT(!IS_GRANTED(lock1)); + ASSERT(!NOT_BLOCKED(lock1)); + path(lock1, lock2); + } + } + } + ep = FIRST_ADJ(lock1); + while (ep != HEAD(lock1)) { + ASSERT(BLOCKS(ep->to_vertex, lock1)); + ep = NEXT_ADJ(ep); + } + } +} + +static int +level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) +{ + edge_t *ep; + lock_descriptor_t *vertex; + lock_descriptor_t *vertex_stack; + + STACK_INIT(vertex_stack); + + flk_graph_uncolor(lock1->l_graph); + ep = FIRST_ADJ(lock1); + ASSERT(ep != HEAD(lock1)); + while (ep != HEAD(lock1)) { + if (no_path) + ASSERT(ep->to_vertex != lock2); + STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); + COLOR(ep->to_vertex); + ep = NEXT_ADJ(ep); + } + + while ((vertex = STACK_TOP(vertex_stack)) != NULL) { + STACK_POP(vertex_stack, l_dstack); + for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); + ep = NEXT_ADJ(ep)) { + if (COLORED(ep->to_vertex)) + continue; + COLOR(ep->to_vertex); + if (ep->to_vertex == lock2) + return (1); + + STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); + } + } + return (0); +} + +static void +check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) +{ + lock_descriptor_t *lock; + + SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); + + if (lock) { + while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { + if (lock->l_flock.l_pid == pid && + lock->l_flock.l_sysid == sysid) + cmn_err(CE_PANIC, + "owner pid %d's lock %p in active queue", + pid, (void *)lock); + lock = lock->l_next; + } + } + SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); + + if (lock) { + while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { + if (lock->l_flock.l_pid == pid && + lock->l_flock.l_sysid == sysid) + cmn_err(CE_PANIC, + "owner pid %d's lock %p in sleep queue", + pid, (void *)lock); + lock = lock->l_next; + } + } +} + +static int +level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) +{ + edge_t *ep = FIRST_ADJ(lock1); + + while (ep != HEAD(lock1)) { + if (ep->to_vertex == lock2) + return (1); + else + ep = NEXT_ADJ(ep); + } + return (0); +} + +static int +no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) +{ + return (!level_two_path(lock1, lock2, 1)); +} + +static void +path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) +{ + if (level_one_path(lock1, lock2)) { + if (level_two_path(lock1, lock2, 0) != 0) { + cmn_err(CE_WARN, + "one edge one path from lock1 %p lock2 %p", + (void *)lock1, (void *)lock2); + } + } else if (no_path(lock1, lock2)) { + cmn_err(CE_PANIC, + "No path from lock1 %p to lock2 %p", + (void *)lock1, (void *)lock2); + } +} +#endif /* DEBUG */ |