summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--usr/src/common/avl/avl.c1030
-rw-r--r--usr/src/uts/common/sys/avl.h309
-rw-r--r--usr/src/uts/common/sys/avl_impl.h164
3 files changed, 1503 insertions, 0 deletions
diff --git a/usr/src/common/avl/avl.c b/usr/src/common/avl/avl.c
new file mode 100644
index 0000000..dd39c12
--- /dev/null
+++ b/usr/src/common/avl/avl.c
@@ -0,0 +1,1030 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * AVL - generic AVL tree implementation for kernel use
+ *
+ * A complete description of AVL trees can be found in many CS textbooks.
+ *
+ * Here is a very brief overview. An AVL tree is a binary search tree that is
+ * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
+ * any given node, the left and right subtrees are allowed to differ in height
+ * by at most 1 level.
+ *
+ * This relaxation from a perfectly balanced binary tree allows doing
+ * insertion and deletion relatively efficiently. Searching the tree is
+ * still a fast operation, roughly O(log(N)).
+ *
+ * The key to insertion and deletion is a set of tree maniuplations called
+ * rotations, which bring unbalanced subtrees back into the semi-balanced state.
+ *
+ * This implementation of AVL trees has the following peculiarities:
+ *
+ * - The AVL specific data structures are physically embedded as fields
+ * in the "using" data structures. To maintain generality the code
+ * must constantly translate between "avl_node_t *" and containing
+ * data structure "void *"s by adding/subracting the avl_offset.
+ *
+ * - Since the AVL data is always embedded in other structures, there is
+ * no locking or memory allocation in the AVL routines. This must be
+ * provided for by the enclosing data structure's semantics. Typically,
+ * avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
+ * exclusive write lock. Other operations require a read lock.
+ *
+ * - The implementation uses iteration instead of explicit recursion,
+ * since it is intended to run on limited size kernel stacks. Since
+ * there is no recursion stack present to move "up" in the tree,
+ * there is an explicit "parent" link in the avl_node_t.
+ *
+ * - The left/right children pointers of a node are in an array.
+ * In the code, variables (instead of constants) are used to represent
+ * left and right indices. The implementation is written as if it only
+ * dealt with left handed manipulations. By changing the value assigned
+ * to "left", the code also works for right handed trees. The
+ * following variables/terms are frequently used:
+ *
+ * int left; // 0 when dealing with left children,
+ * // 1 for dealing with right children
+ *
+ * int left_heavy; // -1 when left subtree is taller at some node,
+ * // +1 when right subtree is taller
+ *
+ * int right; // will be the opposite of left (0 or 1)
+ * int right_heavy;// will be the opposite of left_heavy (-1 or 1)
+ *
+ * int direction; // 0 for "<" (ie. left child); 1 for ">" (right)
+ *
+ * Though it is a little more confusing to read the code, the approach
+ * allows using half as much code (and hence cache footprint) for tree
+ * manipulations and eliminates many conditional branches.
+ *
+ * - The avl_index_t is an opaque "cookie" used to find nodes at or
+ * adjacent to where a new value would be inserted in the tree. The value
+ * is a modified "avl_node_t *". The bottom bit (normally 0 for a
+ * pointer) is set to indicate if that the new node has a value greater
+ * than the value of the indicated "avl_node_t *".
+ */
+
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/debug.h>
+#include <sys/avl.h>
+#include <sys/cmn_err.h>
+
+/*
+ * Small arrays to translate between balance (or diff) values and child indeces.
+ *
+ * Code that deals with binary tree data structures will randomly use
+ * left and right children when examining a tree. C "if()" statements
+ * which evaluate randomly suffer from very poor hardware branch prediction.
+ * In this code we avoid some of the branch mispredictions by using the
+ * following translation arrays. They replace random branches with an
+ * additional memory reference. Since the translation arrays are both very
+ * small the data should remain efficiently in cache.
+ */
+static const int avl_child2balance[2] = {-1, 1};
+static const int avl_balance2child[] = {0, 0, 1};
+
+
+/*
+ * Walk from one node to the previous valued node (ie. an infix walk
+ * towards the left). At any given node we do one of 2 things:
+ *
+ * - If there is a left child, go to it, then to it's rightmost descendant.
+ *
+ * - otherwise we return thru parent nodes until we've come from a right child.
+ *
+ * Return Value:
+ * NULL - if at the end of the nodes
+ * otherwise next node
+ */
+void *
+avl_walk(avl_tree_t *tree, void *oldnode, int left)
+{
+ size_t off = tree->avl_offset;
+ avl_node_t *node = AVL_DATA2NODE(oldnode, off);
+ int right = 1 - left;
+ int was_child;
+
+
+ /*
+ * nowhere to walk to if tree is empty
+ */
+ if (node == NULL)
+ return (NULL);
+
+ /*
+ * Visit the previous valued node. There are two possibilities:
+ *
+ * If this node has a left child, go down one left, then all
+ * the way right.
+ */
+ if (node->avl_child[left] != NULL) {
+ for (node = node->avl_child[left];
+ node->avl_child[right] != NULL;
+ node = node->avl_child[right])
+ ;
+ /*
+ * Otherwise, return thru left children as far as we can.
+ */
+ } else {
+ for (;;) {
+ was_child = AVL_XCHILD(node);
+ node = AVL_XPARENT(node);
+ if (node == NULL)
+ return (NULL);
+ if (was_child == right)
+ break;
+ }
+ }
+
+ return (AVL_NODE2DATA(node, off));
+}
+
+/*
+ * Return the lowest valued node in a tree or NULL.
+ * (leftmost child from root of tree)
+ */
+void *
+avl_first(avl_tree_t *tree)
+{
+ avl_node_t *node;
+ avl_node_t *prev = NULL;
+ size_t off = tree->avl_offset;
+
+ for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
+ prev = node;
+
+ if (prev != NULL)
+ return (AVL_NODE2DATA(prev, off));
+ return (NULL);
+}
+
+/*
+ * Return the highest valued node in a tree or NULL.
+ * (rightmost child from root of tree)
+ */
+void *
+avl_last(avl_tree_t *tree)
+{
+ avl_node_t *node;
+ avl_node_t *prev = NULL;
+ size_t off = tree->avl_offset;
+
+ for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
+ prev = node;
+
+ if (prev != NULL)
+ return (AVL_NODE2DATA(prev, off));
+ return (NULL);
+}
+
+/*
+ * Access the node immediately before or after an insertion point.
+ *
+ * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
+ *
+ * Return value:
+ * NULL: no node in the given direction
+ * "void *" of the found tree node
+ */
+void *
+avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
+{
+ int child = AVL_INDEX2CHILD(where);
+ avl_node_t *node = AVL_INDEX2NODE(where);
+ void *data;
+ size_t off = tree->avl_offset;
+
+ if (node == NULL) {
+ ASSERT(tree->avl_root == NULL);
+ return (NULL);
+ }
+ data = AVL_NODE2DATA(node, off);
+ if (child != direction)
+ return (data);
+
+ return (avl_walk(tree, data, direction));
+}
+
+
+/*
+ * Search for the node which contains "value". The algorithm is a
+ * simple binary tree search.
+ *
+ * return value:
+ * NULL: the value is not in the AVL tree
+ * *where (if not NULL) is set to indicate the insertion point
+ * "void *" of the found tree node
+ */
+void *
+avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
+{
+ avl_node_t *node;
+ avl_node_t *prev = NULL;
+ int child = 0;
+ int diff;
+ size_t off = tree->avl_offset;
+
+ for (node = tree->avl_root; node != NULL;
+ node = node->avl_child[child]) {
+
+ prev = node;
+
+ diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
+ ASSERT(-1 <= diff && diff <= 1);
+ if (diff == 0) {
+#ifdef DEBUG
+ if (where != NULL)
+ *where = 0;
+#endif
+ return (AVL_NODE2DATA(node, off));
+ }
+ child = avl_balance2child[1 + diff];
+
+ }
+
+ if (where != NULL)
+ *where = AVL_MKINDEX(prev, child);
+
+ return (NULL);
+}
+
+
+/*
+ * Perform a rotation to restore balance at the subtree given by depth.
+ *
+ * This routine is used by both insertion and deletion. The return value
+ * indicates:
+ * 0 : subtree did not change height
+ * !0 : subtree was reduced in height
+ *
+ * The code is written as if handling left rotations, right rotations are
+ * symmetric and handled by swapping values of variables right/left[_heavy]
+ *
+ * On input balance is the "new" balance at "node". This value is either
+ * -2 or +2.
+ */
+static int
+avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
+{
+ int left = !(balance < 0); /* when balance = -2, left will be 0 */
+ int right = 1 - left;
+ int left_heavy = balance >> 1;
+ int right_heavy = -left_heavy;
+ avl_node_t *parent = AVL_XPARENT(node);
+ avl_node_t *child = node->avl_child[left];
+ avl_node_t *cright;
+ avl_node_t *gchild;
+ avl_node_t *gright;
+ avl_node_t *gleft;
+ int which_child = AVL_XCHILD(node);
+ int child_bal = AVL_XBALANCE(child);
+
+ /* BEGIN CSTYLED */
+ /*
+ * case 1 : node is overly left heavy, the left child is balanced or
+ * also left heavy. This requires the following rotation.
+ *
+ * (node bal:-2)
+ * / \
+ * / \
+ * (child bal:0 or -1)
+ * / \
+ * / \
+ * cright
+ *
+ * becomes:
+ *
+ * (child bal:1 or 0)
+ * / \
+ * / \
+ * (node bal:-1 or 0)
+ * / \
+ * / \
+ * cright
+ *
+ * we detect this situation by noting that child's balance is not
+ * right_heavy.
+ */
+ /* END CSTYLED */
+ if (child_bal != right_heavy) {
+
+ /*
+ * compute new balance of nodes
+ *
+ * If child used to be left heavy (now balanced) we reduced
+ * the height of this sub-tree -- used in "return...;" below
+ */
+ child_bal += right_heavy; /* adjust towards right */
+
+ /*
+ * move "cright" to be node's left child
+ */
+ cright = child->avl_child[right];
+ node->avl_child[left] = cright;
+ if (cright != NULL) {
+ AVL_SETPARENT(cright, node);
+ AVL_SETCHILD(cright, left);
+ }
+
+ /*
+ * move node to be child's right child
+ */
+ child->avl_child[right] = node;
+ AVL_SETBALANCE(node, -child_bal);
+ AVL_SETCHILD(node, right);
+ AVL_SETPARENT(node, child);
+
+ /*
+ * update the pointer into this subtree
+ */
+ AVL_SETBALANCE(child, child_bal);
+ AVL_SETCHILD(child, which_child);
+ AVL_SETPARENT(child, parent);
+ if (parent != NULL)
+ parent->avl_child[which_child] = child;
+ else
+ tree->avl_root = child;
+
+ return (child_bal == 0);
+ }
+
+ /* BEGIN CSTYLED */
+ /*
+ * case 2 : When node is left heavy, but child is right heavy we use
+ * a different rotation.
+ *
+ * (node b:-2)
+ * / \
+ * / \
+ * / \
+ * (child b:+1)
+ * / \
+ * / \
+ * (gchild b: != 0)
+ * / \
+ * / \
+ * gleft gright
+ *
+ * becomes:
+ *
+ * (gchild b:0)
+ * / \
+ * / \
+ * / \
+ * (child b:?) (node b:?)
+ * / \ / \
+ * / \ / \
+ * gleft gright
+ *
+ * computing the new balances is more complicated. As an example:
+ * if gchild was right_heavy, then child is now left heavy
+ * else it is balanced
+ */
+ /* END CSTYLED */
+ gchild = child->avl_child[right];
+ gleft = gchild->avl_child[left];
+ gright = gchild->avl_child[right];
+
+ /*
+ * move gright to left child of node and
+ *
+ * move gleft to right child of node
+ */
+ node->avl_child[left] = gright;
+ if (gright != NULL) {
+ AVL_SETPARENT(gright, node);
+ AVL_SETCHILD(gright, left);
+ }
+
+ child->avl_child[right] = gleft;
+ if (gleft != NULL) {
+ AVL_SETPARENT(gleft, child);
+ AVL_SETCHILD(gleft, right);
+ }
+
+ /*
+ * move child to left child of gchild and
+ *
+ * move node to right child of gchild and
+ *
+ * fixup parent of all this to point to gchild
+ */
+ balance = AVL_XBALANCE(gchild);
+ gchild->avl_child[left] = child;
+ AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
+ AVL_SETPARENT(child, gchild);
+ AVL_SETCHILD(child, left);
+
+ gchild->avl_child[right] = node;
+ AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
+ AVL_SETPARENT(node, gchild);
+ AVL_SETCHILD(node, right);
+
+ AVL_SETBALANCE(gchild, 0);
+ AVL_SETPARENT(gchild, parent);
+ AVL_SETCHILD(gchild, which_child);
+ if (parent != NULL)
+ parent->avl_child[which_child] = gchild;
+ else
+ tree->avl_root = gchild;
+
+ return (1); /* the new tree is always shorter */
+}
+
+
+/*
+ * Insert a new node into an AVL tree at the specified (from avl_find()) place.
+ *
+ * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
+ * searches out to the leaf positions. The avl_index_t indicates the node
+ * which will be the parent of the new node.
+ *
+ * After the node is inserted, a single rotation further up the tree may
+ * be necessary to maintain an acceptable AVL balance.
+ */
+void
+avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
+{
+ avl_node_t *node;
+ avl_node_t *parent = AVL_INDEX2NODE(where);
+ int old_balance;
+ int new_balance;
+ int which_child = AVL_INDEX2CHILD(where);
+ size_t off = tree->avl_offset;
+
+ ASSERT(tree);
+#ifdef _LP64
+ ASSERT(((uintptr_t)new_data & 0x7) == 0);
+#endif
+
+ node = AVL_DATA2NODE(new_data, off);
+
+ /*
+ * First, add the node to the tree at the indicated position.
+ */
+ ++tree->avl_numnodes;
+
+ node->avl_child[0] = NULL;
+ node->avl_child[1] = NULL;
+
+ AVL_SETCHILD(node, which_child);
+ AVL_SETBALANCE(node, 0);
+ AVL_SETPARENT(node, parent);
+ if (parent != NULL) {
+ ASSERT(parent->avl_child[which_child] == NULL);
+ parent->avl_child[which_child] = node;
+ } else {
+ ASSERT(tree->avl_root == NULL);
+ tree->avl_root = node;
+ }
+ /*
+ * Now, back up the tree modifying the balance of all nodes above the
+ * insertion point. If we get to a highly unbalanced ancestor, we
+ * need to do a rotation. If we back out of the tree we are done.
+ * If we brought any subtree into perfect balance (0), we are also done.
+ */
+ for (;;) {
+ node = parent;
+ if (node == NULL)
+ return;
+
+ /*
+ * Compute the new balance
+ */
+ old_balance = AVL_XBALANCE(node);
+ new_balance = old_balance + avl_child2balance[which_child];
+
+ /*
+ * If we introduced equal balance, then we are done immediately
+ */
+ if (new_balance == 0) {
+ AVL_SETBALANCE(node, 0);
+ return;
+ }
+
+ /*
+ * If both old and new are not zero we went
+ * from -1 to -2 balance, do a rotation.
+ */
+ if (old_balance != 0)
+ break;
+
+ AVL_SETBALANCE(node, new_balance);
+ parent = AVL_XPARENT(node);
+ which_child = AVL_XCHILD(node);
+ }
+
+ /*
+ * perform a rotation to fix the tree and return
+ */
+ (void) avl_rotation(tree, node, new_balance);
+}
+
+/*
+ * Insert "new_data" in "tree" in the given "direction" either after or
+ * before (AVL_AFTER, AVL_BEFORE) the data "here".
+ *
+ * Insertions can only be done at empty leaf points in the tree, therefore
+ * if the given child of the node is already present we move to either
+ * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
+ * every other node in the tree is a leaf, this always works.
+ *
+ * To help developers using this interface, we assert that the new node
+ * is correctly ordered at every step of the way in DEBUG kernels.
+ */
+void
+avl_insert_here(
+ avl_tree_t *tree,
+ void *new_data,
+ void *here,
+ int direction)
+{
+ avl_node_t *node;
+ int child = direction; /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
+#ifdef DEBUG
+ int diff;
+#endif
+
+ ASSERT(tree != NULL);
+ ASSERT(new_data != NULL);
+ ASSERT(here != NULL);
+ ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
+
+ /*
+ * If corresponding child of node is not NULL, go to the neighboring
+ * node and reverse the insertion direction.
+ */
+ node = AVL_DATA2NODE(here, tree->avl_offset);
+
+#ifdef DEBUG
+ diff = tree->avl_compar(new_data, here);
+ ASSERT(-1 <= diff && diff <= 1);
+ ASSERT(diff != 0);
+ ASSERT(diff > 0 ? child == 1 : child == 0);
+#endif
+
+ if (node->avl_child[child] != NULL) {
+ node = node->avl_child[child];
+ child = 1 - child;
+ while (node->avl_child[child] != NULL) {
+#ifdef DEBUG
+ diff = tree->avl_compar(new_data,
+ AVL_NODE2DATA(node, tree->avl_offset));
+ ASSERT(-1 <= diff && diff <= 1);
+ ASSERT(diff != 0);
+ ASSERT(diff > 0 ? child == 1 : child == 0);
+#endif
+ node = node->avl_child[child];
+ }
+#ifdef DEBUG
+ diff = tree->avl_compar(new_data,
+ AVL_NODE2DATA(node, tree->avl_offset));
+ ASSERT(-1 <= diff && diff <= 1);
+ ASSERT(diff != 0);
+ ASSERT(diff > 0 ? child == 1 : child == 0);
+#endif
+ }
+ ASSERT(node->avl_child[child] == NULL);
+
+ avl_insert(tree, new_data, AVL_MKINDEX(node, child));
+}
+
+/*
+ * Add a new node to an AVL tree.
+ */
+void
+avl_add(avl_tree_t *tree, void *new_node)
+{
+ avl_index_t where;
+
+ /*
+ * This is unfortunate. We want to call panic() here, even for
+ * non-DEBUG kernels. In userland, however, we can't depend on anything
+ * in libc or else the rtld build process gets confused. So, all we can
+ * do in userland is resort to a normal ASSERT().
+ */
+ if (avl_find(tree, new_node, &where) != NULL)
+#ifdef _KERNEL
+ panic("avl_find() succeeded inside avl_add()");
+#else
+ ASSERT(0);
+#endif
+ avl_insert(tree, new_node, where);
+}
+
+/*
+ * Delete a node from the AVL tree. Deletion is similar to insertion, but
+ * with 2 complications.
+ *
+ * First, we may be deleting an interior node. Consider the following subtree:
+ *
+ * d c c
+ * / \ / \ / \
+ * b e b e b e
+ * / \ / \ /
+ * a c a a
+ *
+ * When we are deleting node (d), we find and bring up an adjacent valued leaf
+ * node, say (c), to take the interior node's place. In the code this is
+ * handled by temporarily swapping (d) and (c) in the tree and then using
+ * common code to delete (d) from the leaf position.
+ *
+ * Secondly, an interior deletion from a deep tree may require more than one
+ * rotation to fix the balance. This is handled by moving up the tree through
+ * parents and applying rotations as needed. The return value from
+ * avl_rotation() is used to detect when a subtree did not change overall
+ * height due to a rotation.
+ */
+void
+avl_remove(avl_tree_t *tree, void *data)
+{
+ avl_node_t *delete;
+ avl_node_t *parent;
+ avl_node_t *node;
+ avl_node_t tmp;
+ int old_balance;
+ int new_balance;
+ int left;
+ int right;
+ int which_child;
+ size_t off = tree->avl_offset;
+
+ ASSERT(tree);
+
+ delete = AVL_DATA2NODE(data, off);
+
+ /*
+ * Deletion is easiest with a node that has at most 1 child.
+ * We swap a node with 2 children with a sequentially valued
+ * neighbor node. That node will have at most 1 child. Note this
+ * has no effect on the ordering of the remaining nodes.
+ *
+ * As an optimization, we choose the greater neighbor if the tree
+ * is right heavy, otherwise the left neighbor. This reduces the
+ * number of rotations needed.
+ */
+ if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
+
+ /*
+ * choose node to swap from whichever side is taller
+ */
+ old_balance = AVL_XBALANCE(delete);
+ left = avl_balance2child[old_balance + 1];
+ right = 1 - left;
+
+ /*
+ * get to the previous value'd node
+ * (down 1 left, as far as possible right)
+ */
+ for (node = delete->avl_child[left];
+ node->avl_child[right] != NULL;
+ node = node->avl_child[right])
+ ;
+
+ /*
+ * create a temp placeholder for 'node'
+ * move 'node' to delete's spot in the tree
+ */
+ tmp = *node;
+
+ *node = *delete;
+ if (node->avl_child[left] == node)
+ node->avl_child[left] = &tmp;
+
+ parent = AVL_XPARENT(node);
+ if (parent != NULL)
+ parent->avl_child[AVL_XCHILD(node)] = node;
+ else
+ tree->avl_root = node;
+ AVL_SETPARENT(node->avl_child[left], node);
+ AVL_SETPARENT(node->avl_child[right], node);
+
+ /*
+ * Put tmp where node used to be (just temporary).
+ * It always has a parent and at most 1 child.
+ */
+ delete = &tmp;
+ parent = AVL_XPARENT(delete);
+ parent->avl_child[AVL_XCHILD(delete)] = delete;
+ which_child = (delete->avl_child[1] != 0);
+ if (delete->avl_child[which_child] != NULL)
+ AVL_SETPARENT(delete->avl_child[which_child], delete);
+ }
+
+
+ /*
+ * Here we know "delete" is at least partially a leaf node. It can
+ * be easily removed from the tree.
+ */
+ ASSERT(tree->avl_numnodes > 0);
+ --tree->avl_numnodes;
+ parent = AVL_XPARENT(delete);
+ which_child = AVL_XCHILD(delete);
+ if (delete->avl_child[0] != NULL)
+ node = delete->avl_child[0];
+ else
+ node = delete->avl_child[1];
+
+ /*
+ * Connect parent directly to node (leaving out delete).
+ */
+ if (node != NULL) {
+ AVL_SETPARENT(node, parent);
+ AVL_SETCHILD(node, which_child);
+ }
+ if (parent == NULL) {
+ tree->avl_root = node;
+ return;
+ }
+ parent->avl_child[which_child] = node;
+
+
+ /*
+ * Since the subtree is now shorter, begin adjusting parent balances
+ * and performing any needed rotations.
+ */
+ do {
+
+ /*
+ * Move up the tree and adjust the balance
+ *
+ * Capture the parent and which_child values for the next
+ * iteration before any rotations occur.
+ */
+ node = parent;
+ old_balance = AVL_XBALANCE(node);
+ new_balance = old_balance - avl_child2balance[which_child];
+ parent = AVL_XPARENT(node);
+ which_child = AVL_XCHILD(node);
+
+ /*
+ * If a node was in perfect balance but isn't anymore then
+ * we can stop, since the height didn't change above this point
+ * due to a deletion.
+ */
+ if (old_balance == 0) {
+ AVL_SETBALANCE(node, new_balance);
+ break;
+ }
+
+ /*
+ * If the new balance is zero, we don't need to rotate
+ * else
+ * need a rotation to fix the balance.
+ * If the rotation doesn't change the height
+ * of the sub-tree we have finished adjusting.
+ */
+ if (new_balance == 0)
+ AVL_SETBALANCE(node, new_balance);
+ else if (!avl_rotation(tree, node, new_balance))
+ break;
+ } while (parent != NULL);
+}
+
+#define AVL_REINSERT(tree, obj) \
+ avl_remove((tree), (obj)); \
+ avl_add((tree), (obj))
+
+boolean_t
+avl_update_lt(avl_tree_t *t, void *obj)
+{
+ void *neighbor;
+
+ ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
+ (t->avl_compar(obj, neighbor) <= 0));
+
+ neighbor = AVL_PREV(t, obj);
+ if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
+ AVL_REINSERT(t, obj);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+boolean_t
+avl_update_gt(avl_tree_t *t, void *obj)
+{
+ void *neighbor;
+
+ ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
+ (t->avl_compar(obj, neighbor) >= 0));
+
+ neighbor = AVL_NEXT(t, obj);
+ if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
+ AVL_REINSERT(t, obj);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+boolean_t
+avl_update(avl_tree_t *t, void *obj)
+{
+ void *neighbor;
+
+ neighbor = AVL_PREV(t, obj);
+ if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
+ AVL_REINSERT(t, obj);
+ return (B_TRUE);
+ }
+
+ neighbor = AVL_NEXT(t, obj);
+ if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
+ AVL_REINSERT(t, obj);
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
+/*
+ * initialize a new AVL tree
+ */
+void
+avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
+ size_t size, size_t offset)
+{
+ ASSERT(tree);
+ ASSERT(compar);
+ ASSERT(size > 0);
+ ASSERT(size >= offset + sizeof (avl_node_t));
+#ifdef _LP64
+ ASSERT((offset & 0x7) == 0);
+#endif
+
+ tree->avl_compar = compar;
+ tree->avl_root = NULL;
+ tree->avl_numnodes = 0;
+ tree->avl_size = size;
+ tree->avl_offset = offset;
+}
+
+/*
+ * Delete a tree.
+ */
+/* ARGSUSED */
+void
+avl_destroy(avl_tree_t *tree)
+{
+ ASSERT(tree);
+ ASSERT(tree->avl_numnodes == 0);
+ ASSERT(tree->avl_root == NULL);
+}
+
+
+/*
+ * Return the number of nodes in an AVL tree.
+ */
+ulong_t
+avl_numnodes(avl_tree_t *tree)
+{
+ ASSERT(tree);
+ return (tree->avl_numnodes);
+}
+
+boolean_t
+avl_is_empty(avl_tree_t *tree)
+{
+ ASSERT(tree);
+ return (tree->avl_numnodes == 0);
+}
+
+#define CHILDBIT (1L)
+
+/*
+ * Post-order tree walk used to visit all tree nodes and destroy the tree
+ * in post order. This is used for destroying a tree w/o paying any cost
+ * for rebalancing it.
+ *
+ * example:
+ *
+ * void *cookie = NULL;
+ * my_data_t *node;
+ *
+ * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
+ * free(node);
+ * avl_destroy(tree);
+ *
+ * The cookie is really an avl_node_t to the current node's parent and
+ * an indication of which child you looked at last.
+ *
+ * On input, a cookie value of CHILDBIT indicates the tree is done.
+ */
+void *
+avl_destroy_nodes(avl_tree_t *tree, void **cookie)
+{
+ avl_node_t *node;
+ avl_node_t *parent;
+ int child;
+ void *first;
+ size_t off = tree->avl_offset;
+
+ /*
+ * Initial calls go to the first node or it's right descendant.
+ */
+ if (*cookie == NULL) {
+ first = avl_first(tree);
+
+ /*
+ * deal with an empty tree
+ */
+ if (first == NULL) {
+ *cookie = (void *)CHILDBIT;
+ return (NULL);
+ }
+
+ node = AVL_DATA2NODE(first, off);
+ parent = AVL_XPARENT(node);
+ goto check_right_side;
+ }
+
+ /*
+ * If there is no parent to return to we are done.
+ */
+ parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
+ if (parent == NULL) {
+ if (tree->avl_root != NULL) {
+ ASSERT(tree->avl_numnodes == 1);
+ tree->avl_root = NULL;
+ tree->avl_numnodes = 0;
+ }
+ return (NULL);
+ }
+
+ /*
+ * Remove the child pointer we just visited from the parent and tree.
+ */
+ child = (uintptr_t)(*cookie) & CHILDBIT;
+ parent->avl_child[child] = NULL;
+ ASSERT(tree->avl_numnodes > 1);
+ --tree->avl_numnodes;
+
+ /*
+ * If we just did a right child or there isn't one, go up to parent.
+ */
+ if (child == 1 || parent->avl_child[1] == NULL) {
+ node = parent;
+ parent = AVL_XPARENT(parent);
+ goto done;
+ }
+
+ /*
+ * Do parent's right child, then leftmost descendent.
+ */
+ node = parent->avl_child[1];
+ while (node->avl_child[0] != NULL) {
+ parent = node;
+ node = node->avl_child[0];
+ }
+
+ /*
+ * If here, we moved to a left child. It may have one
+ * child on the right (when balance == +1).
+ */
+check_right_side:
+ if (node->avl_child[1] != NULL) {
+ ASSERT(AVL_XBALANCE(node) == 1);
+ parent = node;
+ node = node->avl_child[1];
+ ASSERT(node->avl_child[0] == NULL &&
+ node->avl_child[1] == NULL);
+ } else {
+ ASSERT(AVL_XBALANCE(node) <= 0);
+ }
+
+done:
+ if (parent == NULL) {
+ *cookie = (void *)CHILDBIT;
+ ASSERT(node == tree->avl_root);
+ } else {
+ *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
+ }
+
+ return (AVL_NODE2DATA(node, off));
+}
diff --git a/usr/src/uts/common/sys/avl.h b/usr/src/uts/common/sys/avl.h
new file mode 100644
index 0000000..ba305c9
--- /dev/null
+++ b/usr/src/uts/common/sys/avl.h
@@ -0,0 +1,309 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _AVL_H
+#define _AVL_H
+
+/*
+ * This is a private header file. Applications should not directly include
+ * this file.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/avl_impl.h>
+
+/*
+ * This is a generic implemenatation of AVL trees for use in the Solaris kernel.
+ * The interfaces provide an efficient way of implementing an ordered set of
+ * data structures.
+ *
+ * AVL trees provide an alternative to using an ordered linked list. Using AVL
+ * trees will usually be faster, however they requires more storage. An ordered
+ * linked list in general requires 2 pointers in each data structure. The
+ * AVL tree implementation uses 3 pointers. The following chart gives the
+ * approximate performance of operations with the different approaches:
+ *
+ * Operation Link List AVL tree
+ * --------- -------- --------
+ * lookup O(n) O(log(n))
+ *
+ * insert 1 node constant constant
+ *
+ * delete 1 node constant between constant and O(log(n))
+ *
+ * delete all nodes O(n) O(n)
+ *
+ * visit the next
+ * or prev node constant between constant and O(log(n))
+ *
+ *
+ * The data structure nodes are anchored at an "avl_tree_t" (the equivalent
+ * of a list header) and the individual nodes will have a field of
+ * type "avl_node_t" (corresponding to list pointers).
+ *
+ * The type "avl_index_t" is used to indicate a position in the list for
+ * certain calls.
+ *
+ * The usage scenario is generally:
+ *
+ * 1. Create the list/tree with: avl_create()
+ *
+ * followed by any mixture of:
+ *
+ * 2a. Insert nodes with: avl_add(), or avl_find() and avl_insert()
+ *
+ * 2b. Visited elements with:
+ * avl_first() - returns the lowest valued node
+ * avl_last() - returns the highest valued node
+ * AVL_NEXT() - given a node go to next higher one
+ * AVL_PREV() - given a node go to previous lower one
+ *
+ * 2c. Find the node with the closest value either less than or greater
+ * than a given value with avl_nearest().
+ *
+ * 2d. Remove individual nodes from the list/tree with avl_remove().
+ *
+ * and finally when the list is being destroyed
+ *
+ * 3. Use avl_destroy_nodes() to quickly process/free up any remaining nodes.
+ * Note that once you use avl_destroy_nodes(), you can no longer
+ * use any routine except avl_destroy_nodes() and avl_destoy().
+ *
+ * 4. Use avl_destroy() to destroy the AVL tree itself.
+ *
+ * Any locking for multiple thread access is up to the user to provide, just
+ * as is needed for any linked list implementation.
+ */
+
+
+/*
+ * Type used for the root of the AVL tree.
+ */
+typedef struct avl_tree avl_tree_t;
+
+/*
+ * The data nodes in the AVL tree must have a field of this type.
+ */
+typedef struct avl_node avl_node_t;
+
+/*
+ * An opaque type used to locate a position in the tree where a node
+ * would be inserted.
+ */
+typedef uintptr_t avl_index_t;
+
+
+/*
+ * Direction constants used for avl_nearest().
+ */
+#define AVL_BEFORE (0)
+#define AVL_AFTER (1)
+
+
+/*
+ * Prototypes
+ *
+ * Where not otherwise mentioned, "void *" arguments are a pointer to the
+ * user data structure which must contain a field of type avl_node_t.
+ *
+ * Also assume the user data structures looks like:
+ * stuct my_type {
+ * ...
+ * avl_node_t my_link;
+ * ...
+ * };
+ */
+
+/*
+ * Initialize an AVL tree. Arguments are:
+ *
+ * tree - the tree to be initialized
+ * compar - function to compare two nodes, it must return exactly: -1, 0, or +1
+ * -1 for <, 0 for ==, and +1 for >
+ * size - the value of sizeof(struct my_type)
+ * offset - the value of OFFSETOF(struct my_type, my_link)
+ */
+extern void avl_create(avl_tree_t *tree,
+ int (*compar) (const void *, const void *), size_t size, size_t offset);
+
+
+/*
+ * Find a node with a matching value in the tree. Returns the matching node
+ * found. If not found, it returns NULL and then if "where" is not NULL it sets
+ * "where" for use with avl_insert() or avl_nearest().
+ *
+ * node - node that has the value being looked for
+ * where - position for use with avl_nearest() or avl_insert(), may be NULL
+ */
+extern void *avl_find(avl_tree_t *tree, const void *node, avl_index_t *where);
+
+/*
+ * Insert a node into the tree.
+ *
+ * node - the node to insert
+ * where - position as returned from avl_find()
+ */
+extern void avl_insert(avl_tree_t *tree, void *node, avl_index_t where);
+
+/*
+ * Insert "new_data" in "tree" in the given "direction" either after
+ * or before the data "here".
+ *
+ * This might be usefull for avl clients caching recently accessed
+ * data to avoid doing avl_find() again for insertion.
+ *
+ * new_data - new data to insert
+ * here - existing node in "tree"
+ * direction - either AVL_AFTER or AVL_BEFORE the data "here".
+ */
+extern void avl_insert_here(avl_tree_t *tree, void *new_data, void *here,
+ int direction);
+
+
+/*
+ * Return the first or last valued node in the tree. Will return NULL
+ * if the tree is empty.
+ *
+ */
+extern void *avl_first(avl_tree_t *tree);
+extern void *avl_last(avl_tree_t *tree);
+
+
+/*
+ * Return the next or previous valued node in the tree.
+ * AVL_NEXT() will return NULL if at the last node.
+ * AVL_PREV() will return NULL if at the first node.
+ *
+ * node - the node from which the next or previous node is found
+ */
+#define AVL_NEXT(tree, node) avl_walk(tree, node, AVL_AFTER)
+#define AVL_PREV(tree, node) avl_walk(tree, node, AVL_BEFORE)
+
+
+/*
+ * Find the node with the nearest value either greater or less than
+ * the value from a previous avl_find(). Returns the node or NULL if
+ * there isn't a matching one.
+ *
+ * where - position as returned from avl_find()
+ * direction - either AVL_BEFORE or AVL_AFTER
+ *
+ * EXAMPLE get the greatest node that is less than a given value:
+ *
+ * avl_tree_t *tree;
+ * struct my_data look_for_value = {....};
+ * struct my_data *node;
+ * struct my_data *less;
+ * avl_index_t where;
+ *
+ * node = avl_find(tree, &look_for_value, &where);
+ * if (node != NULL)
+ * less = AVL_PREV(tree, node);
+ * else
+ * less = avl_nearest(tree, where, AVL_BEFORE);
+ */
+extern void *avl_nearest(avl_tree_t *tree, avl_index_t where, int direction);
+
+
+/*
+ * Add a single node to the tree.
+ * The node must not be in the tree, and it must not
+ * compare equal to any other node already in the tree.
+ *
+ * node - the node to add
+ */
+extern void avl_add(avl_tree_t *tree, void *node);
+
+
+/*
+ * Remove a single node from the tree. The node must be in the tree.
+ *
+ * node - the node to remove
+ */
+extern void avl_remove(avl_tree_t *tree, void *node);
+
+/*
+ * Reinsert a node only if its order has changed relative to its nearest
+ * neighbors. To optimize performance avl_update_lt() checks only the previous
+ * node and avl_update_gt() checks only the next node. Use avl_update_lt() and
+ * avl_update_gt() only if you know the direction in which the order of the
+ * node may change.
+ */
+extern boolean_t avl_update(avl_tree_t *, void *);
+extern boolean_t avl_update_lt(avl_tree_t *, void *);
+extern boolean_t avl_update_gt(avl_tree_t *, void *);
+
+/*
+ * Return the number of nodes in the tree
+ */
+extern ulong_t avl_numnodes(avl_tree_t *tree);
+
+/*
+ * Return B_TRUE if there are zero nodes in the tree, B_FALSE otherwise.
+ */
+extern boolean_t avl_is_empty(avl_tree_t *tree);
+
+/*
+ * Used to destroy any remaining nodes in a tree. The cookie argument should
+ * be initialized to NULL before the first call. Returns a node that has been
+ * removed from the tree and may be free()'d. Returns NULL when the tree is
+ * empty.
+ *
+ * Once you call avl_destroy_nodes(), you can only continuing calling it and
+ * finally avl_destroy(). No other AVL routines will be valid.
+ *
+ * cookie - a "void *" used to save state between calls to avl_destroy_nodes()
+ *
+ * EXAMPLE:
+ * avl_tree_t *tree;
+ * struct my_data *node;
+ * void *cookie;
+ *
+ * cookie = NULL;
+ * while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
+ * free(node);
+ * avl_destroy(tree);
+ */
+extern void *avl_destroy_nodes(avl_tree_t *tree, void **cookie);
+
+
+/*
+ * Final destroy of an AVL tree. Arguments are:
+ *
+ * tree - the empty tree to destroy
+ */
+extern void avl_destroy(avl_tree_t *tree);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AVL_H */
diff --git a/usr/src/uts/common/sys/avl_impl.h b/usr/src/uts/common/sys/avl_impl.h
new file mode 100644
index 0000000..620685f
--- /dev/null
+++ b/usr/src/uts/common/sys/avl_impl.h
@@ -0,0 +1,164 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _AVL_IMPL_H
+#define _AVL_IMPL_H
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * This is a private header file. Applications should not directly include
+ * this file.
+ */
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * generic AVL tree implementation for kernel use
+ *
+ * There are 5 pieces of information stored for each node in an AVL tree
+ *
+ * pointer to less than child
+ * pointer to greater than child
+ * a pointer to the parent of this node
+ * an indication [0/1] of which child I am of my parent
+ * a "balance" (-1, 0, +1) indicating which child tree is taller
+ *
+ * Since they only need 3 bits, the last two fields are packed into the
+ * bottom bits of the parent pointer on 64 bit machines to save on space.
+ */
+
+#ifndef _LP64
+
+struct avl_node {
+ struct avl_node *avl_child[2]; /* left/right children */
+ struct avl_node *avl_parent; /* this node's parent */
+ unsigned short avl_child_index; /* my index in parent's avl_child[] */
+ short avl_balance; /* balance value: -1, 0, +1 */
+};
+
+#define AVL_XPARENT(n) ((n)->avl_parent)
+#define AVL_SETPARENT(n, p) ((n)->avl_parent = (p))
+
+#define AVL_XCHILD(n) ((n)->avl_child_index)
+#define AVL_SETCHILD(n, c) ((n)->avl_child_index = (unsigned short)(c))
+
+#define AVL_XBALANCE(n) ((n)->avl_balance)
+#define AVL_SETBALANCE(n, b) ((n)->avl_balance = (short)(b))
+
+#else /* _LP64 */
+
+/*
+ * for 64 bit machines, avl_pcb contains parent pointer, balance and child_index
+ * values packed in the following manner:
+ *
+ * |63 3| 2 |1 0 |
+ * |-------------------------------------|-----------------|-------------|
+ * | avl_parent hi order bits | avl_child_index | avl_balance |
+ * | | | + 1 |
+ * |-------------------------------------|-----------------|-------------|
+ *
+ */
+struct avl_node {
+ struct avl_node *avl_child[2]; /* left/right children nodes */
+ uintptr_t avl_pcb; /* parent, child_index, balance */
+};
+
+/*
+ * macros to extract/set fields in avl_pcb
+ *
+ * pointer to the parent of the current node is the high order bits
+ */
+#define AVL_XPARENT(n) ((struct avl_node *)((n)->avl_pcb & ~7))
+#define AVL_SETPARENT(n, p) \
+ ((n)->avl_pcb = (((n)->avl_pcb & 7) | (uintptr_t)(p)))
+
+/*
+ * index of this node in its parent's avl_child[]: bit #2
+ */
+#define AVL_XCHILD(n) (((n)->avl_pcb >> 2) & 1)
+#define AVL_SETCHILD(n, c) \
+ ((n)->avl_pcb = (uintptr_t)(((n)->avl_pcb & ~4) | ((c) << 2)))
+
+/*
+ * balance indication for a node, lowest 2 bits. A valid balance is
+ * -1, 0, or +1, and is encoded by adding 1 to the value to get the
+ * unsigned values of 0, 1, 2.
+ */
+#define AVL_XBALANCE(n) ((int)(((n)->avl_pcb & 3) - 1))
+#define AVL_SETBALANCE(n, b) \
+ ((n)->avl_pcb = (uintptr_t)((((n)->avl_pcb & ~3) | ((b) + 1))))
+
+#endif /* _LP64 */
+
+
+
+/*
+ * switch between a node and data pointer for a given tree
+ * the value of "o" is tree->avl_offset
+ */
+#define AVL_NODE2DATA(n, o) ((void *)((uintptr_t)(n) - (o)))
+#define AVL_DATA2NODE(d, o) ((struct avl_node *)((uintptr_t)(d) + (o)))
+
+
+
+/*
+ * macros used to create/access an avl_index_t
+ */
+#define AVL_INDEX2NODE(x) ((avl_node_t *)((x) & ~1))
+#define AVL_INDEX2CHILD(x) ((x) & 1)
+#define AVL_MKINDEX(n, c) ((avl_index_t)(n) | (c))
+
+
+/*
+ * The tree structure. The fields avl_root, avl_compar, and avl_offset come
+ * first since they are needed for avl_find(). We want them to fit into
+ * a single 64 byte cache line to make avl_find() as fast as possible.
+ */
+struct avl_tree {
+ struct avl_node *avl_root; /* root node in tree */
+ int (*avl_compar)(const void *, const void *);
+ size_t avl_offset; /* offsetof(type, avl_link_t field) */
+ ulong_t avl_numnodes; /* number of nodes in the tree */
+ size_t avl_size; /* sizeof user type struct */
+};
+
+
+/*
+ * This will only by used via AVL_NEXT() or AVL_PREV()
+ */
+extern void *avl_walk(struct avl_tree *, void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AVL_IMPL_H */