1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2014 by Delphix. All rights reserved.
  28  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  29  */
  30 
  31 /*
  32  * AVL - generic AVL tree implementation for kernel use
  33  *
  34  * A complete description of AVL trees can be found in many CS textbooks.
  35  *
  36  * Here is a very brief overview. An AVL tree is a binary search tree that is
  37  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
  38  * any given node, the left and right subtrees are allowed to differ in height
  39  * by at most 1 level.
  40  *
  41  * This relaxation from a perfectly balanced binary tree allows doing
  42  * insertion and deletion relatively efficiently. Searching the tree is
  43  * still a fast operation, roughly O(log(N)).
  44  *
  45  * The key to insertion and deletion is a set of tree manipulations called
  46  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
  47  *
  48  * This implementation of AVL trees has the following peculiarities:
  49  *
  50  *      - The AVL specific data structures are physically embedded as fields
  51  *        in the "using" data structures.  To maintain generality the code
  52  *        must constantly translate between "avl_node_t *" and containing
  53  *        data structure "void *"s by adding/subtracting the avl_offset.
  54  *
  55  *      - Since the AVL data is always embedded in other structures, there is
  56  *        no locking or memory allocation in the AVL routines. This must be
  57  *        provided for by the enclosing data structure's semantics. Typically,
  58  *        avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
  59  *        exclusive write lock. Other operations require a read lock.
  60  *
  61  *      - The implementation uses iteration instead of explicit recursion,
  62  *        since it is intended to run on limited size kernel stacks. Since
  63  *        there is no recursion stack present to move "up" in the tree,
  64  *        there is an explicit "parent" link in the avl_node_t.
  65  *
  66  *      - The left/right children pointers of a node are in an array.
  67  *        In the code, variables (instead of constants) are used to represent
  68  *        left and right indices.  The implementation is written as if it only
  69  *        dealt with left handed manipulations.  By changing the value assigned
  70  *        to "left", the code also works for right handed trees.  The
  71  *        following variables/terms are frequently used:
  72  *
  73  *              int left;       // 0 when dealing with left children,
  74  *                              // 1 for dealing with right children
  75  *
  76  *              int left_heavy; // -1 when left subtree is taller at some node,
  77  *                              // +1 when right subtree is taller
  78  *
  79  *              int right;      // will be the opposite of left (0 or 1)
  80  *              int right_heavy;// will be the opposite of left_heavy (-1 or 1)
  81  *
  82  *              int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
  83  *
  84  *        Though it is a little more confusing to read the code, the approach
  85  *        allows using half as much code (and hence cache footprint) for tree
  86  *        manipulations and eliminates many conditional branches.
  87  *
  88  *      - The avl_index_t is an opaque "cookie" used to find nodes at or
  89  *        adjacent to where a new value would be inserted in the tree. The value
  90  *        is a modified "avl_node_t *".  The bottom bit (normally 0 for a
  91  *        pointer) is set to indicate if that the new node has a value greater
  92  *        than the value of the indicated "avl_node_t *".
  93  *
  94  * Note - in addition to userland (e.g. libavl and libutil) and the kernel
  95  * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
  96  * which each have their own compilation environments and subsequent
  97  * requirements. Each of these environments must be considered when adding
  98  * dependencies from avl.c.
  99  */
 100 
 101 #include <sys/types.h>
 102 #include <sys/param.h>
 103 #include <sys/debug.h>
 104 #include <sys/avl.h>
 105 #include <sys/cmn_err.h>
 106 
 107 /*
 108  * Small arrays to translate between balance (or diff) values and child indices.
 109  *
 110  * Code that deals with binary tree data structures will randomly use
 111  * left and right children when examining a tree.  C "if()" statements
 112  * which evaluate randomly suffer from very poor hardware branch prediction.
 113  * In this code we avoid some of the branch mispredictions by using the
 114  * following translation arrays. They replace random branches with an
 115  * additional memory reference. Since the translation arrays are both very
 116  * small the data should remain efficiently in cache.
 117  */
 118 static const int  avl_child2balance[2]  = {-1, 1};
 119 static const int  avl_balance2child[]   = {0, 0, 1};
 120 
 121 
 122 /*
 123  * Walk from one node to the previous valued node (ie. an infix walk
 124  * towards the left). At any given node we do one of 2 things:
 125  *
 126  * - If there is a left child, go to it, then to it's rightmost descendant.
 127  *
 128  * - otherwise we return through parent nodes until we've come from a right
 129  *   child.
 130  *
 131  * Return Value:
 132  * NULL - if at the end of the nodes
 133  * otherwise next node
 134  */
 135 void *
 136 avl_walk(avl_tree_t *tree, void *oldnode, int left)
 137 {
 138         size_t off = tree->avl_offset;
 139         avl_node_t *node = AVL_DATA2NODE(oldnode, off);
 140         int right = 1 - left;
 141         int was_child;
 142 
 143 
 144         /*
 145          * nowhere to walk to if tree is empty
 146          */
 147         if (node == NULL)
 148                 return (NULL);
 149 
 150         /*
 151          * Visit the previous valued node. There are two possibilities:
 152          *
 153          * If this node has a left child, go down one left, then all
 154          * the way right.
 155          */
 156         if (node->avl_child[left] != NULL) {
 157                 for (node = node->avl_child[left];
 158                     node->avl_child[right] != NULL;
 159                     node = node->avl_child[right])
 160                         ;
 161         /*
 162          * Otherwise, return thru left children as far as we can.
 163          */
 164         } else {
 165                 for (;;) {
 166                         was_child = AVL_XCHILD(node);
 167                         node = AVL_XPARENT(node);
 168                         if (node == NULL)
 169                                 return (NULL);
 170                         if (was_child == right)
 171                                 break;
 172                 }
 173         }
 174 
 175         return (AVL_NODE2DATA(node, off));
 176 }
 177 
 178 /*
 179  * Return the lowest valued node in a tree or NULL.
 180  * (leftmost child from root of tree)
 181  */
 182 void *
 183 avl_first(avl_tree_t *tree)
 184 {
 185         avl_node_t *node;
 186         avl_node_t *prev = NULL;
 187         size_t off = tree->avl_offset;
 188 
 189         for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
 190                 prev = node;
 191 
 192         if (prev != NULL)
 193                 return (AVL_NODE2DATA(prev, off));
 194         return (NULL);
 195 }
 196 
 197 /*
 198  * Return the highest valued node in a tree or NULL.
 199  * (rightmost child from root of tree)
 200  */
 201 void *
 202 avl_last(avl_tree_t *tree)
 203 {
 204         avl_node_t *node;
 205         avl_node_t *prev = NULL;
 206         size_t off = tree->avl_offset;
 207 
 208         for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
 209                 prev = node;
 210 
 211         if (prev != NULL)
 212                 return (AVL_NODE2DATA(prev, off));
 213         return (NULL);
 214 }
 215 
 216 /*
 217  * Access the node immediately before or after an insertion point.
 218  *
 219  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
 220  *
 221  * Return value:
 222  *      NULL: no node in the given direction
 223  *      "void *"  of the found tree node
 224  */
 225 void *
 226 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
 227 {
 228         int child = AVL_INDEX2CHILD(where);
 229         avl_node_t *node = AVL_INDEX2NODE(where);
 230         void *data;
 231         size_t off = tree->avl_offset;
 232 
 233         if (node == NULL) {
 234                 ASSERT(tree->avl_root == NULL);
 235                 return (NULL);
 236         }
 237         data = AVL_NODE2DATA(node, off);
 238         if (child != direction)
 239                 return (data);
 240 
 241         return (avl_walk(tree, data, direction));
 242 }
 243 
 244 
 245 /*
 246  * Search for the node which contains "value".  The algorithm is a
 247  * simple binary tree search.
 248  *
 249  * return value:
 250  *      NULL: the value is not in the AVL tree
 251  *              *where (if not NULL)  is set to indicate the insertion point
 252  *      "void *"  of the found tree node
 253  */
 254 void *
 255 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
 256 {
 257         avl_node_t *node;
 258         avl_node_t *prev = NULL;
 259         int child = 0;
 260         int diff;
 261         size_t off = tree->avl_offset;
 262 
 263         for (node = tree->avl_root; node != NULL;
 264             node = node->avl_child[child]) {
 265 
 266                 prev = node;
 267 
 268                 diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
 269                 ASSERT(-1 <= diff && diff <= 1);
 270                 if (diff == 0) {
 271 #ifdef DEBUG
 272                         if (where != NULL)
 273                                 *where = 0;
 274 #endif
 275                         return (AVL_NODE2DATA(node, off));
 276                 }
 277                 child = avl_balance2child[1 + diff];
 278 
 279         }
 280 
 281         if (where != NULL)
 282                 *where = AVL_MKINDEX(prev, child);
 283 
 284         return (NULL);
 285 }
 286 
 287 
 288 /*
 289  * Perform a rotation to restore balance at the subtree given by depth.
 290  *
 291  * This routine is used by both insertion and deletion. The return value
 292  * indicates:
 293  *       0 : subtree did not change height
 294  *      !0 : subtree was reduced in height
 295  *
 296  * The code is written as if handling left rotations, right rotations are
 297  * symmetric and handled by swapping values of variables right/left[_heavy]
 298  *
 299  * On input balance is the "new" balance at "node". This value is either
 300  * -2 or +2.
 301  */
 302 static int
 303 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
 304 {
 305         int left = !(balance < 0);   /* when balance = -2, left will be 0 */
 306         int right = 1 - left;
 307         int left_heavy = balance >> 1;
 308         int right_heavy = -left_heavy;
 309         avl_node_t *parent = AVL_XPARENT(node);
 310         avl_node_t *child = node->avl_child[left];
 311         avl_node_t *cright;
 312         avl_node_t *gchild;
 313         avl_node_t *gright;
 314         avl_node_t *gleft;
 315         int which_child = AVL_XCHILD(node);
 316         int child_bal = AVL_XBALANCE(child);
 317 
 318         /* BEGIN CSTYLED */
 319         /*
 320          * case 1 : node is overly left heavy, the left child is balanced or
 321          * also left heavy. This requires the following rotation.
 322          *
 323          *                   (node bal:-2)
 324          *                    /           \
 325          *                   /             \
 326          *              (child bal:0 or -1)
 327          *              /    \
 328          *             /      \
 329          *                     cright
 330          *
 331          * becomes:
 332          *
 333          *              (child bal:1 or 0)
 334          *              /        \
 335          *             /          \
 336          *                        (node bal:-1 or 0)
 337          *                         /     \
 338          *                        /       \
 339          *                     cright
 340          *
 341          * we detect this situation by noting that child's balance is not
 342          * right_heavy.
 343          */
 344         /* END CSTYLED */
 345         if (child_bal != right_heavy) {
 346 
 347                 /*
 348                  * compute new balance of nodes
 349                  *
 350                  * If child used to be left heavy (now balanced) we reduced
 351                  * the height of this sub-tree -- used in "return...;" below
 352                  */
 353                 child_bal += right_heavy; /* adjust towards right */
 354 
 355                 /*
 356                  * move "cright" to be node's left child
 357                  */
 358                 cright = child->avl_child[right];
 359                 node->avl_child[left] = cright;
 360                 if (cright != NULL) {
 361                         AVL_SETPARENT(cright, node);
 362                         AVL_SETCHILD(cright, left);
 363                 }
 364 
 365                 /*
 366                  * move node to be child's right child
 367                  */
 368                 child->avl_child[right] = node;
 369                 AVL_SETBALANCE(node, -child_bal);
 370                 AVL_SETCHILD(node, right);
 371                 AVL_SETPARENT(node, child);
 372 
 373                 /*
 374                  * update the pointer into this subtree
 375                  */
 376                 AVL_SETBALANCE(child, child_bal);
 377                 AVL_SETCHILD(child, which_child);
 378                 AVL_SETPARENT(child, parent);
 379                 if (parent != NULL)
 380                         parent->avl_child[which_child] = child;
 381                 else
 382                         tree->avl_root = child;
 383 
 384                 return (child_bal == 0);
 385         }
 386 
 387         /* BEGIN CSTYLED */
 388         /*
 389          * case 2 : When node is left heavy, but child is right heavy we use
 390          * a different rotation.
 391          *
 392          *                   (node b:-2)
 393          *                    /   \
 394          *                   /     \
 395          *                  /       \
 396          *             (child b:+1)
 397          *              /     \
 398          *             /       \
 399          *                   (gchild b: != 0)
 400          *                     /  \
 401          *                    /    \
 402          *                 gleft   gright
 403          *
 404          * becomes:
 405          *
 406          *              (gchild b:0)
 407          *              /       \
 408          *             /         \
 409          *            /           \
 410          *        (child b:?)   (node b:?)
 411          *         /  \          /   \
 412          *        /    \        /     \
 413          *            gleft   gright
 414          *
 415          * computing the new balances is more complicated. As an example:
 416          *       if gchild was right_heavy, then child is now left heavy
 417          *              else it is balanced
 418          */
 419         /* END CSTYLED */
 420         gchild = child->avl_child[right];
 421         gleft = gchild->avl_child[left];
 422         gright = gchild->avl_child[right];
 423 
 424         /*
 425          * move gright to left child of node and
 426          *
 427          * move gleft to right child of node
 428          */
 429         node->avl_child[left] = gright;
 430         if (gright != NULL) {
 431                 AVL_SETPARENT(gright, node);
 432                 AVL_SETCHILD(gright, left);
 433         }
 434 
 435         child->avl_child[right] = gleft;
 436         if (gleft != NULL) {
 437                 AVL_SETPARENT(gleft, child);
 438                 AVL_SETCHILD(gleft, right);
 439         }
 440 
 441         /*
 442          * move child to left child of gchild and
 443          *
 444          * move node to right child of gchild and
 445          *
 446          * fixup parent of all this to point to gchild
 447          */
 448         balance = AVL_XBALANCE(gchild);
 449         gchild->avl_child[left] = child;
 450         AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
 451         AVL_SETPARENT(child, gchild);
 452         AVL_SETCHILD(child, left);
 453 
 454         gchild->avl_child[right] = node;
 455         AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
 456         AVL_SETPARENT(node, gchild);
 457         AVL_SETCHILD(node, right);
 458 
 459         AVL_SETBALANCE(gchild, 0);
 460         AVL_SETPARENT(gchild, parent);
 461         AVL_SETCHILD(gchild, which_child);
 462         if (parent != NULL)
 463                 parent->avl_child[which_child] = gchild;
 464         else
 465                 tree->avl_root = gchild;
 466 
 467         return (1);     /* the new tree is always shorter */
 468 }
 469 
 470 
 471 /*
 472  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
 473  *
 474  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
 475  * searches out to the leaf positions.  The avl_index_t indicates the node
 476  * which will be the parent of the new node.
 477  *
 478  * After the node is inserted, a single rotation further up the tree may
 479  * be necessary to maintain an acceptable AVL balance.
 480  */
 481 void
 482 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
 483 {
 484         avl_node_t *node;
 485         avl_node_t *parent = AVL_INDEX2NODE(where);
 486         int old_balance;
 487         int new_balance;
 488         int which_child = AVL_INDEX2CHILD(where);
 489         size_t off = tree->avl_offset;
 490 
 491         ASSERT(tree);
 492 #ifdef _LP64
 493         ASSERT(((uintptr_t)new_data & 0x7) == 0);
 494 #endif
 495 
 496         node = AVL_DATA2NODE(new_data, off);
 497 
 498         /*
 499          * First, add the node to the tree at the indicated position.
 500          */
 501         ++tree->avl_numnodes;
 502 
 503         node->avl_child[0] = NULL;
 504         node->avl_child[1] = NULL;
 505 
 506         AVL_SETCHILD(node, which_child);
 507         AVL_SETBALANCE(node, 0);
 508         AVL_SETPARENT(node, parent);
 509         if (parent != NULL) {
 510                 ASSERT(parent->avl_child[which_child] == NULL);
 511                 parent->avl_child[which_child] = node;
 512         } else {
 513                 ASSERT(tree->avl_root == NULL);
 514                 tree->avl_root = node;
 515         }
 516         /*
 517          * Now, back up the tree modifying the balance of all nodes above the
 518          * insertion point. If we get to a highly unbalanced ancestor, we
 519          * need to do a rotation.  If we back out of the tree we are done.
 520          * If we brought any subtree into perfect balance (0), we are also done.
 521          */
 522         for (;;) {
 523                 node = parent;
 524                 if (node == NULL)
 525                         return;
 526 
 527                 /*
 528                  * Compute the new balance
 529                  */
 530                 old_balance = AVL_XBALANCE(node);
 531                 new_balance = old_balance + avl_child2balance[which_child];
 532 
 533                 /*
 534                  * If we introduced equal balance, then we are done immediately
 535                  */
 536                 if (new_balance == 0) {
 537                         AVL_SETBALANCE(node, 0);
 538                         return;
 539                 }
 540 
 541                 /*
 542                  * If both old and new are not zero we went
 543                  * from -1 to -2 balance, do a rotation.
 544                  */
 545                 if (old_balance != 0)
 546                         break;
 547 
 548                 AVL_SETBALANCE(node, new_balance);
 549                 parent = AVL_XPARENT(node);
 550                 which_child = AVL_XCHILD(node);
 551         }
 552 
 553         /*
 554          * perform a rotation to fix the tree and return
 555          */
 556         (void) avl_rotation(tree, node, new_balance);
 557 }
 558 
 559 /*
 560  * Insert "new_data" in "tree" in the given "direction" either after or
 561  * before (AVL_AFTER, AVL_BEFORE) the data "here".
 562  *
 563  * Insertions can only be done at empty leaf points in the tree, therefore
 564  * if the given child of the node is already present we move to either
 565  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
 566  * every other node in the tree is a leaf, this always works.
 567  *
 568  * To help developers using this interface, we assert that the new node
 569  * is correctly ordered at every step of the way in DEBUG kernels.
 570  */
 571 void
 572 avl_insert_here(
 573         avl_tree_t *tree,
 574         void *new_data,
 575         void *here,
 576         int direction)
 577 {
 578         avl_node_t *node;
 579         int child = direction;  /* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
 580 #ifdef DEBUG
 581         int diff;
 582 #endif
 583 
 584         ASSERT(tree != NULL);
 585         ASSERT(new_data != NULL);
 586         ASSERT(here != NULL);
 587         ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
 588 
 589         /*
 590          * If corresponding child of node is not NULL, go to the neighboring
 591          * node and reverse the insertion direction.
 592          */
 593         node = AVL_DATA2NODE(here, tree->avl_offset);
 594 
 595 #ifdef DEBUG
 596         diff = tree->avl_compar(new_data, here);
 597         ASSERT(-1 <= diff && diff <= 1);
 598         ASSERT(diff != 0);
 599         ASSERT(diff > 0 ? child == 1 : child == 0);
 600 #endif
 601 
 602         if (node->avl_child[child] != NULL) {
 603                 node = node->avl_child[child];
 604                 child = 1 - child;
 605                 while (node->avl_child[child] != NULL) {
 606 #ifdef DEBUG
 607                         diff = tree->avl_compar(new_data,
 608                             AVL_NODE2DATA(node, tree->avl_offset));
 609                         ASSERT(-1 <= diff && diff <= 1);
 610                         ASSERT(diff != 0);
 611                         ASSERT(diff > 0 ? child == 1 : child == 0);
 612 #endif
 613                         node = node->avl_child[child];
 614                 }
 615 #ifdef DEBUG
 616                 diff = tree->avl_compar(new_data,
 617                     AVL_NODE2DATA(node, tree->avl_offset));
 618                 ASSERT(-1 <= diff && diff <= 1);
 619                 ASSERT(diff != 0);
 620                 ASSERT(diff > 0 ? child == 1 : child == 0);
 621 #endif
 622         }
 623         ASSERT(node->avl_child[child] == NULL);
 624 
 625         avl_insert(tree, new_data, AVL_MKINDEX(node, child));
 626 }
 627 
 628 /*
 629  * Add a new node to an AVL tree.
 630  */
 631 void
 632 avl_add(avl_tree_t *tree, void *new_node)
 633 {
 634         avl_index_t where;
 635 
 636         /*
 637          * This is unfortunate.  We want to call panic() here, even for
 638          * non-DEBUG kernels.  In userland, however, we can't depend on anything
 639          * in libc or else the rtld build process gets confused.
 640          * Thankfully, rtld provides us with its own assfail() so we can use
 641          * that here.  We use assfail() directly to get a nice error message
 642          * in the core - much like what panic() does for crashdumps.
 643          */
 644         if (avl_find(tree, new_node, &where) != NULL)
 645 #ifdef _KERNEL
 646                 panic("avl_find() succeeded inside avl_add()");
 647 #else
 648                 (void) assfail("avl_find() succeeded inside avl_add()",
 649                     __FILE__, __LINE__);
 650 #endif
 651         avl_insert(tree, new_node, where);
 652 }
 653 
 654 /*
 655  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
 656  * with 2 complications.
 657  *
 658  * First, we may be deleting an interior node. Consider the following subtree:
 659  *
 660  *     d           c            c
 661  *    / \         / \          / \
 662  *   b   e       b   e        b   e
 663  *  / \         / \          /
 664  * a   c       a            a
 665  *
 666  * When we are deleting node (d), we find and bring up an adjacent valued leaf
 667  * node, say (c), to take the interior node's place. In the code this is
 668  * handled by temporarily swapping (d) and (c) in the tree and then using
 669  * common code to delete (d) from the leaf position.
 670  *
 671  * Secondly, an interior deletion from a deep tree may require more than one
 672  * rotation to fix the balance. This is handled by moving up the tree through
 673  * parents and applying rotations as needed. The return value from
 674  * avl_rotation() is used to detect when a subtree did not change overall
 675  * height due to a rotation.
 676  */
 677 void
 678 avl_remove(avl_tree_t *tree, void *data)
 679 {
 680         avl_node_t *delete;
 681         avl_node_t *parent;
 682         avl_node_t *node;
 683         avl_node_t tmp;
 684         int old_balance;
 685         int new_balance;
 686         int left;
 687         int right;
 688         int which_child;
 689         size_t off = tree->avl_offset;
 690 
 691         ASSERT(tree);
 692 
 693         delete = AVL_DATA2NODE(data, off);
 694 
 695         /*
 696          * Deletion is easiest with a node that has at most 1 child.
 697          * We swap a node with 2 children with a sequentially valued
 698          * neighbor node. That node will have at most 1 child. Note this
 699          * has no effect on the ordering of the remaining nodes.
 700          *
 701          * As an optimization, we choose the greater neighbor if the tree
 702          * is right heavy, otherwise the left neighbor. This reduces the
 703          * number of rotations needed.
 704          */
 705         if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
 706 
 707                 /*
 708                  * choose node to swap from whichever side is taller
 709                  */
 710                 old_balance = AVL_XBALANCE(delete);
 711                 left = avl_balance2child[old_balance + 1];
 712                 right = 1 - left;
 713 
 714                 /*
 715                  * get to the previous value'd node
 716                  * (down 1 left, as far as possible right)
 717                  */
 718                 for (node = delete->avl_child[left];
 719                     node->avl_child[right] != NULL;
 720                     node = node->avl_child[right])
 721                         ;
 722 
 723                 /*
 724                  * create a temp placeholder for 'node'
 725                  * move 'node' to delete's spot in the tree
 726                  */
 727                 tmp = *node;
 728 
 729                 *node = *delete;
 730                 if (node->avl_child[left] == node)
 731                         node->avl_child[left] = &tmp;
 732 
 733                 parent = AVL_XPARENT(node);
 734                 if (parent != NULL)
 735                         parent->avl_child[AVL_XCHILD(node)] = node;
 736                 else
 737                         tree->avl_root = node;
 738                 AVL_SETPARENT(node->avl_child[left], node);
 739                 AVL_SETPARENT(node->avl_child[right], node);
 740 
 741                 /*
 742                  * Put tmp where node used to be (just temporary).
 743                  * It always has a parent and at most 1 child.
 744                  */
 745                 delete = &tmp;
 746                 parent = AVL_XPARENT(delete);
 747                 parent->avl_child[AVL_XCHILD(delete)] = delete;
 748                 which_child = (delete->avl_child[1] != 0);
 749                 if (delete->avl_child[which_child] != NULL)
 750                         AVL_SETPARENT(delete->avl_child[which_child], delete);
 751         }
 752 
 753 
 754         /*
 755          * Here we know "delete" is at least partially a leaf node. It can
 756          * be easily removed from the tree.
 757          */
 758         ASSERT(tree->avl_numnodes > 0);
 759         --tree->avl_numnodes;
 760         parent = AVL_XPARENT(delete);
 761         which_child = AVL_XCHILD(delete);
 762         if (delete->avl_child[0] != NULL)
 763                 node = delete->avl_child[0];
 764         else
 765                 node = delete->avl_child[1];
 766 
 767         /*
 768          * Connect parent directly to node (leaving out delete).
 769          */
 770         if (node != NULL) {
 771                 AVL_SETPARENT(node, parent);
 772                 AVL_SETCHILD(node, which_child);
 773         }
 774         if (parent == NULL) {
 775                 tree->avl_root = node;
 776                 return;
 777         }
 778         parent->avl_child[which_child] = node;
 779 
 780 
 781         /*
 782          * Since the subtree is now shorter, begin adjusting parent balances
 783          * and performing any needed rotations.
 784          */
 785         do {
 786 
 787                 /*
 788                  * Move up the tree and adjust the balance
 789                  *
 790                  * Capture the parent and which_child values for the next
 791                  * iteration before any rotations occur.
 792                  */
 793                 node = parent;
 794                 old_balance = AVL_XBALANCE(node);
 795                 new_balance = old_balance - avl_child2balance[which_child];
 796                 parent = AVL_XPARENT(node);
 797                 which_child = AVL_XCHILD(node);
 798 
 799                 /*
 800                  * If a node was in perfect balance but isn't anymore then
 801                  * we can stop, since the height didn't change above this point
 802                  * due to a deletion.
 803                  */
 804                 if (old_balance == 0) {
 805                         AVL_SETBALANCE(node, new_balance);
 806                         break;
 807                 }
 808 
 809                 /*
 810                  * If the new balance is zero, we don't need to rotate
 811                  * else
 812                  * need a rotation to fix the balance.
 813                  * If the rotation doesn't change the height
 814                  * of the sub-tree we have finished adjusting.
 815                  */
 816                 if (new_balance == 0)
 817                         AVL_SETBALANCE(node, new_balance);
 818                 else if (!avl_rotation(tree, node, new_balance))
 819                         break;
 820         } while (parent != NULL);
 821 }
 822 
 823 #define AVL_REINSERT(tree, obj)         \
 824         avl_remove((tree), (obj));      \
 825         avl_add((tree), (obj))
 826 
 827 boolean_t
 828 avl_update_lt(avl_tree_t *t, void *obj)
 829 {
 830         void *neighbor;
 831 
 832         ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
 833             (t->avl_compar(obj, neighbor) <= 0));
 834 
 835         neighbor = AVL_PREV(t, obj);
 836         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
 837                 AVL_REINSERT(t, obj);
 838                 return (B_TRUE);
 839         }
 840 
 841         return (B_FALSE);
 842 }
 843 
 844 boolean_t
 845 avl_update_gt(avl_tree_t *t, void *obj)
 846 {
 847         void *neighbor;
 848 
 849         ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
 850             (t->avl_compar(obj, neighbor) >= 0));
 851 
 852         neighbor = AVL_NEXT(t, obj);
 853         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
 854                 AVL_REINSERT(t, obj);
 855                 return (B_TRUE);
 856         }
 857 
 858         return (B_FALSE);
 859 }
 860 
 861 boolean_t
 862 avl_update(avl_tree_t *t, void *obj)
 863 {
 864         void *neighbor;
 865 
 866         neighbor = AVL_PREV(t, obj);
 867         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
 868                 AVL_REINSERT(t, obj);
 869                 return (B_TRUE);
 870         }
 871 
 872         neighbor = AVL_NEXT(t, obj);
 873         if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
 874                 AVL_REINSERT(t, obj);
 875                 return (B_TRUE);
 876         }
 877 
 878         return (B_FALSE);
 879 }
 880 
 881 void
 882 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
 883 {
 884         avl_node_t *temp_node;
 885         ulong_t temp_numnodes;
 886 
 887         ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
 888         ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
 889         ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
 890 
 891         temp_node = tree1->avl_root;
 892         temp_numnodes = tree1->avl_numnodes;
 893         tree1->avl_root = tree2->avl_root;
 894         tree1->avl_numnodes = tree2->avl_numnodes;
 895         tree2->avl_root = temp_node;
 896         tree2->avl_numnodes = temp_numnodes;
 897 }
 898 
 899 /*
 900  * initialize a new AVL tree
 901  */
 902 void
 903 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
 904     size_t size, size_t offset)
 905 {
 906         ASSERT(tree);
 907         ASSERT(compar);
 908         ASSERT(size > 0);
 909         ASSERT(size >= offset + sizeof (avl_node_t));
 910 #ifdef _LP64
 911         ASSERT((offset & 0x7) == 0);
 912 #endif
 913 
 914         tree->avl_compar = compar;
 915         tree->avl_root = NULL;
 916         tree->avl_numnodes = 0;
 917         tree->avl_size = size;
 918         tree->avl_offset = offset;
 919 }
 920 
 921 /*
 922  * Delete a tree.
 923  */
 924 /* ARGSUSED */
 925 void
 926 avl_destroy(avl_tree_t *tree)
 927 {
 928         ASSERT(tree);
 929         ASSERT(tree->avl_numnodes == 0);
 930         ASSERT(tree->avl_root == NULL);
 931 }
 932 
 933 
 934 /*
 935  * Return the number of nodes in an AVL tree.
 936  */
 937 ulong_t
 938 avl_numnodes(avl_tree_t *tree)
 939 {
 940         ASSERT(tree);
 941         return (tree->avl_numnodes);
 942 }
 943 
 944 boolean_t
 945 avl_is_empty(avl_tree_t *tree)
 946 {
 947         ASSERT(tree);
 948         return (tree->avl_numnodes == 0);
 949 }
 950 
 951 #define CHILDBIT        (1L)
 952 
 953 /*
 954  * Post-order tree walk used to visit all tree nodes and destroy the tree
 955  * in post order. This is used for destroying a tree without paying any cost
 956  * for rebalancing it.
 957  *
 958  * example:
 959  *
 960  *      void *cookie = NULL;
 961  *      my_data_t *node;
 962  *
 963  *      while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
 964  *              free(node);
 965  *      avl_destroy(tree);
 966  *
 967  * The cookie is really an avl_node_t to the current node's parent and
 968  * an indication of which child you looked at last.
 969  *
 970  * On input, a cookie value of CHILDBIT indicates the tree is done.
 971  */
 972 void *
 973 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
 974 {
 975         avl_node_t      *node;
 976         avl_node_t      *parent;
 977         int             child;
 978         void            *first;
 979         size_t          off = tree->avl_offset;
 980 
 981         /*
 982          * Initial calls go to the first node or it's right descendant.
 983          */
 984         if (*cookie == NULL) {
 985                 first = avl_first(tree);
 986 
 987                 /*
 988                  * deal with an empty tree
 989                  */
 990                 if (first == NULL) {
 991                         *cookie = (void *)CHILDBIT;
 992                         return (NULL);
 993                 }
 994 
 995                 node = AVL_DATA2NODE(first, off);
 996                 parent = AVL_XPARENT(node);
 997                 goto check_right_side;
 998         }
 999 
1000         /*
1001          * If there is no parent to return to we are done.
1002          */
1003         parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
1004         if (parent == NULL) {
1005                 if (tree->avl_root != NULL) {
1006                         ASSERT(tree->avl_numnodes == 1);
1007                         tree->avl_root = NULL;
1008                         tree->avl_numnodes = 0;
1009                 }
1010                 return (NULL);
1011         }
1012 
1013         /*
1014          * Remove the child pointer we just visited from the parent and tree.
1015          */
1016         child = (uintptr_t)(*cookie) & CHILDBIT;
1017         parent->avl_child[child] = NULL;
1018         ASSERT(tree->avl_numnodes > 1);
1019         --tree->avl_numnodes;
1020 
1021         /*
1022          * If we just did a right child or there isn't one, go up to parent.
1023          */
1024         if (child == 1 || parent->avl_child[1] == NULL) {
1025                 node = parent;
1026                 parent = AVL_XPARENT(parent);
1027                 goto done;
1028         }
1029 
1030         /*
1031          * Do parent's right child, then leftmost descendent.
1032          */
1033         node = parent->avl_child[1];
1034         while (node->avl_child[0] != NULL) {
1035                 parent = node;
1036                 node = node->avl_child[0];
1037         }
1038 
1039         /*
1040          * If here, we moved to a left child. It may have one
1041          * child on the right (when balance == +1).
1042          */
1043 check_right_side:
1044         if (node->avl_child[1] != NULL) {
1045                 ASSERT(AVL_XBALANCE(node) == 1);
1046                 parent = node;
1047                 node = node->avl_child[1];
1048                 ASSERT(node->avl_child[0] == NULL &&
1049                     node->avl_child[1] == NULL);
1050         } else {
1051                 ASSERT(AVL_XBALANCE(node) <= 0);
1052         }
1053 
1054 done:
1055         if (parent == NULL) {
1056                 *cookie = (void *)CHILDBIT;
1057                 ASSERT(node == tree->avl_root);
1058         } else {
1059                 *cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1060         }
1061 
1062         return (AVL_NODE2DATA(node, off));
1063 }