1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 #include <sys/flock_impl.h> 33 #include <sys/vfs.h> 34 #include <sys/t_lock.h> /* for <sys/callb.h> */ 35 #include <sys/callb.h> 36 #include <sys/clconf.h> 37 #include <sys/cladm.h> 38 #include <sys/nbmlock.h> 39 #include <sys/cred.h> 40 #include <sys/policy.h> 41 42 /* 43 * The following four variables are for statistics purposes and they are 44 * not protected by locks. They may not be accurate but will at least be 45 * close to the actual value. 46 */ 47 48 int flk_lock_allocs; 49 int flk_lock_frees; 50 int edge_allocs; 51 int edge_frees; 52 int flk_proc_vertex_allocs; 53 int flk_proc_edge_allocs; 54 int flk_proc_vertex_frees; 55 int flk_proc_edge_frees; 56 57 static kmutex_t flock_lock; 58 59 #ifdef DEBUG 60 int check_debug = 0; 61 #define CHECK_ACTIVE_LOCKS(gp) if (check_debug) \ 62 check_active_locks(gp); 63 #define CHECK_SLEEPING_LOCKS(gp) if (check_debug) \ 64 check_sleeping_locks(gp); 65 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) \ 66 if (check_debug) \ 67 check_owner_locks(gp, pid, sysid, vp); 68 #define CHECK_LOCK_TRANSITION(old_state, new_state) \ 69 { \ 70 if (check_lock_transition(old_state, new_state)) { \ 71 cmn_err(CE_PANIC, "Illegal lock transition \ 72 from %d to %d", old_state, new_state); \ 73 } \ 74 } 75 #else 76 77 #define CHECK_ACTIVE_LOCKS(gp) 78 #define CHECK_SLEEPING_LOCKS(gp) 79 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp) 80 #define CHECK_LOCK_TRANSITION(old_state, new_state) 81 82 #endif /* DEBUG */ 83 84 struct kmem_cache *flk_edge_cache; 85 86 graph_t *lock_graph[HASH_SIZE]; 87 proc_graph_t pgraph; 88 89 /* 90 * Clustering. 91 * 92 * NLM REGISTRY TYPE IMPLEMENTATION 93 * 94 * Assumptions: 95 * 1. Nodes in a cluster are numbered starting at 1; always non-negative 96 * integers; maximum node id is returned by clconf_maximum_nodeid(). 97 * 2. We use this node id to identify the node an NLM server runs on. 98 */ 99 100 /* 101 * NLM registry object keeps track of NLM servers via their 102 * nlmids (which are the node ids of the node in the cluster they run on) 103 * that have requested locks at this LLM with which this registry is 104 * associated. 105 * 106 * Representation of abstraction: 107 * rep = record[ states: array[nlm_state], 108 * lock: mutex] 109 * 110 * Representation invariants: 111 * 1. index i of rep.states is between 0 and n - 1 where n is number 112 * of elements in the array, which happen to be the maximum number 113 * of nodes in the cluster configuration + 1. 114 * 2. map nlmid to index i of rep.states 115 * 0 -> 0 116 * 1 -> 1 117 * 2 -> 2 118 * n-1 -> clconf_maximum_nodeid()+1 119 * 3. This 1-1 mapping is quite convenient and it avoids errors resulting 120 * from forgetting to subtract 1 from the index. 121 * 4. The reason we keep the 0th index is the following. A legitimate 122 * cluster configuration includes making a UFS file system NFS 123 * exportable. The code is structured so that if you're in a cluster 124 * you do one thing; otherwise, you do something else. The problem 125 * is what to do if you think you're in a cluster with PXFS loaded, 126 * but you're using UFS not PXFS? The upper two bytes of the sysid 127 * encode the node id of the node where NLM server runs; these bytes 128 * are zero for UFS. Since the nodeid is used to index into the 129 * registry, we can record the NLM server state information at index 130 * 0 using the same mechanism used for PXFS file locks! 131 */ 132 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */ 133 static kmutex_t nlm_reg_lock; /* lock to protect arrary */ 134 static uint_t nlm_status_size; /* size of state array */ 135 136 /* 137 * Although we need a global lock dependency graph (and associated data 138 * structures), we also need a per-zone notion of whether the lock manager is 139 * running, and so whether to allow lock manager requests or not. 140 * 141 * Thus, on a per-zone basis we maintain a ``global'' variable 142 * (flk_lockmgr_status), protected by flock_lock, and set when the lock 143 * manager is determined to be changing state (starting or stopping). 144 * 145 * Each graph/zone pair also has a copy of this variable, which is protected by 146 * the graph's mutex. 147 * 148 * The per-graph copies are used to synchronize lock requests with shutdown 149 * requests. The global copy is used to initialize the per-graph field when a 150 * new graph is created. 151 */ 152 struct flock_globals { 153 flk_lockmgr_status_t flk_lockmgr_status; 154 flk_lockmgr_status_t lockmgr_status[HASH_SIZE]; 155 }; 156 157 zone_key_t flock_zone_key; 158 159 static void create_flock(lock_descriptor_t *, flock64_t *); 160 static lock_descriptor_t *flk_get_lock(void); 161 static void flk_free_lock(lock_descriptor_t *lock); 162 static void flk_get_first_blocking_lock(lock_descriptor_t *request); 163 static int flk_process_request(lock_descriptor_t *); 164 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int); 165 static edge_t *flk_get_edge(void); 166 static int flk_wait_execute_request(lock_descriptor_t *); 167 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *); 168 static void flk_insert_active_lock(lock_descriptor_t *); 169 static void flk_delete_active_lock(lock_descriptor_t *, int); 170 static void flk_insert_sleeping_lock(lock_descriptor_t *); 171 static void flk_graph_uncolor(graph_t *); 172 static void flk_wakeup(lock_descriptor_t *, int); 173 static void flk_free_edge(edge_t *); 174 static void flk_recompute_dependencies(lock_descriptor_t *, 175 lock_descriptor_t **, int, int); 176 static int flk_find_barriers(lock_descriptor_t *); 177 static void flk_update_barriers(lock_descriptor_t *); 178 static int flk_color_reachables(lock_descriptor_t *); 179 static int flk_canceled(lock_descriptor_t *); 180 static void flk_delete_locks_by_sysid(lock_descriptor_t *); 181 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *); 182 static void wait_for_lock(lock_descriptor_t *); 183 static void unlock_lockmgr_granted(struct flock_globals *); 184 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *); 185 186 /* Clustering hooks */ 187 static void cl_flk_change_nlm_state_all_locks(int, flk_nlm_status_t); 188 static void cl_flk_wakeup_sleeping_nlm_locks(int); 189 static void cl_flk_unlock_nlm_granted(int); 190 191 #ifdef DEBUG 192 static int check_lock_transition(int, int); 193 static void check_sleeping_locks(graph_t *); 194 static void check_active_locks(graph_t *); 195 static int no_path(lock_descriptor_t *, lock_descriptor_t *); 196 static void path(lock_descriptor_t *, lock_descriptor_t *); 197 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *); 198 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *); 199 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int); 200 #endif 201 202 /* proc_graph function definitions */ 203 static int flk_check_deadlock(lock_descriptor_t *); 204 static void flk_proc_graph_uncolor(void); 205 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *); 206 static proc_edge_t *flk_get_proc_edge(void); 207 static void flk_proc_release(proc_vertex_t *); 208 static void flk_free_proc_edge(proc_edge_t *); 209 static void flk_update_proc_graph(edge_t *, int); 210 211 /* Non-blocking mandatory locking */ 212 static int lock_blocks_io(nbl_op_t, u_offset_t, ssize_t, int, u_offset_t, 213 u_offset_t); 214 215 static struct flock_globals * 216 flk_get_globals(void) 217 { 218 /* 219 * The KLM module had better be loaded if we're attempting to handle 220 * lockmgr requests. 221 */ 222 ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED); 223 return (zone_getspecific(flock_zone_key, curproc->p_zone)); 224 } 225 226 static flk_lockmgr_status_t 227 flk_get_lockmgr_status(void) 228 { 229 struct flock_globals *fg; 230 231 ASSERT(MUTEX_HELD(&flock_lock)); 232 233 if (flock_zone_key == ZONE_KEY_UNINITIALIZED) { 234 /* 235 * KLM module not loaded; lock manager definitely not running. 236 */ 237 return (FLK_LOCKMGR_DOWN); 238 } 239 fg = flk_get_globals(); 240 return (fg->flk_lockmgr_status); 241 } 242 243 /* 244 * Routine called from fs_frlock in fs/fs_subr.c 245 */ 246 247 int 248 reclock(vnode_t *vp, 249 flock64_t *lckdat, 250 int cmd, 251 int flag, 252 u_offset_t offset, 253 flk_callback_t *flk_cbp) 254 { 255 lock_descriptor_t stack_lock_request; 256 lock_descriptor_t *lock_request; 257 int error = 0; 258 graph_t *gp; 259 int nlmid; 260 261 /* 262 * Check access permissions 263 */ 264 if ((cmd & SETFLCK) && 265 ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) || 266 (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0))) 267 return (EBADF); 268 269 /* 270 * for query and unlock we use the stack_lock_request 271 */ 272 273 if ((lckdat->l_type == F_UNLCK) || 274 !((cmd & INOFLCK) || (cmd & SETFLCK))) { 275 lock_request = &stack_lock_request; 276 (void) bzero((caddr_t)lock_request, 277 sizeof (lock_descriptor_t)); 278 279 /* 280 * following is added to make the assertions in 281 * flk_execute_request() to pass through 282 */ 283 284 lock_request->l_edge.edge_in_next = &lock_request->l_edge; 285 lock_request->l_edge.edge_in_prev = &lock_request->l_edge; 286 lock_request->l_edge.edge_adj_next = &lock_request->l_edge; 287 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge; 288 lock_request->l_status = FLK_INITIAL_STATE; 289 } else { 290 lock_request = flk_get_lock(); 291 } 292 lock_request->l_state = 0; 293 lock_request->l_vnode = vp; 294 lock_request->l_zoneid = getzoneid(); 295 296 /* 297 * Convert the request range into the canonical start and end 298 * values. The NLM protocol supports locking over the entire 299 * 32-bit range, so there's no range checking for remote requests, 300 * but we still need to verify that local requests obey the rules. 301 */ 302 /* Clustering */ 303 if ((cmd & (RCMDLCK | PCMDLCK)) != 0) { 304 ASSERT(lckdat->l_whence == 0); 305 lock_request->l_start = lckdat->l_start; 306 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T : 307 lckdat->l_start + (lckdat->l_len - 1); 308 } else { 309 /* check the validity of the lock range */ 310 error = flk_convert_lock_data(vp, lckdat, 311 &lock_request->l_start, &lock_request->l_end, 312 offset); 313 if (error) { 314 goto done; 315 } 316 error = flk_check_lock_data(lock_request->l_start, 317 lock_request->l_end, MAXEND); 318 if (error) { 319 goto done; 320 } 321 } 322 323 ASSERT(lock_request->l_end >= lock_request->l_start); 324 325 lock_request->l_type = lckdat->l_type; 326 if (cmd & INOFLCK) 327 lock_request->l_state |= IO_LOCK; 328 if (cmd & SLPFLCK) 329 lock_request->l_state |= WILLING_TO_SLEEP_LOCK; 330 if (cmd & RCMDLCK) 331 lock_request->l_state |= LOCKMGR_LOCK; 332 if (cmd & NBMLCK) 333 lock_request->l_state |= NBMAND_LOCK; 334 /* 335 * Clustering: set flag for PXFS locks 336 * We do not _only_ check for the PCMDLCK flag because PXFS locks could 337 * also be of type 'RCMDLCK'. 338 * We do not _only_ check the GETPXFSID() macro because local PXFS 339 * clients use a pxfsid of zero to permit deadlock detection in the LLM. 340 */ 341 342 if ((cmd & PCMDLCK) || (GETPXFSID(lckdat->l_sysid) != 0)) { 343 lock_request->l_state |= PXFS_LOCK; 344 } 345 if (!((cmd & SETFLCK) || (cmd & INOFLCK))) { 346 if (lock_request->l_type == F_RDLCK || 347 lock_request->l_type == F_WRLCK) 348 lock_request->l_state |= QUERY_LOCK; 349 } 350 lock_request->l_flock = (*lckdat); 351 lock_request->l_callbacks = flk_cbp; 352 353 /* 354 * We are ready for processing the request 355 */ 356 if (IS_LOCKMGR(lock_request)) { 357 /* 358 * If the lock request is an NLM server request .... 359 */ 360 if (nlm_status_size == 0) { /* not booted as cluster */ 361 mutex_enter(&flock_lock); 362 /* 363 * Bail out if this is a lock manager request and the 364 * lock manager is not supposed to be running. 365 */ 366 if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) { 367 mutex_exit(&flock_lock); 368 error = ENOLCK; 369 goto done; 370 } 371 mutex_exit(&flock_lock); 372 } else { /* booted as a cluster */ 373 nlmid = GETNLMID(lock_request->l_flock.l_sysid); 374 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 375 376 mutex_enter(&nlm_reg_lock); 377 /* 378 * If the NLM registry does not know about this 379 * NLM server making the request, add its nlmid 380 * to the registry. 381 */ 382 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, 383 nlmid)) { 384 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid); 385 } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status, 386 nlmid)) { 387 /* 388 * If the NLM server is already known (has made 389 * previous lock requests) and its state is 390 * not NLM_UP (means that NLM server is 391 * shutting down), then bail out with an 392 * error to deny the lock request. 393 */ 394 mutex_exit(&nlm_reg_lock); 395 error = ENOLCK; 396 goto done; 397 } 398 mutex_exit(&nlm_reg_lock); 399 } 400 } 401 402 /* Now get the lock graph for a particular vnode */ 403 gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH); 404 405 /* 406 * We drop rwlock here otherwise this might end up causing a 407 * deadlock if this IOLOCK sleeps. (bugid # 1183392). 408 */ 409 410 if (IS_IO_LOCK(lock_request)) { 411 VOP_RWUNLOCK(vp, 412 (lock_request->l_type == F_RDLCK) ? 413 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 414 } 415 mutex_enter(&gp->gp_mutex); 416 417 lock_request->l_state |= REFERENCED_LOCK; 418 lock_request->l_graph = gp; 419 420 switch (lock_request->l_type) { 421 case F_RDLCK: 422 case F_WRLCK: 423 if (IS_QUERY_LOCK(lock_request)) { 424 flk_get_first_blocking_lock(lock_request); 425 (*lckdat) = lock_request->l_flock; 426 break; 427 } 428 429 /* process the request now */ 430 431 error = flk_process_request(lock_request); 432 break; 433 434 case F_UNLCK: 435 /* unlock request will not block so execute it immediately */ 436 437 if (IS_LOCKMGR(lock_request) && 438 flk_canceled(lock_request)) { 439 error = 0; 440 } else { 441 error = flk_execute_request(lock_request); 442 } 443 break; 444 445 case F_UNLKSYS: 446 /* 447 * Recovery mechanism to release lock manager locks when 448 * NFS client crashes and restart. NFS server will clear 449 * old locks and grant new locks. 450 */ 451 452 if (lock_request->l_flock.l_sysid == 0) { 453 mutex_exit(&gp->gp_mutex); 454 return (EINVAL); 455 } 456 if (secpolicy_nfs(CRED()) != 0) { 457 mutex_exit(&gp->gp_mutex); 458 return (EPERM); 459 } 460 flk_delete_locks_by_sysid(lock_request); 461 lock_request->l_state &= ~REFERENCED_LOCK; 462 flk_set_state(lock_request, FLK_DEAD_STATE); 463 flk_free_lock(lock_request); 464 mutex_exit(&gp->gp_mutex); 465 return (0); 466 467 default: 468 error = EINVAL; 469 break; 470 } 471 472 /* Clustering: For blocked PXFS locks, return */ 473 if (error == PXFS_LOCK_BLOCKED) { 474 lock_request->l_state &= ~REFERENCED_LOCK; 475 mutex_exit(&gp->gp_mutex); 476 return (error); 477 } 478 479 /* 480 * Now that we have seen the status of locks in the system for 481 * this vnode we acquire the rwlock if it is an IO_LOCK. 482 */ 483 484 if (IS_IO_LOCK(lock_request)) { 485 (void) VOP_RWLOCK(vp, 486 (lock_request->l_type == F_RDLCK) ? 487 V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL); 488 if (!error) { 489 lckdat->l_type = F_UNLCK; 490 491 /* 492 * This wake up is needed otherwise 493 * if IO_LOCK has slept the dependents on this 494 * will not be woken up at all. (bugid # 1185482). 495 */ 496 497 flk_wakeup(lock_request, 1); 498 flk_set_state(lock_request, FLK_DEAD_STATE); 499 flk_free_lock(lock_request); 500 } 501 /* 502 * else if error had occurred either flk_process_request() 503 * has returned EDEADLK in which case there will be no 504 * dependents for this lock or EINTR from flk_wait_execute_ 505 * request() in which case flk_cancel_sleeping_lock() 506 * would have been done. same is true with EBADF. 507 */ 508 } 509 510 if (lock_request == &stack_lock_request) { 511 flk_set_state(lock_request, FLK_DEAD_STATE); 512 } else { 513 lock_request->l_state &= ~REFERENCED_LOCK; 514 if ((error != 0) || IS_DELETED(lock_request)) { 515 flk_set_state(lock_request, FLK_DEAD_STATE); 516 flk_free_lock(lock_request); 517 } 518 } 519 520 mutex_exit(&gp->gp_mutex); 521 return (error); 522 523 done: 524 flk_set_state(lock_request, FLK_DEAD_STATE); 525 if (lock_request != &stack_lock_request) 526 flk_free_lock(lock_request); 527 return (error); 528 } 529 530 /* 531 * Invoke the callbacks in the given list. If before sleeping, invoke in 532 * list order. If after sleeping, invoke in reverse order. 533 * 534 * CPR (suspend/resume) support: if one of the callbacks returns a 535 * callb_cpr_t, return it. This will be used to make the thread CPR-safe 536 * while it is sleeping. There should be at most one callb_cpr_t for the 537 * thread. 538 * XXX This is unnecessarily complicated. The CPR information should just 539 * get passed in directly through VOP_FRLOCK and reclock, rather than 540 * sneaking it in via a callback. 541 */ 542 543 callb_cpr_t * 544 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when) 545 { 546 callb_cpr_t *cpr_callbackp = NULL; 547 callb_cpr_t *one_result; 548 flk_callback_t *cb; 549 550 if (cblist == NULL) 551 return (NULL); 552 553 if (when == FLK_BEFORE_SLEEP) { 554 cb = cblist; 555 do { 556 one_result = (*cb->cb_callback)(when, cb->cb_data); 557 if (one_result != NULL) { 558 ASSERT(cpr_callbackp == NULL); 559 cpr_callbackp = one_result; 560 } 561 cb = cb->cb_next; 562 } while (cb != cblist); 563 } else { 564 cb = cblist->cb_prev; 565 do { 566 one_result = (*cb->cb_callback)(when, cb->cb_data); 567 if (one_result != NULL) { 568 cpr_callbackp = one_result; 569 } 570 cb = cb->cb_prev; 571 } while (cb != cblist->cb_prev); 572 } 573 574 return (cpr_callbackp); 575 } 576 577 /* 578 * Initialize a flk_callback_t to hold the given callback. 579 */ 580 581 void 582 flk_init_callback(flk_callback_t *flk_cb, 583 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata) 584 { 585 flk_cb->cb_next = flk_cb; 586 flk_cb->cb_prev = flk_cb; 587 flk_cb->cb_callback = cb_fcn; 588 flk_cb->cb_data = cbdata; 589 } 590 591 /* 592 * Initialize an flk_callback_t and then link it into the head of an 593 * existing list (which may be NULL). 594 */ 595 596 void 597 flk_add_callback(flk_callback_t *newcb, 598 callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), 599 void *cbdata, flk_callback_t *cblist) 600 { 601 flk_init_callback(newcb, cb_fcn, cbdata); 602 603 if (cblist == NULL) 604 return; 605 606 newcb->cb_prev = cblist->cb_prev; 607 newcb->cb_next = cblist; 608 cblist->cb_prev->cb_next = newcb; 609 cblist->cb_prev = newcb; 610 } 611 612 /* 613 * Initialize the flk_edge_cache data structure and create the 614 * nlm_reg_status array. 615 */ 616 617 void 618 flk_init(void) 619 { 620 uint_t i; 621 622 flk_edge_cache = kmem_cache_create("flk_edges", 623 sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0); 624 if (flk_edge_cache == NULL) { 625 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n"); 626 } 627 /* 628 * Create the NLM registry object. 629 */ 630 631 if (cluster_bootflags & CLUSTER_BOOTED) { 632 /* 633 * This routine tells you the maximum node id that will be used 634 * in the cluster. This number will be the size of the nlm 635 * registry status array. We add 1 because we will be using 636 * all entries indexed from 0 to maxnodeid; e.g., from 0 637 * to 64, for a total of 65 entries. 638 */ 639 nlm_status_size = clconf_maximum_nodeid() + 1; 640 } else { 641 nlm_status_size = 0; 642 } 643 644 if (nlm_status_size != 0) { /* booted as a cluster */ 645 nlm_reg_status = (flk_nlm_status_t *) 646 kmem_alloc(sizeof (flk_nlm_status_t) * nlm_status_size, 647 KM_SLEEP); 648 649 /* initialize all NLM states in array to NLM_UNKNOWN */ 650 for (i = 0; i < nlm_status_size; i++) { 651 nlm_reg_status[i] = FLK_NLM_UNKNOWN; 652 } 653 } 654 } 655 656 /* 657 * Zone constructor/destructor callbacks to be executed when a zone is 658 * created/destroyed. 659 */ 660 /* ARGSUSED */ 661 void * 662 flk_zone_init(zoneid_t zoneid) 663 { 664 struct flock_globals *fg; 665 uint_t i; 666 667 fg = kmem_alloc(sizeof (*fg), KM_SLEEP); 668 fg->flk_lockmgr_status = FLK_LOCKMGR_UP; 669 for (i = 0; i < HASH_SIZE; i++) 670 fg->lockmgr_status[i] = FLK_LOCKMGR_UP; 671 return (fg); 672 } 673 674 /* ARGSUSED */ 675 void 676 flk_zone_fini(zoneid_t zoneid, void *data) 677 { 678 struct flock_globals *fg = data; 679 680 kmem_free(fg, sizeof (*fg)); 681 } 682 683 /* 684 * Get a lock_descriptor structure with initialization of edge lists. 685 */ 686 687 static lock_descriptor_t * 688 flk_get_lock(void) 689 { 690 lock_descriptor_t *l; 691 692 l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP); 693 694 cv_init(&l->l_cv, NULL, CV_DRIVER, NULL); 695 l->l_edge.edge_in_next = &l->l_edge; 696 l->l_edge.edge_in_prev = &l->l_edge; 697 l->l_edge.edge_adj_next = &l->l_edge; 698 l->l_edge.edge_adj_prev = &l->l_edge; 699 l->pvertex = -1; 700 l->l_status = FLK_INITIAL_STATE; 701 flk_lock_allocs++; 702 return (l); 703 } 704 705 /* 706 * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag 707 * when some thread has a reference to it as in reclock(). 708 */ 709 710 void 711 flk_free_lock(lock_descriptor_t *lock) 712 { 713 ASSERT(IS_DEAD(lock)); 714 if (IS_REFERENCED(lock)) { 715 lock->l_state |= DELETED_LOCK; 716 return; 717 } 718 flk_lock_frees++; 719 kmem_free((void *)lock, sizeof (lock_descriptor_t)); 720 } 721 722 void 723 flk_set_state(lock_descriptor_t *lock, int new_state) 724 { 725 /* 726 * Locks in the sleeping list may be woken up in a number of ways, 727 * and more than once. If a sleeping lock is signaled awake more 728 * than once, then it may or may not change state depending on its 729 * current state. 730 * Also note that NLM locks that are sleeping could be moved to an 731 * interrupted state more than once if the unlock request is 732 * retransmitted by the NLM client - the second time around, this is 733 * just a nop. 734 * The ordering of being signaled awake is: 735 * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE. 736 * The checks below implement this ordering. 737 */ 738 if (IS_INTERRUPTED(lock)) { 739 if ((new_state == FLK_CANCELLED_STATE) || 740 (new_state == FLK_GRANTED_STATE) || 741 (new_state == FLK_INTERRUPTED_STATE)) { 742 return; 743 } 744 } 745 if (IS_CANCELLED(lock)) { 746 if ((new_state == FLK_GRANTED_STATE) || 747 (new_state == FLK_CANCELLED_STATE)) { 748 return; 749 } 750 } 751 CHECK_LOCK_TRANSITION(lock->l_status, new_state); 752 if (IS_PXFS(lock)) { 753 cl_flk_state_transition_notify(lock, lock->l_status, new_state); 754 } 755 lock->l_status = new_state; 756 } 757 758 /* 759 * Routine that checks whether there are any blocking locks in the system. 760 * 761 * The policy followed is if a write lock is sleeping we don't allow read 762 * locks before this write lock even though there may not be any active 763 * locks corresponding to the read locks' region. 764 * 765 * flk_add_edge() function adds an edge between l1 and l2 iff there 766 * is no path between l1 and l2. This is done to have a "minimum 767 * storage representation" of the dependency graph. 768 * 769 * Another property of the graph is since only the new request throws 770 * edges to the existing locks in the graph, the graph is always topologically 771 * ordered. 772 */ 773 774 static int 775 flk_process_request(lock_descriptor_t *request) 776 { 777 graph_t *gp = request->l_graph; 778 lock_descriptor_t *lock; 779 int request_blocked_by_active = 0; 780 int request_blocked_by_granted = 0; 781 int request_blocked_by_sleeping = 0; 782 vnode_t *vp = request->l_vnode; 783 int error = 0; 784 int request_will_wait = 0; 785 int found_covering_lock = 0; 786 lock_descriptor_t *covered_by = NULL; 787 788 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 789 request_will_wait = IS_WILLING_TO_SLEEP(request); 790 791 /* 792 * check active locks 793 */ 794 795 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 796 797 798 if (lock) { 799 do { 800 if (BLOCKS(lock, request)) { 801 if (!request_will_wait) 802 return (EAGAIN); 803 request_blocked_by_active = 1; 804 break; 805 } 806 /* 807 * Grant lock if it is for the same owner holding active 808 * lock that covers the request. 809 */ 810 811 if (SAME_OWNER(lock, request) && 812 COVERS(lock, request) && 813 (request->l_type == F_RDLCK)) 814 return (flk_execute_request(request)); 815 lock = lock->l_next; 816 } while (lock->l_vnode == vp); 817 } 818 819 if (!request_blocked_by_active) { 820 lock_descriptor_t *lk[1]; 821 lock_descriptor_t *first_glock = NULL; 822 /* 823 * Shall we grant this?! NO!! 824 * What about those locks that were just granted and still 825 * in sleep queue. Those threads are woken up and so locks 826 * are almost active. 827 */ 828 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 829 if (lock) { 830 do { 831 if (BLOCKS(lock, request)) { 832 if (IS_GRANTED(lock)) { 833 request_blocked_by_granted = 1; 834 } else { 835 request_blocked_by_sleeping = 1; 836 } 837 } 838 839 lock = lock->l_next; 840 } while ((lock->l_vnode == vp)); 841 first_glock = lock->l_prev; 842 ASSERT(first_glock->l_vnode == vp); 843 } 844 845 if (request_blocked_by_granted) 846 goto block; 847 848 if (!request_blocked_by_sleeping) { 849 /* 850 * If the request isn't going to be blocked by a 851 * sleeping request, we know that it isn't going to 852 * be blocked; we can just execute the request -- 853 * without performing costly deadlock detection. 854 */ 855 ASSERT(!request_blocked_by_active); 856 return (flk_execute_request(request)); 857 } else if (request->l_type == F_RDLCK) { 858 /* 859 * If we have a sleeping writer in the requested 860 * lock's range, block. 861 */ 862 goto block; 863 } 864 865 lk[0] = request; 866 request->l_state |= RECOMPUTE_LOCK; 867 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 868 if (lock) { 869 do { 870 flk_recompute_dependencies(lock, lk, 1, 0); 871 lock = lock->l_next; 872 } while (lock->l_vnode == vp); 873 } 874 lock = first_glock; 875 if (lock) { 876 do { 877 if (IS_GRANTED(lock)) { 878 flk_recompute_dependencies(lock, lk, 1, 0); 879 } 880 lock = lock->l_prev; 881 } while ((lock->l_vnode == vp)); 882 } 883 request->l_state &= ~RECOMPUTE_LOCK; 884 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request)) 885 return (EDEADLK); 886 return (flk_execute_request(request)); 887 } 888 889 block: 890 if (request_will_wait) 891 flk_graph_uncolor(gp); 892 893 /* check sleeping locks */ 894 895 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 896 897 /* 898 * If we find a sleeping write lock that is a superset of the 899 * region wanted by request we can be assured that by adding an 900 * edge to this write lock we have paths to all locks in the 901 * graph that blocks the request except in one case and that is why 902 * another check for SAME_OWNER in the loop below. The exception 903 * case is when this process that owns the sleeping write lock 'l1' 904 * has other locks l2, l3, l4 that are in the system and arrived 905 * before l1. l1 does not have path to these locks as they are from 906 * same process. We break when we find a second covering sleeping 907 * lock l5 owned by a process different from that owning l1, because 908 * there cannot be any of l2, l3, l4, etc., arrived before l5, and if 909 * it has l1 would have produced a deadlock already. 910 */ 911 912 if (lock) { 913 do { 914 if (BLOCKS(lock, request)) { 915 if (!request_will_wait) 916 return (EAGAIN); 917 if (COVERS(lock, request) && 918 lock->l_type == F_WRLCK) { 919 if (found_covering_lock && 920 !SAME_OWNER(lock, covered_by)) { 921 found_covering_lock++; 922 break; 923 } 924 found_covering_lock = 1; 925 covered_by = lock; 926 } 927 if (found_covering_lock && 928 !SAME_OWNER(lock, covered_by)) { 929 lock = lock->l_next; 930 continue; 931 } 932 if ((error = flk_add_edge(request, lock, 933 !found_covering_lock, 0))) 934 return (error); 935 } 936 lock = lock->l_next; 937 } while (lock->l_vnode == vp); 938 } 939 940 /* 941 * found_covering_lock == 2 iff at this point 'request' has paths 942 * to all locks that blocks 'request'. found_covering_lock == 1 iff at this 943 * point 'request' has paths to all locks that blocks 'request' whose owners 944 * are not same as the one that covers 'request' (covered_by above) and 945 * we can have locks whose owner is same as covered_by in the active list. 946 */ 947 948 if (request_blocked_by_active && found_covering_lock != 2) { 949 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 950 ASSERT(lock != NULL); 951 do { 952 if (BLOCKS(lock, request)) { 953 if (found_covering_lock && 954 !SAME_OWNER(lock, covered_by)) { 955 lock = lock->l_next; 956 continue; 957 } 958 if ((error = flk_add_edge(request, lock, 959 CHECK_CYCLE, 0))) 960 return (error); 961 } 962 lock = lock->l_next; 963 } while (lock->l_vnode == vp); 964 } 965 966 if (NOT_BLOCKED(request)) { 967 /* 968 * request not dependent on any other locks 969 * so execute this request 970 */ 971 return (flk_execute_request(request)); 972 } else { 973 /* 974 * check for deadlock 975 */ 976 if (flk_check_deadlock(request)) 977 return (EDEADLK); 978 /* 979 * this thread has to sleep 980 */ 981 return (flk_wait_execute_request(request)); 982 } 983 } 984 985 /* 986 * The actual execution of the request in the simple case is only to 987 * insert the 'request' in the list of active locks if it is not an 988 * UNLOCK. 989 * We have to consider the existing active locks' relation to 990 * this 'request' if they are owned by same process. flk_relation() does 991 * this job and sees to that the dependency graph information is maintained 992 * properly. 993 */ 994 995 int 996 flk_execute_request(lock_descriptor_t *request) 997 { 998 graph_t *gp = request->l_graph; 999 vnode_t *vp = request->l_vnode; 1000 lock_descriptor_t *lock, *lock1; 1001 int done_searching = 0; 1002 1003 CHECK_SLEEPING_LOCKS(gp); 1004 CHECK_ACTIVE_LOCKS(gp); 1005 1006 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1007 1008 flk_set_state(request, FLK_START_STATE); 1009 1010 ASSERT(NOT_BLOCKED(request)); 1011 1012 /* IO_LOCK requests are only to check status */ 1013 1014 if (IS_IO_LOCK(request)) 1015 return (0); 1016 1017 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1018 1019 if (lock == NULL && request->l_type == F_UNLCK) 1020 return (0); 1021 if (lock == NULL) { 1022 flk_insert_active_lock(request); 1023 return (0); 1024 } 1025 1026 do { 1027 lock1 = lock->l_next; 1028 if (SAME_OWNER(request, lock)) { 1029 done_searching = flk_relation(lock, request); 1030 } 1031 lock = lock1; 1032 } while (lock->l_vnode == vp && !done_searching); 1033 1034 /* 1035 * insert in active queue 1036 */ 1037 1038 if (request->l_type != F_UNLCK) 1039 flk_insert_active_lock(request); 1040 1041 return (0); 1042 } 1043 1044 /* 1045 * 'request' is blocked by some one therefore we put it into sleep queue. 1046 */ 1047 static int 1048 flk_wait_execute_request(lock_descriptor_t *request) 1049 { 1050 graph_t *gp = request->l_graph; 1051 callb_cpr_t *cprp; /* CPR info from callback */ 1052 struct flock_globals *fg; 1053 int index; 1054 1055 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1056 ASSERT(IS_WILLING_TO_SLEEP(request)); 1057 1058 flk_insert_sleeping_lock(request); 1059 1060 if (IS_LOCKMGR(request)) { 1061 index = HASH_INDEX(request->l_vnode); 1062 fg = flk_get_globals(); 1063 1064 if (nlm_status_size == 0) { /* not booted as a cluster */ 1065 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) { 1066 flk_cancel_sleeping_lock(request, 1); 1067 return (ENOLCK); 1068 } 1069 } else { /* booted as a cluster */ 1070 /* 1071 * If the request is an NLM server lock request, 1072 * and the NLM state of the lock request is not 1073 * NLM_UP (because the NLM server is shutting 1074 * down), then cancel the sleeping lock and 1075 * return error ENOLCK that will encourage the 1076 * client to retransmit. 1077 */ 1078 if (!IS_NLM_UP(request)) { 1079 flk_cancel_sleeping_lock(request, 1); 1080 return (ENOLCK); 1081 } 1082 } 1083 } 1084 1085 /* Clustering: For blocking PXFS locks, return */ 1086 if (IS_PXFS(request)) { 1087 /* 1088 * PXFS locks sleep on the client side. 1089 * The callback argument is used to wake up the sleeper 1090 * when the lock is granted. 1091 * We return -1 (rather than an errno value) to indicate 1092 * the client side should sleep 1093 */ 1094 return (PXFS_LOCK_BLOCKED); 1095 } 1096 1097 if (request->l_callbacks != NULL) { 1098 /* 1099 * To make sure the shutdown code works correctly, either 1100 * the callback must happen after putting the lock on the 1101 * sleep list, or we must check the shutdown status after 1102 * returning from the callback (and before sleeping). At 1103 * least for now, we'll use the first option. If a 1104 * shutdown or signal or whatever happened while the graph 1105 * mutex was dropped, that will be detected by 1106 * wait_for_lock(). 1107 */ 1108 mutex_exit(&gp->gp_mutex); 1109 1110 cprp = flk_invoke_callbacks(request->l_callbacks, 1111 FLK_BEFORE_SLEEP); 1112 1113 mutex_enter(&gp->gp_mutex); 1114 1115 if (cprp == NULL) { 1116 wait_for_lock(request); 1117 } else { 1118 mutex_enter(cprp->cc_lockp); 1119 CALLB_CPR_SAFE_BEGIN(cprp); 1120 mutex_exit(cprp->cc_lockp); 1121 wait_for_lock(request); 1122 mutex_enter(cprp->cc_lockp); 1123 CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp); 1124 mutex_exit(cprp->cc_lockp); 1125 } 1126 1127 mutex_exit(&gp->gp_mutex); 1128 (void) flk_invoke_callbacks(request->l_callbacks, 1129 FLK_AFTER_SLEEP); 1130 mutex_enter(&gp->gp_mutex); 1131 } else { 1132 wait_for_lock(request); 1133 } 1134 1135 if (IS_LOCKMGR(request)) { 1136 /* 1137 * If the lock manager is shutting down, return an 1138 * error that will encourage the client to retransmit. 1139 */ 1140 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP && 1141 !IS_GRANTED(request)) { 1142 flk_cancel_sleeping_lock(request, 1); 1143 return (ENOLCK); 1144 } 1145 } 1146 1147 if (IS_INTERRUPTED(request)) { 1148 /* we got a signal, or act like we did */ 1149 flk_cancel_sleeping_lock(request, 1); 1150 return (EINTR); 1151 } 1152 1153 /* Cancelled if some other thread has closed the file */ 1154 1155 if (IS_CANCELLED(request)) { 1156 flk_cancel_sleeping_lock(request, 1); 1157 return (EBADF); 1158 } 1159 1160 request->l_state &= ~GRANTED_LOCK; 1161 REMOVE_SLEEP_QUEUE(request); 1162 return (flk_execute_request(request)); 1163 } 1164 1165 /* 1166 * This routine adds an edge between from and to because from depends 1167 * to. If asked to check for deadlock it checks whether there are any 1168 * reachable locks from "from_lock" that is owned by the same process 1169 * as "from_lock". 1170 * NOTE: It is the caller's responsibility to make sure that the color 1171 * of the graph is consistent between the calls to flk_add_edge as done 1172 * in flk_process_request. This routine does not color and check for 1173 * deadlock explicitly. 1174 */ 1175 1176 static int 1177 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock, 1178 int check_cycle, int update_graph) 1179 { 1180 edge_t *edge; 1181 edge_t *ep; 1182 lock_descriptor_t *vertex; 1183 lock_descriptor_t *vertex_stack; 1184 1185 STACK_INIT(vertex_stack); 1186 1187 /* 1188 * if to vertex already has mark_color just return 1189 * don't add an edge as it is reachable from from vertex 1190 * before itself. 1191 */ 1192 1193 if (COLORED(to_lock)) 1194 return (0); 1195 1196 edge = flk_get_edge(); 1197 1198 /* 1199 * set the from and to vertex 1200 */ 1201 1202 edge->from_vertex = from_lock; 1203 edge->to_vertex = to_lock; 1204 1205 /* 1206 * put in adjacency list of from vertex 1207 */ 1208 1209 from_lock->l_edge.edge_adj_next->edge_adj_prev = edge; 1210 edge->edge_adj_next = from_lock->l_edge.edge_adj_next; 1211 edge->edge_adj_prev = &from_lock->l_edge; 1212 from_lock->l_edge.edge_adj_next = edge; 1213 1214 /* 1215 * put in in list of to vertex 1216 */ 1217 1218 to_lock->l_edge.edge_in_next->edge_in_prev = edge; 1219 edge->edge_in_next = to_lock->l_edge.edge_in_next; 1220 to_lock->l_edge.edge_in_next = edge; 1221 edge->edge_in_prev = &to_lock->l_edge; 1222 1223 1224 if (update_graph) { 1225 flk_update_proc_graph(edge, 0); 1226 return (0); 1227 } 1228 if (!check_cycle) { 1229 return (0); 1230 } 1231 1232 STACK_PUSH(vertex_stack, from_lock, l_stack); 1233 1234 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1235 1236 STACK_POP(vertex_stack, l_stack); 1237 1238 for (ep = FIRST_ADJ(vertex); 1239 ep != HEAD(vertex); 1240 ep = NEXT_ADJ(ep)) { 1241 if (COLORED(ep->to_vertex)) 1242 continue; 1243 COLOR(ep->to_vertex); 1244 if (SAME_OWNER(ep->to_vertex, from_lock)) 1245 goto dead_lock; 1246 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1247 } 1248 } 1249 return (0); 1250 1251 dead_lock: 1252 1253 /* 1254 * remove all edges 1255 */ 1256 1257 ep = FIRST_ADJ(from_lock); 1258 1259 while (ep != HEAD(from_lock)) { 1260 IN_LIST_REMOVE(ep); 1261 from_lock->l_sedge = NEXT_ADJ(ep); 1262 ADJ_LIST_REMOVE(ep); 1263 flk_free_edge(ep); 1264 ep = from_lock->l_sedge; 1265 } 1266 return (EDEADLK); 1267 } 1268 1269 /* 1270 * Get an edge structure for representing the dependency between two locks. 1271 */ 1272 1273 static edge_t * 1274 flk_get_edge() 1275 { 1276 edge_t *ep; 1277 1278 ASSERT(flk_edge_cache != NULL); 1279 1280 ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP); 1281 edge_allocs++; 1282 return (ep); 1283 } 1284 1285 /* 1286 * Free the edge structure. 1287 */ 1288 1289 static void 1290 flk_free_edge(edge_t *ep) 1291 { 1292 edge_frees++; 1293 kmem_cache_free(flk_edge_cache, (void *)ep); 1294 } 1295 1296 /* 1297 * Check the relationship of request with lock and perform the 1298 * recomputation of dependencies, break lock if required, and return 1299 * 1 if request cannot have any more relationship with the next 1300 * active locks. 1301 * The 'lock' and 'request' are compared and in case of overlap we 1302 * delete the 'lock' and form new locks to represent the non-overlapped 1303 * portion of original 'lock'. This function has side effects such as 1304 * 'lock' will be freed, new locks will be added to the active list. 1305 */ 1306 1307 static int 1308 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request) 1309 { 1310 int lock_effect; 1311 lock_descriptor_t *lock1, *lock2; 1312 lock_descriptor_t *topology[3]; 1313 int nvertex = 0; 1314 int i; 1315 edge_t *ep; 1316 graph_t *gp = (lock->l_graph); 1317 1318 1319 CHECK_SLEEPING_LOCKS(gp); 1320 CHECK_ACTIVE_LOCKS(gp); 1321 1322 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1323 1324 topology[0] = topology[1] = topology[2] = NULL; 1325 1326 if (request->l_type == F_UNLCK) 1327 lock_effect = FLK_UNLOCK; 1328 else if (request->l_type == F_RDLCK && 1329 lock->l_type == F_WRLCK) 1330 lock_effect = FLK_DOWNGRADE; 1331 else if (request->l_type == F_WRLCK && 1332 lock->l_type == F_RDLCK) 1333 lock_effect = FLK_UPGRADE; 1334 else 1335 lock_effect = FLK_STAY_SAME; 1336 1337 if (lock->l_end < request->l_start) { 1338 if (lock->l_end == request->l_start - 1 && 1339 lock_effect == FLK_STAY_SAME) { 1340 topology[0] = request; 1341 request->l_start = lock->l_start; 1342 nvertex = 1; 1343 goto recompute; 1344 } else { 1345 return (0); 1346 } 1347 } 1348 1349 if (lock->l_start > request->l_end) { 1350 if (request->l_end == lock->l_start - 1 && 1351 lock_effect == FLK_STAY_SAME) { 1352 topology[0] = request; 1353 request->l_end = lock->l_end; 1354 nvertex = 1; 1355 goto recompute; 1356 } else { 1357 return (1); 1358 } 1359 } 1360 1361 if (request->l_end < lock->l_end) { 1362 if (request->l_start > lock->l_start) { 1363 if (lock_effect == FLK_STAY_SAME) { 1364 request->l_start = lock->l_start; 1365 request->l_end = lock->l_end; 1366 topology[0] = request; 1367 nvertex = 1; 1368 } else { 1369 lock1 = flk_get_lock(); 1370 lock2 = flk_get_lock(); 1371 COPY(lock1, lock); 1372 COPY(lock2, lock); 1373 lock1->l_start = lock->l_start; 1374 lock1->l_end = request->l_start - 1; 1375 lock2->l_start = request->l_end + 1; 1376 lock2->l_end = lock->l_end; 1377 topology[0] = lock1; 1378 topology[1] = lock2; 1379 topology[2] = request; 1380 nvertex = 3; 1381 } 1382 } else if (request->l_start < lock->l_start) { 1383 if (lock_effect == FLK_STAY_SAME) { 1384 request->l_end = lock->l_end; 1385 topology[0] = request; 1386 nvertex = 1; 1387 } else { 1388 lock1 = flk_get_lock(); 1389 COPY(lock1, lock); 1390 lock1->l_start = request->l_end + 1; 1391 topology[0] = lock1; 1392 topology[1] = request; 1393 nvertex = 2; 1394 } 1395 } else { 1396 if (lock_effect == FLK_STAY_SAME) { 1397 request->l_start = lock->l_start; 1398 request->l_end = lock->l_end; 1399 topology[0] = request; 1400 nvertex = 1; 1401 } else { 1402 lock1 = flk_get_lock(); 1403 COPY(lock1, lock); 1404 lock1->l_start = request->l_end + 1; 1405 topology[0] = lock1; 1406 topology[1] = request; 1407 nvertex = 2; 1408 } 1409 } 1410 } else if (request->l_end > lock->l_end) { 1411 if (request->l_start > lock->l_start) { 1412 if (lock_effect == FLK_STAY_SAME) { 1413 request->l_start = lock->l_start; 1414 topology[0] = request; 1415 nvertex = 1; 1416 } else { 1417 lock1 = flk_get_lock(); 1418 COPY(lock1, lock); 1419 lock1->l_end = request->l_start - 1; 1420 topology[0] = lock1; 1421 topology[1] = request; 1422 nvertex = 2; 1423 } 1424 } else if (request->l_start < lock->l_start) { 1425 topology[0] = request; 1426 nvertex = 1; 1427 } else { 1428 topology[0] = request; 1429 nvertex = 1; 1430 } 1431 } else { 1432 if (request->l_start > lock->l_start) { 1433 if (lock_effect == FLK_STAY_SAME) { 1434 request->l_start = lock->l_start; 1435 topology[0] = request; 1436 nvertex = 1; 1437 } else { 1438 lock1 = flk_get_lock(); 1439 COPY(lock1, lock); 1440 lock1->l_end = request->l_start - 1; 1441 topology[0] = lock1; 1442 topology[1] = request; 1443 nvertex = 2; 1444 } 1445 } else if (request->l_start < lock->l_start) { 1446 topology[0] = request; 1447 nvertex = 1; 1448 } else { 1449 if (lock_effect != FLK_UNLOCK) { 1450 topology[0] = request; 1451 nvertex = 1; 1452 } else { 1453 flk_delete_active_lock(lock, 0); 1454 flk_wakeup(lock, 1); 1455 flk_free_lock(lock); 1456 CHECK_SLEEPING_LOCKS(gp); 1457 CHECK_ACTIVE_LOCKS(gp); 1458 return (1); 1459 } 1460 } 1461 } 1462 1463 recompute: 1464 1465 /* 1466 * For unlock we don't send the 'request' to for recomputing 1467 * dependencies because no lock will add an edge to this. 1468 */ 1469 1470 if (lock_effect == FLK_UNLOCK) { 1471 topology[nvertex-1] = NULL; 1472 nvertex--; 1473 } 1474 for (i = 0; i < nvertex; i++) { 1475 topology[i]->l_state |= RECOMPUTE_LOCK; 1476 topology[i]->l_color = NO_COLOR; 1477 } 1478 1479 ASSERT(FIRST_ADJ(lock) == HEAD(lock)); 1480 1481 /* 1482 * we remove the adjacent edges for all vertices' to this vertex 1483 * 'lock'. 1484 */ 1485 1486 ep = FIRST_IN(lock); 1487 while (ep != HEAD(lock)) { 1488 ADJ_LIST_REMOVE(ep); 1489 ep = NEXT_IN(ep); 1490 } 1491 1492 flk_delete_active_lock(lock, 0); 1493 1494 /* We are ready for recomputing the dependencies now */ 1495 1496 flk_recompute_dependencies(lock, topology, nvertex, 1); 1497 1498 for (i = 0; i < nvertex; i++) { 1499 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1500 topology[i]->l_color = NO_COLOR; 1501 } 1502 1503 1504 if (lock_effect == FLK_UNLOCK) { 1505 nvertex++; 1506 } 1507 for (i = 0; i < nvertex - 1; i++) { 1508 flk_insert_active_lock(topology[i]); 1509 } 1510 1511 1512 if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) { 1513 flk_wakeup(lock, 0); 1514 } else { 1515 ep = FIRST_IN(lock); 1516 while (ep != HEAD(lock)) { 1517 lock->l_sedge = NEXT_IN(ep); 1518 IN_LIST_REMOVE(ep); 1519 flk_update_proc_graph(ep, 1); 1520 flk_free_edge(ep); 1521 ep = lock->l_sedge; 1522 } 1523 } 1524 flk_free_lock(lock); 1525 1526 CHECK_SLEEPING_LOCKS(gp); 1527 CHECK_ACTIVE_LOCKS(gp); 1528 return (0); 1529 } 1530 1531 /* 1532 * Insert a lock into the active queue. 1533 */ 1534 1535 static void 1536 flk_insert_active_lock(lock_descriptor_t *new_lock) 1537 { 1538 graph_t *gp = new_lock->l_graph; 1539 vnode_t *vp = new_lock->l_vnode; 1540 lock_descriptor_t *first_lock, *lock; 1541 1542 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1543 1544 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1545 first_lock = lock; 1546 1547 if (first_lock != NULL) { 1548 for (; (lock->l_vnode == vp && 1549 lock->l_start < new_lock->l_start); lock = lock->l_next) 1550 ; 1551 } else { 1552 lock = ACTIVE_HEAD(gp); 1553 } 1554 1555 lock->l_prev->l_next = new_lock; 1556 new_lock->l_next = lock; 1557 new_lock->l_prev = lock->l_prev; 1558 lock->l_prev = new_lock; 1559 1560 if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) { 1561 vp->v_filocks = (struct filock *)new_lock; 1562 } 1563 flk_set_state(new_lock, FLK_ACTIVE_STATE); 1564 new_lock->l_state |= ACTIVE_LOCK; 1565 1566 CHECK_ACTIVE_LOCKS(gp); 1567 CHECK_SLEEPING_LOCKS(gp); 1568 } 1569 1570 /* 1571 * Delete the active lock : Performs two functions depending on the 1572 * value of second parameter. One is to remove from the active lists 1573 * only and other is to both remove and free the lock. 1574 */ 1575 1576 static void 1577 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock) 1578 { 1579 vnode_t *vp = lock->l_vnode; 1580 graph_t *gp = lock->l_graph; 1581 1582 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1583 if (free_lock) 1584 ASSERT(NO_DEPENDENTS(lock)); 1585 ASSERT(NOT_BLOCKED(lock)); 1586 ASSERT(IS_ACTIVE(lock)); 1587 1588 ASSERT((vp->v_filocks != NULL)); 1589 1590 if (vp->v_filocks == (struct filock *)lock) { 1591 vp->v_filocks = (struct filock *) 1592 ((lock->l_next->l_vnode == vp) ? lock->l_next : 1593 NULL); 1594 } 1595 lock->l_next->l_prev = lock->l_prev; 1596 lock->l_prev->l_next = lock->l_next; 1597 lock->l_next = lock->l_prev = NULL; 1598 flk_set_state(lock, FLK_DEAD_STATE); 1599 lock->l_state &= ~ACTIVE_LOCK; 1600 1601 if (free_lock) 1602 flk_free_lock(lock); 1603 CHECK_ACTIVE_LOCKS(gp); 1604 CHECK_SLEEPING_LOCKS(gp); 1605 } 1606 1607 /* 1608 * Insert into the sleep queue. 1609 */ 1610 1611 static void 1612 flk_insert_sleeping_lock(lock_descriptor_t *request) 1613 { 1614 graph_t *gp = request->l_graph; 1615 vnode_t *vp = request->l_vnode; 1616 lock_descriptor_t *lock; 1617 1618 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1619 ASSERT(IS_INITIAL(request)); 1620 1621 for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks && 1622 lock->l_vnode < vp); lock = lock->l_next) 1623 ; 1624 1625 lock->l_prev->l_next = request; 1626 request->l_prev = lock->l_prev; 1627 lock->l_prev = request; 1628 request->l_next = lock; 1629 flk_set_state(request, FLK_SLEEPING_STATE); 1630 request->l_state |= SLEEPING_LOCK; 1631 } 1632 1633 /* 1634 * Cancelling a sleeping lock implies removing a vertex from the 1635 * dependency graph and therefore we should recompute the dependencies 1636 * of all vertices that have a path to this vertex, w.r.t. all 1637 * vertices reachable from this vertex. 1638 */ 1639 1640 void 1641 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue) 1642 { 1643 graph_t *gp = request->l_graph; 1644 vnode_t *vp = request->l_vnode; 1645 lock_descriptor_t **topology = NULL; 1646 edge_t *ep; 1647 lock_descriptor_t *vertex, *lock; 1648 int nvertex = 0; 1649 int i; 1650 lock_descriptor_t *vertex_stack; 1651 1652 STACK_INIT(vertex_stack); 1653 1654 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1655 /* 1656 * count number of vertex pointers that has to be allocated 1657 * All vertices that are reachable from request. 1658 */ 1659 1660 STACK_PUSH(vertex_stack, request, l_stack); 1661 1662 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1663 STACK_POP(vertex_stack, l_stack); 1664 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 1665 ep = NEXT_ADJ(ep)) { 1666 if (IS_RECOMPUTE(ep->to_vertex)) 1667 continue; 1668 ep->to_vertex->l_state |= RECOMPUTE_LOCK; 1669 STACK_PUSH(vertex_stack, ep->to_vertex, l_stack); 1670 nvertex++; 1671 } 1672 } 1673 1674 /* 1675 * allocate memory for holding the vertex pointers 1676 */ 1677 1678 if (nvertex) { 1679 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *), 1680 KM_SLEEP); 1681 } 1682 1683 /* 1684 * one more pass to actually store the vertices in the 1685 * allocated array. 1686 * We first check sleeping locks and then active locks 1687 * so that topology array will be in a topological 1688 * order. 1689 */ 1690 1691 nvertex = 0; 1692 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 1693 1694 if (lock) { 1695 do { 1696 if (IS_RECOMPUTE(lock)) { 1697 lock->l_index = nvertex; 1698 topology[nvertex++] = lock; 1699 } 1700 lock->l_color = NO_COLOR; 1701 lock = lock->l_next; 1702 } while (lock->l_vnode == vp); 1703 } 1704 1705 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 1706 1707 if (lock) { 1708 do { 1709 if (IS_RECOMPUTE(lock)) { 1710 lock->l_index = nvertex; 1711 topology[nvertex++] = lock; 1712 } 1713 lock->l_color = NO_COLOR; 1714 lock = lock->l_next; 1715 } while (lock->l_vnode == vp); 1716 } 1717 1718 /* 1719 * remove in and out edges of request 1720 * They are freed after updating proc_graph below. 1721 */ 1722 1723 for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) { 1724 ADJ_LIST_REMOVE(ep); 1725 } 1726 1727 1728 if (remove_from_queue) 1729 REMOVE_SLEEP_QUEUE(request); 1730 1731 /* we are ready to recompute */ 1732 1733 flk_recompute_dependencies(request, topology, nvertex, 1); 1734 1735 ep = FIRST_ADJ(request); 1736 while (ep != HEAD(request)) { 1737 IN_LIST_REMOVE(ep); 1738 request->l_sedge = NEXT_ADJ(ep); 1739 ADJ_LIST_REMOVE(ep); 1740 flk_update_proc_graph(ep, 1); 1741 flk_free_edge(ep); 1742 ep = request->l_sedge; 1743 } 1744 1745 1746 /* 1747 * unset the RECOMPUTE flag in those vertices 1748 */ 1749 1750 for (i = 0; i < nvertex; i++) { 1751 topology[i]->l_state &= ~RECOMPUTE_LOCK; 1752 } 1753 1754 /* 1755 * free the topology 1756 */ 1757 if (nvertex) 1758 kmem_free((void *)topology, 1759 (nvertex * sizeof (lock_descriptor_t *))); 1760 /* 1761 * Possibility of some locks unblocked now 1762 */ 1763 1764 flk_wakeup(request, 0); 1765 1766 /* 1767 * we expect to have a correctly recomputed graph now. 1768 */ 1769 flk_set_state(request, FLK_DEAD_STATE); 1770 flk_free_lock(request); 1771 CHECK_SLEEPING_LOCKS(gp); 1772 CHECK_ACTIVE_LOCKS(gp); 1773 1774 } 1775 1776 /* 1777 * Uncoloring the graph is simply to increment the mark value of the graph 1778 * And only when wrap round takes place will we color all vertices in 1779 * the graph explicitly. 1780 */ 1781 1782 static void 1783 flk_graph_uncolor(graph_t *gp) 1784 { 1785 lock_descriptor_t *lock; 1786 1787 if (gp->mark == UINT_MAX) { 1788 gp->mark = 1; 1789 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 1790 lock = lock->l_next) 1791 lock->l_color = 0; 1792 1793 for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp); 1794 lock = lock->l_next) 1795 lock->l_color = 0; 1796 } else { 1797 gp->mark++; 1798 } 1799 } 1800 1801 /* 1802 * Wake up locks that are blocked on the given lock. 1803 */ 1804 1805 static void 1806 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove) 1807 { 1808 edge_t *ep; 1809 graph_t *gp = lock->l_graph; 1810 lock_descriptor_t *lck; 1811 1812 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1813 if (NO_DEPENDENTS(lock)) 1814 return; 1815 ep = FIRST_IN(lock); 1816 do { 1817 /* 1818 * delete the edge from the adjacency list 1819 * of from vertex. if no more adjacent edges 1820 * for this vertex wake this process. 1821 */ 1822 lck = ep->from_vertex; 1823 if (adj_list_remove) 1824 ADJ_LIST_REMOVE(ep); 1825 flk_update_proc_graph(ep, 1); 1826 if (NOT_BLOCKED(lck)) { 1827 GRANT_WAKEUP(lck); 1828 } 1829 lock->l_sedge = NEXT_IN(ep); 1830 IN_LIST_REMOVE(ep); 1831 flk_free_edge(ep); 1832 ep = lock->l_sedge; 1833 } while (ep != HEAD(lock)); 1834 ASSERT(NO_DEPENDENTS(lock)); 1835 } 1836 1837 /* 1838 * The dependents of request, is checked for its dependency against the 1839 * locks in topology (called topology because the array is and should be in 1840 * topological order for this algorithm, if not in topological order the 1841 * inner loop below might add more edges than necessary. Topological ordering 1842 * of vertices satisfies the property that all edges will be from left to 1843 * right i.e., topology[i] can have an edge to topology[j], iff i<j) 1844 * If lock l1 in the dependent set of request is dependent (blocked by) 1845 * on lock l2 in topology but does not have a path to it, we add an edge 1846 * in the inner loop below. 1847 * 1848 * We don't want to add an edge between l1 and l2 if there exists 1849 * already a path from l1 to l2, so care has to be taken for those vertices 1850 * that have two paths to 'request'. These vertices are referred to here 1851 * as barrier locks. 1852 * 1853 * The barriers has to be found (those vertex that originally had two paths 1854 * to request) because otherwise we may end up adding edges unnecessarily 1855 * to vertices in topology, and thus barrier vertices can have an edge 1856 * to a vertex in topology as well a path to it. 1857 */ 1858 1859 static void 1860 flk_recompute_dependencies(lock_descriptor_t *request, 1861 lock_descriptor_t **topology, 1862 int nvertex, int update_graph) 1863 { 1864 lock_descriptor_t *vertex, *lock; 1865 graph_t *gp = request->l_graph; 1866 int i, count; 1867 int barrier_found = 0; 1868 edge_t *ep; 1869 lock_descriptor_t *vertex_stack; 1870 1871 STACK_INIT(vertex_stack); 1872 1873 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 1874 if (nvertex == 0) 1875 return; 1876 flk_graph_uncolor(request->l_graph); 1877 barrier_found = flk_find_barriers(request); 1878 request->l_state |= RECOMPUTE_DONE; 1879 1880 STACK_PUSH(vertex_stack, request, l_stack); 1881 request->l_sedge = FIRST_IN(request); 1882 1883 1884 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 1885 if (vertex->l_state & RECOMPUTE_DONE) { 1886 count = 0; 1887 goto next_in_edge; 1888 } 1889 if (IS_BARRIER(vertex)) { 1890 /* decrement the barrier count */ 1891 if (vertex->l_index) { 1892 vertex->l_index--; 1893 /* this guy will be pushed again anyway ? */ 1894 STACK_POP(vertex_stack, l_stack); 1895 if (vertex->l_index == 0) { 1896 /* 1897 * barrier is over we can recompute 1898 * dependencies for this lock in the 1899 * next stack pop 1900 */ 1901 vertex->l_state &= ~BARRIER_LOCK; 1902 } 1903 continue; 1904 } 1905 } 1906 vertex->l_state |= RECOMPUTE_DONE; 1907 flk_graph_uncolor(gp); 1908 count = flk_color_reachables(vertex); 1909 for (i = 0; i < nvertex; i++) { 1910 lock = topology[i]; 1911 if (COLORED(lock)) 1912 continue; 1913 if (BLOCKS(lock, vertex)) { 1914 (void) flk_add_edge(vertex, lock, 1915 NO_CHECK_CYCLE, update_graph); 1916 COLOR(lock); 1917 count++; 1918 count += flk_color_reachables(lock); 1919 } 1920 1921 } 1922 1923 next_in_edge: 1924 if (count == nvertex || 1925 vertex->l_sedge == HEAD(vertex)) { 1926 /* prune the tree below this */ 1927 STACK_POP(vertex_stack, l_stack); 1928 vertex->l_state &= ~RECOMPUTE_DONE; 1929 /* update the barrier locks below this! */ 1930 if (vertex->l_sedge != HEAD(vertex) && barrier_found) { 1931 flk_graph_uncolor(gp); 1932 flk_update_barriers(vertex); 1933 } 1934 continue; 1935 } 1936 1937 ep = vertex->l_sedge; 1938 lock = ep->from_vertex; 1939 STACK_PUSH(vertex_stack, lock, l_stack); 1940 lock->l_sedge = FIRST_IN(lock); 1941 vertex->l_sedge = NEXT_IN(ep); 1942 } 1943 1944 } 1945 1946 /* 1947 * Color all reachable vertices from vertex that belongs to topology (here 1948 * those that have RECOMPUTE_LOCK set in their state) and yet uncolored. 1949 * 1950 * Note: we need to use a different stack_link l_stack1 because this is 1951 * called from flk_recompute_dependencies() that already uses a stack with 1952 * l_stack as stack_link. 1953 */ 1954 1955 static int 1956 flk_color_reachables(lock_descriptor_t *vertex) 1957 { 1958 lock_descriptor_t *ver, *lock; 1959 int count; 1960 edge_t *ep; 1961 lock_descriptor_t *vertex_stack; 1962 1963 STACK_INIT(vertex_stack); 1964 1965 STACK_PUSH(vertex_stack, vertex, l_stack1); 1966 count = 0; 1967 while ((ver = STACK_TOP(vertex_stack)) != NULL) { 1968 1969 STACK_POP(vertex_stack, l_stack1); 1970 for (ep = FIRST_ADJ(ver); ep != HEAD(ver); 1971 ep = NEXT_ADJ(ep)) { 1972 lock = ep->to_vertex; 1973 if (COLORED(lock)) 1974 continue; 1975 COLOR(lock); 1976 if (IS_RECOMPUTE(lock)) 1977 count++; 1978 STACK_PUSH(vertex_stack, lock, l_stack1); 1979 } 1980 1981 } 1982 return (count); 1983 } 1984 1985 /* 1986 * Called from flk_recompute_dependencies() this routine decrements 1987 * the barrier count of barrier vertices that are reachable from lock. 1988 */ 1989 1990 static void 1991 flk_update_barriers(lock_descriptor_t *lock) 1992 { 1993 lock_descriptor_t *vertex, *lck; 1994 edge_t *ep; 1995 lock_descriptor_t *vertex_stack; 1996 1997 STACK_INIT(vertex_stack); 1998 1999 STACK_PUSH(vertex_stack, lock, l_stack1); 2000 2001 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2002 STACK_POP(vertex_stack, l_stack1); 2003 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2004 ep = NEXT_IN(ep)) { 2005 lck = ep->from_vertex; 2006 if (COLORED(lck)) { 2007 if (IS_BARRIER(lck)) { 2008 ASSERT(lck->l_index > 0); 2009 lck->l_index--; 2010 if (lck->l_index == 0) 2011 lck->l_state &= ~BARRIER_LOCK; 2012 } 2013 continue; 2014 } 2015 COLOR(lck); 2016 if (IS_BARRIER(lck)) { 2017 ASSERT(lck->l_index > 0); 2018 lck->l_index--; 2019 if (lck->l_index == 0) 2020 lck->l_state &= ~BARRIER_LOCK; 2021 } 2022 STACK_PUSH(vertex_stack, lck, l_stack1); 2023 } 2024 } 2025 } 2026 2027 /* 2028 * Finds all vertices that are reachable from 'lock' more than once and 2029 * mark them as barrier vertices and increment their barrier count. 2030 * The barrier count is one minus the total number of paths from lock 2031 * to that vertex. 2032 */ 2033 2034 static int 2035 flk_find_barriers(lock_descriptor_t *lock) 2036 { 2037 lock_descriptor_t *vertex, *lck; 2038 int found = 0; 2039 edge_t *ep; 2040 lock_descriptor_t *vertex_stack; 2041 2042 STACK_INIT(vertex_stack); 2043 2044 STACK_PUSH(vertex_stack, lock, l_stack1); 2045 2046 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 2047 STACK_POP(vertex_stack, l_stack1); 2048 for (ep = FIRST_IN(vertex); ep != HEAD(vertex); 2049 ep = NEXT_IN(ep)) { 2050 lck = ep->from_vertex; 2051 if (COLORED(lck)) { 2052 /* this is a barrier */ 2053 lck->l_state |= BARRIER_LOCK; 2054 /* index will have barrier count */ 2055 lck->l_index++; 2056 if (!found) 2057 found = 1; 2058 continue; 2059 } 2060 COLOR(lck); 2061 lck->l_index = 0; 2062 STACK_PUSH(vertex_stack, lck, l_stack1); 2063 } 2064 } 2065 return (found); 2066 } 2067 2068 /* 2069 * Finds the first lock that is mainly responsible for blocking this 2070 * request. If there is no such lock, request->l_flock.l_type is set to 2071 * F_UNLCK. Otherwise, request->l_flock is filled in with the particulars 2072 * of the blocking lock. 2073 * 2074 * Note: It is possible a request is blocked by a sleeping lock because 2075 * of the fairness policy used in flk_process_request() to construct the 2076 * dependencies. (see comments before flk_process_request()). 2077 */ 2078 2079 static void 2080 flk_get_first_blocking_lock(lock_descriptor_t *request) 2081 { 2082 graph_t *gp = request->l_graph; 2083 vnode_t *vp = request->l_vnode; 2084 lock_descriptor_t *lock, *blocker; 2085 2086 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2087 blocker = NULL; 2088 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2089 2090 if (lock) { 2091 do { 2092 if (BLOCKS(lock, request)) { 2093 blocker = lock; 2094 break; 2095 } 2096 lock = lock->l_next; 2097 } while (lock->l_vnode == vp); 2098 } 2099 2100 if (blocker) { 2101 report_blocker(blocker, request); 2102 } else 2103 request->l_flock.l_type = F_UNLCK; 2104 } 2105 2106 /* 2107 * Get the graph_t structure associated with a vnode. 2108 * If 'initialize' is non-zero, and the graph_t structure for this vnode has 2109 * not yet been initialized, then a new element is allocated and returned. 2110 */ 2111 graph_t * 2112 flk_get_lock_graph(vnode_t *vp, int initialize) 2113 { 2114 graph_t *gp; 2115 graph_t *gp_alloc = NULL; 2116 int index = HASH_INDEX(vp); 2117 2118 if (initialize == FLK_USE_GRAPH) { 2119 mutex_enter(&flock_lock); 2120 gp = lock_graph[index]; 2121 mutex_exit(&flock_lock); 2122 return (gp); 2123 } 2124 2125 ASSERT(initialize == FLK_INIT_GRAPH); 2126 2127 if (lock_graph[index] == NULL) { 2128 2129 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP); 2130 2131 /* Initialize the graph */ 2132 2133 gp_alloc->active_locks.l_next = 2134 gp_alloc->active_locks.l_prev = 2135 (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc); 2136 gp_alloc->sleeping_locks.l_next = 2137 gp_alloc->sleeping_locks.l_prev = 2138 (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc); 2139 gp_alloc->index = index; 2140 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL); 2141 } 2142 2143 mutex_enter(&flock_lock); 2144 2145 gp = lock_graph[index]; 2146 2147 /* Recheck the value within flock_lock */ 2148 if (gp == NULL) { 2149 struct flock_globals *fg; 2150 2151 /* We must have previously allocated the graph_t structure */ 2152 ASSERT(gp_alloc != NULL); 2153 lock_graph[index] = gp = gp_alloc; 2154 /* 2155 * The lockmgr status is only needed if KLM is loaded. 2156 */ 2157 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) { 2158 fg = flk_get_globals(); 2159 fg->lockmgr_status[index] = fg->flk_lockmgr_status; 2160 } 2161 } 2162 2163 mutex_exit(&flock_lock); 2164 2165 if ((gp_alloc != NULL) && (gp != gp_alloc)) { 2166 /* There was a race to allocate the graph_t and we lost */ 2167 mutex_destroy(&gp_alloc->gp_mutex); 2168 kmem_free(gp_alloc, sizeof (graph_t)); 2169 } 2170 2171 return (gp); 2172 } 2173 2174 /* 2175 * PSARC case 1997/292 2176 */ 2177 int 2178 cl_flk_has_remote_locks_for_nlmid(vnode_t *vp, int nlmid) 2179 { 2180 lock_descriptor_t *lock; 2181 int result = 0; 2182 graph_t *gp; 2183 int lock_nlmid; 2184 2185 /* 2186 * Check to see if node is booted as a cluster. If not, return. 2187 */ 2188 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2189 return (0); 2190 } 2191 2192 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2193 if (gp == NULL) { 2194 return (0); 2195 } 2196 2197 mutex_enter(&gp->gp_mutex); 2198 2199 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2200 2201 if (lock) { 2202 while (lock->l_vnode == vp) { 2203 /* get NLM id from sysid */ 2204 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2205 2206 /* 2207 * If NLM server request _and_ nlmid of lock matches 2208 * nlmid of argument, then we've found a remote lock. 2209 */ 2210 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2211 result = 1; 2212 goto done; 2213 } 2214 lock = lock->l_next; 2215 } 2216 } 2217 2218 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2219 2220 if (lock) { 2221 while (lock->l_vnode == vp) { 2222 /* get NLM id from sysid */ 2223 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 2224 2225 /* 2226 * If NLM server request _and_ nlmid of lock matches 2227 * nlmid of argument, then we've found a remote lock. 2228 */ 2229 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 2230 result = 1; 2231 goto done; 2232 } 2233 lock = lock->l_next; 2234 } 2235 } 2236 2237 done: 2238 mutex_exit(&gp->gp_mutex); 2239 return (result); 2240 } 2241 2242 /* 2243 * Determine whether there are any locks for the given vnode with a remote 2244 * sysid. Returns zero if not, non-zero if there are. 2245 * 2246 * Note that the return value from this function is potentially invalid 2247 * once it has been returned. The caller is responsible for providing its 2248 * own synchronization mechanism to ensure that the return value is useful 2249 * (e.g., see nfs_lockcompletion()). 2250 */ 2251 int 2252 flk_has_remote_locks(vnode_t *vp) 2253 { 2254 lock_descriptor_t *lock; 2255 int result = 0; 2256 graph_t *gp; 2257 2258 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2259 if (gp == NULL) { 2260 return (0); 2261 } 2262 2263 mutex_enter(&gp->gp_mutex); 2264 2265 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2266 2267 if (lock) { 2268 while (lock->l_vnode == vp) { 2269 if (IS_REMOTE(lock)) { 2270 result = 1; 2271 goto done; 2272 } 2273 lock = lock->l_next; 2274 } 2275 } 2276 2277 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2278 2279 if (lock) { 2280 while (lock->l_vnode == vp) { 2281 if (IS_REMOTE(lock)) { 2282 result = 1; 2283 goto done; 2284 } 2285 lock = lock->l_next; 2286 } 2287 } 2288 2289 done: 2290 mutex_exit(&gp->gp_mutex); 2291 return (result); 2292 } 2293 2294 /* 2295 * Determine if there are any locks owned by the given sysid. 2296 * Returns zero if not, non-zero if there are. Note that this return code 2297 * could be derived from flk_get_{sleeping,active}_locks, but this routine 2298 * avoids all the memory allocations of those routines. 2299 * 2300 * This routine has the same synchronization issues as 2301 * flk_has_remote_locks. 2302 */ 2303 2304 int 2305 flk_sysid_has_locks(int sysid, int lck_type) 2306 { 2307 int has_locks = 0; 2308 lock_descriptor_t *lock; 2309 graph_t *gp; 2310 int i; 2311 2312 for (i = 0; i < HASH_SIZE && !has_locks; i++) { 2313 mutex_enter(&flock_lock); 2314 gp = lock_graph[i]; 2315 mutex_exit(&flock_lock); 2316 if (gp == NULL) { 2317 continue; 2318 } 2319 2320 mutex_enter(&gp->gp_mutex); 2321 2322 if (lck_type & FLK_QUERY_ACTIVE) { 2323 for (lock = ACTIVE_HEAD(gp)->l_next; 2324 lock != ACTIVE_HEAD(gp) && !has_locks; 2325 lock = lock->l_next) { 2326 if (lock->l_flock.l_sysid == sysid) 2327 has_locks = 1; 2328 } 2329 } 2330 2331 if (lck_type & FLK_QUERY_SLEEPING) { 2332 for (lock = SLEEPING_HEAD(gp)->l_next; 2333 lock != SLEEPING_HEAD(gp) && !has_locks; 2334 lock = lock->l_next) { 2335 if (lock->l_flock.l_sysid == sysid) 2336 has_locks = 1; 2337 } 2338 } 2339 mutex_exit(&gp->gp_mutex); 2340 } 2341 2342 return (has_locks); 2343 } 2344 2345 2346 /* 2347 * PSARC case 1997/292 2348 * 2349 * Requires: "sysid" is a pair [nlmid, sysid]. The lower half is 16-bit 2350 * quantity, the real sysid generated by the NLM server; the upper half 2351 * identifies the node of the cluster where the NLM server ran. 2352 * This routine is only called by an NLM server running in a cluster. 2353 * Effects: Remove all locks held on behalf of the client identified 2354 * by "sysid." 2355 */ 2356 void 2357 cl_flk_remove_locks_by_sysid(int sysid) 2358 { 2359 graph_t *gp; 2360 int i; 2361 lock_descriptor_t *lock, *nlock; 2362 2363 /* 2364 * Check to see if node is booted as a cluster. If not, return. 2365 */ 2366 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 2367 return; 2368 } 2369 2370 ASSERT(sysid != 0); 2371 for (i = 0; i < HASH_SIZE; i++) { 2372 mutex_enter(&flock_lock); 2373 gp = lock_graph[i]; 2374 mutex_exit(&flock_lock); 2375 2376 if (gp == NULL) 2377 continue; 2378 2379 mutex_enter(&gp->gp_mutex); /* get mutex on lock graph */ 2380 2381 /* signal sleeping requests so that they bail out */ 2382 lock = SLEEPING_HEAD(gp)->l_next; 2383 while (lock != SLEEPING_HEAD(gp)) { 2384 nlock = lock->l_next; 2385 if (lock->l_flock.l_sysid == sysid) { 2386 INTERRUPT_WAKEUP(lock); 2387 } 2388 lock = nlock; 2389 } 2390 2391 /* delete active locks */ 2392 lock = ACTIVE_HEAD(gp)->l_next; 2393 while (lock != ACTIVE_HEAD(gp)) { 2394 nlock = lock->l_next; 2395 if (lock->l_flock.l_sysid == sysid) { 2396 flk_delete_active_lock(lock, 0); 2397 flk_wakeup(lock, 1); 2398 flk_free_lock(lock); 2399 } 2400 lock = nlock; 2401 } 2402 mutex_exit(&gp->gp_mutex); /* release mutex on lock graph */ 2403 } 2404 } 2405 2406 /* 2407 * Delete all locks in the system that belongs to the sysid of the request. 2408 */ 2409 2410 static void 2411 flk_delete_locks_by_sysid(lock_descriptor_t *request) 2412 { 2413 int sysid = request->l_flock.l_sysid; 2414 lock_descriptor_t *lock, *nlock; 2415 graph_t *gp; 2416 int i; 2417 2418 ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex)); 2419 ASSERT(sysid != 0); 2420 2421 mutex_exit(&request->l_graph->gp_mutex); 2422 2423 for (i = 0; i < HASH_SIZE; i++) { 2424 mutex_enter(&flock_lock); 2425 gp = lock_graph[i]; 2426 mutex_exit(&flock_lock); 2427 2428 if (gp == NULL) 2429 continue; 2430 2431 mutex_enter(&gp->gp_mutex); 2432 2433 /* signal sleeping requests so that they bail out */ 2434 lock = SLEEPING_HEAD(gp)->l_next; 2435 while (lock != SLEEPING_HEAD(gp)) { 2436 nlock = lock->l_next; 2437 if (lock->l_flock.l_sysid == sysid) { 2438 INTERRUPT_WAKEUP(lock); 2439 } 2440 lock = nlock; 2441 } 2442 2443 /* delete active locks */ 2444 lock = ACTIVE_HEAD(gp)->l_next; 2445 while (lock != ACTIVE_HEAD(gp)) { 2446 nlock = lock->l_next; 2447 if (lock->l_flock.l_sysid == sysid) { 2448 flk_delete_active_lock(lock, 0); 2449 flk_wakeup(lock, 1); 2450 flk_free_lock(lock); 2451 } 2452 lock = nlock; 2453 } 2454 mutex_exit(&gp->gp_mutex); 2455 } 2456 2457 mutex_enter(&request->l_graph->gp_mutex); 2458 } 2459 2460 /* 2461 * Clustering: Deletes PXFS locks 2462 * Effects: Delete all locks on files in the given file system and with the 2463 * given PXFS id. 2464 */ 2465 void 2466 cl_flk_delete_pxfs_locks(struct vfs *vfsp, int pxfsid) 2467 { 2468 lock_descriptor_t *lock, *nlock; 2469 graph_t *gp; 2470 int i; 2471 2472 for (i = 0; i < HASH_SIZE; i++) { 2473 mutex_enter(&flock_lock); 2474 gp = lock_graph[i]; 2475 mutex_exit(&flock_lock); 2476 2477 if (gp == NULL) 2478 continue; 2479 2480 mutex_enter(&gp->gp_mutex); 2481 2482 /* signal sleeping requests so that they bail out */ 2483 lock = SLEEPING_HEAD(gp)->l_next; 2484 while (lock != SLEEPING_HEAD(gp)) { 2485 nlock = lock->l_next; 2486 if (lock->l_vnode->v_vfsp == vfsp) { 2487 ASSERT(IS_PXFS(lock)); 2488 if (GETPXFSID(lock->l_flock.l_sysid) == 2489 pxfsid) { 2490 flk_set_state(lock, 2491 FLK_CANCELLED_STATE); 2492 flk_cancel_sleeping_lock(lock, 1); 2493 } 2494 } 2495 lock = nlock; 2496 } 2497 2498 /* delete active locks */ 2499 lock = ACTIVE_HEAD(gp)->l_next; 2500 while (lock != ACTIVE_HEAD(gp)) { 2501 nlock = lock->l_next; 2502 if (lock->l_vnode->v_vfsp == vfsp) { 2503 ASSERT(IS_PXFS(lock)); 2504 if (GETPXFSID(lock->l_flock.l_sysid) == 2505 pxfsid) { 2506 flk_delete_active_lock(lock, 0); 2507 flk_wakeup(lock, 1); 2508 flk_free_lock(lock); 2509 } 2510 } 2511 lock = nlock; 2512 } 2513 mutex_exit(&gp->gp_mutex); 2514 } 2515 } 2516 2517 /* 2518 * Search for a sleeping lock manager lock which matches exactly this lock 2519 * request; if one is found, fake a signal to cancel it. 2520 * 2521 * Return 1 if a matching lock was found, 0 otherwise. 2522 */ 2523 2524 static int 2525 flk_canceled(lock_descriptor_t *request) 2526 { 2527 lock_descriptor_t *lock, *nlock; 2528 graph_t *gp = request->l_graph; 2529 vnode_t *vp = request->l_vnode; 2530 2531 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 2532 ASSERT(IS_LOCKMGR(request)); 2533 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2534 2535 if (lock) { 2536 while (lock->l_vnode == vp) { 2537 nlock = lock->l_next; 2538 if (SAME_OWNER(lock, request) && 2539 lock->l_start == request->l_start && 2540 lock->l_end == request->l_end) { 2541 INTERRUPT_WAKEUP(lock); 2542 return (1); 2543 } 2544 lock = nlock; 2545 } 2546 } 2547 return (0); 2548 } 2549 2550 /* 2551 * Remove all the locks for the vnode belonging to the given pid and sysid. 2552 */ 2553 2554 void 2555 cleanlocks(vnode_t *vp, pid_t pid, int sysid) 2556 { 2557 graph_t *gp; 2558 lock_descriptor_t *lock, *nlock; 2559 lock_descriptor_t *link_stack; 2560 2561 STACK_INIT(link_stack); 2562 2563 gp = flk_get_lock_graph(vp, FLK_USE_GRAPH); 2564 2565 if (gp == NULL) 2566 return; 2567 mutex_enter(&gp->gp_mutex); 2568 2569 CHECK_SLEEPING_LOCKS(gp); 2570 CHECK_ACTIVE_LOCKS(gp); 2571 2572 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 2573 2574 if (lock) { 2575 do { 2576 nlock = lock->l_next; 2577 if ((lock->l_flock.l_pid == pid || 2578 pid == IGN_PID) && 2579 lock->l_flock.l_sysid == sysid) { 2580 CANCEL_WAKEUP(lock); 2581 } 2582 lock = nlock; 2583 } while (lock->l_vnode == vp); 2584 } 2585 2586 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 2587 2588 if (lock) { 2589 do { 2590 nlock = lock->l_next; 2591 if ((lock->l_flock.l_pid == pid || 2592 pid == IGN_PID) && 2593 lock->l_flock.l_sysid == sysid) { 2594 flk_delete_active_lock(lock, 0); 2595 STACK_PUSH(link_stack, lock, l_stack); 2596 } 2597 lock = nlock; 2598 } while (lock->l_vnode == vp); 2599 } 2600 2601 while ((lock = STACK_TOP(link_stack)) != NULL) { 2602 STACK_POP(link_stack, l_stack); 2603 flk_wakeup(lock, 1); 2604 flk_free_lock(lock); 2605 } 2606 2607 CHECK_SLEEPING_LOCKS(gp); 2608 CHECK_ACTIVE_LOCKS(gp); 2609 CHECK_OWNER_LOCKS(gp, pid, sysid, vp); 2610 mutex_exit(&gp->gp_mutex); 2611 } 2612 2613 2614 /* 2615 * Called from 'fs' read and write routines for files that have mandatory 2616 * locking enabled. 2617 */ 2618 2619 int 2620 chklock( 2621 struct vnode *vp, 2622 int iomode, 2623 u_offset_t offset, 2624 ssize_t len, 2625 int fmode, 2626 caller_context_t *ct) 2627 { 2628 register int i; 2629 struct flock64 bf; 2630 int error = 0; 2631 2632 bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK; 2633 bf.l_whence = 0; 2634 bf.l_start = offset; 2635 bf.l_len = len; 2636 if (ct == NULL) { 2637 bf.l_pid = curproc->p_pid; 2638 bf.l_sysid = 0; 2639 } else { 2640 bf.l_pid = ct->cc_pid; 2641 bf.l_sysid = ct->cc_sysid; 2642 } 2643 i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK; 2644 if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 || 2645 bf.l_type != F_UNLCK) 2646 error = i ? i : EAGAIN; 2647 return (error); 2648 } 2649 2650 /* 2651 * convoff - converts the given data (start, whence) to the 2652 * given whence. 2653 */ 2654 int 2655 convoff(vp, lckdat, whence, offset) 2656 struct vnode *vp; 2657 struct flock64 *lckdat; 2658 int whence; 2659 offset_t offset; 2660 { 2661 int error; 2662 struct vattr vattr; 2663 2664 if ((lckdat->l_whence == 2) || (whence == 2)) { 2665 vattr.va_mask = AT_SIZE; 2666 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 2667 return (error); 2668 } 2669 2670 switch (lckdat->l_whence) { 2671 case 1: 2672 lckdat->l_start += offset; 2673 break; 2674 case 2: 2675 lckdat->l_start += vattr.va_size; 2676 /* FALLTHRU */ 2677 case 0: 2678 break; 2679 default: 2680 return (EINVAL); 2681 } 2682 2683 if (lckdat->l_start < 0) 2684 return (EINVAL); 2685 2686 switch (whence) { 2687 case 1: 2688 lckdat->l_start -= offset; 2689 break; 2690 case 2: 2691 lckdat->l_start -= vattr.va_size; 2692 /* FALLTHRU */ 2693 case 0: 2694 break; 2695 default: 2696 return (EINVAL); 2697 } 2698 2699 lckdat->l_whence = (short)whence; 2700 return (0); 2701 } 2702 2703 2704 /* proc_graph function definitions */ 2705 2706 /* 2707 * Function checks for deadlock due to the new 'lock'. If deadlock found 2708 * edges of this lock are freed and returned. 2709 */ 2710 2711 static int 2712 flk_check_deadlock(lock_descriptor_t *lock) 2713 { 2714 proc_vertex_t *start_vertex, *pvertex; 2715 proc_vertex_t *dvertex; 2716 proc_edge_t *pep, *ppep; 2717 edge_t *ep, *nep; 2718 proc_vertex_t *process_stack; 2719 2720 STACK_INIT(process_stack); 2721 2722 mutex_enter(&flock_lock); 2723 start_vertex = flk_get_proc_vertex(lock); 2724 ASSERT(start_vertex != NULL); 2725 2726 /* construct the edges from this process to other processes */ 2727 2728 ep = FIRST_ADJ(lock); 2729 while (ep != HEAD(lock)) { 2730 proc_vertex_t *adj_proc; 2731 2732 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2733 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) { 2734 if (pep->to_proc == adj_proc) { 2735 ASSERT(pep->refcount); 2736 pep->refcount++; 2737 break; 2738 } 2739 } 2740 if (pep == NULL) { 2741 pep = flk_get_proc_edge(); 2742 pep->to_proc = adj_proc; 2743 pep->refcount = 1; 2744 adj_proc->incount++; 2745 pep->next = start_vertex->edge; 2746 start_vertex->edge = pep; 2747 } 2748 ep = NEXT_ADJ(ep); 2749 } 2750 2751 ep = FIRST_IN(lock); 2752 2753 while (ep != HEAD(lock)) { 2754 proc_vertex_t *in_proc; 2755 2756 in_proc = flk_get_proc_vertex(ep->from_vertex); 2757 2758 for (pep = in_proc->edge; pep != NULL; pep = pep->next) { 2759 if (pep->to_proc == start_vertex) { 2760 ASSERT(pep->refcount); 2761 pep->refcount++; 2762 break; 2763 } 2764 } 2765 if (pep == NULL) { 2766 pep = flk_get_proc_edge(); 2767 pep->to_proc = start_vertex; 2768 pep->refcount = 1; 2769 start_vertex->incount++; 2770 pep->next = in_proc->edge; 2771 in_proc->edge = pep; 2772 } 2773 ep = NEXT_IN(ep); 2774 } 2775 2776 if (start_vertex->incount == 0) { 2777 mutex_exit(&flock_lock); 2778 return (0); 2779 } 2780 2781 flk_proc_graph_uncolor(); 2782 2783 start_vertex->p_sedge = start_vertex->edge; 2784 2785 STACK_PUSH(process_stack, start_vertex, p_stack); 2786 2787 while ((pvertex = STACK_TOP(process_stack)) != NULL) { 2788 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) { 2789 dvertex = pep->to_proc; 2790 if (!PROC_ARRIVED(dvertex)) { 2791 STACK_PUSH(process_stack, dvertex, p_stack); 2792 dvertex->p_sedge = dvertex->edge; 2793 PROC_ARRIVE(pvertex); 2794 pvertex->p_sedge = pep->next; 2795 break; 2796 } 2797 if (!PROC_DEPARTED(dvertex)) 2798 goto deadlock; 2799 } 2800 if (pep == NULL) { 2801 PROC_DEPART(pvertex); 2802 STACK_POP(process_stack, p_stack); 2803 } 2804 } 2805 mutex_exit(&flock_lock); 2806 return (0); 2807 2808 deadlock: 2809 2810 /* we remove all lock edges and proc edges */ 2811 2812 ep = FIRST_ADJ(lock); 2813 while (ep != HEAD(lock)) { 2814 proc_vertex_t *adj_proc; 2815 adj_proc = flk_get_proc_vertex(ep->to_vertex); 2816 nep = NEXT_ADJ(ep); 2817 IN_LIST_REMOVE(ep); 2818 ADJ_LIST_REMOVE(ep); 2819 flk_free_edge(ep); 2820 ppep = start_vertex->edge; 2821 for (pep = start_vertex->edge; pep != NULL; ppep = pep, 2822 pep = ppep->next) { 2823 if (pep->to_proc == adj_proc) { 2824 pep->refcount--; 2825 if (pep->refcount == 0) { 2826 if (pep == ppep) { 2827 start_vertex->edge = pep->next; 2828 } else { 2829 ppep->next = pep->next; 2830 } 2831 adj_proc->incount--; 2832 flk_proc_release(adj_proc); 2833 flk_free_proc_edge(pep); 2834 } 2835 break; 2836 } 2837 } 2838 ep = nep; 2839 } 2840 ep = FIRST_IN(lock); 2841 while (ep != HEAD(lock)) { 2842 proc_vertex_t *in_proc; 2843 in_proc = flk_get_proc_vertex(ep->from_vertex); 2844 nep = NEXT_IN(ep); 2845 IN_LIST_REMOVE(ep); 2846 ADJ_LIST_REMOVE(ep); 2847 flk_free_edge(ep); 2848 ppep = in_proc->edge; 2849 for (pep = in_proc->edge; pep != NULL; ppep = pep, 2850 pep = ppep->next) { 2851 if (pep->to_proc == start_vertex) { 2852 pep->refcount--; 2853 if (pep->refcount == 0) { 2854 if (pep == ppep) { 2855 in_proc->edge = pep->next; 2856 } else { 2857 ppep->next = pep->next; 2858 } 2859 start_vertex->incount--; 2860 flk_proc_release(in_proc); 2861 flk_free_proc_edge(pep); 2862 } 2863 break; 2864 } 2865 } 2866 ep = nep; 2867 } 2868 flk_proc_release(start_vertex); 2869 mutex_exit(&flock_lock); 2870 return (1); 2871 } 2872 2873 /* 2874 * Get a proc vertex. If lock's pvertex value gets a correct proc vertex 2875 * from the list we return that, otherwise we allocate one. If necessary, 2876 * we grow the list of vertices also. 2877 */ 2878 2879 static proc_vertex_t * 2880 flk_get_proc_vertex(lock_descriptor_t *lock) 2881 { 2882 int i; 2883 proc_vertex_t *pv; 2884 proc_vertex_t **palloc; 2885 2886 ASSERT(MUTEX_HELD(&flock_lock)); 2887 if (lock->pvertex != -1) { 2888 ASSERT(lock->pvertex >= 0); 2889 pv = pgraph.proc[lock->pvertex]; 2890 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2891 return (pv); 2892 } 2893 } 2894 for (i = 0; i < pgraph.gcount; i++) { 2895 pv = pgraph.proc[i]; 2896 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) { 2897 lock->pvertex = pv->index = i; 2898 return (pv); 2899 } 2900 } 2901 pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP); 2902 pv->pid = lock->l_flock.l_pid; 2903 pv->sysid = lock->l_flock.l_sysid; 2904 flk_proc_vertex_allocs++; 2905 if (pgraph.free != 0) { 2906 for (i = 0; i < pgraph.gcount; i++) { 2907 if (pgraph.proc[i] == NULL) { 2908 pgraph.proc[i] = pv; 2909 lock->pvertex = pv->index = i; 2910 pgraph.free--; 2911 return (pv); 2912 } 2913 } 2914 } 2915 palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) * 2916 sizeof (proc_vertex_t *), KM_SLEEP); 2917 2918 if (pgraph.proc) { 2919 bcopy(pgraph.proc, palloc, 2920 pgraph.gcount * sizeof (proc_vertex_t *)); 2921 2922 kmem_free(pgraph.proc, 2923 pgraph.gcount * sizeof (proc_vertex_t *)); 2924 } 2925 pgraph.proc = palloc; 2926 pgraph.free += (PROC_CHUNK - 1); 2927 pv->index = lock->pvertex = pgraph.gcount; 2928 pgraph.gcount += PROC_CHUNK; 2929 pgraph.proc[pv->index] = pv; 2930 return (pv); 2931 } 2932 2933 /* 2934 * Allocate a proc edge. 2935 */ 2936 2937 static proc_edge_t * 2938 flk_get_proc_edge() 2939 { 2940 proc_edge_t *pep; 2941 2942 pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP); 2943 flk_proc_edge_allocs++; 2944 return (pep); 2945 } 2946 2947 /* 2948 * Free the proc edge. Called whenever its reference count goes to zero. 2949 */ 2950 2951 static void 2952 flk_free_proc_edge(proc_edge_t *pep) 2953 { 2954 ASSERT(pep->refcount == 0); 2955 kmem_free((void *)pep, sizeof (proc_edge_t)); 2956 flk_proc_edge_frees++; 2957 } 2958 2959 /* 2960 * Color the graph explicitly done only when the mark value hits max value. 2961 */ 2962 2963 static void 2964 flk_proc_graph_uncolor() 2965 { 2966 int i; 2967 2968 if (pgraph.mark == UINT_MAX) { 2969 for (i = 0; i < pgraph.gcount; i++) 2970 if (pgraph.proc[i] != NULL) { 2971 pgraph.proc[i]->atime = 0; 2972 pgraph.proc[i]->dtime = 0; 2973 } 2974 pgraph.mark = 1; 2975 } else { 2976 pgraph.mark++; 2977 } 2978 } 2979 2980 /* 2981 * Release the proc vertex iff both there are no in edges and out edges 2982 */ 2983 2984 static void 2985 flk_proc_release(proc_vertex_t *proc) 2986 { 2987 ASSERT(MUTEX_HELD(&flock_lock)); 2988 if (proc->edge == NULL && proc->incount == 0) { 2989 pgraph.proc[proc->index] = NULL; 2990 pgraph.free++; 2991 kmem_free(proc, sizeof (proc_vertex_t)); 2992 flk_proc_vertex_frees++; 2993 } 2994 } 2995 2996 /* 2997 * Updates process graph to reflect change in a lock_graph. 2998 * Note: We should call this function only after we have a correctly 2999 * recomputed lock graph. Otherwise we might miss a deadlock detection. 3000 * eg: in function flk_relation() we call this function after flk_recompute_ 3001 * dependencies() otherwise if a process tries to lock a vnode hashed 3002 * into another graph it might sleep for ever. 3003 */ 3004 3005 static void 3006 flk_update_proc_graph(edge_t *ep, int delete) 3007 { 3008 proc_vertex_t *toproc, *fromproc; 3009 proc_edge_t *pep, *prevpep; 3010 3011 mutex_enter(&flock_lock); 3012 toproc = flk_get_proc_vertex(ep->to_vertex); 3013 fromproc = flk_get_proc_vertex(ep->from_vertex); 3014 3015 if (!delete) 3016 goto add; 3017 pep = prevpep = fromproc->edge; 3018 3019 ASSERT(pep != NULL); 3020 while (pep != NULL) { 3021 if (pep->to_proc == toproc) { 3022 ASSERT(pep->refcount > 0); 3023 pep->refcount--; 3024 if (pep->refcount == 0) { 3025 if (pep == prevpep) { 3026 fromproc->edge = pep->next; 3027 } else { 3028 prevpep->next = pep->next; 3029 } 3030 toproc->incount--; 3031 flk_proc_release(toproc); 3032 flk_free_proc_edge(pep); 3033 } 3034 break; 3035 } 3036 prevpep = pep; 3037 pep = pep->next; 3038 } 3039 flk_proc_release(fromproc); 3040 mutex_exit(&flock_lock); 3041 return; 3042 add: 3043 3044 pep = fromproc->edge; 3045 3046 while (pep != NULL) { 3047 if (pep->to_proc == toproc) { 3048 ASSERT(pep->refcount > 0); 3049 pep->refcount++; 3050 break; 3051 } 3052 pep = pep->next; 3053 } 3054 if (pep == NULL) { 3055 pep = flk_get_proc_edge(); 3056 pep->to_proc = toproc; 3057 pep->refcount = 1; 3058 toproc->incount++; 3059 pep->next = fromproc->edge; 3060 fromproc->edge = pep; 3061 } 3062 mutex_exit(&flock_lock); 3063 } 3064 3065 /* 3066 * Set the control status for lock manager requests. 3067 * 3068 */ 3069 3070 /* 3071 * PSARC case 1997/292 3072 * 3073 * Requires: "nlmid" must be >= 1 and <= clconf_maximum_nodeid(). 3074 * Effects: Set the state of the NLM server identified by "nlmid" 3075 * in the NLM registry to state "nlm_state." 3076 * Raises exception no_such_nlm if "nlmid" doesn't identify a known 3077 * NLM server to this LLM. 3078 * Note that when this routine is called with NLM_SHUTTING_DOWN there 3079 * may be locks requests that have gotten started but not finished. In 3080 * particular, there may be blocking requests that are in the callback code 3081 * before sleeping (so they're not holding the lock for the graph). If 3082 * such a thread reacquires the graph's lock (to go to sleep) after 3083 * NLM state in the NLM registry is set to a non-up value, 3084 * it will notice the status and bail out. If the request gets 3085 * granted before the thread can check the NLM registry, let it 3086 * continue normally. It will get flushed when we are called with NLM_DOWN. 3087 * 3088 * Modifies: nlm_reg_obj (global) 3089 * Arguments: 3090 * nlmid (IN): id uniquely identifying an NLM server 3091 * nlm_state (IN): NLM server state to change "nlmid" to 3092 */ 3093 void 3094 cl_flk_set_nlm_status(int nlmid, flk_nlm_status_t nlm_state) 3095 { 3096 /* 3097 * Check to see if node is booted as a cluster. If not, return. 3098 */ 3099 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3100 return; 3101 } 3102 3103 /* 3104 * Check for development/debugging. It is possible to boot a node 3105 * in non-cluster mode, and then run a special script, currently 3106 * available only to developers, to bring up the node as part of a 3107 * cluster. The problem is that running such a script does not 3108 * result in the routine flk_init() being called and hence global array 3109 * nlm_reg_status is NULL. The NLM thinks it's in cluster mode, 3110 * but the LLM needs to do an additional check to see if the global 3111 * array has been created or not. If nlm_reg_status is NULL, then 3112 * return, else continue. 3113 */ 3114 if (nlm_reg_status == NULL) { 3115 return; 3116 } 3117 3118 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3119 mutex_enter(&nlm_reg_lock); 3120 3121 if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status, nlmid)) { 3122 /* 3123 * If the NLM server "nlmid" is unknown in the NLM registry, 3124 * add it to the registry in the nlm shutting down state. 3125 */ 3126 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3127 FLK_NLM_SHUTTING_DOWN); 3128 } else { 3129 /* 3130 * Change the state of the NLM server identified by "nlmid" 3131 * in the NLM registry to the argument "nlm_state." 3132 */ 3133 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, 3134 nlm_state); 3135 } 3136 3137 /* 3138 * The reason we must register the NLM server that is shutting down 3139 * with an LLM that doesn't already know about it (never sent a lock 3140 * request) is to handle correctly a race between shutdown and a new 3141 * lock request. Suppose that a shutdown request from the NLM server 3142 * invokes this routine at the LLM, and a thread is spawned to 3143 * service the request. Now suppose a new lock request is in 3144 * progress and has already passed the first line of defense in 3145 * reclock(), which denies new locks requests from NLM servers 3146 * that are not in the NLM_UP state. After the current routine 3147 * is invoked for both phases of shutdown, the routine will return, 3148 * having done nothing, and the lock request will proceed and 3149 * probably be granted. The problem is that the shutdown was ignored 3150 * by the lock request because there was no record of that NLM server 3151 * shutting down. We will be in the peculiar position of thinking 3152 * that we've shutdown the NLM server and all locks at all LLMs have 3153 * been discarded, but in fact there's still one lock held. 3154 * The solution is to record the existence of NLM server and change 3155 * its state immediately to NLM_SHUTTING_DOWN. The lock request in 3156 * progress may proceed because the next phase NLM_DOWN will catch 3157 * this lock and discard it. 3158 */ 3159 mutex_exit(&nlm_reg_lock); 3160 3161 switch (nlm_state) { 3162 case FLK_NLM_UP: 3163 /* 3164 * Change the NLM state of all locks still held on behalf of 3165 * the NLM server identified by "nlmid" to NLM_UP. 3166 */ 3167 cl_flk_change_nlm_state_all_locks(nlmid, FLK_NLM_UP); 3168 break; 3169 3170 case FLK_NLM_SHUTTING_DOWN: 3171 /* 3172 * Wake up all sleeping locks for the NLM server identified 3173 * by "nlmid." Note that eventually all woken threads will 3174 * have their lock requests cancelled and descriptors 3175 * removed from the sleeping lock list. Note that the NLM 3176 * server state associated with each lock descriptor is 3177 * changed to FLK_NLM_SHUTTING_DOWN. 3178 */ 3179 cl_flk_wakeup_sleeping_nlm_locks(nlmid); 3180 break; 3181 3182 case FLK_NLM_DOWN: 3183 /* 3184 * Discard all active, granted locks for this NLM server 3185 * identified by "nlmid." 3186 */ 3187 cl_flk_unlock_nlm_granted(nlmid); 3188 break; 3189 3190 default: 3191 panic("cl_set_nlm_status: bad status (%d)", nlm_state); 3192 } 3193 } 3194 3195 /* 3196 * Set the control status for lock manager requests. 3197 * 3198 * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there 3199 * may be locks requests that have gotten started but not finished. In 3200 * particular, there may be blocking requests that are in the callback code 3201 * before sleeping (so they're not holding the lock for the graph). If 3202 * such a thread reacquires the graph's lock (to go to sleep) after 3203 * flk_lockmgr_status is set to a non-up value, it will notice the status 3204 * and bail out. If the request gets granted before the thread can check 3205 * flk_lockmgr_status, let it continue normally. It will get flushed when 3206 * we are called with FLK_LOCKMGR_DOWN. 3207 */ 3208 3209 void 3210 flk_set_lockmgr_status(flk_lockmgr_status_t status) 3211 { 3212 int i; 3213 graph_t *gp; 3214 struct flock_globals *fg; 3215 3216 fg = flk_get_globals(); 3217 ASSERT(fg != NULL); 3218 3219 mutex_enter(&flock_lock); 3220 fg->flk_lockmgr_status = status; 3221 mutex_exit(&flock_lock); 3222 3223 /* 3224 * If the lock manager is coming back up, all that's needed is to 3225 * propagate this information to the graphs. If the lock manager 3226 * is going down, additional action is required, and each graph's 3227 * copy of the state is updated atomically with this other action. 3228 */ 3229 switch (status) { 3230 case FLK_LOCKMGR_UP: 3231 for (i = 0; i < HASH_SIZE; i++) { 3232 mutex_enter(&flock_lock); 3233 gp = lock_graph[i]; 3234 mutex_exit(&flock_lock); 3235 if (gp == NULL) 3236 continue; 3237 mutex_enter(&gp->gp_mutex); 3238 fg->lockmgr_status[i] = status; 3239 mutex_exit(&gp->gp_mutex); 3240 } 3241 break; 3242 case FLK_WAKEUP_SLEEPERS: 3243 wakeup_sleeping_lockmgr_locks(fg); 3244 break; 3245 case FLK_LOCKMGR_DOWN: 3246 unlock_lockmgr_granted(fg); 3247 break; 3248 default: 3249 panic("flk_set_lockmgr_status: bad status (%d)", status); 3250 break; 3251 } 3252 } 3253 3254 /* 3255 * This routine returns all the locks that are active or sleeping and are 3256 * associated with a particular set of identifiers. If lock_state != 0, then 3257 * only locks that match the lock_state are returned. If lock_state == 0, then 3258 * all locks are returned. If pid == NOPID, the pid is ignored. If 3259 * use_sysid is FALSE, then the sysid is ignored. If vp is NULL, then the 3260 * vnode pointer is ignored. 3261 * 3262 * A list containing the vnode pointer and an flock structure 3263 * describing the lock is returned. Each element in the list is 3264 * dynamically allocated and must be freed by the caller. The 3265 * last item in the list is denoted by a NULL value in the ll_next 3266 * field. 3267 * 3268 * The vnode pointers returned are held. The caller is responsible 3269 * for releasing these. Note that the returned list is only a snapshot of 3270 * the current lock information, and that it is a snapshot of a moving 3271 * target (only one graph is locked at a time). 3272 */ 3273 3274 locklist_t * 3275 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid, 3276 pid_t pid, const vnode_t *vp, zoneid_t zoneid) 3277 { 3278 lock_descriptor_t *lock; 3279 lock_descriptor_t *graph_head; 3280 locklist_t listhead; 3281 locklist_t *llheadp; 3282 locklist_t *llp; 3283 locklist_t *lltp; 3284 graph_t *gp; 3285 int i; 3286 int first_index; /* graph index */ 3287 int num_indexes; /* graph index */ 3288 3289 ASSERT((list_type == FLK_ACTIVE_STATE) || 3290 (list_type == FLK_SLEEPING_STATE)); 3291 3292 /* 3293 * Get a pointer to something to use as a list head while building 3294 * the rest of the list. 3295 */ 3296 llheadp = &listhead; 3297 lltp = llheadp; 3298 llheadp->ll_next = (locklist_t *)NULL; 3299 3300 /* Figure out which graphs we want to look at. */ 3301 if (vp == NULL) { 3302 first_index = 0; 3303 num_indexes = HASH_SIZE; 3304 } else { 3305 first_index = HASH_INDEX(vp); 3306 num_indexes = 1; 3307 } 3308 3309 for (i = first_index; i < first_index + num_indexes; i++) { 3310 mutex_enter(&flock_lock); 3311 gp = lock_graph[i]; 3312 mutex_exit(&flock_lock); 3313 if (gp == NULL) { 3314 continue; 3315 } 3316 3317 mutex_enter(&gp->gp_mutex); 3318 graph_head = (list_type == FLK_ACTIVE_STATE) ? 3319 ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp); 3320 for (lock = graph_head->l_next; 3321 lock != graph_head; 3322 lock = lock->l_next) { 3323 if (use_sysid && lock->l_flock.l_sysid != sysid) 3324 continue; 3325 if (pid != NOPID && lock->l_flock.l_pid != pid) 3326 continue; 3327 if (vp != NULL && lock->l_vnode != vp) 3328 continue; 3329 if (lock_state && !(lock_state & lock->l_state)) 3330 continue; 3331 if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES) 3332 continue; 3333 /* 3334 * A matching lock was found. Allocate 3335 * space for a new locklist entry and fill 3336 * it in. 3337 */ 3338 llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP); 3339 lltp->ll_next = llp; 3340 VN_HOLD(lock->l_vnode); 3341 llp->ll_vp = lock->l_vnode; 3342 create_flock(lock, &(llp->ll_flock)); 3343 llp->ll_next = (locklist_t *)NULL; 3344 lltp = llp; 3345 } 3346 mutex_exit(&gp->gp_mutex); 3347 } 3348 3349 llp = llheadp->ll_next; 3350 return (llp); 3351 } 3352 3353 /* 3354 * These two functions are simply interfaces to get_lock_list. They return 3355 * a list of sleeping or active locks for the given sysid and pid. See 3356 * get_lock_list for details. 3357 * 3358 * In either case we don't particularly care to specify the zone of interest; 3359 * the sysid-space is global across zones, so the sysid will map to exactly one 3360 * zone, and we'll return information for that zone. 3361 */ 3362 3363 locklist_t * 3364 flk_get_sleeping_locks(int sysid, pid_t pid) 3365 { 3366 return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL, 3367 ALL_ZONES)); 3368 } 3369 3370 locklist_t * 3371 flk_get_active_locks(int sysid, pid_t pid) 3372 { 3373 return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL, 3374 ALL_ZONES)); 3375 } 3376 3377 /* 3378 * Another interface to get_lock_list. This one returns all the active 3379 * locks for a given vnode. Again, see get_lock_list for details. 3380 * 3381 * We don't need to specify which zone's locks we're interested in. The matter 3382 * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't 3383 * be used by multiple zones, so the list of locks will all be from the right 3384 * zone. 3385 */ 3386 3387 locklist_t * 3388 flk_active_locks_for_vp(const vnode_t *vp) 3389 { 3390 return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp, 3391 ALL_ZONES)); 3392 } 3393 3394 /* 3395 * Another interface to get_lock_list. This one returns all the active 3396 * nbmand locks for a given vnode. Again, see get_lock_list for details. 3397 * 3398 * See the comment for flk_active_locks_for_vp() for why we don't care to 3399 * specify the particular zone of interest. 3400 */ 3401 locklist_t * 3402 flk_active_nbmand_locks_for_vp(const vnode_t *vp) 3403 { 3404 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3405 NOPID, vp, ALL_ZONES)); 3406 } 3407 3408 /* 3409 * Another interface to get_lock_list. This one returns all the active 3410 * nbmand locks for a given pid. Again, see get_lock_list for details. 3411 * 3412 * The zone doesn't need to be specified here; the locks held by a 3413 * particular process will either be local (ie, non-NFS) or from the zone 3414 * the process is executing in. This is because other parts of the system 3415 * ensure that an NFS vnode can't be used in a zone other than that in 3416 * which it was opened. 3417 */ 3418 locklist_t * 3419 flk_active_nbmand_locks(pid_t pid) 3420 { 3421 return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE, 3422 pid, NULL, ALL_ZONES)); 3423 } 3424 3425 /* 3426 * Free up all entries in the locklist. 3427 */ 3428 void 3429 flk_free_locklist(locklist_t *llp) 3430 { 3431 locklist_t *next_llp; 3432 3433 while (llp) { 3434 next_llp = llp->ll_next; 3435 VN_RELE(llp->ll_vp); 3436 kmem_free(llp, sizeof (*llp)); 3437 llp = next_llp; 3438 } 3439 } 3440 3441 static void 3442 cl_flk_change_nlm_state_all_locks(int nlmid, flk_nlm_status_t nlm_state) 3443 { 3444 /* 3445 * For each graph "lg" in the hash table lock_graph do 3446 * a. Get the list of sleeping locks 3447 * b. For each lock descriptor in the list do 3448 * i. If the requested lock is an NLM server request AND 3449 * the nlmid is the same as the routine argument then 3450 * change the lock descriptor's state field to 3451 * "nlm_state." 3452 * c. Get the list of active locks 3453 * d. For each lock descriptor in the list do 3454 * i. If the requested lock is an NLM server request AND 3455 * the nlmid is the same as the routine argument then 3456 * change the lock descriptor's state field to 3457 * "nlm_state." 3458 */ 3459 3460 int i; 3461 graph_t *gp; /* lock graph */ 3462 lock_descriptor_t *lock; /* lock */ 3463 lock_descriptor_t *nlock = NULL; /* next lock */ 3464 int lock_nlmid; 3465 3466 for (i = 0; i < HASH_SIZE; i++) { 3467 mutex_enter(&flock_lock); 3468 gp = lock_graph[i]; 3469 mutex_exit(&flock_lock); 3470 if (gp == NULL) { 3471 continue; 3472 } 3473 3474 /* Get list of sleeping locks in current lock graph. */ 3475 mutex_enter(&gp->gp_mutex); 3476 for (lock = SLEEPING_HEAD(gp)->l_next; 3477 lock != SLEEPING_HEAD(gp); 3478 lock = nlock) { 3479 nlock = lock->l_next; 3480 /* get NLM id */ 3481 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3482 3483 /* 3484 * If NLM server request AND nlmid of lock matches 3485 * nlmid of argument, then set the NLM state of the 3486 * lock to "nlm_state." 3487 */ 3488 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3489 SET_NLM_STATE(lock, nlm_state); 3490 } 3491 } 3492 3493 /* Get list of active locks in current lock graph. */ 3494 for (lock = ACTIVE_HEAD(gp)->l_next; 3495 lock != ACTIVE_HEAD(gp); 3496 lock = nlock) { 3497 nlock = lock->l_next; 3498 /* get NLM id */ 3499 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3500 3501 /* 3502 * If NLM server request AND nlmid of lock matches 3503 * nlmid of argument, then set the NLM state of the 3504 * lock to "nlm_state." 3505 */ 3506 if (IS_LOCKMGR(lock) && nlmid == lock_nlmid) { 3507 ASSERT(IS_ACTIVE(lock)); 3508 SET_NLM_STATE(lock, nlm_state); 3509 } 3510 } 3511 mutex_exit(&gp->gp_mutex); 3512 } 3513 } 3514 3515 /* 3516 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid(). 3517 * Effects: Find all sleeping lock manager requests _only_ for the NLM server 3518 * identified by "nlmid." Poke those lock requests. 3519 */ 3520 static void 3521 cl_flk_wakeup_sleeping_nlm_locks(int nlmid) 3522 { 3523 lock_descriptor_t *lock; 3524 lock_descriptor_t *nlock = NULL; /* next lock */ 3525 int i; 3526 graph_t *gp; 3527 int lock_nlmid; 3528 3529 for (i = 0; i < HASH_SIZE; i++) { 3530 mutex_enter(&flock_lock); 3531 gp = lock_graph[i]; 3532 mutex_exit(&flock_lock); 3533 if (gp == NULL) { 3534 continue; 3535 } 3536 3537 mutex_enter(&gp->gp_mutex); 3538 for (lock = SLEEPING_HEAD(gp)->l_next; 3539 lock != SLEEPING_HEAD(gp); 3540 lock = nlock) { 3541 nlock = lock->l_next; 3542 /* 3543 * If NLM server request _and_ nlmid of lock matches 3544 * nlmid of argument, then set the NLM state of the 3545 * lock to NLM_SHUTTING_DOWN, and wake up sleeping 3546 * request. 3547 */ 3548 if (IS_LOCKMGR(lock)) { 3549 /* get NLM id */ 3550 lock_nlmid = 3551 GETNLMID(lock->l_flock.l_sysid); 3552 if (nlmid == lock_nlmid) { 3553 SET_NLM_STATE(lock, 3554 FLK_NLM_SHUTTING_DOWN); 3555 INTERRUPT_WAKEUP(lock); 3556 } 3557 } 3558 } 3559 mutex_exit(&gp->gp_mutex); 3560 } 3561 } 3562 3563 /* 3564 * Requires: "nlmid" >= 1 and <= clconf_maximum_nodeid() 3565 * Effects: Find all active (granted) lock manager locks _only_ for the 3566 * NLM server identified by "nlmid" and release them. 3567 */ 3568 static void 3569 cl_flk_unlock_nlm_granted(int nlmid) 3570 { 3571 lock_descriptor_t *lock; 3572 lock_descriptor_t *nlock = NULL; /* next lock */ 3573 int i; 3574 graph_t *gp; 3575 int lock_nlmid; 3576 3577 for (i = 0; i < HASH_SIZE; i++) { 3578 mutex_enter(&flock_lock); 3579 gp = lock_graph[i]; 3580 mutex_exit(&flock_lock); 3581 if (gp == NULL) { 3582 continue; 3583 } 3584 3585 mutex_enter(&gp->gp_mutex); 3586 for (lock = ACTIVE_HEAD(gp)->l_next; 3587 lock != ACTIVE_HEAD(gp); 3588 lock = nlock) { 3589 nlock = lock->l_next; 3590 ASSERT(IS_ACTIVE(lock)); 3591 3592 /* 3593 * If it's an NLM server request _and_ nlmid of 3594 * the lock matches nlmid of argument, then 3595 * remove the active lock the list, wakup blocked 3596 * threads, and free the storage for the lock. 3597 * Note that there's no need to mark the NLM state 3598 * of this lock to NLM_DOWN because the lock will 3599 * be deleted anyway and its storage freed. 3600 */ 3601 if (IS_LOCKMGR(lock)) { 3602 /* get NLM id */ 3603 lock_nlmid = GETNLMID(lock->l_flock.l_sysid); 3604 if (nlmid == lock_nlmid) { 3605 flk_delete_active_lock(lock, 0); 3606 flk_wakeup(lock, 1); 3607 flk_free_lock(lock); 3608 } 3609 } 3610 } 3611 mutex_exit(&gp->gp_mutex); 3612 } 3613 } 3614 3615 /* 3616 * Find all sleeping lock manager requests and poke them. 3617 */ 3618 static void 3619 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg) 3620 { 3621 lock_descriptor_t *lock; 3622 lock_descriptor_t *nlock = NULL; /* next lock */ 3623 int i; 3624 graph_t *gp; 3625 zoneid_t zoneid = getzoneid(); 3626 3627 for (i = 0; i < HASH_SIZE; i++) { 3628 mutex_enter(&flock_lock); 3629 gp = lock_graph[i]; 3630 mutex_exit(&flock_lock); 3631 if (gp == NULL) { 3632 continue; 3633 } 3634 3635 mutex_enter(&gp->gp_mutex); 3636 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS; 3637 for (lock = SLEEPING_HEAD(gp)->l_next; 3638 lock != SLEEPING_HEAD(gp); 3639 lock = nlock) { 3640 nlock = lock->l_next; 3641 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3642 INTERRUPT_WAKEUP(lock); 3643 } 3644 } 3645 mutex_exit(&gp->gp_mutex); 3646 } 3647 } 3648 3649 3650 /* 3651 * Find all active (granted) lock manager locks and release them. 3652 */ 3653 static void 3654 unlock_lockmgr_granted(struct flock_globals *fg) 3655 { 3656 lock_descriptor_t *lock; 3657 lock_descriptor_t *nlock = NULL; /* next lock */ 3658 int i; 3659 graph_t *gp; 3660 zoneid_t zoneid = getzoneid(); 3661 3662 for (i = 0; i < HASH_SIZE; i++) { 3663 mutex_enter(&flock_lock); 3664 gp = lock_graph[i]; 3665 mutex_exit(&flock_lock); 3666 if (gp == NULL) { 3667 continue; 3668 } 3669 3670 mutex_enter(&gp->gp_mutex); 3671 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN; 3672 for (lock = ACTIVE_HEAD(gp)->l_next; 3673 lock != ACTIVE_HEAD(gp); 3674 lock = nlock) { 3675 nlock = lock->l_next; 3676 if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) { 3677 ASSERT(IS_ACTIVE(lock)); 3678 flk_delete_active_lock(lock, 0); 3679 flk_wakeup(lock, 1); 3680 flk_free_lock(lock); 3681 } 3682 } 3683 mutex_exit(&gp->gp_mutex); 3684 } 3685 } 3686 3687 3688 /* 3689 * Wait until a lock is granted, cancelled, or interrupted. 3690 */ 3691 3692 static void 3693 wait_for_lock(lock_descriptor_t *request) 3694 { 3695 graph_t *gp = request->l_graph; 3696 3697 ASSERT(MUTEX_HELD(&gp->gp_mutex)); 3698 3699 while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) && 3700 !(IS_INTERRUPTED(request))) { 3701 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) { 3702 flk_set_state(request, FLK_INTERRUPTED_STATE); 3703 request->l_state |= INTERRUPTED_LOCK; 3704 } 3705 } 3706 } 3707 3708 /* 3709 * Create an flock structure from the existing lock information 3710 * 3711 * This routine is used to create flock structures for the lock manager 3712 * to use in a reclaim request. Since the lock was originated on this 3713 * host, it must be conforming to UNIX semantics, so no checking is 3714 * done to make sure it falls within the lower half of the 32-bit range. 3715 */ 3716 3717 static void 3718 create_flock(lock_descriptor_t *lp, flock64_t *flp) 3719 { 3720 ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND); 3721 ASSERT(lp->l_end >= lp->l_start); 3722 3723 flp->l_type = lp->l_type; 3724 flp->l_whence = 0; 3725 flp->l_start = lp->l_start; 3726 flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 : 3727 (lp->l_end - lp->l_start + 1); 3728 flp->l_sysid = lp->l_flock.l_sysid; 3729 flp->l_pid = lp->l_flock.l_pid; 3730 } 3731 3732 /* 3733 * Convert flock_t data describing a lock range into unsigned long starting 3734 * and ending points, which are put into lock_request. Returns 0 or an 3735 * errno value. 3736 * Large Files: max is passed by the caller and we return EOVERFLOW 3737 * as defined by LFS API. 3738 */ 3739 3740 int 3741 flk_convert_lock_data(vnode_t *vp, flock64_t *flp, 3742 u_offset_t *start, u_offset_t *end, offset_t offset) 3743 { 3744 struct vattr vattr; 3745 int error; 3746 3747 /* 3748 * Determine the starting point of the request 3749 */ 3750 switch (flp->l_whence) { 3751 case 0: /* SEEK_SET */ 3752 *start = (u_offset_t)flp->l_start; 3753 break; 3754 case 1: /* SEEK_CUR */ 3755 *start = (u_offset_t)(flp->l_start + offset); 3756 break; 3757 case 2: /* SEEK_END */ 3758 vattr.va_mask = AT_SIZE; 3759 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) 3760 return (error); 3761 *start = (u_offset_t)(flp->l_start + vattr.va_size); 3762 break; 3763 default: 3764 return (EINVAL); 3765 } 3766 3767 /* 3768 * Determine the range covered by the request. 3769 */ 3770 if (flp->l_len == 0) 3771 *end = MAX_U_OFFSET_T; 3772 else if ((offset_t)flp->l_len > 0) { 3773 *end = (u_offset_t)(*start + (flp->l_len - 1)); 3774 } else { 3775 /* 3776 * Negative length; why do we even allow this ? 3777 * Because this allows easy specification of 3778 * the last n bytes of the file. 3779 */ 3780 *end = *start; 3781 *start += (u_offset_t)flp->l_len; 3782 (*start)++; 3783 } 3784 return (0); 3785 } 3786 3787 /* 3788 * Check the validity of lock data. This can used by the NFS 3789 * frlock routines to check data before contacting the server. The 3790 * server must support semantics that aren't as restrictive as 3791 * the UNIX API, so the NFS client is required to check. 3792 * The maximum is now passed in by the caller. 3793 */ 3794 3795 int 3796 flk_check_lock_data(u_offset_t start, u_offset_t end, offset_t max) 3797 { 3798 /* 3799 * The end (length) for local locking should never be greater 3800 * than MAXEND. However, the representation for 3801 * the entire file is MAX_U_OFFSET_T. 3802 */ 3803 if ((start > max) || 3804 ((end > max) && (end != MAX_U_OFFSET_T))) { 3805 return (EINVAL); 3806 } 3807 if (start > end) { 3808 return (EINVAL); 3809 } 3810 return (0); 3811 } 3812 3813 /* 3814 * Fill in request->l_flock with information about the lock blocking the 3815 * request. The complexity here is that lock manager requests are allowed 3816 * to see into the upper part of the 32-bit address range, whereas local 3817 * requests are only allowed to see signed values. 3818 * 3819 * What should be done when "blocker" is a lock manager lock that uses the 3820 * upper portion of the 32-bit range, but "request" is local? Since the 3821 * request has already been determined to have been blocked by the blocker, 3822 * at least some portion of "blocker" must be in the range of the request, 3823 * or the request extends to the end of file. For the first case, the 3824 * portion in the lower range is returned with the indication that it goes 3825 * "to EOF." For the second case, the last byte of the lower range is 3826 * returned with the indication that it goes "to EOF." 3827 */ 3828 3829 static void 3830 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request) 3831 { 3832 flock64_t *flrp; /* l_flock portion of request */ 3833 3834 ASSERT(blocker != NULL); 3835 3836 flrp = &request->l_flock; 3837 flrp->l_whence = 0; 3838 flrp->l_type = blocker->l_type; 3839 flrp->l_pid = blocker->l_flock.l_pid; 3840 flrp->l_sysid = blocker->l_flock.l_sysid; 3841 3842 if (IS_LOCKMGR(request)) { 3843 flrp->l_start = blocker->l_start; 3844 if (blocker->l_end == MAX_U_OFFSET_T) 3845 flrp->l_len = 0; 3846 else 3847 flrp->l_len = blocker->l_end - blocker->l_start + 1; 3848 } else { 3849 if (blocker->l_start > MAXEND) { 3850 flrp->l_start = MAXEND; 3851 flrp->l_len = 0; 3852 } else { 3853 flrp->l_start = blocker->l_start; 3854 if (blocker->l_end == MAX_U_OFFSET_T) 3855 flrp->l_len = 0; 3856 else 3857 flrp->l_len = blocker->l_end - 3858 blocker->l_start + 1; 3859 } 3860 } 3861 } 3862 3863 /* 3864 * PSARC case 1997/292 3865 */ 3866 /* 3867 * This is the public routine exported by flock.h. 3868 */ 3869 void 3870 cl_flk_change_nlm_state_to_unknown(int nlmid) 3871 { 3872 /* 3873 * Check to see if node is booted as a cluster. If not, return. 3874 */ 3875 if ((cluster_bootflags & CLUSTER_BOOTED) == 0) { 3876 return; 3877 } 3878 3879 /* 3880 * See comment in cl_flk_set_nlm_status(). 3881 */ 3882 if (nlm_reg_status == NULL) { 3883 return; 3884 } 3885 3886 /* 3887 * protect NLM registry state with a mutex. 3888 */ 3889 ASSERT(nlmid <= nlm_status_size && nlmid >= 0); 3890 mutex_enter(&nlm_reg_lock); 3891 FLK_REGISTRY_CHANGE_NLM_STATE(nlm_reg_status, nlmid, FLK_NLM_UNKNOWN); 3892 mutex_exit(&nlm_reg_lock); 3893 } 3894 3895 /* 3896 * Return non-zero if the given I/O request conflicts with an active NBMAND 3897 * lock. 3898 * If svmand is non-zero, it means look at all active locks, not just NBMAND 3899 * locks. 3900 */ 3901 3902 int 3903 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, u_offset_t offset, 3904 ssize_t length, int svmand, caller_context_t *ct) 3905 { 3906 int conflict = 0; 3907 graph_t *gp; 3908 lock_descriptor_t *lock; 3909 pid_t pid; 3910 int sysid; 3911 3912 if (ct == NULL) { 3913 pid = curproc->p_pid; 3914 sysid = 0; 3915 } else { 3916 pid = ct->cc_pid; 3917 sysid = ct->cc_sysid; 3918 } 3919 3920 mutex_enter(&flock_lock); 3921 gp = lock_graph[HASH_INDEX(vp)]; 3922 mutex_exit(&flock_lock); 3923 if (gp == NULL) 3924 return (0); 3925 3926 mutex_enter(&gp->gp_mutex); 3927 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 3928 3929 for (; lock && lock->l_vnode == vp; lock = lock->l_next) { 3930 if ((svmand || (lock->l_state & NBMAND_LOCK)) && 3931 (lock->l_flock.l_sysid != sysid || 3932 lock->l_flock.l_pid != pid) && 3933 lock_blocks_io(op, offset, length, 3934 lock->l_type, lock->l_start, lock->l_end)) { 3935 conflict = 1; 3936 break; 3937 } 3938 } 3939 mutex_exit(&gp->gp_mutex); 3940 3941 return (conflict); 3942 } 3943 3944 /* 3945 * Return non-zero if the given I/O request conflicts with the given lock. 3946 */ 3947 3948 static int 3949 lock_blocks_io(nbl_op_t op, u_offset_t offset, ssize_t length, 3950 int lock_type, u_offset_t lock_start, u_offset_t lock_end) 3951 { 3952 ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE); 3953 ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK); 3954 3955 if (op == NBL_READ && lock_type == F_RDLCK) 3956 return (0); 3957 3958 if (offset <= lock_start && lock_start < offset + length) 3959 return (1); 3960 if (lock_start <= offset && offset <= lock_end) 3961 return (1); 3962 3963 return (0); 3964 } 3965 3966 #ifdef DEBUG 3967 static void 3968 check_active_locks(graph_t *gp) 3969 { 3970 lock_descriptor_t *lock, *lock1; 3971 edge_t *ep; 3972 3973 for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp); 3974 lock = lock->l_next) { 3975 ASSERT(IS_ACTIVE(lock)); 3976 ASSERT(NOT_BLOCKED(lock)); 3977 ASSERT(!IS_BARRIER(lock)); 3978 3979 ep = FIRST_IN(lock); 3980 3981 while (ep != HEAD(lock)) { 3982 ASSERT(IS_SLEEPING(ep->from_vertex)); 3983 ASSERT(!NOT_BLOCKED(ep->from_vertex)); 3984 ep = NEXT_IN(ep); 3985 } 3986 3987 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp); 3988 lock1 = lock1->l_next) { 3989 if (lock1->l_vnode == lock->l_vnode) { 3990 if (BLOCKS(lock1, lock)) { 3991 cmn_err(CE_PANIC, 3992 "active lock %p blocks %p", 3993 (void *)lock1, (void *)lock); 3994 } else if (BLOCKS(lock, lock1)) { 3995 cmn_err(CE_PANIC, 3996 "active lock %p blocks %p", 3997 (void *)lock, (void *)lock1); 3998 } 3999 } 4000 } 4001 } 4002 } 4003 4004 /* 4005 * Effect: This functions checks to see if the transition from 'old_state' to 4006 * 'new_state' is a valid one. It returns 0 if the transition is valid 4007 * and 1 if it is not. 4008 * For a map of valid transitions, see sys/flock_impl.h 4009 */ 4010 static int 4011 check_lock_transition(int old_state, int new_state) 4012 { 4013 switch (old_state) { 4014 case FLK_INITIAL_STATE: 4015 if ((new_state == FLK_START_STATE) || 4016 (new_state == FLK_SLEEPING_STATE) || 4017 (new_state == FLK_ACTIVE_STATE) || 4018 (new_state == FLK_DEAD_STATE)) { 4019 return (0); 4020 } else { 4021 return (1); 4022 } 4023 case FLK_START_STATE: 4024 if ((new_state == FLK_ACTIVE_STATE) || 4025 (new_state == FLK_DEAD_STATE)) { 4026 return (0); 4027 } else { 4028 return (1); 4029 } 4030 case FLK_ACTIVE_STATE: 4031 if (new_state == FLK_DEAD_STATE) { 4032 return (0); 4033 } else { 4034 return (1); 4035 } 4036 case FLK_SLEEPING_STATE: 4037 if ((new_state == FLK_GRANTED_STATE) || 4038 (new_state == FLK_INTERRUPTED_STATE) || 4039 (new_state == FLK_CANCELLED_STATE)) { 4040 return (0); 4041 } else { 4042 return (1); 4043 } 4044 case FLK_GRANTED_STATE: 4045 if ((new_state == FLK_START_STATE) || 4046 (new_state == FLK_INTERRUPTED_STATE) || 4047 (new_state == FLK_CANCELLED_STATE)) { 4048 return (0); 4049 } else { 4050 return (1); 4051 } 4052 case FLK_CANCELLED_STATE: 4053 if ((new_state == FLK_INTERRUPTED_STATE) || 4054 (new_state == FLK_DEAD_STATE)) { 4055 return (0); 4056 } else { 4057 return (1); 4058 } 4059 case FLK_INTERRUPTED_STATE: 4060 if (new_state == FLK_DEAD_STATE) { 4061 return (0); 4062 } else { 4063 return (1); 4064 } 4065 case FLK_DEAD_STATE: 4066 /* May be set more than once */ 4067 if (new_state == FLK_DEAD_STATE) { 4068 return (0); 4069 } else { 4070 return (1); 4071 } 4072 default: 4073 return (1); 4074 } 4075 } 4076 4077 static void 4078 check_sleeping_locks(graph_t *gp) 4079 { 4080 lock_descriptor_t *lock1, *lock2; 4081 edge_t *ep; 4082 for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp); 4083 lock1 = lock1->l_next) { 4084 ASSERT(!IS_BARRIER(lock1)); 4085 for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp); 4086 lock2 = lock2->l_next) { 4087 if (lock1->l_vnode == lock2->l_vnode) { 4088 if (BLOCKS(lock2, lock1)) { 4089 ASSERT(!IS_GRANTED(lock1)); 4090 ASSERT(!NOT_BLOCKED(lock1)); 4091 path(lock1, lock2); 4092 } 4093 } 4094 } 4095 4096 for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp); 4097 lock2 = lock2->l_next) { 4098 ASSERT(!IS_BARRIER(lock1)); 4099 if (lock1->l_vnode == lock2->l_vnode) { 4100 if (BLOCKS(lock2, lock1)) { 4101 ASSERT(!IS_GRANTED(lock1)); 4102 ASSERT(!NOT_BLOCKED(lock1)); 4103 path(lock1, lock2); 4104 } 4105 } 4106 } 4107 ep = FIRST_ADJ(lock1); 4108 while (ep != HEAD(lock1)) { 4109 ASSERT(BLOCKS(ep->to_vertex, lock1)); 4110 ep = NEXT_ADJ(ep); 4111 } 4112 } 4113 } 4114 4115 static int 4116 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path) 4117 { 4118 edge_t *ep; 4119 lock_descriptor_t *vertex; 4120 lock_descriptor_t *vertex_stack; 4121 4122 STACK_INIT(vertex_stack); 4123 4124 flk_graph_uncolor(lock1->l_graph); 4125 ep = FIRST_ADJ(lock1); 4126 ASSERT(ep != HEAD(lock1)); 4127 while (ep != HEAD(lock1)) { 4128 if (no_path) 4129 ASSERT(ep->to_vertex != lock2); 4130 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4131 COLOR(ep->to_vertex); 4132 ep = NEXT_ADJ(ep); 4133 } 4134 4135 while ((vertex = STACK_TOP(vertex_stack)) != NULL) { 4136 STACK_POP(vertex_stack, l_dstack); 4137 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex); 4138 ep = NEXT_ADJ(ep)) { 4139 if (COLORED(ep->to_vertex)) 4140 continue; 4141 COLOR(ep->to_vertex); 4142 if (ep->to_vertex == lock2) 4143 return (1); 4144 4145 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack); 4146 } 4147 } 4148 return (0); 4149 } 4150 4151 static void 4152 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp) 4153 { 4154 lock_descriptor_t *lock; 4155 4156 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp); 4157 4158 if (lock) { 4159 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) { 4160 if (lock->l_flock.l_pid == pid && 4161 lock->l_flock.l_sysid == sysid) 4162 cmn_err(CE_PANIC, 4163 "owner pid %d's lock %p in active queue", 4164 pid, (void *)lock); 4165 lock = lock->l_next; 4166 } 4167 } 4168 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp); 4169 4170 if (lock) { 4171 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) { 4172 if (lock->l_flock.l_pid == pid && 4173 lock->l_flock.l_sysid == sysid) 4174 cmn_err(CE_PANIC, 4175 "owner pid %d's lock %p in sleep queue", 4176 pid, (void *)lock); 4177 lock = lock->l_next; 4178 } 4179 } 4180 } 4181 4182 static int 4183 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4184 { 4185 edge_t *ep = FIRST_ADJ(lock1); 4186 4187 while (ep != HEAD(lock1)) { 4188 if (ep->to_vertex == lock2) 4189 return (1); 4190 else 4191 ep = NEXT_ADJ(ep); 4192 } 4193 return (0); 4194 } 4195 4196 static int 4197 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4198 { 4199 return (!level_two_path(lock1, lock2, 1)); 4200 } 4201 4202 static void 4203 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2) 4204 { 4205 if (level_one_path(lock1, lock2)) { 4206 if (level_two_path(lock1, lock2, 0) != 0) { 4207 cmn_err(CE_WARN, 4208 "one edge one path from lock1 %p lock2 %p", 4209 (void *)lock1, (void *)lock2); 4210 } 4211 } else if (no_path(lock1, lock2)) { 4212 cmn_err(CE_PANIC, 4213 "No path from lock1 %p to lock2 %p", 4214 (void *)lock1, (void *)lock2); 4215 } 4216 } 4217 #endif /* DEBUG */