1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  25  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/note.h>
  29 #include <sys/t_lock.h>
  30 #include <sys/cmn_err.h>
  31 #include <sys/instance.h>
  32 #include <sys/conf.h>
  33 #include <sys/stat.h>
  34 #include <sys/ddi.h>
  35 #include <sys/hwconf.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/sunndi.h>
  38 #include <sys/ddi_impldefs.h>
  39 #include <sys/ndi_impldefs.h>
  40 #include <sys/modctl.h>
  41 #include <sys/contract/device_impl.h>
  42 #include <sys/dacf.h>
  43 #include <sys/promif.h>
  44 #include <sys/pci.h>
  45 #include <sys/cpuvar.h>
  46 #include <sys/pathname.h>
  47 #include <sys/taskq.h>
  48 #include <sys/sysevent.h>
  49 #include <sys/sunmdi.h>
  50 #include <sys/stream.h>
  51 #include <sys/strsubr.h>
  52 #include <sys/fs/snode.h>
  53 #include <sys/fs/dv_node.h>
  54 #include <sys/reboot.h>
  55 #include <sys/sysmacros.h>
  56 #include <sys/systm.h>
  57 #include <sys/fs/sdev_impl.h>
  58 #include <sys/sunldi.h>
  59 #include <sys/sunldi_impl.h>
  60 #include <sys/bootprops.h>
  61 #include <sys/varargs.h>
  62 #include <sys/modhash.h>
  63 #include <sys/instance.h>
  64 
  65 #if defined(__amd64) && !defined(__xpv)
  66 #include <sys/iommulib.h>
  67 #endif
  68 
  69 #ifdef DEBUG
  70 int ddidebug = DDI_AUDIT;
  71 #else
  72 int ddidebug = 0;
  73 #endif
  74 
  75 #define MT_CONFIG_OP    0
  76 #define MT_UNCONFIG_OP  1
  77 
  78 /* Multi-threaded configuration */
  79 struct mt_config_handle {
  80         kmutex_t mtc_lock;
  81         kcondvar_t mtc_cv;
  82         int mtc_thr_count;
  83         dev_info_t *mtc_pdip;   /* parent dip for mt_config_children */
  84         dev_info_t **mtc_fdip;  /* "a" dip where unconfigure failed */
  85         major_t mtc_parmajor;   /* parent major for mt_config_driver */
  86         major_t mtc_major;
  87         int mtc_flags;
  88         int mtc_op;             /* config or unconfig */
  89         int mtc_error;          /* operation error */
  90         struct brevq_node **mtc_brevqp; /* outstanding branch events queue */
  91 #ifdef DEBUG
  92         int total_time;
  93         timestruc_t start_time;
  94 #endif /* DEBUG */
  95 };
  96 
  97 struct devi_nodeid {
  98         pnode_t nodeid;
  99         dev_info_t *dip;
 100         struct devi_nodeid *next;
 101 };
 102 
 103 struct devi_nodeid_list {
 104         kmutex_t dno_lock;              /* Protects other fields */
 105         struct devi_nodeid *dno_head;   /* list of devi nodeid elements */
 106         struct devi_nodeid *dno_free;   /* Free list */
 107         uint_t dno_list_length;         /* number of dips in list */
 108 };
 109 
 110 /* used to keep track of branch remove events to be generated */
 111 struct brevq_node {
 112         char *brn_deviname;
 113         struct brevq_node *brn_sibling;
 114         struct brevq_node *brn_child;
 115 };
 116 
 117 static struct devi_nodeid_list devi_nodeid_list;
 118 static struct devi_nodeid_list *devimap = &devi_nodeid_list;
 119 
 120 /*
 121  * Well known nodes which are attached first at boot time.
 122  */
 123 dev_info_t *top_devinfo;                /* root of device tree */
 124 dev_info_t *options_dip;
 125 dev_info_t *pseudo_dip;
 126 dev_info_t *clone_dip;
 127 dev_info_t *scsi_vhci_dip;              /* MPXIO dip */
 128 major_t clone_major;
 129 
 130 /*
 131  * A non-global zone's /dev is derived from the device tree.
 132  * This generation number serves to indicate when a zone's
 133  * /dev may need to be updated.
 134  */
 135 volatile ulong_t devtree_gen;           /* generation number */
 136 
 137 /* block all future dev_info state changes */
 138 hrtime_t volatile devinfo_freeze = 0;
 139 
 140 /* number of dev_info attaches/detaches currently in progress */
 141 static ulong_t devinfo_attach_detach = 0;
 142 
 143 extern int      sys_shutdown;
 144 extern kmutex_t global_vhci_lock;
 145 
 146 /* bitset of DS_SYSAVAIL & DS_RECONFIG - no races, no lock */
 147 static int devname_state = 0;
 148 
 149 /*
 150  * The devinfo snapshot cache and related variables.
 151  * The only field in the di_cache structure that needs initialization
 152  * is the mutex (cache_lock). However, since this is an adaptive mutex
 153  * (MUTEX_DEFAULT) - it is automatically initialized by being allocated
 154  * in zeroed memory (static storage class). Therefore no explicit
 155  * initialization of the di_cache structure is needed.
 156  */
 157 struct di_cache di_cache = {1};
 158 int             di_cache_debug = 0;
 159 
 160 /* For ddvis, which needs pseudo children under PCI */
 161 int pci_allow_pseudo_children = 0;
 162 
 163 /* Allow path-oriented alias driver binding on driver.conf enumerated nodes */
 164 int driver_conf_allow_path_alias = 1;
 165 
 166 /*
 167  * The following switch is for service people, in case a
 168  * 3rd party driver depends on identify(9e) being called.
 169  */
 170 int identify_9e = 0;
 171 
 172 /*
 173  * Add flag so behaviour of preventing attach for retired persistant nodes
 174  * can be disabled.
 175  */
 176 int retire_prevents_attach = 1;
 177 
 178 int mtc_off;                                    /* turn off mt config */
 179 
 180 int quiesce_debug = 0;
 181 
 182 boolean_t ddi_aliases_present = B_FALSE;
 183 ddi_alias_t ddi_aliases;
 184 uint_t tsd_ddi_redirect;
 185 
 186 #define DDI_ALIAS_HASH_SIZE     (2700)
 187 
 188 static kmem_cache_t *ddi_node_cache;            /* devinfo node cache */
 189 static devinfo_log_header_t *devinfo_audit_log; /* devinfo log */
 190 static int devinfo_log_size;                    /* size in pages */
 191 
 192 boolean_t ddi_err_panic = B_FALSE;
 193 
 194 static int lookup_compatible(dev_info_t *, uint_t);
 195 static char *encode_composite_string(char **, uint_t, size_t *, uint_t);
 196 static void link_to_driver_list(dev_info_t *);
 197 static void unlink_from_driver_list(dev_info_t *);
 198 static void add_to_dn_list(struct devnames *, dev_info_t *);
 199 static void remove_from_dn_list(struct devnames *, dev_info_t *);
 200 static dev_info_t *find_duplicate_child();
 201 static void add_global_props(dev_info_t *);
 202 static void remove_global_props(dev_info_t *);
 203 static int uninit_node(dev_info_t *);
 204 static void da_log_init(void);
 205 static void da_log_enter(dev_info_t *);
 206 static int walk_devs(dev_info_t *, int (*f)(dev_info_t *, void *), void *, int);
 207 static int reset_nexus_flags(dev_info_t *, void *);
 208 static void ddi_optimize_dtree(dev_info_t *);
 209 static int is_leaf_node(dev_info_t *);
 210 static struct mt_config_handle *mt_config_init(dev_info_t *, dev_info_t **,
 211     int, major_t, int, struct brevq_node **);
 212 static void mt_config_children(struct mt_config_handle *);
 213 static void mt_config_driver(struct mt_config_handle *);
 214 static int mt_config_fini(struct mt_config_handle *);
 215 static int devi_unconfig_common(dev_info_t *, dev_info_t **, int, major_t,
 216     struct brevq_node **);
 217 static int
 218 ndi_devi_config_obp_args(dev_info_t *parent, char *devnm,
 219     dev_info_t **childp, int flags);
 220 static void i_link_vhci_node(dev_info_t *);
 221 static void ndi_devi_exit_and_wait(dev_info_t *dip,
 222     int circular, clock_t end_time);
 223 static int ndi_devi_unbind_driver(dev_info_t *dip);
 224 
 225 static int i_ddi_check_retire(dev_info_t *dip);
 226 
 227 static void quiesce_one_device(dev_info_t *, void *);
 228 
 229 dev_info_t *ddi_alias_redirect(char *alias);
 230 char *ddi_curr_redirect(char *currpath);
 231 
 232 
 233 /*
 234  * dev_info cache and node management
 235  */
 236 
 237 /* initialize dev_info node cache */
 238 void
 239 i_ddi_node_cache_init()
 240 {
 241         ASSERT(ddi_node_cache == NULL);
 242         ddi_node_cache = kmem_cache_create("dev_info_node_cache",
 243             sizeof (struct dev_info), 0, NULL, NULL, NULL, NULL, NULL, 0);
 244 
 245         if (ddidebug & DDI_AUDIT)
 246                 da_log_init();
 247 }
 248 
 249 
 250 /*
 251  * Allocating a dev_info node, callable from interrupt context with KM_NOSLEEP
 252  * The allocated node has a reference count of 0.
 253  */
 254 dev_info_t *
 255 i_ddi_alloc_node(dev_info_t *pdip, char *node_name, pnode_t nodeid,
 256     int instance, ddi_prop_t *sys_prop, int flag)
 257 {
 258         struct dev_info *devi;
 259         struct devi_nodeid *elem;
 260         static char failed[] = "i_ddi_alloc_node: out of memory";
 261 
 262         ASSERT(node_name != NULL);
 263 
 264         if ((devi = kmem_cache_alloc(ddi_node_cache, flag)) == NULL) {
 265                 cmn_err(CE_NOTE, failed);
 266                 return (NULL);
 267         }
 268 
 269         bzero(devi, sizeof (struct dev_info));
 270 
 271         if (devinfo_audit_log) {
 272                 devi->devi_audit = kmem_zalloc(sizeof (devinfo_audit_t), flag);
 273                 if (devi->devi_audit == NULL)
 274                         goto fail;
 275         }
 276 
 277         if ((devi->devi_node_name = i_ddi_strdup(node_name, flag)) == NULL)
 278                 goto fail;
 279 
 280         /* default binding name is node name */
 281         devi->devi_binding_name = devi->devi_node_name;
 282         devi->devi_major = DDI_MAJOR_T_NONE; /* unbound by default */
 283 
 284         /*
 285          * Make a copy of system property
 286          */
 287         if (sys_prop &&
 288             (devi->devi_sys_prop_ptr = i_ddi_prop_list_dup(sys_prop, flag))
 289             == NULL)
 290                 goto fail;
 291 
 292         /*
 293          * Assign devi_nodeid, devi_node_class, devi_node_attributes
 294          * according to the following algorithm:
 295          *
 296          * nodeid arg                   node class              node attributes
 297          *
 298          * DEVI_PSEUDO_NODEID           DDI_NC_PSEUDO           A
 299          * DEVI_SID_NODEID              DDI_NC_PSEUDO           A,P
 300          * DEVI_SID_HIDDEN_NODEID       DDI_NC_PSEUDO           A,P,H
 301          * DEVI_SID_HP_NODEID           DDI_NC_PSEUDO           A,P,h
 302          * DEVI_SID_HP_HIDDEN_NODEID    DDI_NC_PSEUDO           A,P,H,h
 303          * other                        DDI_NC_PROM             P
 304          *
 305          * Where A = DDI_AUTO_ASSIGNED_NODEID (auto-assign a nodeid)
 306          * and   P = DDI_PERSISTENT
 307          * and   H = DDI_HIDDEN_NODE
 308          * and   h = DDI_HOTPLUG_NODE
 309          *
 310          * auto-assigned nodeids are also auto-freed.
 311          */
 312         devi->devi_node_attributes = 0;
 313         switch (nodeid) {
 314         case DEVI_SID_HIDDEN_NODEID:
 315                 devi->devi_node_attributes |= DDI_HIDDEN_NODE;
 316                 goto sid;
 317 
 318         case DEVI_SID_HP_NODEID:
 319                 devi->devi_node_attributes |= DDI_HOTPLUG_NODE;
 320                 goto sid;
 321 
 322         case DEVI_SID_HP_HIDDEN_NODEID:
 323                 devi->devi_node_attributes |= DDI_HIDDEN_NODE;
 324                 devi->devi_node_attributes |= DDI_HOTPLUG_NODE;
 325                 goto sid;
 326 
 327         case DEVI_SID_NODEID:
 328 sid:            devi->devi_node_attributes |= DDI_PERSISTENT;
 329                 if ((elem = kmem_zalloc(sizeof (*elem), flag)) == NULL)
 330                         goto fail;
 331                 /*FALLTHROUGH*/
 332 
 333         case DEVI_PSEUDO_NODEID:
 334                 devi->devi_node_attributes |= DDI_AUTO_ASSIGNED_NODEID;
 335                 devi->devi_node_class = DDI_NC_PSEUDO;
 336                 if (impl_ddi_alloc_nodeid(&devi->devi_nodeid)) {
 337                         panic("i_ddi_alloc_node: out of nodeids");
 338                         /*NOTREACHED*/
 339                 }
 340                 break;
 341 
 342         default:
 343                 if ((elem = kmem_zalloc(sizeof (*elem), flag)) == NULL)
 344                         goto fail;
 345 
 346                 /*
 347                  * the nodetype is 'prom', try to 'take' the nodeid now.
 348                  * This requires memory allocation, so check for failure.
 349                  */
 350                 if (impl_ddi_take_nodeid(nodeid, flag) != 0) {
 351                         kmem_free(elem, sizeof (*elem));
 352                         goto fail;
 353                 }
 354 
 355                 devi->devi_nodeid = nodeid;
 356                 devi->devi_node_class = DDI_NC_PROM;
 357                 devi->devi_node_attributes = DDI_PERSISTENT;
 358                 break;
 359         }
 360 
 361         if (ndi_dev_is_persistent_node((dev_info_t *)devi)) {
 362                 mutex_enter(&devimap->dno_lock);
 363                 elem->next = devimap->dno_free;
 364                 devimap->dno_free = elem;
 365                 mutex_exit(&devimap->dno_lock);
 366         }
 367 
 368         /*
 369          * Instance is normally initialized to -1. In a few special
 370          * cases, the caller may specify an instance (e.g. CPU nodes).
 371          */
 372         devi->devi_instance = instance;
 373 
 374         /*
 375          * set parent and bus_ctl parent
 376          */
 377         devi->devi_parent = DEVI(pdip);
 378         devi->devi_bus_ctl = DEVI(pdip);
 379 
 380         NDI_CONFIG_DEBUG((CE_CONT,
 381             "i_ddi_alloc_node: name=%s id=%d\n", node_name, devi->devi_nodeid));
 382 
 383         cv_init(&(devi->devi_cv), NULL, CV_DEFAULT, NULL);
 384         mutex_init(&(devi->devi_lock), NULL, MUTEX_DEFAULT, NULL);
 385         mutex_init(&(devi->devi_pm_lock), NULL, MUTEX_DEFAULT, NULL);
 386         mutex_init(&(devi->devi_pm_busy_lock), NULL, MUTEX_DEFAULT, NULL);
 387 
 388         RIO_TRACE((CE_NOTE, "i_ddi_alloc_node: Initing contract fields: "
 389             "dip=%p, name=%s", (void *)devi, node_name));
 390 
 391         mutex_init(&(devi->devi_ct_lock), NULL, MUTEX_DEFAULT, NULL);
 392         cv_init(&(devi->devi_ct_cv), NULL, CV_DEFAULT, NULL);
 393         devi->devi_ct_count = -1;    /* counter not in use if -1 */
 394         list_create(&(devi->devi_ct), sizeof (cont_device_t),
 395             offsetof(cont_device_t, cond_next));
 396 
 397         i_ddi_set_node_state((dev_info_t *)devi, DS_PROTO);
 398         da_log_enter((dev_info_t *)devi);
 399         return ((dev_info_t *)devi);
 400 
 401 fail:
 402         if (devi->devi_sys_prop_ptr)
 403                 i_ddi_prop_list_delete(devi->devi_sys_prop_ptr);
 404         if (devi->devi_node_name)
 405                 kmem_free(devi->devi_node_name, strlen(node_name) + 1);
 406         if (devi->devi_audit)
 407                 kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
 408         kmem_cache_free(ddi_node_cache, devi);
 409         cmn_err(CE_NOTE, failed);
 410         return (NULL);
 411 }
 412 
 413 /*
 414  * free a dev_info structure.
 415  * NB. Not callable from interrupt since impl_ddi_free_nodeid may block.
 416  */
 417 void
 418 i_ddi_free_node(dev_info_t *dip)
 419 {
 420         struct dev_info *devi = DEVI(dip);
 421         struct devi_nodeid *elem;
 422 
 423         ASSERT(devi->devi_ref == 0);
 424         ASSERT(devi->devi_addr == NULL);
 425         ASSERT(devi->devi_node_state == DS_PROTO);
 426         ASSERT(devi->devi_child == NULL);
 427         ASSERT(devi->devi_hp_hdlp == NULL);
 428 
 429         /* free devi_addr_buf allocated by ddi_set_name_addr() */
 430         if (devi->devi_addr_buf)
 431                 kmem_free(devi->devi_addr_buf, 2 * MAXNAMELEN);
 432 
 433         if (i_ndi_dev_is_auto_assigned_node(dip))
 434                 impl_ddi_free_nodeid(DEVI(dip)->devi_nodeid);
 435 
 436         if (ndi_dev_is_persistent_node(dip)) {
 437                 mutex_enter(&devimap->dno_lock);
 438                 ASSERT(devimap->dno_free);
 439                 elem = devimap->dno_free;
 440                 devimap->dno_free = elem->next;
 441                 mutex_exit(&devimap->dno_lock);
 442                 kmem_free(elem, sizeof (*elem));
 443         }
 444 
 445         if (DEVI(dip)->devi_compat_names)
 446                 kmem_free(DEVI(dip)->devi_compat_names,
 447                     DEVI(dip)->devi_compat_length);
 448         if (DEVI(dip)->devi_rebinding_name)
 449                 kmem_free(DEVI(dip)->devi_rebinding_name,
 450                     strlen(DEVI(dip)->devi_rebinding_name) + 1);
 451 
 452         ddi_prop_remove_all(dip);       /* remove driver properties */
 453         if (devi->devi_sys_prop_ptr)
 454                 i_ddi_prop_list_delete(devi->devi_sys_prop_ptr);
 455         if (devi->devi_hw_prop_ptr)
 456                 i_ddi_prop_list_delete(devi->devi_hw_prop_ptr);
 457 
 458         if (DEVI(dip)->devi_devid_str)
 459                 ddi_devid_str_free(DEVI(dip)->devi_devid_str);
 460 
 461         i_ddi_set_node_state(dip, DS_INVAL);
 462         da_log_enter(dip);
 463         if (devi->devi_audit) {
 464                 kmem_free(devi->devi_audit, sizeof (devinfo_audit_t));
 465         }
 466         if (devi->devi_device_class)
 467                 kmem_free(devi->devi_device_class,
 468                     strlen(devi->devi_device_class) + 1);
 469         cv_destroy(&(devi->devi_cv));
 470         mutex_destroy(&(devi->devi_lock));
 471         mutex_destroy(&(devi->devi_pm_lock));
 472         mutex_destroy(&(devi->devi_pm_busy_lock));
 473 
 474         RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroying contract fields: "
 475             "dip=%p", (void *)dip));
 476         contract_device_remove_dip(dip);
 477         ASSERT(devi->devi_ct_count == -1);
 478         ASSERT(list_is_empty(&(devi->devi_ct)));
 479         cv_destroy(&(devi->devi_ct_cv));
 480         list_destroy(&(devi->devi_ct));
 481         /* free this last since contract_device_remove_dip() uses it */
 482         mutex_destroy(&(devi->devi_ct_lock));
 483         RIO_TRACE((CE_NOTE, "i_ddi_free_node: destroyed all contract fields: "
 484             "dip=%p, name=%s", (void *)dip, devi->devi_node_name));
 485 
 486         kmem_free(devi->devi_node_name, strlen(devi->devi_node_name) + 1);
 487 
 488         /* free event data */
 489         if (devi->devi_ev_path)
 490                 kmem_free(devi->devi_ev_path, MAXPATHLEN);
 491 
 492         kmem_cache_free(ddi_node_cache, devi);
 493 }
 494 
 495 
 496 /*
 497  * Node state transitions
 498  */
 499 
 500 /*
 501  * Change the node name
 502  */
 503 int
 504 ndi_devi_set_nodename(dev_info_t *dip, char *name, int flags)
 505 {
 506         _NOTE(ARGUNUSED(flags))
 507         char *nname, *oname;
 508 
 509         ASSERT(dip && name);
 510 
 511         oname = DEVI(dip)->devi_node_name;
 512         if (strcmp(oname, name) == 0)
 513                 return (DDI_SUCCESS);
 514 
 515         /*
 516          * pcicfg_fix_ethernet requires a name change after node
 517          * is linked into the tree. When pcicfg is fixed, we
 518          * should only allow name change in DS_PROTO state.
 519          */
 520         if (i_ddi_node_state(dip) >= DS_BOUND) {
 521                 /*
 522                  * Don't allow name change once node is bound
 523                  */
 524                 cmn_err(CE_NOTE,
 525                     "ndi_devi_set_nodename: node already bound dip = %p,"
 526                     " %s -> %s", (void *)dip, ddi_node_name(dip), name);
 527                 return (NDI_FAILURE);
 528         }
 529 
 530         nname = i_ddi_strdup(name, KM_SLEEP);
 531         DEVI(dip)->devi_node_name = nname;
 532         i_ddi_set_binding_name(dip, nname);
 533         kmem_free(oname, strlen(oname) + 1);
 534 
 535         da_log_enter(dip);
 536         return (NDI_SUCCESS);
 537 }
 538 
 539 void
 540 i_ddi_add_devimap(dev_info_t *dip)
 541 {
 542         struct devi_nodeid *elem;
 543 
 544         ASSERT(dip);
 545 
 546         if (!ndi_dev_is_persistent_node(dip))
 547                 return;
 548 
 549         ASSERT(ddi_get_parent(dip) == NULL || (DEVI_VHCI_NODE(dip)) ||
 550             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
 551 
 552         mutex_enter(&devimap->dno_lock);
 553 
 554         ASSERT(devimap->dno_free);
 555 
 556         elem = devimap->dno_free;
 557         devimap->dno_free = elem->next;
 558 
 559         elem->nodeid = ddi_get_nodeid(dip);
 560         elem->dip = dip;
 561         elem->next = devimap->dno_head;
 562         devimap->dno_head = elem;
 563 
 564         devimap->dno_list_length++;
 565 
 566         mutex_exit(&devimap->dno_lock);
 567 }
 568 
 569 static int
 570 i_ddi_remove_devimap(dev_info_t *dip)
 571 {
 572         struct devi_nodeid *prev, *elem;
 573         static const char *fcn = "i_ddi_remove_devimap";
 574 
 575         ASSERT(dip);
 576 
 577         if (!ndi_dev_is_persistent_node(dip))
 578                 return (DDI_SUCCESS);
 579 
 580         mutex_enter(&devimap->dno_lock);
 581 
 582         /*
 583          * The following check is done with dno_lock held
 584          * to prevent race between dip removal and
 585          * e_ddi_prom_node_to_dip()
 586          */
 587         if (e_ddi_devi_holdcnt(dip)) {
 588                 mutex_exit(&devimap->dno_lock);
 589                 return (DDI_FAILURE);
 590         }
 591 
 592         ASSERT(devimap->dno_head);
 593         ASSERT(devimap->dno_list_length > 0);
 594 
 595         prev = NULL;
 596         for (elem = devimap->dno_head; elem; elem = elem->next) {
 597                 if (elem->dip == dip) {
 598                         ASSERT(elem->nodeid == ddi_get_nodeid(dip));
 599                         break;
 600                 }
 601                 prev = elem;
 602         }
 603 
 604         if (elem && prev)
 605                 prev->next = elem->next;
 606         else if (elem)
 607                 devimap->dno_head = elem->next;
 608         else
 609                 panic("%s: devinfo node(%p) not found",
 610                     fcn, (void *)dip);
 611 
 612         devimap->dno_list_length--;
 613 
 614         elem->nodeid = 0;
 615         elem->dip = NULL;
 616 
 617         elem->next = devimap->dno_free;
 618         devimap->dno_free = elem;
 619 
 620         mutex_exit(&devimap->dno_lock);
 621 
 622         return (DDI_SUCCESS);
 623 }
 624 
 625 /*
 626  * Link this node into the devinfo tree and add to orphan list
 627  * Not callable from interrupt context
 628  */
 629 static void
 630 link_node(dev_info_t *dip)
 631 {
 632         struct dev_info *devi = DEVI(dip);
 633         struct dev_info *parent = devi->devi_parent;
 634         dev_info_t **dipp;
 635 
 636         ASSERT(parent); /* never called for root node */
 637 
 638         NDI_CONFIG_DEBUG((CE_CONT, "link_node: parent = %s child = %s\n",
 639             parent->devi_node_name, devi->devi_node_name));
 640 
 641         /*
 642          * Hold the global_vhci_lock before linking any direct
 643          * children of rootnex driver. This special lock protects
 644          * linking and unlinking for rootnext direct children.
 645          */
 646         if ((dev_info_t *)parent == ddi_root_node())
 647                 mutex_enter(&global_vhci_lock);
 648 
 649         /*
 650          * attach the node to end of the list unless the node is already there
 651          */
 652         dipp = (dev_info_t **)(&DEVI(parent)->devi_child);
 653         while (*dipp && (*dipp != dip)) {
 654                 dipp = (dev_info_t **)(&DEVI(*dipp)->devi_sibling);
 655         }
 656         ASSERT(*dipp == NULL);  /* node is not linked */
 657 
 658         /*
 659          * Now that we are in the tree, update the devi-nodeid map.
 660          */
 661         i_ddi_add_devimap(dip);
 662 
 663         /*
 664          * This is a temporary workaround for Bug 4618861.
 665          * We keep the scsi_vhci nexus node on the left side of the devinfo
 666          * tree (under the root nexus driver), so that virtual nodes under
 667          * scsi_vhci will be SUSPENDed first and RESUMEd last.  This ensures
 668          * that the pHCI nodes are active during times when their clients
 669          * may be depending on them.  This workaround embodies the knowledge
 670          * that system PM and CPR both traverse the tree left-to-right during
 671          * SUSPEND and right-to-left during RESUME.
 672          * Extending the workaround to IB Nexus/VHCI
 673          * driver also.
 674          */
 675         if (strcmp(devi->devi_binding_name, "scsi_vhci") == 0) {
 676                 /* Add scsi_vhci to beginning of list */
 677                 ASSERT((dev_info_t *)parent == top_devinfo);
 678                 /* scsi_vhci under rootnex */
 679                 devi->devi_sibling = parent->devi_child;
 680                 parent->devi_child = devi;
 681         } else if (strcmp(devi->devi_binding_name, "ib") == 0) {
 682                 i_link_vhci_node(dip);
 683         } else {
 684                 /* Add to end of list */
 685                 *dipp = dip;
 686                 DEVI(dip)->devi_sibling = NULL;
 687         }
 688 
 689         /*
 690          * Release the global_vhci_lock before linking any direct
 691          * children of rootnex driver.
 692          */
 693         if ((dev_info_t *)parent == ddi_root_node())
 694                 mutex_exit(&global_vhci_lock);
 695 
 696         /* persistent nodes go on orphan list */
 697         if (ndi_dev_is_persistent_node(dip))
 698                 add_to_dn_list(&orphanlist, dip);
 699 }
 700 
 701 /*
 702  * Unlink this node from the devinfo tree
 703  */
 704 static int
 705 unlink_node(dev_info_t *dip)
 706 {
 707         struct dev_info *devi = DEVI(dip);
 708         struct dev_info *parent = devi->devi_parent;
 709         dev_info_t **dipp;
 710         ddi_hp_cn_handle_t *hdlp;
 711 
 712         ASSERT(parent != NULL);
 713         ASSERT(devi->devi_node_state == DS_LINKED);
 714 
 715         NDI_CONFIG_DEBUG((CE_CONT, "unlink_node: name = %s\n",
 716             ddi_node_name(dip)));
 717 
 718         /* check references */
 719         if (devi->devi_ref || i_ddi_remove_devimap(dip) != DDI_SUCCESS)
 720                 return (DDI_FAILURE);
 721 
 722         /*
 723          * Hold the global_vhci_lock before linking any direct
 724          * children of rootnex driver.
 725          */
 726         if ((dev_info_t *)parent == ddi_root_node())
 727                 mutex_enter(&global_vhci_lock);
 728 
 729         dipp = (dev_info_t **)(&DEVI(parent)->devi_child);
 730         while (*dipp && (*dipp != dip)) {
 731                 dipp = (dev_info_t **)(&DEVI(*dipp)->devi_sibling);
 732         }
 733         if (*dipp) {
 734                 *dipp = (dev_info_t *)(devi->devi_sibling);
 735                 devi->devi_sibling = NULL;
 736         } else {
 737                 NDI_CONFIG_DEBUG((CE_NOTE, "unlink_node: %s not linked",
 738                     devi->devi_node_name));
 739         }
 740 
 741         /*
 742          * Release the global_vhci_lock before linking any direct
 743          * children of rootnex driver.
 744          */
 745         if ((dev_info_t *)parent == ddi_root_node())
 746                 mutex_exit(&global_vhci_lock);
 747 
 748         /* Remove node from orphan list */
 749         if (ndi_dev_is_persistent_node(dip)) {
 750                 remove_from_dn_list(&orphanlist, dip);
 751         }
 752 
 753         /* Update parent's hotplug handle list */
 754         for (hdlp = DEVI(parent)->devi_hp_hdlp; hdlp; hdlp = hdlp->next) {
 755                 if (hdlp->cn_info.cn_child == dip)
 756                         hdlp->cn_info.cn_child = NULL;
 757         }
 758         return (DDI_SUCCESS);
 759 }
 760 
 761 /*
 762  * Bind this devinfo node to a driver. If compat is NON-NULL, try that first.
 763  * Else, use the node-name.
 764  *
 765  * NOTE: IEEE1275 specifies that nodename should be tried before compatible.
 766  *      Solaris implementation binds nodename after compatible.
 767  *
 768  * If we find a binding,
 769  * - set the binding name to the string,
 770  * - set major number to driver major
 771  *
 772  * If we don't find a binding,
 773  * - return failure
 774  */
 775 static int
 776 bind_node(dev_info_t *dip)
 777 {
 778         char *p = NULL;
 779         major_t major = DDI_MAJOR_T_NONE;
 780         struct dev_info *devi = DEVI(dip);
 781         dev_info_t *parent = ddi_get_parent(dip);
 782 
 783         ASSERT(devi->devi_node_state == DS_LINKED);
 784 
 785         NDI_CONFIG_DEBUG((CE_CONT, "bind_node: 0x%p(name = %s)\n",
 786             (void *)dip, ddi_node_name(dip)));
 787 
 788         mutex_enter(&DEVI(dip)->devi_lock);
 789         if (DEVI(dip)->devi_flags & DEVI_NO_BIND) {
 790                 mutex_exit(&DEVI(dip)->devi_lock);
 791                 return (DDI_FAILURE);
 792         }
 793         mutex_exit(&DEVI(dip)->devi_lock);
 794 
 795         /* find the driver with most specific binding using compatible */
 796         major = ddi_compatible_driver_major(dip, &p);
 797         if (major == DDI_MAJOR_T_NONE)
 798                 return (DDI_FAILURE);
 799 
 800         devi->devi_major = major;
 801         if (p != NULL) {
 802                 i_ddi_set_binding_name(dip, p);
 803                 NDI_CONFIG_DEBUG((CE_CONT, "bind_node: %s bound to %s\n",
 804                     devi->devi_node_name, p));
 805         }
 806 
 807         /* Link node to per-driver list */
 808         link_to_driver_list(dip);
 809 
 810         /*
 811          * reset parent flag so that nexus will merge .conf props
 812          */
 813         if (ndi_dev_is_persistent_node(dip)) {
 814                 mutex_enter(&DEVI(parent)->devi_lock);
 815                 DEVI(parent)->devi_flags &=
 816                     ~(DEVI_ATTACHED_CHILDREN|DEVI_MADE_CHILDREN);
 817                 mutex_exit(&DEVI(parent)->devi_lock);
 818         }
 819         return (DDI_SUCCESS);
 820 }
 821 
 822 /*
 823  * Unbind this devinfo node
 824  * Called before the node is destroyed or driver is removed from system
 825  */
 826 static int
 827 unbind_node(dev_info_t *dip)
 828 {
 829         ASSERT(DEVI(dip)->devi_node_state == DS_BOUND);
 830         ASSERT(DEVI(dip)->devi_major != DDI_MAJOR_T_NONE);
 831 
 832         /* check references */
 833         if (DEVI(dip)->devi_ref)
 834                 return (DDI_FAILURE);
 835 
 836         NDI_CONFIG_DEBUG((CE_CONT, "unbind_node: 0x%p(name = %s)\n",
 837             (void *)dip, ddi_node_name(dip)));
 838 
 839         unlink_from_driver_list(dip);
 840 
 841         DEVI(dip)->devi_major = DDI_MAJOR_T_NONE;
 842         DEVI(dip)->devi_binding_name = DEVI(dip)->devi_node_name;
 843         return (DDI_SUCCESS);
 844 }
 845 
 846 /*
 847  * Initialize a node: calls the parent nexus' bus_ctl ops to do the operation.
 848  * Must hold parent and per-driver list while calling this function.
 849  * A successful init_node() returns with an active ndi_hold_devi() hold on
 850  * the parent.
 851  */
 852 static int
 853 init_node(dev_info_t *dip)
 854 {
 855         int error;
 856         dev_info_t *pdip = ddi_get_parent(dip);
 857         int (*f)(dev_info_t *, dev_info_t *, ddi_ctl_enum_t, void *, void *);
 858         char *path;
 859         major_t major;
 860         ddi_devid_t devid = NULL;
 861 
 862         ASSERT(i_ddi_node_state(dip) == DS_BOUND);
 863 
 864         /* should be DS_READY except for pcmcia ... */
 865         ASSERT(i_ddi_node_state(pdip) >= DS_PROBED);
 866 
 867         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 868         (void) ddi_pathname(dip, path);
 869         NDI_CONFIG_DEBUG((CE_CONT, "init_node: entry: path %s 0x%p\n",
 870             path, (void *)dip));
 871 
 872         /*
 873          * The parent must have a bus_ctl operation.
 874          */
 875         if ((DEVI(pdip)->devi_ops->devo_bus_ops == NULL) ||
 876             (f = DEVI(pdip)->devi_ops->devo_bus_ops->bus_ctl) == NULL) {
 877                 error = DDI_FAILURE;
 878                 goto out;
 879         }
 880 
 881         add_global_props(dip);
 882 
 883         /*
 884          * Invoke the parent's bus_ctl operation with the DDI_CTLOPS_INITCHILD
 885          * command to transform the child to canonical form 1. If there
 886          * is an error, ddi_remove_child should be called, to clean up.
 887          */
 888         error = (*f)(pdip, pdip, DDI_CTLOPS_INITCHILD, dip, NULL);
 889         if (error != DDI_SUCCESS) {
 890                 NDI_CONFIG_DEBUG((CE_CONT, "init_node: %s 0x%p failed\n",
 891                     path, (void *)dip));
 892                 remove_global_props(dip);
 893 
 894                 /*
 895                  * If a nexus INITCHILD implementation calls ddi_devid_regster()
 896                  * prior to setting devi_addr, the devid is not recorded in
 897                  * the devid cache (i.e. DEVI_CACHED_DEVID is not set).
 898                  * With mpxio, while the vhci client path may be missing
 899                  * from the cache, phci pathinfo paths may have already be
 900                  * added to the cache, against the client dip, by use of
 901                  * e_devid_cache_pathinfo().  Because of this, when INITCHILD
 902                  * of the client fails, we need to purge the client dip from
 903                  * the cache even if DEVI_CACHED_DEVID is not set - if only
 904                  * devi_devid_str is set.
 905                  */
 906                 mutex_enter(&DEVI(dip)->devi_lock);
 907                 if ((DEVI(dip)->devi_flags & DEVI_CACHED_DEVID) ||
 908                     DEVI(dip)->devi_devid_str) {
 909                         DEVI(dip)->devi_flags &= ~DEVI_CACHED_DEVID;
 910                         mutex_exit(&DEVI(dip)->devi_lock);
 911                         ddi_devid_unregister(dip);
 912                 } else
 913                         mutex_exit(&DEVI(dip)->devi_lock);
 914 
 915                 /* in case nexus driver didn't clear this field */
 916                 ddi_set_name_addr(dip, NULL);
 917                 error = DDI_FAILURE;
 918                 goto out;
 919         }
 920 
 921         ndi_hold_devi(pdip);                    /* initial hold of parent */
 922 
 923         /* recompute path after initchild for @addr information */
 924         (void) ddi_pathname(dip, path);
 925 
 926         /* Check for duplicate nodes */
 927         if (find_duplicate_child(pdip, dip) != NULL) {
 928                 /*
 929                  * uninit_node() the duplicate - a successful uninit_node()
 930                  * will release inital hold of parent using ndi_rele_devi().
 931                  */
 932                 if ((error = uninit_node(dip)) != DDI_SUCCESS) {
 933                         ndi_rele_devi(pdip);    /* release initial hold */
 934                         cmn_err(CE_WARN, "init_node: uninit of duplicate "
 935                             "node %s failed", path);
 936                 }
 937                 NDI_CONFIG_DEBUG((CE_CONT, "init_node: duplicate uninit "
 938                     "%s 0x%p%s\n", path, (void *)dip,
 939                     (error == DDI_SUCCESS) ? "" : " failed"));
 940                 error = DDI_FAILURE;
 941                 goto out;
 942         }
 943 
 944         /*
 945          * If a devid was registered for a DS_BOUND node then the devid_cache
 946          * may not have captured the path. Detect this situation and ensure that
 947          * the path enters the cache now that devi_addr is established.
 948          */
 949         if (!(DEVI(dip)->devi_flags & DEVI_CACHED_DEVID) &&
 950             (ddi_devid_get(dip, &devid) == DDI_SUCCESS)) {
 951                 if (e_devid_cache_register(dip, devid) == DDI_SUCCESS) {
 952                         mutex_enter(&DEVI(dip)->devi_lock);
 953                         DEVI(dip)->devi_flags |= DEVI_CACHED_DEVID;
 954                         mutex_exit(&DEVI(dip)->devi_lock);
 955                 }
 956 
 957                 ddi_devid_free(devid);
 958         }
 959 
 960         /*
 961          * Check to see if we have a path-oriented driver alias that overrides
 962          * the current driver binding. If so, we need to rebind. This check
 963          * needs to be delayed until after a successful DDI_CTLOPS_INITCHILD,
 964          * so the unit-address is established on the last component of the path.
 965          *
 966          * NOTE: Allowing a path-oriented alias to change the driver binding
 967          * of a driver.conf node results in non-intuitive property behavior.
 968          * We provide a tunable (driver_conf_allow_path_alias) to control
 969          * this behavior. See uninit_node() for more details.
 970          *
 971          * NOTE: If you are adding a path-oriented alias for the boot device,
 972          * and there is mismatch between OBP and the kernel in regard to
 973          * generic name use, like "disk" .vs. "ssd", then you will need
 974          * to add a path-oriented alias for both paths.
 975          */
 976         major = ddi_name_to_major(path);
 977         if (driver_active(major) && (major != DEVI(dip)->devi_major) &&
 978             (ndi_dev_is_persistent_node(dip) || driver_conf_allow_path_alias)) {
 979 
 980                 /* Mark node for rebind processing. */
 981                 mutex_enter(&DEVI(dip)->devi_lock);
 982                 DEVI(dip)->devi_flags |= DEVI_REBIND;
 983                 mutex_exit(&DEVI(dip)->devi_lock);
 984 
 985                 /*
 986                  * Add an extra hold on the parent to prevent it from ever
 987                  * having a zero devi_ref during the child rebind process.
 988                  * This is necessary to ensure that the parent will never
 989                  * detach(9E) during the rebind.
 990                  */
 991                 ndi_hold_devi(pdip);            /* extra hold of parent */
 992 
 993                 /*
 994                  * uninit_node() current binding - a successful uninit_node()
 995                  * will release extra hold of parent using ndi_rele_devi().
 996                  */
 997                 if ((error = uninit_node(dip)) != DDI_SUCCESS) {
 998                         ndi_rele_devi(pdip);    /* release extra hold */
 999                         ndi_rele_devi(pdip);    /* release initial hold */
1000                         cmn_err(CE_WARN, "init_node: uninit for rebind "
1001                             "of node %s failed", path);
1002                         goto out;
1003                 }
1004 
1005                 /* Unbind: demote the node back to DS_LINKED.  */
1006                 if ((error = ndi_devi_unbind_driver(dip)) != DDI_SUCCESS) {
1007                         ndi_rele_devi(pdip);    /* release initial hold */
1008                         cmn_err(CE_WARN, "init_node: unbind for rebind "
1009                             "of node %s failed", path);
1010                         goto out;
1011                 }
1012 
1013                 /* establish rebinding name */
1014                 if (DEVI(dip)->devi_rebinding_name == NULL)
1015                         DEVI(dip)->devi_rebinding_name =
1016                             i_ddi_strdup(path, KM_SLEEP);
1017 
1018                 /*
1019                  * Now that we are demoted and marked for rebind, repromote.
1020                  * We need to do this in steps, instead of just calling
1021                  * ddi_initchild, so that we can redo the merge operation
1022                  * after we are rebound to the path-bound driver.
1023                  *
1024                  * Start by rebinding node to the path-bound driver.
1025                  */
1026                 if ((error = ndi_devi_bind_driver(dip, 0)) != DDI_SUCCESS) {
1027                         ndi_rele_devi(pdip);    /* release initial hold */
1028                         cmn_err(CE_WARN, "init_node: rebind "
1029                             "of node %s failed", path);
1030                         goto out;
1031                 }
1032 
1033                 /*
1034                  * If the node is not a driver.conf node then merge
1035                  * driver.conf properties from new path-bound driver.conf.
1036                  */
1037                 if (ndi_dev_is_persistent_node(dip))
1038                         (void) i_ndi_make_spec_children(pdip, 0);
1039 
1040                 /*
1041                  * Now that we have taken care of merge, repromote back
1042                  * to DS_INITIALIZED.
1043                  */
1044                 error = ddi_initchild(pdip, dip);
1045                 NDI_CONFIG_DEBUG((CE_CONT, "init_node: rebind "
1046                     "%s 0x%p\n", path, (void *)dip));
1047 
1048                 /*
1049                  * Release our initial hold. If ddi_initchild() was
1050                  * successful then it will return with the active hold.
1051                  */
1052                 ndi_rele_devi(pdip);
1053                 goto out;
1054         }
1055 
1056         /*
1057          * Apply multi-parent/deep-nexus optimization to the new node
1058          */
1059         DEVI(dip)->devi_instance = e_ddi_assign_instance(dip);
1060         ddi_optimize_dtree(dip);
1061         error = DDI_SUCCESS;            /* return with active hold */
1062 
1063 out:    if (error != DDI_SUCCESS) {
1064                 /* On failure ensure that DEVI_REBIND is cleared */
1065                 mutex_enter(&DEVI(dip)->devi_lock);
1066                 DEVI(dip)->devi_flags &= ~DEVI_REBIND;
1067                 mutex_exit(&DEVI(dip)->devi_lock);
1068         }
1069         kmem_free(path, MAXPATHLEN);
1070         return (error);
1071 }
1072 
1073 /*
1074  * Uninitialize node
1075  * The per-driver list must be held busy during the call.
1076  * A successful uninit_node() releases the init_node() hold on
1077  * the parent by calling ndi_rele_devi().
1078  */
1079 static int
1080 uninit_node(dev_info_t *dip)
1081 {
1082         int node_state_entry;
1083         dev_info_t *pdip;
1084         struct dev_ops *ops;
1085         int (*f)();
1086         int error;
1087         char *addr;
1088 
1089         /*
1090          * Don't check for references here or else a ref-counted
1091          * dip cannot be downgraded by the framework.
1092          */
1093         node_state_entry = i_ddi_node_state(dip);
1094         ASSERT((node_state_entry == DS_BOUND) ||
1095             (node_state_entry == DS_INITIALIZED));
1096         pdip = ddi_get_parent(dip);
1097         ASSERT(pdip);
1098 
1099         NDI_CONFIG_DEBUG((CE_CONT, "uninit_node: 0x%p(%s%d)\n",
1100             (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1101 
1102         if (((ops = ddi_get_driver(pdip)) == NULL) ||
1103             (ops->devo_bus_ops == NULL) ||
1104             ((f = ops->devo_bus_ops->bus_ctl) == NULL)) {
1105                 return (DDI_FAILURE);
1106         }
1107 
1108         /*
1109          * save the @addr prior to DDI_CTLOPS_UNINITCHILD for use in
1110          * freeing the instance if it succeeds.
1111          */
1112         if (node_state_entry == DS_INITIALIZED) {
1113                 addr = ddi_get_name_addr(dip);
1114                 if (addr)
1115                         addr = i_ddi_strdup(addr, KM_SLEEP);
1116         } else {
1117                 addr = NULL;
1118         }
1119 
1120         error = (*f)(pdip, pdip, DDI_CTLOPS_UNINITCHILD, dip, (void *)NULL);
1121         if (error == DDI_SUCCESS) {
1122                 /* ensure that devids are unregistered */
1123                 mutex_enter(&DEVI(dip)->devi_lock);
1124                 if ((DEVI(dip)->devi_flags & DEVI_CACHED_DEVID)) {
1125                         DEVI(dip)->devi_flags &= ~DEVI_CACHED_DEVID;
1126                         mutex_exit(&DEVI(dip)->devi_lock);
1127                         ddi_devid_unregister(dip);
1128                 } else
1129                         mutex_exit(&DEVI(dip)->devi_lock);
1130 
1131                 /* if uninitchild forgot to set devi_addr to NULL do it now */
1132                 ddi_set_name_addr(dip, NULL);
1133 
1134                 /*
1135                  * Free instance number. This is a no-op if instance has
1136                  * been kept by probe_node().  Avoid free when we are called
1137                  * from init_node (DS_BOUND) because the instance has not yet
1138                  * been assigned.
1139                  */
1140                 if (node_state_entry == DS_INITIALIZED) {
1141                         e_ddi_free_instance(dip, addr);
1142                         DEVI(dip)->devi_instance = -1;
1143                 }
1144 
1145                 /* release the init_node hold */
1146                 ndi_rele_devi(pdip);
1147 
1148                 remove_global_props(dip);
1149 
1150                 /*
1151                  * NOTE: The decision on whether to allow a path-oriented
1152                  * rebind of a driver.conf enumerated node is made by
1153                  * init_node() based on driver_conf_allow_path_alias. The
1154                  * rebind code below prevents deletion of system properties
1155                  * on driver.conf nodes.
1156                  *
1157                  * When driver_conf_allow_path_alias is set, property behavior
1158                  * on rebound driver.conf file is non-intuitive. For a
1159                  * driver.conf node, the unit-address properties come from
1160                  * the driver.conf file as system properties. Removing system
1161                  * properties from a driver.conf node makes the node
1162                  * useless (we get node without unit-address properties) - so
1163                  * we leave system properties in place. The result is a node
1164                  * where system properties come from the node being rebound,
1165                  * and global properties come from the driver.conf file
1166                  * of the driver we are rebinding to.  If we could determine
1167                  * that the path-oriented alias driver.conf file defined a
1168                  * node at the same unit address, it would be best to use
1169                  * that node and avoid the non-intuitive property behavior.
1170                  * Unfortunately, the current "merge" code does not support
1171                  * this, so we live with the non-intuitive property behavior.
1172                  */
1173                 if (!((ndi_dev_is_persistent_node(dip) == 0) &&
1174                     (DEVI(dip)->devi_flags & DEVI_REBIND)))
1175                         e_ddi_prop_remove_all(dip);
1176         } else {
1177                 NDI_CONFIG_DEBUG((CE_CONT, "uninit_node failed: 0x%p(%s%d)\n",
1178                     (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1179         }
1180 
1181         if (addr)
1182                 kmem_free(addr, strlen(addr) + 1);
1183         return (error);
1184 }
1185 
1186 /*
1187  * Invoke driver's probe entry point to probe for existence of hardware.
1188  * Keep instance permanent for successful probe and leaf nodes.
1189  *
1190  * Per-driver list must be held busy while calling this function.
1191  */
1192 static int
1193 probe_node(dev_info_t *dip)
1194 {
1195         int rv;
1196 
1197         ASSERT(i_ddi_node_state(dip) == DS_INITIALIZED);
1198 
1199         NDI_CONFIG_DEBUG((CE_CONT, "probe_node: 0x%p(%s%d)\n",
1200             (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1201 
1202         /* temporarily hold the driver while we probe */
1203         DEVI(dip)->devi_ops = ndi_hold_driver(dip);
1204         if (DEVI(dip)->devi_ops == NULL) {
1205                 NDI_CONFIG_DEBUG((CE_CONT,
1206                     "probe_node: 0x%p(%s%d) cannot load driver\n",
1207                     (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1208                 return (DDI_FAILURE);
1209         }
1210 
1211         if (identify_9e != 0)
1212                 (void) devi_identify(dip);
1213 
1214         rv = devi_probe(dip);
1215 
1216         /* release the driver now that probe is complete */
1217         ndi_rele_driver(dip);
1218         DEVI(dip)->devi_ops = NULL;
1219 
1220         switch (rv) {
1221         case DDI_PROBE_SUCCESS:                 /* found */
1222         case DDI_PROBE_DONTCARE:                /* ddi_dev_is_sid */
1223                 e_ddi_keep_instance(dip);       /* persist instance */
1224                 rv = DDI_SUCCESS;
1225                 break;
1226 
1227         case DDI_PROBE_PARTIAL:                 /* maybe later */
1228         case DDI_PROBE_FAILURE:                 /* not found */
1229                 NDI_CONFIG_DEBUG((CE_CONT,
1230                     "probe_node: 0x%p(%s%d) no hardware found%s\n",
1231                     (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip),
1232                     (rv == DDI_PROBE_PARTIAL) ? " yet" : ""));
1233                 rv = DDI_FAILURE;
1234                 break;
1235 
1236         default:
1237 #ifdef  DEBUG
1238                 cmn_err(CE_WARN, "probe_node: %s%d: illegal probe(9E) value",
1239                     ddi_driver_name(dip), ddi_get_instance(dip));
1240 #endif  /* DEBUG */
1241                 rv = DDI_FAILURE;
1242                 break;
1243         }
1244         return (rv);
1245 }
1246 
1247 /*
1248  * Unprobe a node. Simply reset the node state.
1249  * Per-driver list must be held busy while calling this function.
1250  */
1251 static int
1252 unprobe_node(dev_info_t *dip)
1253 {
1254         ASSERT(i_ddi_node_state(dip) == DS_PROBED);
1255 
1256         /*
1257          * Don't check for references here or else a ref-counted
1258          * dip cannot be downgraded by the framework.
1259          */
1260 
1261         NDI_CONFIG_DEBUG((CE_CONT, "unprobe_node: 0x%p(name = %s)\n",
1262             (void *)dip, ddi_node_name(dip)));
1263         return (DDI_SUCCESS);
1264 }
1265 
1266 /*
1267  * Attach devinfo node.
1268  * Per-driver list must be held busy.
1269  */
1270 static int
1271 attach_node(dev_info_t *dip)
1272 {
1273         int rv;
1274 
1275         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1276         ASSERT(i_ddi_node_state(dip) == DS_PROBED);
1277 
1278         NDI_CONFIG_DEBUG((CE_CONT, "attach_node: 0x%p(%s%d)\n",
1279             (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1280 
1281         /*
1282          * Tell mpxio framework that a node is about to online.
1283          */
1284         if ((rv = mdi_devi_online(dip, 0)) != NDI_SUCCESS) {
1285                 return (DDI_FAILURE);
1286         }
1287 
1288         /* no recursive attachment */
1289         ASSERT(DEVI(dip)->devi_ops == NULL);
1290 
1291         /*
1292          * Hold driver the node is bound to.
1293          */
1294         DEVI(dip)->devi_ops = ndi_hold_driver(dip);
1295         if (DEVI(dip)->devi_ops == NULL) {
1296                 /*
1297                  * We were able to load driver for probing, so we should
1298                  * not get here unless something really bad happened.
1299                  */
1300                 cmn_err(CE_WARN, "attach_node: no driver for major %d",
1301                     DEVI(dip)->devi_major);
1302                 return (DDI_FAILURE);
1303         }
1304 
1305         if (NEXUS_DRV(DEVI(dip)->devi_ops))
1306                 DEVI(dip)->devi_taskq = ddi_taskq_create(dip,
1307                     "nexus_enum_tq", 1,
1308                     TASKQ_DEFAULTPRI, 0);
1309 
1310         mutex_enter(&(DEVI(dip)->devi_lock));
1311         DEVI_SET_ATTACHING(dip);
1312         DEVI_SET_NEED_RESET(dip);
1313         mutex_exit(&(DEVI(dip)->devi_lock));
1314 
1315         rv = devi_attach(dip, DDI_ATTACH);
1316 
1317         mutex_enter(&(DEVI(dip)->devi_lock));
1318         DEVI_CLR_ATTACHING(dip);
1319 
1320         if (rv != DDI_SUCCESS) {
1321                 DEVI_CLR_NEED_RESET(dip);
1322                 mutex_exit(&DEVI(dip)->devi_lock);
1323 
1324                 /*
1325                  * Cleanup dacf reservations
1326                  */
1327                 mutex_enter(&dacf_lock);
1328                 dacf_clr_rsrvs(dip, DACF_OPID_POSTATTACH);
1329                 dacf_clr_rsrvs(dip, DACF_OPID_PREDETACH);
1330                 mutex_exit(&dacf_lock);
1331                 if (DEVI(dip)->devi_taskq)
1332                         ddi_taskq_destroy(DEVI(dip)->devi_taskq);
1333                 ddi_remove_minor_node(dip, NULL);
1334 
1335                 /* release the driver if attach failed */
1336                 ndi_rele_driver(dip);
1337                 DEVI(dip)->devi_ops = NULL;
1338                 NDI_CONFIG_DEBUG((CE_CONT, "attach_node: 0x%p(%s%d) failed\n",
1339                     (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1340                 return (DDI_FAILURE);
1341         } else
1342                 mutex_exit(&DEVI(dip)->devi_lock);
1343 
1344         /* successful attach, return with driver held */
1345 
1346         return (DDI_SUCCESS);
1347 }
1348 
1349 /*
1350  * Detach devinfo node.
1351  * Per-driver list must be held busy.
1352  */
1353 static int
1354 detach_node(dev_info_t *dip, uint_t flag)
1355 {
1356         struct devnames *dnp;
1357         int             rv;
1358 
1359         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1360         ASSERT(i_ddi_node_state(dip) == DS_ATTACHED);
1361 
1362         /* check references */
1363         if (DEVI(dip)->devi_ref)
1364                 return (DDI_FAILURE);
1365 
1366         NDI_CONFIG_DEBUG((CE_CONT, "detach_node: 0x%p(%s%d)\n",
1367             (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1368 
1369         /*
1370          * NOTE: If we are processing a pHCI node then the calling code
1371          * must detect this and ndi_devi_enter() in (vHCI, parent(pHCI))
1372          * order unless pHCI and vHCI are siblings.  Code paths leading
1373          * here that must ensure this ordering include:
1374          * unconfig_immediate_children(), devi_unconfig_one(),
1375          * ndi_devi_unconfig_one(), ndi_devi_offline().
1376          */
1377         ASSERT(!MDI_PHCI(dip) ||
1378             (ddi_get_parent(mdi_devi_get_vdip(dip)) == ddi_get_parent(dip)) ||
1379             DEVI_BUSY_OWNED(mdi_devi_get_vdip(dip)));
1380 
1381         /* Offline the device node with the mpxio framework. */
1382         if (mdi_devi_offline(dip, flag) != NDI_SUCCESS) {
1383                 return (DDI_FAILURE);
1384         }
1385 
1386         /* drain the taskq */
1387         if (DEVI(dip)->devi_taskq)
1388                 ddi_taskq_wait(DEVI(dip)->devi_taskq);
1389 
1390         rv = devi_detach(dip, DDI_DETACH);
1391 
1392         if (rv != DDI_SUCCESS) {
1393                 NDI_CONFIG_DEBUG((CE_CONT,
1394                     "detach_node: 0x%p(%s%d) failed\n",
1395                     (void *)dip, ddi_driver_name(dip), ddi_get_instance(dip)));
1396                 return (DDI_FAILURE);
1397         }
1398 
1399         mutex_enter(&(DEVI(dip)->devi_lock));
1400         DEVI_CLR_NEED_RESET(dip);
1401         mutex_exit(&(DEVI(dip)->devi_lock));
1402 
1403 #if defined(__amd64) && !defined(__xpv)
1404         /*
1405          * Close any iommulib mediated linkage to an IOMMU
1406          */
1407         if (IOMMU_USED(dip))
1408                 iommulib_nex_close(dip);
1409 #endif
1410 
1411         /* destroy the taskq */
1412         if (DEVI(dip)->devi_taskq) {
1413                 ddi_taskq_destroy(DEVI(dip)->devi_taskq);
1414                 DEVI(dip)->devi_taskq = NULL;
1415         }
1416 
1417         /* Cleanup dacf reservations */
1418         mutex_enter(&dacf_lock);
1419         dacf_clr_rsrvs(dip, DACF_OPID_POSTATTACH);
1420         dacf_clr_rsrvs(dip, DACF_OPID_PREDETACH);
1421         mutex_exit(&dacf_lock);
1422 
1423         /* remove any additional flavors that were added */
1424         if (DEVI(dip)->devi_flavorv_n > 1 && DEVI(dip)->devi_flavorv != NULL) {
1425                 kmem_free(DEVI(dip)->devi_flavorv,
1426                     (DEVI(dip)->devi_flavorv_n - 1) * sizeof (void *));
1427                 DEVI(dip)->devi_flavorv = NULL;
1428         }
1429 
1430         /* Remove properties and minor nodes in case driver forgots */
1431         ddi_remove_minor_node(dip, NULL);
1432         ddi_prop_remove_all(dip);
1433 
1434         /* a detached node can't have attached or .conf children */
1435         mutex_enter(&DEVI(dip)->devi_lock);
1436         DEVI(dip)->devi_flags &= ~(DEVI_MADE_CHILDREN|DEVI_ATTACHED_CHILDREN);
1437         mutex_exit(&DEVI(dip)->devi_lock);
1438 
1439         /*
1440          * If the instance has successfully detached in detach_driver() context,
1441          * clear DN_DRIVER_HELD for correct ddi_hold_installed_driver()
1442          * behavior. Consumers like qassociate() depend on this (via clnopen()).
1443          */
1444         if (flag & NDI_DETACH_DRIVER) {
1445                 dnp = &(devnamesp[DEVI(dip)->devi_major]);
1446                 LOCK_DEV_OPS(&dnp->dn_lock);
1447                 dnp->dn_flags &= ~DN_DRIVER_HELD;
1448                 UNLOCK_DEV_OPS(&dnp->dn_lock);
1449         }
1450 
1451         /* successful detach, release the driver */
1452         ndi_rele_driver(dip);
1453         DEVI(dip)->devi_ops = NULL;
1454         return (DDI_SUCCESS);
1455 }
1456 
1457 /*
1458  * Run dacf post_attach routines
1459  */
1460 static int
1461 postattach_node(dev_info_t *dip)
1462 {
1463         int rval;
1464 
1465         /*
1466          * For hotplug busses like USB, it's possible that devices
1467          * are removed but dip is still around. We don't want to
1468          * run dacf routines as part of detach failure recovery.
1469          *
1470          * Pretend success until we figure out how to prevent
1471          * access to such devinfo nodes.
1472          */
1473         if (DEVI_IS_DEVICE_REMOVED(dip))
1474                 return (DDI_SUCCESS);
1475 
1476         /*
1477          * if dacf_postattach failed, report it to the framework
1478          * so that it can be retried later at the open time.
1479          */
1480         mutex_enter(&dacf_lock);
1481         rval = dacfc_postattach(dip);
1482         mutex_exit(&dacf_lock);
1483 
1484         /*
1485          * Plumbing during postattach may fail because of the
1486          * underlying device is not ready. This will fail ndi_devi_config()
1487          * in dv_filldir() and a warning message is issued. The message
1488          * from here will explain what happened
1489          */
1490         if (rval != DACF_SUCCESS) {
1491                 cmn_err(CE_WARN, "Postattach failed for %s%d\n",
1492                     ddi_driver_name(dip), ddi_get_instance(dip));
1493                 return (DDI_FAILURE);
1494         }
1495 
1496         return (DDI_SUCCESS);
1497 }
1498 
1499 /*
1500  * Run dacf pre-detach routines
1501  */
1502 static int
1503 predetach_node(dev_info_t *dip, uint_t flag)
1504 {
1505         int ret;
1506 
1507         /*
1508          * Don't auto-detach if DDI_FORCEATTACH or DDI_NO_AUTODETACH
1509          * properties are set.
1510          */
1511         if (flag & NDI_AUTODETACH) {
1512                 struct devnames *dnp;
1513                 int pflag = DDI_PROP_NOTPROM | DDI_PROP_DONTPASS;
1514 
1515                 if ((ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1516                     pflag, DDI_FORCEATTACH, 0) == 1) ||
1517                     (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
1518                     pflag, DDI_NO_AUTODETACH, 0) == 1))
1519                         return (DDI_FAILURE);
1520 
1521                 /* check for driver global version of DDI_NO_AUTODETACH */
1522                 dnp = &devnamesp[DEVI(dip)->devi_major];
1523                 LOCK_DEV_OPS(&dnp->dn_lock);
1524                 if (dnp->dn_flags & DN_NO_AUTODETACH) {
1525                         UNLOCK_DEV_OPS(&dnp->dn_lock);
1526                         return (DDI_FAILURE);
1527                 }
1528                 UNLOCK_DEV_OPS(&dnp->dn_lock);
1529         }
1530 
1531         mutex_enter(&dacf_lock);
1532         ret = dacfc_predetach(dip);
1533         mutex_exit(&dacf_lock);
1534 
1535         return (ret);
1536 }
1537 
1538 /*
1539  * Wrapper for making multiple state transitions
1540  */
1541 
1542 /*
1543  * i_ndi_config_node: upgrade dev_info node into a specified state.
1544  * It is a bit tricky because the locking protocol changes before and
1545  * after a node is bound to a driver. All locks are held external to
1546  * this function.
1547  */
1548 int
1549 i_ndi_config_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
1550 {
1551         _NOTE(ARGUNUSED(flag))
1552         int rv = DDI_SUCCESS;
1553 
1554         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1555 
1556         while ((i_ddi_node_state(dip) < state) && (rv == DDI_SUCCESS)) {
1557 
1558                 /* don't allow any more changes to the device tree */
1559                 if (devinfo_freeze) {
1560                         rv = DDI_FAILURE;
1561                         break;
1562                 }
1563 
1564                 switch (i_ddi_node_state(dip)) {
1565                 case DS_PROTO:
1566                         /*
1567                          * only caller can reference this node, no external
1568                          * locking needed.
1569                          */
1570                         link_node(dip);
1571                         translate_devid((dev_info_t *)dip);
1572                         i_ddi_set_node_state(dip, DS_LINKED);
1573                         break;
1574                 case DS_LINKED:
1575                         /*
1576                          * Three code path may attempt to bind a node:
1577                          * - boot code
1578                          * - add_drv
1579                          * - hotplug thread
1580                          * Boot code is single threaded, add_drv synchronize
1581                          * on a userland lock, and hotplug synchronize on
1582                          * hotplug_lk. There could be a race between add_drv
1583                          * and hotplug thread. We'll live with this until the
1584                          * conversion to top-down loading.
1585                          */
1586                         if ((rv = bind_node(dip)) == DDI_SUCCESS)
1587                                 i_ddi_set_node_state(dip, DS_BOUND);
1588 
1589                         break;
1590                 case DS_BOUND:
1591                         /*
1592                          * The following transitions synchronizes on the
1593                          * per-driver busy changing flag, since we already
1594                          * have a driver.
1595                          */
1596                         if ((rv = init_node(dip)) == DDI_SUCCESS)
1597                                 i_ddi_set_node_state(dip, DS_INITIALIZED);
1598                         break;
1599                 case DS_INITIALIZED:
1600                         if ((rv = probe_node(dip)) == DDI_SUCCESS)
1601                                 i_ddi_set_node_state(dip, DS_PROBED);
1602                         break;
1603                 case DS_PROBED:
1604                         /*
1605                          * If node is retired and persistent, then prevent
1606                          * attach. We can't do this for non-persistent nodes
1607                          * as we would lose evidence that the node existed.
1608                          */
1609                         if (i_ddi_check_retire(dip) == 1 &&
1610                             ndi_dev_is_persistent_node(dip) &&
1611                             retire_prevents_attach == 1) {
1612                                 rv = DDI_FAILURE;
1613                                 break;
1614                         }
1615                         atomic_inc_ulong(&devinfo_attach_detach);
1616                         if ((rv = attach_node(dip)) == DDI_SUCCESS)
1617                                 i_ddi_set_node_state(dip, DS_ATTACHED);
1618                         atomic_dec_ulong(&devinfo_attach_detach);
1619                         break;
1620                 case DS_ATTACHED:
1621                         if ((rv = postattach_node(dip)) == DDI_SUCCESS)
1622                                 i_ddi_set_node_state(dip, DS_READY);
1623                         break;
1624                 case DS_READY:
1625                         break;
1626                 default:
1627                         /* should never reach here */
1628                         ASSERT("unknown devinfo state");
1629                 }
1630         }
1631 
1632         if (ddidebug & DDI_AUDIT)
1633                 da_log_enter(dip);
1634         return (rv);
1635 }
1636 
1637 /*
1638  * i_ndi_unconfig_node: downgrade dev_info node into a specified state.
1639  */
1640 int
1641 i_ndi_unconfig_node(dev_info_t *dip, ddi_node_state_t state, uint_t flag)
1642 {
1643         int     rv = DDI_SUCCESS;
1644 
1645         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
1646 
1647         while ((i_ddi_node_state(dip) > state) && (rv == DDI_SUCCESS)) {
1648 
1649                 /* don't allow any more changes to the device tree */
1650                 if (devinfo_freeze) {
1651                         rv = DDI_FAILURE;
1652                         break;
1653                 }
1654 
1655                 switch (i_ddi_node_state(dip)) {
1656                 case DS_PROTO:
1657                         break;
1658                 case DS_LINKED:
1659                         /*
1660                          * Persistent nodes are only removed by hotplug code
1661                          * .conf nodes synchronizes on per-driver list.
1662                          */
1663                         if ((rv = unlink_node(dip)) == DDI_SUCCESS)
1664                                 i_ddi_set_node_state(dip, DS_PROTO);
1665                         break;
1666                 case DS_BOUND:
1667                         /*
1668                          * The following transitions synchronizes on the
1669                          * per-driver busy changing flag, since we already
1670                          * have a driver.
1671                          */
1672                         if ((rv = unbind_node(dip)) == DDI_SUCCESS)
1673                                 i_ddi_set_node_state(dip, DS_LINKED);
1674                         break;
1675                 case DS_INITIALIZED:
1676                         if ((rv = uninit_node(dip)) == DDI_SUCCESS)
1677                                 i_ddi_set_node_state(dip, DS_BOUND);
1678                         break;
1679                 case DS_PROBED:
1680                         if ((rv = unprobe_node(dip)) == DDI_SUCCESS)
1681                                 i_ddi_set_node_state(dip, DS_INITIALIZED);
1682                         break;
1683                 case DS_ATTACHED:
1684                         atomic_inc_ulong(&devinfo_attach_detach);
1685 
1686                         mutex_enter(&(DEVI(dip)->devi_lock));
1687                         DEVI_SET_DETACHING(dip);
1688                         mutex_exit(&(DEVI(dip)->devi_lock));
1689 
1690                         membar_enter(); /* ensure visibility for hold_devi */
1691 
1692                         if ((rv = detach_node(dip, flag)) == DDI_SUCCESS)
1693                                 i_ddi_set_node_state(dip, DS_PROBED);
1694 
1695                         mutex_enter(&(DEVI(dip)->devi_lock));
1696                         DEVI_CLR_DETACHING(dip);
1697                         mutex_exit(&(DEVI(dip)->devi_lock));
1698 
1699                         atomic_dec_ulong(&devinfo_attach_detach);
1700                         break;
1701                 case DS_READY:
1702                         if ((rv = predetach_node(dip, flag)) == DDI_SUCCESS)
1703                                 i_ddi_set_node_state(dip, DS_ATTACHED);
1704                         break;
1705                 default:
1706                         ASSERT("unknown devinfo state");
1707                 }
1708         }
1709         da_log_enter(dip);
1710         return (rv);
1711 }
1712 
1713 /*
1714  * ddi_initchild: transform node to DS_INITIALIZED state
1715  */
1716 int
1717 ddi_initchild(dev_info_t *parent, dev_info_t *proto)
1718 {
1719         int ret, circ;
1720 
1721         ndi_devi_enter(parent, &circ);
1722         ret = i_ndi_config_node(proto, DS_INITIALIZED, 0);
1723         ndi_devi_exit(parent, circ);
1724 
1725         return (ret);
1726 }
1727 
1728 /*
1729  * ddi_uninitchild: transform node down to DS_BOUND state
1730  */
1731 int
1732 ddi_uninitchild(dev_info_t *dip)
1733 {
1734         int ret, circ;
1735         dev_info_t *parent = ddi_get_parent(dip);
1736         ASSERT(parent);
1737 
1738         ndi_devi_enter(parent, &circ);
1739         ret = i_ndi_unconfig_node(dip, DS_BOUND, 0);
1740         ndi_devi_exit(parent, circ);
1741 
1742         return (ret);
1743 }
1744 
1745 /*
1746  * i_ddi_attachchild: transform node to DS_READY/i_ddi_devi_attached() state
1747  */
1748 static int
1749 i_ddi_attachchild(dev_info_t *dip)
1750 {
1751         dev_info_t      *parent = ddi_get_parent(dip);
1752         int             ret;
1753 
1754         ASSERT(parent && DEVI_BUSY_OWNED(parent));
1755 
1756         if ((i_ddi_node_state(dip) < DS_BOUND) || DEVI_IS_DEVICE_OFFLINE(dip))
1757                 return (DDI_FAILURE);
1758 
1759         ret = i_ndi_config_node(dip, DS_READY, 0);
1760         if (ret == NDI_SUCCESS) {
1761                 ret = DDI_SUCCESS;
1762         } else {
1763                 /*
1764                  * Take it down to DS_INITIALIZED so pm_pre_probe is run
1765                  * on the next attach
1766                  */
1767                 (void) i_ndi_unconfig_node(dip, DS_INITIALIZED, 0);
1768                 ret = DDI_FAILURE;
1769         }
1770 
1771         return (ret);
1772 }
1773 
1774 /*
1775  * i_ddi_detachchild: transform node down to DS_PROBED state
1776  *      If it fails, put it back to DS_READY state.
1777  * NOTE: A node that fails detach may be at DS_ATTACHED instead
1778  * of DS_READY for a small amount of time - this is the source of
1779  * transient DS_READY->DS_ATTACHED->DS_READY state changes.
1780  */
1781 static int
1782 i_ddi_detachchild(dev_info_t *dip, uint_t flags)
1783 {
1784         dev_info_t      *parent = ddi_get_parent(dip);
1785         int             ret;
1786 
1787         ASSERT(parent && DEVI_BUSY_OWNED(parent));
1788 
1789         ret = i_ndi_unconfig_node(dip, DS_PROBED, flags);
1790         if (ret != DDI_SUCCESS)
1791                 (void) i_ndi_config_node(dip, DS_READY, 0);
1792         else
1793                 /* allow pm_pre_probe to reestablish pm state */
1794                 (void) i_ndi_unconfig_node(dip, DS_INITIALIZED, 0);
1795         return (ret);
1796 }
1797 
1798 /*
1799  * Add a child and bind to driver
1800  */
1801 dev_info_t *
1802 ddi_add_child(dev_info_t *pdip, char *name, uint_t nodeid, uint_t unit)
1803 {
1804         int circ;
1805         dev_info_t *dip;
1806 
1807         /* allocate a new node */
1808         dip = i_ddi_alloc_node(pdip, name, nodeid, (int)unit, NULL, KM_SLEEP);
1809 
1810         ndi_devi_enter(pdip, &circ);
1811         (void) i_ndi_config_node(dip, DS_BOUND, 0);
1812         ndi_devi_exit(pdip, circ);
1813         return (dip);
1814 }
1815 
1816 /*
1817  * ddi_remove_child: remove the dip. The parent must be attached and held
1818  */
1819 int
1820 ddi_remove_child(dev_info_t *dip, int dummy)
1821 {
1822         _NOTE(ARGUNUSED(dummy))
1823         int circ, ret;
1824         dev_info_t *parent = ddi_get_parent(dip);
1825         ASSERT(parent);
1826 
1827         ndi_devi_enter(parent, &circ);
1828 
1829         /*
1830          * If we still have children, for example SID nodes marked
1831          * as persistent but not attached, attempt to remove them.
1832          */
1833         if (DEVI(dip)->devi_child) {
1834                 ret = ndi_devi_unconfig(dip, NDI_DEVI_REMOVE);
1835                 if (ret != NDI_SUCCESS) {
1836                         ndi_devi_exit(parent, circ);
1837                         return (DDI_FAILURE);
1838                 }
1839                 ASSERT(DEVI(dip)->devi_child == NULL);
1840         }
1841 
1842         ret = i_ndi_unconfig_node(dip, DS_PROTO, 0);
1843         ndi_devi_exit(parent, circ);
1844 
1845         if (ret != DDI_SUCCESS)
1846                 return (ret);
1847 
1848         ASSERT(i_ddi_node_state(dip) == DS_PROTO);
1849         i_ddi_free_node(dip);
1850         return (DDI_SUCCESS);
1851 }
1852 
1853 /*
1854  * NDI wrappers for ref counting, node allocation, and transitions
1855  */
1856 
1857 /*
1858  * Hold/release the devinfo node itself.
1859  * Caller is assumed to prevent the devi from detaching during this call
1860  */
1861 void
1862 ndi_hold_devi(dev_info_t *dip)
1863 {
1864         mutex_enter(&DEVI(dip)->devi_lock);
1865         ASSERT(DEVI(dip)->devi_ref >= 0);
1866         DEVI(dip)->devi_ref++;
1867         membar_enter();                 /* make sure stores are flushed */
1868         mutex_exit(&DEVI(dip)->devi_lock);
1869 }
1870 
1871 void
1872 ndi_rele_devi(dev_info_t *dip)
1873 {
1874         ASSERT(DEVI(dip)->devi_ref > 0);
1875 
1876         mutex_enter(&DEVI(dip)->devi_lock);
1877         DEVI(dip)->devi_ref--;
1878         membar_enter();                 /* make sure stores are flushed */
1879         mutex_exit(&DEVI(dip)->devi_lock);
1880 }
1881 
1882 int
1883 e_ddi_devi_holdcnt(dev_info_t *dip)
1884 {
1885         return (DEVI(dip)->devi_ref);
1886 }
1887 
1888 /*
1889  * Hold/release the driver the devinfo node is bound to.
1890  */
1891 struct dev_ops *
1892 ndi_hold_driver(dev_info_t *dip)
1893 {
1894         if (i_ddi_node_state(dip) < DS_BOUND)
1895                 return (NULL);
1896 
1897         ASSERT(DEVI(dip)->devi_major != -1);
1898         return (mod_hold_dev_by_major(DEVI(dip)->devi_major));
1899 }
1900 
1901 void
1902 ndi_rele_driver(dev_info_t *dip)
1903 {
1904         ASSERT(i_ddi_node_state(dip) >= DS_BOUND);
1905         mod_rele_dev_by_major(DEVI(dip)->devi_major);
1906 }
1907 
1908 /*
1909  * Single thread entry into devinfo node for modifying its children (devinfo,
1910  * pathinfo, and minor). To verify in ASSERTS use DEVI_BUSY_OWNED macro.
1911  */
1912 void
1913 ndi_devi_enter(dev_info_t *dip, int *circular)
1914 {
1915         struct dev_info *devi = DEVI(dip);
1916         ASSERT(dip != NULL);
1917 
1918         /* for vHCI, enforce (vHCI, pHCI) ndi_deve_enter() order */
1919         ASSERT(!MDI_VHCI(dip) || (mdi_devi_pdip_entered(dip) == 0) ||
1920             DEVI_BUSY_OWNED(dip));
1921 
1922         mutex_enter(&devi->devi_lock);
1923         if (devi->devi_busy_thread == curthread) {
1924                 devi->devi_circular++;
1925         } else {
1926                 while (DEVI_BUSY_CHANGING(devi) && !panicstr)
1927                         cv_wait(&(devi->devi_cv), &(devi->devi_lock));
1928                 if (panicstr) {
1929                         mutex_exit(&devi->devi_lock);
1930                         return;
1931                 }
1932                 devi->devi_flags |= DEVI_BUSY;
1933                 devi->devi_busy_thread = curthread;
1934         }
1935         *circular = devi->devi_circular;
1936         mutex_exit(&devi->devi_lock);
1937 }
1938 
1939 /*
1940  * Release ndi_devi_enter or successful ndi_devi_tryenter.
1941  */
1942 void
1943 ndi_devi_exit(dev_info_t *dip, int circular)
1944 {
1945         struct dev_info *devi = DEVI(dip);
1946         struct dev_info *vdevi;
1947         ASSERT(dip != NULL);
1948 
1949         if (panicstr)
1950                 return;
1951 
1952         mutex_enter(&(devi->devi_lock));
1953         if (circular != 0) {
1954                 devi->devi_circular--;
1955         } else {
1956                 devi->devi_flags &= ~DEVI_BUSY;
1957                 ASSERT(devi->devi_busy_thread == curthread);
1958                 devi->devi_busy_thread = NULL;
1959                 cv_broadcast(&(devi->devi_cv));
1960         }
1961         mutex_exit(&(devi->devi_lock));
1962 
1963         /*
1964          * For pHCI exit we issue a broadcast to vHCI for ndi_devi_config_one()
1965          * doing cv_wait on vHCI.
1966          */
1967         if (MDI_PHCI(dip)) {
1968                 vdevi = DEVI(mdi_devi_get_vdip(dip));
1969                 if (vdevi) {
1970                         mutex_enter(&(vdevi->devi_lock));
1971                         if (vdevi->devi_flags & DEVI_PHCI_SIGNALS_VHCI) {
1972                                 vdevi->devi_flags &= ~DEVI_PHCI_SIGNALS_VHCI;
1973                                 cv_broadcast(&(vdevi->devi_cv));
1974                         }
1975                         mutex_exit(&(vdevi->devi_lock));
1976                 }
1977         }
1978 }
1979 
1980 /*
1981  * Release ndi_devi_enter and wait for possibility of new children, avoiding
1982  * possibility of missing broadcast before getting to cv_timedwait().
1983  */
1984 static void
1985 ndi_devi_exit_and_wait(dev_info_t *dip, int circular, clock_t end_time)
1986 {
1987         struct dev_info *devi = DEVI(dip);
1988         ASSERT(dip != NULL);
1989 
1990         if (panicstr)
1991                 return;
1992 
1993         /*
1994          * We are called to wait for of a new child, and new child can
1995          * only be added if circular is zero.
1996          */
1997         ASSERT(circular == 0);
1998 
1999         /* like ndi_devi_exit with circular of zero */
2000         mutex_enter(&(devi->devi_lock));
2001         devi->devi_flags &= ~DEVI_BUSY;
2002         ASSERT(devi->devi_busy_thread == curthread);
2003         devi->devi_busy_thread = NULL;
2004         cv_broadcast(&(devi->devi_cv));
2005 
2006         /* now wait for new children while still holding devi_lock */
2007         (void) cv_timedwait(&devi->devi_cv, &(devi->devi_lock), end_time);
2008         mutex_exit(&(devi->devi_lock));
2009 }
2010 
2011 /*
2012  * Attempt to single thread entry into devinfo node for modifying its children.
2013  */
2014 int
2015 ndi_devi_tryenter(dev_info_t *dip, int *circular)
2016 {
2017         int rval = 1;              /* assume we enter */
2018         struct dev_info *devi = DEVI(dip);
2019         ASSERT(dip != NULL);
2020 
2021         mutex_enter(&devi->devi_lock);
2022         if (devi->devi_busy_thread == (void *)curthread) {
2023                 devi->devi_circular++;
2024         } else {
2025                 if (!DEVI_BUSY_CHANGING(devi)) {
2026                         devi->devi_flags |= DEVI_BUSY;
2027                         devi->devi_busy_thread = (void *)curthread;
2028                 } else {
2029                         rval = 0;       /* devi is busy */
2030                 }
2031         }
2032         *circular = devi->devi_circular;
2033         mutex_exit(&devi->devi_lock);
2034         return (rval);
2035 }
2036 
2037 /*
2038  * Allocate and initialize a new dev_info structure.
2039  *
2040  * This routine may be called at interrupt time by a nexus in
2041  * response to a hotplug event, therefore memory allocations are
2042  * not allowed to sleep.
2043  */
2044 int
2045 ndi_devi_alloc(dev_info_t *parent, char *node_name, pnode_t nodeid,
2046     dev_info_t **ret_dip)
2047 {
2048         ASSERT(node_name != NULL);
2049         ASSERT(ret_dip != NULL);
2050 
2051         *ret_dip = i_ddi_alloc_node(parent, node_name, nodeid, -1, NULL,
2052             KM_NOSLEEP);
2053         if (*ret_dip == NULL) {
2054                 return (NDI_NOMEM);
2055         }
2056 
2057         return (NDI_SUCCESS);
2058 }
2059 
2060 /*
2061  * Allocate and initialize a new dev_info structure
2062  * This routine may sleep and should not be called at interrupt time
2063  */
2064 void
2065 ndi_devi_alloc_sleep(dev_info_t *parent, char *node_name, pnode_t nodeid,
2066     dev_info_t **ret_dip)
2067 {
2068         ASSERT(node_name != NULL);
2069         ASSERT(ret_dip != NULL);
2070 
2071         *ret_dip = i_ddi_alloc_node(parent, node_name, nodeid, -1, NULL,
2072             KM_SLEEP);
2073         ASSERT(*ret_dip);
2074 }
2075 
2076 /*
2077  * Remove an initialized (but not yet attached) dev_info
2078  * node from it's parent.
2079  */
2080 int
2081 ndi_devi_free(dev_info_t *dip)
2082 {
2083         ASSERT(dip != NULL);
2084 
2085         if (i_ddi_node_state(dip) >= DS_INITIALIZED)
2086                 return (DDI_FAILURE);
2087 
2088         NDI_CONFIG_DEBUG((CE_CONT, "ndi_devi_free: %s%d (%p)\n",
2089             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip));
2090 
2091         (void) ddi_remove_child(dip, 0);
2092 
2093         return (NDI_SUCCESS);
2094 }
2095 
2096 /*
2097  * ndi_devi_bind_driver() binds a driver to a given device. If it fails
2098  * to bind the driver, it returns an appropriate error back. Some drivers
2099  * may want to know if the actually failed to bind.
2100  */
2101 int
2102 ndi_devi_bind_driver(dev_info_t *dip, uint_t flags)
2103 {
2104         int ret = NDI_FAILURE;
2105         int circ;
2106         dev_info_t *pdip = ddi_get_parent(dip);
2107         ASSERT(pdip);
2108 
2109         NDI_CONFIG_DEBUG((CE_CONT,
2110             "ndi_devi_bind_driver: %s%d (%p) flags: %x\n",
2111             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
2112 
2113         ndi_devi_enter(pdip, &circ);
2114         if (i_ndi_config_node(dip, DS_BOUND, flags) == DDI_SUCCESS)
2115                 ret = NDI_SUCCESS;
2116         ndi_devi_exit(pdip, circ);
2117 
2118         return (ret);
2119 }
2120 
2121 /*
2122  * ndi_devi_unbind_driver: unbind the dip
2123  */
2124 static int
2125 ndi_devi_unbind_driver(dev_info_t *dip)
2126 {
2127         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
2128 
2129         return (i_ndi_unconfig_node(dip, DS_LINKED, 0));
2130 }
2131 
2132 /*
2133  * Misc. help routines called by framework only
2134  */
2135 
2136 /*
2137  * Get the state of node
2138  */
2139 ddi_node_state_t
2140 i_ddi_node_state(dev_info_t *dip)
2141 {
2142         return (DEVI(dip)->devi_node_state);
2143 }
2144 
2145 /*
2146  * Set the state of node
2147  */
2148 void
2149 i_ddi_set_node_state(dev_info_t *dip, ddi_node_state_t state)
2150 {
2151         DEVI(dip)->devi_node_state = state;
2152         membar_enter();                 /* make sure stores are flushed */
2153 }
2154 
2155 /*
2156  * Determine if node is attached. The implementation accommodates transient
2157  * DS_READY->DS_ATTACHED->DS_READY state changes.  Outside this file, this
2158  * function should be instead of i_ddi_node_state() DS_ATTACHED/DS_READY
2159  * state checks.
2160  */
2161 int
2162 i_ddi_devi_attached(dev_info_t *dip)
2163 {
2164         return (DEVI(dip)->devi_node_state >= DS_ATTACHED);
2165 }
2166 
2167 /*
2168  * Common function for finding a node in a sibling list given name and addr.
2169  *
2170  * By default, name is matched with devi_node_name. The following
2171  * alternative match strategies are supported:
2172  *
2173  *      FIND_NODE_BY_NODENAME: Match on node name - typical use.
2174  *
2175  *      FIND_NODE_BY_DRIVER: A match on driver name bound to node is conducted.
2176  *              This support is used for support of OBP generic names and
2177  *              for the conversion from driver names to generic names. When
2178  *              more consistency in the generic name environment is achieved
2179  *              (and not needed for upgrade) this support can be removed.
2180  *
2181  *      FIND_NODE_BY_ADDR: Match on just the addr.
2182  *              This support is only used/needed during boot to match
2183  *              a node bound via a path-based driver alias.
2184  *
2185  * If a child is not named (dev_addr == NULL), there are three
2186  * possible actions:
2187  *
2188  *      (1) skip it
2189  *      (2) FIND_ADDR_BY_INIT: bring child to DS_INITIALIZED state
2190  *      (3) FIND_ADDR_BY_CALLBACK: use a caller-supplied callback function
2191  */
2192 #define FIND_NODE_BY_NODENAME   0x01
2193 #define FIND_NODE_BY_DRIVER     0x02
2194 #define FIND_NODE_BY_ADDR       0x04
2195 #define FIND_ADDR_BY_INIT       0x10
2196 #define FIND_ADDR_BY_CALLBACK   0x20
2197 
2198 static dev_info_t *
2199 find_sibling(dev_info_t *head, char *cname, char *caddr, uint_t flag,
2200     int (*callback)(dev_info_t *, char *, int))
2201 {
2202         dev_info_t      *dip;
2203         char            *addr, *buf;
2204         major_t         major;
2205         uint_t          by;
2206 
2207         /* only one way to find a node */
2208         by = flag &
2209             (FIND_NODE_BY_DRIVER | FIND_NODE_BY_NODENAME | FIND_NODE_BY_ADDR);
2210         ASSERT(by && BIT_ONLYONESET(by));
2211 
2212         /* only one way to name a node */
2213         ASSERT(((flag & FIND_ADDR_BY_INIT) == 0) ||
2214             ((flag & FIND_ADDR_BY_CALLBACK) == 0));
2215 
2216         if (by == FIND_NODE_BY_DRIVER) {
2217                 major = ddi_name_to_major(cname);
2218                 if (major == DDI_MAJOR_T_NONE)
2219                         return (NULL);
2220         }
2221 
2222         /* preallocate buffer of naming node by callback */
2223         if (flag & FIND_ADDR_BY_CALLBACK)
2224                 buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2225 
2226         /*
2227          * Walk the child list to find a match
2228          */
2229         if (head == NULL)
2230                 return (NULL);
2231         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(head)));
2232         for (dip = head; dip; dip = ddi_get_next_sibling(dip)) {
2233                 if (by == FIND_NODE_BY_NODENAME) {
2234                         /* match node name */
2235                         if (strcmp(cname, DEVI(dip)->devi_node_name) != 0)
2236                                 continue;
2237                 } else if (by == FIND_NODE_BY_DRIVER) {
2238                         /* match driver major */
2239                         if (DEVI(dip)->devi_major != major)
2240                                 continue;
2241                 }
2242 
2243                 if ((addr = DEVI(dip)->devi_addr) == NULL) {
2244                         /* name the child based on the flag */
2245                         if (flag & FIND_ADDR_BY_INIT) {
2246                                 if (ddi_initchild(ddi_get_parent(dip), dip)
2247                                     != DDI_SUCCESS)
2248                                         continue;
2249                                 addr = DEVI(dip)->devi_addr;
2250                         } else if (flag & FIND_ADDR_BY_CALLBACK) {
2251                                 if ((callback == NULL) || (callback(
2252                                     dip, buf, MAXNAMELEN) != DDI_SUCCESS))
2253                                         continue;
2254                                 addr = buf;
2255                         } else {
2256                                 continue;       /* skip */
2257                         }
2258                 }
2259 
2260                 /* match addr */
2261                 ASSERT(addr != NULL);
2262                 if (strcmp(caddr, addr) == 0)
2263                         break;  /* node found */
2264 
2265         }
2266         if (flag & FIND_ADDR_BY_CALLBACK)
2267                 kmem_free(buf, MAXNAMELEN);
2268         return (dip);
2269 }
2270 
2271 /*
2272  * Find child of pdip with name: cname@caddr
2273  * Called by init_node() to look for duplicate nodes
2274  */
2275 static dev_info_t *
2276 find_duplicate_child(dev_info_t *pdip, dev_info_t *dip)
2277 {
2278         dev_info_t *dup;
2279         char *cname = DEVI(dip)->devi_node_name;
2280         char *caddr = DEVI(dip)->devi_addr;
2281 
2282         /* search nodes before dip */
2283         dup = find_sibling(ddi_get_child(pdip), cname, caddr,
2284             FIND_NODE_BY_NODENAME, NULL);
2285         if (dup != dip)
2286                 return (dup);
2287 
2288         /*
2289          * search nodes after dip; normally this is not needed,
2290          */
2291         return (find_sibling(ddi_get_next_sibling(dip), cname, caddr,
2292             FIND_NODE_BY_NODENAME, NULL));
2293 }
2294 
2295 /*
2296  * Find a child of a given name and address, using a callback to name
2297  * unnamed children. cname is the binding name.
2298  */
2299 dev_info_t *
2300 ndi_devi_findchild_by_callback(dev_info_t *pdip, char *dname, char *ua,
2301     int (*make_ua)(dev_info_t *, char *, int))
2302 {
2303         int     by = FIND_ADDR_BY_CALLBACK;
2304 
2305         ASSERT(DEVI_BUSY_OWNED(pdip));
2306         by |= dname ? FIND_NODE_BY_DRIVER : FIND_NODE_BY_ADDR;
2307         return (find_sibling(ddi_get_child(pdip), dname, ua, by, make_ua));
2308 }
2309 
2310 /*
2311  * Find a child of a given name and address, invoking initchild to name
2312  * unnamed children. cname is the node name.
2313  */
2314 static dev_info_t *
2315 find_child_by_name(dev_info_t *pdip, char *cname, char *caddr)
2316 {
2317         dev_info_t      *dip;
2318 
2319         /* attempt search without changing state of preceding siblings */
2320         dip = find_sibling(ddi_get_child(pdip), cname, caddr,
2321             FIND_NODE_BY_NODENAME, NULL);
2322         if (dip)
2323                 return (dip);
2324 
2325         return (find_sibling(ddi_get_child(pdip), cname, caddr,
2326             FIND_NODE_BY_NODENAME|FIND_ADDR_BY_INIT, NULL));
2327 }
2328 
2329 /*
2330  * Find a child of a given name and address, invoking initchild to name
2331  * unnamed children. cname is the node name.
2332  */
2333 static dev_info_t *
2334 find_child_by_driver(dev_info_t *pdip, char *cname, char *caddr)
2335 {
2336         dev_info_t      *dip;
2337 
2338         /* attempt search without changing state of preceding siblings */
2339         dip = find_sibling(ddi_get_child(pdip), cname, caddr,
2340             FIND_NODE_BY_DRIVER, NULL);
2341         if (dip)
2342                 return (dip);
2343 
2344         return (find_sibling(ddi_get_child(pdip), cname, caddr,
2345             FIND_NODE_BY_DRIVER|FIND_ADDR_BY_INIT, NULL));
2346 }
2347 
2348 /*
2349  * Find a child of a given address, invoking initchild to name
2350  * unnamed children. cname is the node name.
2351  *
2352  * NOTE: This function is only used during boot. One would hope that
2353  * unique sibling unit-addresses on hardware branches of the tree would
2354  * be a requirement to avoid two drivers trying to control the same
2355  * piece of hardware. Unfortunately there are some cases where this
2356  * situation exists (/ssm@0,0/pci@1c,700000 /ssm@0,0/sghsc@1c,700000).
2357  * Until unit-address uniqueness of siblings is guaranteed, use of this
2358  * interface for purposes other than boot should be avoided.
2359  */
2360 static dev_info_t *
2361 find_child_by_addr(dev_info_t *pdip, char *caddr)
2362 {
2363         dev_info_t      *dip;
2364 
2365         /* return NULL if called without a unit-address */
2366         if ((caddr == NULL) || (*caddr == '\0'))
2367                 return (NULL);
2368 
2369         /* attempt search without changing state of preceding siblings */
2370         dip = find_sibling(ddi_get_child(pdip), NULL, caddr,
2371             FIND_NODE_BY_ADDR, NULL);
2372         if (dip)
2373                 return (dip);
2374 
2375         return (find_sibling(ddi_get_child(pdip), NULL, caddr,
2376             FIND_NODE_BY_ADDR|FIND_ADDR_BY_INIT, NULL));
2377 }
2378 
2379 /*
2380  * Deleting a property list. Take care, since some property structures
2381  * may not be fully built.
2382  */
2383 void
2384 i_ddi_prop_list_delete(ddi_prop_t *prop)
2385 {
2386         while (prop) {
2387                 ddi_prop_t *next = prop->prop_next;
2388                 if (prop->prop_name)
2389                         kmem_free(prop->prop_name, strlen(prop->prop_name) + 1);
2390                 if ((prop->prop_len != 0) && prop->prop_val)
2391                         kmem_free(prop->prop_val, prop->prop_len);
2392                 kmem_free(prop, sizeof (struct ddi_prop));
2393                 prop = next;
2394         }
2395 }
2396 
2397 /*
2398  * Duplicate property list
2399  */
2400 ddi_prop_t *
2401 i_ddi_prop_list_dup(ddi_prop_t *prop, uint_t flag)
2402 {
2403         ddi_prop_t *result, *prev, *copy;
2404 
2405         if (prop == NULL)
2406                 return (NULL);
2407 
2408         result = prev = NULL;
2409         for (; prop != NULL; prop = prop->prop_next) {
2410                 ASSERT(prop->prop_name != NULL);
2411                 copy = kmem_zalloc(sizeof (struct ddi_prop), flag);
2412                 if (copy == NULL)
2413                         goto fail;
2414 
2415                 copy->prop_dev = prop->prop_dev;
2416                 copy->prop_flags = prop->prop_flags;
2417                 copy->prop_name = i_ddi_strdup(prop->prop_name, flag);
2418                 if (copy->prop_name == NULL)
2419                         goto fail;
2420 
2421                 if ((copy->prop_len = prop->prop_len) != 0) {
2422                         copy->prop_val = kmem_zalloc(prop->prop_len, flag);
2423                         if (copy->prop_val == NULL)
2424                                 goto fail;
2425 
2426                         bcopy(prop->prop_val, copy->prop_val, prop->prop_len);
2427                 }
2428 
2429                 if (prev == NULL)
2430                         result = prev = copy;
2431                 else
2432                         prev->prop_next = copy;
2433                 prev = copy;
2434         }
2435         return (result);
2436 
2437 fail:
2438         i_ddi_prop_list_delete(result);
2439         return (NULL);
2440 }
2441 
2442 /*
2443  * Create a reference property list, currently used only for
2444  * driver global properties. Created with ref count of 1.
2445  */
2446 ddi_prop_list_t *
2447 i_ddi_prop_list_create(ddi_prop_t *props)
2448 {
2449         ddi_prop_list_t *list = kmem_alloc(sizeof (*list), KM_SLEEP);
2450         list->prop_list = props;
2451         list->prop_ref = 1;
2452         return (list);
2453 }
2454 
2455 /*
2456  * Increment/decrement reference count. The reference is
2457  * protected by dn_lock. The only interfaces modifying
2458  * dn_global_prop_ptr is in impl_make[free]_parlist().
2459  */
2460 void
2461 i_ddi_prop_list_hold(ddi_prop_list_t *prop_list, struct devnames *dnp)
2462 {
2463         ASSERT(prop_list->prop_ref >= 0);
2464         ASSERT(mutex_owned(&dnp->dn_lock));
2465         prop_list->prop_ref++;
2466 }
2467 
2468 void
2469 i_ddi_prop_list_rele(ddi_prop_list_t *prop_list, struct devnames *dnp)
2470 {
2471         ASSERT(prop_list->prop_ref > 0);
2472         ASSERT(mutex_owned(&dnp->dn_lock));
2473         prop_list->prop_ref--;
2474 
2475         if (prop_list->prop_ref == 0) {
2476                 i_ddi_prop_list_delete(prop_list->prop_list);
2477                 kmem_free(prop_list, sizeof (*prop_list));
2478         }
2479 }
2480 
2481 /*
2482  * Free table of classes by drivers
2483  */
2484 void
2485 i_ddi_free_exported_classes(char **classes, int n)
2486 {
2487         if ((n == 0) || (classes == NULL))
2488                 return;
2489 
2490         kmem_free(classes, n * sizeof (char *));
2491 }
2492 
2493 /*
2494  * Get all classes exported by dip
2495  */
2496 int
2497 i_ddi_get_exported_classes(dev_info_t *dip, char ***classes)
2498 {
2499         extern void lock_hw_class_list();
2500         extern void unlock_hw_class_list();
2501         extern int get_class(const char *, char **);
2502 
2503         static char *rootclass = "root";
2504         int n = 0, nclass = 0;
2505         char **buf;
2506 
2507         ASSERT(i_ddi_node_state(dip) >= DS_BOUND);
2508 
2509         if (dip == ddi_root_node())     /* rootnode exports class "root" */
2510                 nclass = 1;
2511         lock_hw_class_list();
2512         nclass += get_class(ddi_driver_name(dip), NULL);
2513         if (nclass == 0) {
2514                 unlock_hw_class_list();
2515                 return (0);             /* no class exported */
2516         }
2517 
2518         *classes = buf = kmem_alloc(nclass * sizeof (char *), KM_SLEEP);
2519         if (dip == ddi_root_node()) {
2520                 *buf++ = rootclass;
2521                 n = 1;
2522         }
2523         n += get_class(ddi_driver_name(dip), buf);
2524         unlock_hw_class_list();
2525 
2526         ASSERT(n == nclass);    /* make sure buf wasn't overrun */
2527         return (nclass);
2528 }
2529 
2530 /*
2531  * Helper functions, returns NULL if no memory.
2532  */
2533 char *
2534 i_ddi_strdup(char *str, uint_t flag)
2535 {
2536         char *copy;
2537 
2538         if (str == NULL)
2539                 return (NULL);
2540 
2541         copy = kmem_alloc(strlen(str) + 1, flag);
2542         if (copy == NULL)
2543                 return (NULL);
2544 
2545         (void) strcpy(copy, str);
2546         return (copy);
2547 }
2548 
2549 /*
2550  * Load driver.conf file for major. Load all if major == -1.
2551  *
2552  * This is called
2553  * - early in boot after devnames array is initialized
2554  * - from vfs code when certain file systems are mounted
2555  * - from add_drv when a new driver is added
2556  */
2557 int
2558 i_ddi_load_drvconf(major_t major)
2559 {
2560         extern int modrootloaded;
2561 
2562         major_t low, high, m;
2563 
2564         if (major == DDI_MAJOR_T_NONE) {
2565                 low = 0;
2566                 high = devcnt - 1;
2567         } else {
2568                 if (major >= devcnt)
2569                         return (EINVAL);
2570                 low = high = major;
2571         }
2572 
2573         for (m = low; m <= high; m++) {
2574                 struct devnames *dnp = &devnamesp[m];
2575                 LOCK_DEV_OPS(&dnp->dn_lock);
2576                 dnp->dn_flags &= ~(DN_DRIVER_HELD|DN_DRIVER_INACTIVE);
2577                 (void) impl_make_parlist(m);
2578                 UNLOCK_DEV_OPS(&dnp->dn_lock);
2579         }
2580 
2581         if (modrootloaded) {
2582                 ddi_walk_devs(ddi_root_node(), reset_nexus_flags,
2583                     (void *)(uintptr_t)major);
2584         }
2585 
2586         /* build dn_list from old entries in path_to_inst */
2587         e_ddi_unorphan_instance_nos();
2588         return (0);
2589 }
2590 
2591 /*
2592  * Unload a specific driver.conf.
2593  * Don't support unload all because it doesn't make any sense
2594  */
2595 int
2596 i_ddi_unload_drvconf(major_t major)
2597 {
2598         int error;
2599         struct devnames *dnp;
2600 
2601         if (major >= devcnt)
2602                 return (EINVAL);
2603 
2604         /*
2605          * Take the per-driver lock while unloading driver.conf
2606          */
2607         dnp = &devnamesp[major];
2608         LOCK_DEV_OPS(&dnp->dn_lock);
2609         error = impl_free_parlist(major);
2610         UNLOCK_DEV_OPS(&dnp->dn_lock);
2611         return (error);
2612 }
2613 
2614 /*
2615  * Merge a .conf node. This is called by nexus drivers to augment
2616  * hw node with properties specified in driver.conf file. This function
2617  * takes a callback routine to name nexus children.
2618  * The parent node must be held busy.
2619  *
2620  * It returns DDI_SUCCESS if the node is merged and DDI_FAILURE otherwise.
2621  */
2622 int
2623 ndi_merge_node(dev_info_t *dip, int (*make_ua)(dev_info_t *, char *, int))
2624 {
2625         dev_info_t *hwdip;
2626 
2627         ASSERT(ndi_dev_is_persistent_node(dip) == 0);
2628         ASSERT(ddi_get_name_addr(dip) != NULL);
2629 
2630         hwdip = ndi_devi_findchild_by_callback(ddi_get_parent(dip),
2631             ddi_binding_name(dip), ddi_get_name_addr(dip), make_ua);
2632 
2633         /*
2634          * Look for the hardware node that is the target of the merge;
2635          * return failure if not found.
2636          */
2637         if ((hwdip == NULL) || (hwdip == dip)) {
2638                 char *buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2639                 NDI_CONFIG_DEBUG((CE_WARN, "No HW node to merge conf node %s",
2640                     ddi_deviname(dip, buf)));
2641                 kmem_free(buf, MAXNAMELEN);
2642                 return (DDI_FAILURE);
2643         }
2644 
2645         /*
2646          * Make sure the hardware node is uninitialized and has no property.
2647          * This may not be the case if new .conf files are load after some
2648          * hardware nodes have already been initialized and attached.
2649          *
2650          * N.B. We return success here because the node was *intended*
2651          *      to be a merge node because there is a hw node with the name.
2652          */
2653         mutex_enter(&DEVI(hwdip)->devi_lock);
2654         if (ndi_dev_is_persistent_node(hwdip) == 0) {
2655                 char *buf;
2656                 mutex_exit(&DEVI(hwdip)->devi_lock);
2657 
2658                 buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2659                 NDI_CONFIG_DEBUG((CE_NOTE, "Duplicate .conf node %s",
2660                     ddi_deviname(dip, buf)));
2661                 kmem_free(buf, MAXNAMELEN);
2662                 return (DDI_SUCCESS);
2663         }
2664 
2665         /*
2666          * If it is possible that the hardware has already been touched
2667          * then don't merge.
2668          */
2669         if (i_ddi_node_state(hwdip) >= DS_INITIALIZED ||
2670             (DEVI(hwdip)->devi_sys_prop_ptr != NULL) ||
2671             (DEVI(hwdip)->devi_drv_prop_ptr != NULL)) {
2672                 char *buf;
2673                 mutex_exit(&DEVI(hwdip)->devi_lock);
2674 
2675                 buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2676                 NDI_CONFIG_DEBUG((CE_NOTE,
2677                     "!Cannot merge .conf node %s with hw node %p "
2678                     "-- not in proper state",
2679                     ddi_deviname(dip, buf), (void *)hwdip));
2680                 kmem_free(buf, MAXNAMELEN);
2681                 return (DDI_SUCCESS);
2682         }
2683 
2684         mutex_enter(&DEVI(dip)->devi_lock);
2685         DEVI(hwdip)->devi_sys_prop_ptr = DEVI(dip)->devi_sys_prop_ptr;
2686         DEVI(hwdip)->devi_drv_prop_ptr = DEVI(dip)->devi_drv_prop_ptr;
2687         DEVI(dip)->devi_sys_prop_ptr = NULL;
2688         DEVI(dip)->devi_drv_prop_ptr = NULL;
2689         mutex_exit(&DEVI(dip)->devi_lock);
2690         mutex_exit(&DEVI(hwdip)->devi_lock);
2691 
2692         return (DDI_SUCCESS);
2693 }
2694 
2695 /*
2696  * Merge a "wildcard" .conf node. This is called by nexus drivers to
2697  * augment a set of hw node with properties specified in driver.conf file.
2698  * The parent node must be held busy.
2699  *
2700  * There is no failure mode, since the nexus may or may not have child
2701  * node bound the driver specified by the wildcard node.
2702  */
2703 void
2704 ndi_merge_wildcard_node(dev_info_t *dip)
2705 {
2706         dev_info_t *hwdip;
2707         dev_info_t *pdip = ddi_get_parent(dip);
2708         major_t major = ddi_driver_major(dip);
2709 
2710         /* never attempt to merge a hw node */
2711         ASSERT(ndi_dev_is_persistent_node(dip) == 0);
2712         /* must be bound to a driver major number */
2713         ASSERT(major != DDI_MAJOR_T_NONE);
2714 
2715         /*
2716          * Walk the child list to find all nodes bound to major
2717          * and copy properties.
2718          */
2719         mutex_enter(&DEVI(dip)->devi_lock);
2720         ASSERT(DEVI_BUSY_OWNED(pdip));
2721         for (hwdip = ddi_get_child(pdip); hwdip;
2722             hwdip = ddi_get_next_sibling(hwdip)) {
2723                 /*
2724                  * Skip nodes not bound to same driver
2725                  */
2726                 if (ddi_driver_major(hwdip) != major)
2727                         continue;
2728 
2729                 /*
2730                  * Skip .conf nodes
2731                  */
2732                 if (ndi_dev_is_persistent_node(hwdip) == 0)
2733                         continue;
2734 
2735                 /*
2736                  * Make sure the node is uninitialized and has no property.
2737                  */
2738                 mutex_enter(&DEVI(hwdip)->devi_lock);
2739                 if (i_ddi_node_state(hwdip) >= DS_INITIALIZED ||
2740                     (DEVI(hwdip)->devi_sys_prop_ptr != NULL) ||
2741                     (DEVI(hwdip)->devi_drv_prop_ptr != NULL)) {
2742                         mutex_exit(&DEVI(hwdip)->devi_lock);
2743                         NDI_CONFIG_DEBUG((CE_NOTE, "HW node %p state not "
2744                             "suitable for merging wildcard conf node %s",
2745                             (void *)hwdip, ddi_node_name(dip)));
2746                         continue;
2747                 }
2748 
2749                 DEVI(hwdip)->devi_sys_prop_ptr =
2750                     i_ddi_prop_list_dup(DEVI(dip)->devi_sys_prop_ptr, KM_SLEEP);
2751                 DEVI(hwdip)->devi_drv_prop_ptr =
2752                     i_ddi_prop_list_dup(DEVI(dip)->devi_drv_prop_ptr, KM_SLEEP);
2753                 mutex_exit(&DEVI(hwdip)->devi_lock);
2754         }
2755         mutex_exit(&DEVI(dip)->devi_lock);
2756 }
2757 
2758 /*
2759  * Return the major number based on the compatible property. This interface
2760  * may be used in situations where we are trying to detect if a better driver
2761  * now exists for a device, so it must use the 'compatible' property.  If
2762  * a non-NULL formp is specified and the binding was based on compatible then
2763  * return the pointer to the form used in *formp.
2764  */
2765 major_t
2766 ddi_compatible_driver_major(dev_info_t *dip, char **formp)
2767 {
2768         struct dev_info *devi = DEVI(dip);
2769         void            *compat;
2770         size_t          len;
2771         char            *p = NULL;
2772         major_t         major = DDI_MAJOR_T_NONE;
2773 
2774         if (formp)
2775                 *formp = NULL;
2776 
2777         if (ddi_prop_exists(DDI_DEV_T_NONE, dip, DDI_PROP_DONTPASS,
2778             "ddi-assigned")) {
2779                 major = ddi_name_to_major("nulldriver");
2780                 return (major);
2781         }
2782 
2783         /*
2784          * Highest precedence binding is a path-oriented alias. Since this
2785          * requires a 'path', this type of binding occurs via more obtuse
2786          * 'rebind'. The need for a path-oriented alias 'rebind' is detected
2787          * after a successful DDI_CTLOPS_INITCHILD to another driver: this is
2788          * is the first point at which the unit-address (or instance) of the
2789          * last component of the path is available (even though the path is
2790          * bound to the wrong driver at this point).
2791          */
2792         if (devi->devi_flags & DEVI_REBIND) {
2793                 p = devi->devi_rebinding_name;
2794                 major = ddi_name_to_major(p);
2795                 if (driver_active(major)) {
2796                         if (formp)
2797                                 *formp = p;
2798                         return (major);
2799                 }
2800 
2801                 /*
2802                  * If for some reason devi_rebinding_name no longer resolves
2803                  * to a proper driver then clear DEVI_REBIND.
2804                  */
2805                 mutex_enter(&devi->devi_lock);
2806                 devi->devi_flags &= ~DEVI_REBIND;
2807                 mutex_exit(&devi->devi_lock);
2808         }
2809 
2810         /* look up compatible property */
2811         (void) lookup_compatible(dip, KM_SLEEP);
2812         compat = (void *)(devi->devi_compat_names);
2813         len = devi->devi_compat_length;
2814 
2815         /* find the highest precedence compatible form with a driver binding */
2816         while ((p = prom_decode_composite_string(compat, len, p)) != NULL) {
2817                 major = ddi_name_to_major(p);
2818                 if (driver_active(major)) {
2819                         if (formp)
2820                                 *formp = p;
2821                         return (major);
2822                 }
2823         }
2824 
2825         /*
2826          * none of the compatible forms have a driver binding, see if
2827          * the node name has a driver binding.
2828          */
2829         major = ddi_name_to_major(ddi_node_name(dip));
2830         if (driver_active(major))
2831                 return (major);
2832 
2833         /* no driver */
2834         return (DDI_MAJOR_T_NONE);
2835 }
2836 
2837 /*
2838  * Static help functions
2839  */
2840 
2841 /*
2842  * lookup the "compatible" property and cache it's contents in the
2843  * device node.
2844  */
2845 static int
2846 lookup_compatible(dev_info_t *dip, uint_t flag)
2847 {
2848         int rv;
2849         int prop_flags;
2850         uint_t ncompatstrs;
2851         char **compatstrpp;
2852         char *di_compat_strp;
2853         size_t di_compat_strlen;
2854 
2855         if (DEVI(dip)->devi_compat_names) {
2856                 return (DDI_SUCCESS);
2857         }
2858 
2859         prop_flags = DDI_PROP_TYPE_STRING | DDI_PROP_DONTPASS;
2860 
2861         if (flag & KM_NOSLEEP) {
2862                 prop_flags |= DDI_PROP_DONTSLEEP;
2863         }
2864 
2865         if (ndi_dev_is_prom_node(dip) == 0) {
2866                 prop_flags |= DDI_PROP_NOTPROM;
2867         }
2868 
2869         rv = ddi_prop_lookup_common(DDI_DEV_T_ANY, dip, prop_flags,
2870             "compatible", &compatstrpp, &ncompatstrs,
2871             ddi_prop_fm_decode_strings);
2872 
2873         if (rv == DDI_PROP_NOT_FOUND) {
2874                 return (DDI_SUCCESS);
2875         }
2876 
2877         if (rv != DDI_PROP_SUCCESS) {
2878                 return (DDI_FAILURE);
2879         }
2880 
2881         /*
2882          * encode the compatible property data in the dev_info node
2883          */
2884         rv = DDI_SUCCESS;
2885         if (ncompatstrs != 0) {
2886                 di_compat_strp = encode_composite_string(compatstrpp,
2887                     ncompatstrs, &di_compat_strlen, flag);
2888                 if (di_compat_strp != NULL) {
2889                         DEVI(dip)->devi_compat_names = di_compat_strp;
2890                         DEVI(dip)->devi_compat_length = di_compat_strlen;
2891                 } else {
2892                         rv = DDI_FAILURE;
2893                 }
2894         }
2895         ddi_prop_free(compatstrpp);
2896         return (rv);
2897 }
2898 
2899 /*
2900  * Create a composite string from a list of strings.
2901  *
2902  * A composite string consists of a single buffer containing one
2903  * or more NULL terminated strings.
2904  */
2905 static char *
2906 encode_composite_string(char **strings, uint_t nstrings, size_t *retsz,
2907     uint_t flag)
2908 {
2909         uint_t index;
2910         char  **strpp;
2911         uint_t slen;
2912         size_t cbuf_sz = 0;
2913         char *cbuf_p;
2914         char *cbuf_ip;
2915 
2916         if (strings == NULL || nstrings == 0 || retsz == NULL) {
2917                 return (NULL);
2918         }
2919 
2920         for (index = 0, strpp = strings; index < nstrings; index++)
2921                 cbuf_sz += strlen(*(strpp++)) + 1;
2922 
2923         if ((cbuf_p = kmem_alloc(cbuf_sz, flag)) == NULL) {
2924                 cmn_err(CE_NOTE,
2925                     "?failed to allocate device node compatstr");
2926                 return (NULL);
2927         }
2928 
2929         cbuf_ip = cbuf_p;
2930         for (index = 0, strpp = strings; index < nstrings; index++) {
2931                 slen = strlen(*strpp);
2932                 bcopy(*(strpp++), cbuf_ip, slen);
2933                 cbuf_ip += slen;
2934                 *(cbuf_ip++) = '\0';
2935         }
2936 
2937         *retsz = cbuf_sz;
2938         return (cbuf_p);
2939 }
2940 
2941 static void
2942 link_to_driver_list(dev_info_t *dip)
2943 {
2944         major_t major = DEVI(dip)->devi_major;
2945         struct devnames *dnp;
2946 
2947         ASSERT(major != DDI_MAJOR_T_NONE);
2948 
2949         /*
2950          * Remove from orphan list
2951          */
2952         if (ndi_dev_is_persistent_node(dip)) {
2953                 dnp = &orphanlist;
2954                 remove_from_dn_list(dnp, dip);
2955         }
2956 
2957         /*
2958          * Add to per driver list
2959          */
2960         dnp = &devnamesp[major];
2961         add_to_dn_list(dnp, dip);
2962 }
2963 
2964 static void
2965 unlink_from_driver_list(dev_info_t *dip)
2966 {
2967         major_t major = DEVI(dip)->devi_major;
2968         struct devnames *dnp;
2969 
2970         ASSERT(major != DDI_MAJOR_T_NONE);
2971 
2972         /*
2973          * Remove from per-driver list
2974          */
2975         dnp = &devnamesp[major];
2976         remove_from_dn_list(dnp, dip);
2977 
2978         /*
2979          * Add to orphan list
2980          */
2981         if (ndi_dev_is_persistent_node(dip)) {
2982                 dnp = &orphanlist;
2983                 add_to_dn_list(dnp, dip);
2984         }
2985 }
2986 
2987 /*
2988  * scan the per-driver list looking for dev_info "dip"
2989  */
2990 static dev_info_t *
2991 in_dn_list(struct devnames *dnp, dev_info_t *dip)
2992 {
2993         struct dev_info *idevi;
2994 
2995         if ((idevi = DEVI(dnp->dn_head)) == NULL)
2996                 return (NULL);
2997 
2998         while (idevi) {
2999                 if (idevi == DEVI(dip))
3000                         return (dip);
3001                 idevi = idevi->devi_next;
3002         }
3003         return (NULL);
3004 }
3005 
3006 /*
3007  * insert devinfo node 'dip' into the per-driver instance list
3008  * headed by 'dnp'
3009  *
3010  * Nodes on the per-driver list are ordered: HW - SID - PSEUDO.  The order is
3011  * required for merging of .conf file data to work properly.
3012  */
3013 static void
3014 add_to_ordered_dn_list(struct devnames *dnp, dev_info_t *dip)
3015 {
3016         dev_info_t **dipp;
3017 
3018         ASSERT(mutex_owned(&(dnp->dn_lock)));
3019 
3020         dipp = &dnp->dn_head;
3021         if (ndi_dev_is_prom_node(dip)) {
3022                 /*
3023                  * Find the first non-prom node or end of list
3024                  */
3025                 while (*dipp && (ndi_dev_is_prom_node(*dipp) != 0)) {
3026                         dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3027                 }
3028         } else if (ndi_dev_is_persistent_node(dip)) {
3029                 /*
3030                  * Find the first non-persistent node
3031                  */
3032                 while (*dipp && (ndi_dev_is_persistent_node(*dipp) != 0)) {
3033                         dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3034                 }
3035         } else {
3036                 /*
3037                  * Find the end of the list
3038                  */
3039                 while (*dipp) {
3040                         dipp = (dev_info_t **)&DEVI(*dipp)->devi_next;
3041                 }
3042         }
3043 
3044         DEVI(dip)->devi_next = DEVI(*dipp);
3045         *dipp = dip;
3046 }
3047 
3048 /*
3049  * add a list of device nodes to the device node list in the
3050  * devnames structure
3051  */
3052 static void
3053 add_to_dn_list(struct devnames *dnp, dev_info_t *dip)
3054 {
3055         /*
3056          * Look to see if node already exists
3057          */
3058         LOCK_DEV_OPS(&(dnp->dn_lock));
3059         if (in_dn_list(dnp, dip)) {
3060                 cmn_err(CE_NOTE, "add_to_dn_list: node %s already in list",
3061                     DEVI(dip)->devi_node_name);
3062         } else {
3063                 add_to_ordered_dn_list(dnp, dip);
3064         }
3065         UNLOCK_DEV_OPS(&(dnp->dn_lock));
3066 }
3067 
3068 static void
3069 remove_from_dn_list(struct devnames *dnp, dev_info_t *dip)
3070 {
3071         dev_info_t **plist;
3072 
3073         LOCK_DEV_OPS(&(dnp->dn_lock));
3074 
3075         plist = (dev_info_t **)&dnp->dn_head;
3076         while (*plist && (*plist != dip)) {
3077                 plist = (dev_info_t **)&DEVI(*plist)->devi_next;
3078         }
3079 
3080         if (*plist != NULL) {
3081                 ASSERT(*plist == dip);
3082                 *plist = (dev_info_t *)(DEVI(dip)->devi_next);
3083                 DEVI(dip)->devi_next = NULL;
3084         } else {
3085                 NDI_CONFIG_DEBUG((CE_NOTE,
3086                     "remove_from_dn_list: node %s not found in list",
3087                     DEVI(dip)->devi_node_name));
3088         }
3089 
3090         UNLOCK_DEV_OPS(&(dnp->dn_lock));
3091 }
3092 
3093 /*
3094  * Add and remove reference driver global property list
3095  */
3096 static void
3097 add_global_props(dev_info_t *dip)
3098 {
3099         struct devnames *dnp;
3100         ddi_prop_list_t *plist;
3101 
3102         ASSERT(DEVI(dip)->devi_global_prop_list == NULL);
3103         ASSERT(DEVI(dip)->devi_major != DDI_MAJOR_T_NONE);
3104 
3105         dnp = &devnamesp[DEVI(dip)->devi_major];
3106         LOCK_DEV_OPS(&dnp->dn_lock);
3107         plist = dnp->dn_global_prop_ptr;
3108         if (plist == NULL) {
3109                 UNLOCK_DEV_OPS(&dnp->dn_lock);
3110                 return;
3111         }
3112         i_ddi_prop_list_hold(plist, dnp);
3113         UNLOCK_DEV_OPS(&dnp->dn_lock);
3114 
3115         mutex_enter(&DEVI(dip)->devi_lock);
3116         DEVI(dip)->devi_global_prop_list = plist;
3117         mutex_exit(&DEVI(dip)->devi_lock);
3118 }
3119 
3120 static void
3121 remove_global_props(dev_info_t *dip)
3122 {
3123         ddi_prop_list_t *proplist;
3124 
3125         mutex_enter(&DEVI(dip)->devi_lock);
3126         proplist = DEVI(dip)->devi_global_prop_list;
3127         DEVI(dip)->devi_global_prop_list = NULL;
3128         mutex_exit(&DEVI(dip)->devi_lock);
3129 
3130         if (proplist) {
3131                 major_t major;
3132                 struct devnames *dnp;
3133 
3134                 major = ddi_driver_major(dip);
3135                 ASSERT(major != DDI_MAJOR_T_NONE);
3136                 dnp = &devnamesp[major];
3137                 LOCK_DEV_OPS(&dnp->dn_lock);
3138                 i_ddi_prop_list_rele(proplist, dnp);
3139                 UNLOCK_DEV_OPS(&dnp->dn_lock);
3140         }
3141 }
3142 
3143 #ifdef DEBUG
3144 /*
3145  * Set this variable to '0' to disable the optimization,
3146  * and to 2 to print debug message.
3147  */
3148 static int optimize_dtree = 1;
3149 
3150 static void
3151 debug_dtree(dev_info_t *devi, struct dev_info *adevi, char *service)
3152 {
3153         char *adeviname, *buf;
3154 
3155         /*
3156          * Don't print unless optimize dtree is set to 2+
3157          */
3158         if (optimize_dtree <= 1)
3159                 return;
3160 
3161         buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
3162         adeviname = ddi_deviname((dev_info_t *)adevi, buf);
3163         if (*adeviname == '\0')
3164                 adeviname = "root";
3165 
3166         cmn_err(CE_CONT, "%s %s -> %s\n",
3167             ddi_deviname(devi, buf), service, adeviname);
3168 
3169         kmem_free(buf, MAXNAMELEN);
3170 }
3171 #else /* DEBUG */
3172 #define debug_dtree(a1, a2, a3)  /* nothing */
3173 #endif  /* DEBUG */
3174 
3175 static void
3176 ddi_optimize_dtree(dev_info_t *devi)
3177 {
3178         struct dev_info *pdevi;
3179         struct bus_ops *b;
3180 
3181         pdevi = DEVI(devi)->devi_parent;
3182         ASSERT(pdevi);
3183 
3184         /*
3185          * Set the unoptimized values
3186          */
3187         DEVI(devi)->devi_bus_map_fault = pdevi;
3188         DEVI(devi)->devi_bus_dma_allochdl = pdevi;
3189         DEVI(devi)->devi_bus_dma_freehdl = pdevi;
3190         DEVI(devi)->devi_bus_dma_bindhdl = pdevi;
3191         DEVI(devi)->devi_bus_dma_bindfunc =
3192             pdevi->devi_ops->devo_bus_ops->bus_dma_bindhdl;
3193         DEVI(devi)->devi_bus_dma_unbindhdl = pdevi;
3194         DEVI(devi)->devi_bus_dma_unbindfunc =
3195             pdevi->devi_ops->devo_bus_ops->bus_dma_unbindhdl;
3196         DEVI(devi)->devi_bus_dma_flush = pdevi;
3197         DEVI(devi)->devi_bus_dma_win = pdevi;
3198         DEVI(devi)->devi_bus_dma_ctl = pdevi;
3199         DEVI(devi)->devi_bus_ctl = pdevi;
3200 
3201 #ifdef DEBUG
3202         if (optimize_dtree == 0)
3203                 return;
3204 #endif /* DEBUG */
3205 
3206         b = pdevi->devi_ops->devo_bus_ops;
3207 
3208         if (i_ddi_map_fault == b->bus_map_fault) {
3209                 DEVI(devi)->devi_bus_map_fault = pdevi->devi_bus_map_fault;
3210                 debug_dtree(devi, DEVI(devi)->devi_bus_map_fault,
3211                     "bus_map_fault");
3212         }
3213 
3214         if (ddi_dma_allochdl == b->bus_dma_allochdl) {
3215                 DEVI(devi)->devi_bus_dma_allochdl =
3216                     pdevi->devi_bus_dma_allochdl;
3217                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_allochdl,
3218                     "bus_dma_allochdl");
3219         }
3220 
3221         if (ddi_dma_freehdl == b->bus_dma_freehdl) {
3222                 DEVI(devi)->devi_bus_dma_freehdl = pdevi->devi_bus_dma_freehdl;
3223                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_freehdl,
3224                     "bus_dma_freehdl");
3225         }
3226 
3227         if (ddi_dma_bindhdl == b->bus_dma_bindhdl) {
3228                 DEVI(devi)->devi_bus_dma_bindhdl = pdevi->devi_bus_dma_bindhdl;
3229                 DEVI(devi)->devi_bus_dma_bindfunc =
3230                     pdevi->devi_bus_dma_bindhdl->devi_ops->
3231                     devo_bus_ops->bus_dma_bindhdl;
3232                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_bindhdl,
3233                     "bus_dma_bindhdl");
3234         }
3235 
3236         if (ddi_dma_unbindhdl == b->bus_dma_unbindhdl) {
3237                 DEVI(devi)->devi_bus_dma_unbindhdl =
3238                     pdevi->devi_bus_dma_unbindhdl;
3239                 DEVI(devi)->devi_bus_dma_unbindfunc =
3240                     pdevi->devi_bus_dma_unbindhdl->devi_ops->
3241                     devo_bus_ops->bus_dma_unbindhdl;
3242                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_unbindhdl,
3243                     "bus_dma_unbindhdl");
3244         }
3245 
3246         if (ddi_dma_flush == b->bus_dma_flush) {
3247                 DEVI(devi)->devi_bus_dma_flush = pdevi->devi_bus_dma_flush;
3248                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_flush,
3249                     "bus_dma_flush");
3250         }
3251 
3252         if (ddi_dma_win == b->bus_dma_win) {
3253                 DEVI(devi)->devi_bus_dma_win = pdevi->devi_bus_dma_win;
3254                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_win,
3255                     "bus_dma_win");
3256         }
3257 
3258         if (ddi_dma_mctl == b->bus_dma_ctl) {
3259                 DEVI(devi)->devi_bus_dma_ctl = pdevi->devi_bus_dma_ctl;
3260                 debug_dtree(devi, DEVI(devi)->devi_bus_dma_ctl, "bus_dma_ctl");
3261         }
3262 
3263         if (ddi_ctlops == b->bus_ctl) {
3264                 DEVI(devi)->devi_bus_ctl = pdevi->devi_bus_ctl;
3265                 debug_dtree(devi, DEVI(devi)->devi_bus_ctl, "bus_ctl");
3266         }
3267 }
3268 
3269 #define MIN_DEVINFO_LOG_SIZE    max_ncpus
3270 #define MAX_DEVINFO_LOG_SIZE    max_ncpus * 10
3271 
3272 static void
3273 da_log_init()
3274 {
3275         devinfo_log_header_t *dh;
3276         int logsize = devinfo_log_size;
3277 
3278         if (logsize == 0)
3279                 logsize = MIN_DEVINFO_LOG_SIZE;
3280         else if (logsize > MAX_DEVINFO_LOG_SIZE)
3281                 logsize = MAX_DEVINFO_LOG_SIZE;
3282 
3283         dh = kmem_alloc(logsize * PAGESIZE, KM_SLEEP);
3284         mutex_init(&dh->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3285         dh->dh_max = ((logsize * PAGESIZE) - sizeof (*dh)) /
3286             sizeof (devinfo_audit_t) + 1;
3287         dh->dh_curr = -1;
3288         dh->dh_hits = 0;
3289 
3290         devinfo_audit_log = dh;
3291 }
3292 
3293 /*
3294  * Log the stack trace in per-devinfo audit structure and also enter
3295  * it into a system wide log for recording the time history.
3296  */
3297 static void
3298 da_log_enter(dev_info_t *dip)
3299 {
3300         devinfo_audit_t *da_log, *da = DEVI(dip)->devi_audit;
3301         devinfo_log_header_t *dh = devinfo_audit_log;
3302 
3303         if (devinfo_audit_log == NULL)
3304                 return;
3305 
3306         ASSERT(da != NULL);
3307 
3308         da->da_devinfo = dip;
3309         da->da_timestamp = gethrtime();
3310         da->da_thread = curthread;
3311         da->da_node_state = DEVI(dip)->devi_node_state;
3312         da->da_device_state = DEVI(dip)->devi_state;
3313         da->da_depth = getpcstack(da->da_stack, DDI_STACK_DEPTH);
3314 
3315         /*
3316          * Copy into common log and note the location for tracing history
3317          */
3318         mutex_enter(&dh->dh_lock);
3319         dh->dh_hits++;
3320         dh->dh_curr++;
3321         if (dh->dh_curr >= dh->dh_max)
3322                 dh->dh_curr -= dh->dh_max;
3323         da_log = &dh->dh_entry[dh->dh_curr];
3324         mutex_exit(&dh->dh_lock);
3325 
3326         bcopy(da, da_log, sizeof (devinfo_audit_t));
3327         da->da_lastlog = da_log;
3328 }
3329 
3330 static void
3331 attach_drivers()
3332 {
3333         int i;
3334         for (i = 0; i < devcnt; i++) {
3335                 struct devnames *dnp = &devnamesp[i];
3336                 if ((dnp->dn_flags & DN_FORCE_ATTACH) &&
3337                     (ddi_hold_installed_driver((major_t)i) != NULL))
3338                         ddi_rele_driver((major_t)i);
3339         }
3340 }
3341 
3342 /*
3343  * Launch a thread to force attach drivers. This avoids penalty on boot time.
3344  */
3345 void
3346 i_ddi_forceattach_drivers()
3347 {
3348 
3349         /*
3350          * Attach IB VHCI driver before the force-attach thread attaches the
3351          * IB HCA driver. IB HCA driver will fail if IB Nexus has not yet
3352          * been attached.
3353          */
3354         (void) ddi_hold_installed_driver(ddi_name_to_major("ib"));
3355 
3356         (void) thread_create(NULL, 0, (void (*)())attach_drivers, NULL, 0, &p0,
3357             TS_RUN, minclsyspri);
3358 }
3359 
3360 /*
3361  * This is a private DDI interface for optimizing boot performance.
3362  * I/O subsystem initialization is considered complete when devfsadm
3363  * is executed.
3364  *
3365  * NOTE: The start of syseventd happens to be a convenient indicator
3366  *      of the completion of I/O initialization during boot.
3367  *      The implementation should be replaced by something more robust.
3368  */
3369 int
3370 i_ddi_io_initialized()
3371 {
3372         extern int sysevent_daemon_init;
3373         return (sysevent_daemon_init);
3374 }
3375 
3376 /*
3377  * May be used to determine system boot state
3378  * "Available" means the system is for the most part up
3379  * and initialized, with all system services either up or
3380  * capable of being started.  This state is set by devfsadm
3381  * during the boot process.  The /dev filesystem infers
3382  * from this when implicit reconfig can be performed,
3383  * ie, devfsadm can be invoked.  Please avoid making
3384  * further use of this unless it's really necessary.
3385  */
3386 int
3387 i_ddi_sysavail()
3388 {
3389         return (devname_state & DS_SYSAVAIL);
3390 }
3391 
3392 /*
3393  * May be used to determine if boot is a reconfigure boot.
3394  */
3395 int
3396 i_ddi_reconfig()
3397 {
3398         return (devname_state & DS_RECONFIG);
3399 }
3400 
3401 /*
3402  * Note system services are up, inform /dev.
3403  */
3404 void
3405 i_ddi_set_sysavail()
3406 {
3407         if ((devname_state & DS_SYSAVAIL) == 0) {
3408                 devname_state |= DS_SYSAVAIL;
3409                 sdev_devstate_change();
3410         }
3411 }
3412 
3413 /*
3414  * Note reconfiguration boot, inform /dev.
3415  */
3416 void
3417 i_ddi_set_reconfig()
3418 {
3419         if ((devname_state & DS_RECONFIG) == 0) {
3420                 devname_state |= DS_RECONFIG;
3421                 sdev_devstate_change();
3422         }
3423 }
3424 
3425 
3426 /*
3427  * device tree walking
3428  */
3429 
3430 struct walk_elem {
3431         struct walk_elem *next;
3432         dev_info_t *dip;
3433 };
3434 
3435 static void
3436 free_list(struct walk_elem *list)
3437 {
3438         while (list) {
3439                 struct walk_elem *next = list->next;
3440                 kmem_free(list, sizeof (*list));
3441                 list = next;
3442         }
3443 }
3444 
3445 static void
3446 append_node(struct walk_elem **list, dev_info_t *dip)
3447 {
3448         struct walk_elem *tail;
3449         struct walk_elem *elem = kmem_alloc(sizeof (*elem), KM_SLEEP);
3450 
3451         elem->next = NULL;
3452         elem->dip = dip;
3453 
3454         if (*list == NULL) {
3455                 *list = elem;
3456                 return;
3457         }
3458 
3459         tail = *list;
3460         while (tail->next)
3461                 tail = tail->next;
3462 
3463         tail->next = elem;
3464 }
3465 
3466 /*
3467  * The implementation of ddi_walk_devs().
3468  */
3469 static int
3470 walk_devs(dev_info_t *dip, int (*f)(dev_info_t *, void *), void *arg,
3471     int do_locking)
3472 {
3473         struct walk_elem *head = NULL;
3474 
3475         /*
3476          * Do it in two passes. First pass invoke callback on each
3477          * dip on the sibling list. Second pass invoke callback on
3478          * children of each dip.
3479          */
3480         while (dip) {
3481                 switch ((*f)(dip, arg)) {
3482                 case DDI_WALK_TERMINATE:
3483                         free_list(head);
3484                         return (DDI_WALK_TERMINATE);
3485 
3486                 case DDI_WALK_PRUNESIB:
3487                         /* ignore sibling by setting dip to NULL */
3488                         append_node(&head, dip);
3489                         dip = NULL;
3490                         break;
3491 
3492                 case DDI_WALK_PRUNECHILD:
3493                         /* don't worry about children */
3494                         dip = ddi_get_next_sibling(dip);
3495                         break;
3496 
3497                 case DDI_WALK_CONTINUE:
3498                 default:
3499                         append_node(&head, dip);
3500                         dip = ddi_get_next_sibling(dip);
3501                         break;
3502                 }
3503 
3504         }
3505 
3506         /* second pass */
3507         while (head) {
3508                 int circ;
3509                 struct walk_elem *next = head->next;
3510 
3511                 if (do_locking)
3512                         ndi_devi_enter(head->dip, &circ);
3513                 if (walk_devs(ddi_get_child(head->dip), f, arg, do_locking) ==
3514                     DDI_WALK_TERMINATE) {
3515                         if (do_locking)
3516                                 ndi_devi_exit(head->dip, circ);
3517                         free_list(head);
3518                         return (DDI_WALK_TERMINATE);
3519                 }
3520                 if (do_locking)
3521                         ndi_devi_exit(head->dip, circ);
3522                 kmem_free(head, sizeof (*head));
3523                 head = next;
3524         }
3525 
3526         return (DDI_WALK_CONTINUE);
3527 }
3528 
3529 /*
3530  * This general-purpose routine traverses the tree of dev_info nodes,
3531  * starting from the given node, and calls the given function for each
3532  * node that it finds with the current node and the pointer arg (which
3533  * can point to a structure of information that the function
3534  * needs) as arguments.
3535  *
3536  * It does the walk a layer at a time, not depth-first. The given function
3537  * must return one of the following values:
3538  *      DDI_WALK_CONTINUE
3539  *      DDI_WALK_PRUNESIB
3540  *      DDI_WALK_PRUNECHILD
3541  *      DDI_WALK_TERMINATE
3542  *
3543  * N.B. Since we walk the sibling list, the caller must ensure that
3544  *      the parent of dip is held against changes, unless the parent
3545  *      is rootnode.  ndi_devi_enter() on the parent is sufficient.
3546  *
3547  *      To avoid deadlock situations, caller must not attempt to
3548  *      configure/unconfigure/remove device node in (*f)(), nor should
3549  *      it attempt to recurse on other nodes in the system. Any
3550  *      ndi_devi_enter() done by (*f)() must occur 'at-or-below' the
3551  *      node entered prior to ddi_walk_devs(). Furthermore, if (*f)()
3552  *      does any multi-threading (in framework *or* in driver) then the
3553  *      ndi_devi_enter() calls done by dependent threads must be
3554  *      'strictly-below'.
3555  *
3556  *      This is not callable from device autoconfiguration routines.
3557  *      They include, but not limited to, _init(9e), _fini(9e), probe(9e),
3558  *      attach(9e), and detach(9e).
3559  */
3560 
3561 void
3562 ddi_walk_devs(dev_info_t *dip, int (*f)(dev_info_t *, void *), void *arg)
3563 {
3564 
3565         ASSERT(dip == NULL || ddi_get_parent(dip) == NULL ||
3566             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
3567 
3568         (void) walk_devs(dip, f, arg, 1);
3569 }
3570 
3571 /*
3572  * This is a general-purpose routine traverses the per-driver list
3573  * and calls the given function for each node. must return one of
3574  * the following values:
3575  *      DDI_WALK_CONTINUE
3576  *      DDI_WALK_TERMINATE
3577  *
3578  * N.B. The same restrictions from ddi_walk_devs() apply.
3579  */
3580 
3581 void
3582 e_ddi_walk_driver(char *drv, int (*f)(dev_info_t *, void *), void *arg)
3583 {
3584         major_t major;
3585         struct devnames *dnp;
3586         dev_info_t *dip;
3587 
3588         major = ddi_name_to_major(drv);
3589         if (major == DDI_MAJOR_T_NONE)
3590                 return;
3591 
3592         dnp = &devnamesp[major];
3593         LOCK_DEV_OPS(&dnp->dn_lock);
3594         dip = dnp->dn_head;
3595         while (dip) {
3596                 ndi_hold_devi(dip);
3597                 UNLOCK_DEV_OPS(&dnp->dn_lock);
3598                 if ((*f)(dip, arg) == DDI_WALK_TERMINATE) {
3599                         ndi_rele_devi(dip);
3600                         return;
3601                 }
3602                 LOCK_DEV_OPS(&dnp->dn_lock);
3603                 ndi_rele_devi(dip);
3604                 dip = ddi_get_next(dip);
3605         }
3606         UNLOCK_DEV_OPS(&dnp->dn_lock);
3607 }
3608 
3609 /*
3610  * argument to i_find_devi, a devinfo node search callback function.
3611  */
3612 struct match_info {
3613         dev_info_t      *dip;           /* result */
3614         char            *nodename;      /* if non-null, nodename must match */
3615         int             instance;       /* if != -1, instance must match */
3616         int             attached;       /* if != 0, i_ddi_devi_attached() */
3617 };
3618 
3619 static int
3620 i_find_devi(dev_info_t *dip, void *arg)
3621 {
3622         struct match_info *info = (struct match_info *)arg;
3623 
3624         if (((info->nodename == NULL) ||
3625             (strcmp(ddi_node_name(dip), info->nodename) == 0)) &&
3626             ((info->instance == -1) ||
3627             (ddi_get_instance(dip) == info->instance)) &&
3628             ((info->attached == 0) || i_ddi_devi_attached(dip))) {
3629                 info->dip = dip;
3630                 ndi_hold_devi(dip);
3631                 return (DDI_WALK_TERMINATE);
3632         }
3633 
3634         return (DDI_WALK_CONTINUE);
3635 }
3636 
3637 /*
3638  * Find dip with a known node name and instance and return with it held
3639  */
3640 dev_info_t *
3641 ddi_find_devinfo(char *nodename, int instance, int attached)
3642 {
3643         struct match_info       info;
3644 
3645         info.nodename = nodename;
3646         info.instance = instance;
3647         info.attached = attached;
3648         info.dip = NULL;
3649 
3650         ddi_walk_devs(ddi_root_node(), i_find_devi, &info);
3651         return (info.dip);
3652 }
3653 
3654 extern ib_boot_prop_t *iscsiboot_prop;
3655 static void
3656 i_ddi_parse_iscsi_name(char *name, char **nodename, char **addrname,
3657     char **minorname)
3658 {
3659         char *cp, *colon;
3660         static char nulladdrname[] = "";
3661 
3662         /* default values */
3663         if (nodename)
3664                 *nodename = name;
3665         if (addrname)
3666                 *addrname = nulladdrname;
3667         if (minorname)
3668                 *minorname = NULL;
3669 
3670         cp = colon = name;
3671         while (*cp != '\0') {
3672                 if (addrname && *cp == '@') {
3673                         *addrname = cp + 1;
3674                         *cp = '\0';
3675                 } else if (minorname && *cp == ':') {
3676                         *minorname = cp + 1;
3677                         colon = cp;
3678                 }
3679                 ++cp;
3680         }
3681         if (colon != name) {
3682                 *colon = '\0';
3683         }
3684 }
3685 
3686 /*
3687  * Parse for name, addr, and minor names. Some args may be NULL.
3688  */
3689 void
3690 i_ddi_parse_name(char *name, char **nodename, char **addrname, char **minorname)
3691 {
3692         char *cp;
3693         static char nulladdrname[] = "";
3694 
3695         /* default values */
3696         if (nodename)
3697                 *nodename = name;
3698         if (addrname)
3699                 *addrname = nulladdrname;
3700         if (minorname)
3701                 *minorname = NULL;
3702 
3703         cp = name;
3704         while (*cp != '\0') {
3705                 if (addrname && *cp == '@') {
3706                         *addrname = cp + 1;
3707                         *cp = '\0';
3708                 } else if (minorname && *cp == ':') {
3709                         *minorname = cp + 1;
3710                         *cp = '\0';
3711                 }
3712                 ++cp;
3713         }
3714 }
3715 
3716 static char *
3717 child_path_to_driver(dev_info_t *parent, char *child_name, char *unit_address)
3718 {
3719         char *p, *drvname = NULL;
3720         major_t maj;
3721 
3722         /*
3723          * Construct the pathname and ask the implementation
3724          * if it can do a driver = f(pathname) for us, if not
3725          * we'll just default to using the node-name that
3726          * was given to us.  We want to do this first to
3727          * allow the platform to use 'generic' names for
3728          * legacy device drivers.
3729          */
3730         p = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
3731         (void) ddi_pathname(parent, p);
3732         (void) strcat(p, "/");
3733         (void) strcat(p, child_name);
3734         if (unit_address && *unit_address) {
3735                 (void) strcat(p, "@");
3736                 (void) strcat(p, unit_address);
3737         }
3738 
3739         /*
3740          * Get the binding. If there is none, return the child_name
3741          * and let the caller deal with it.
3742          */
3743         maj = path_to_major(p);
3744 
3745         kmem_free(p, MAXPATHLEN);
3746 
3747         if (maj != DDI_MAJOR_T_NONE)
3748                 drvname = ddi_major_to_name(maj);
3749         if (drvname == NULL)
3750                 drvname = child_name;
3751 
3752         return (drvname);
3753 }
3754 
3755 
3756 #define PCI_EX_CLASS    "pciexclass"
3757 #define PCI_EX          "pciex"
3758 #define PCI_CLASS       "pciclass"
3759 #define PCI             "pci"
3760 
3761 int
3762 ddi_is_pci_dip(dev_info_t *dip)
3763 {
3764         char    *prop = NULL;
3765 
3766         if (ddi_prop_lookup_string(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
3767             "compatible", &prop) == DDI_PROP_SUCCESS) {
3768                 ASSERT(prop);
3769                 if (strncmp(prop, PCI_EX_CLASS, sizeof (PCI_EX_CLASS) - 1)
3770                     == 0 ||
3771                     strncmp(prop, PCI_EX, sizeof (PCI_EX)- 1)
3772                     == 0 ||
3773                     strncmp(prop, PCI_CLASS, sizeof (PCI_CLASS) - 1)
3774                     == 0 ||
3775                     strncmp(prop, PCI, sizeof (PCI) - 1)
3776                     == 0) {
3777                         ddi_prop_free(prop);
3778                         return (1);
3779                 }
3780         }
3781 
3782         if (prop != NULL) {
3783                 ddi_prop_free(prop);
3784         }
3785 
3786         return (0);
3787 }
3788 
3789 /*
3790  * Given the pathname of a device, fill in the dev_info_t value and/or the
3791  * dev_t value and/or the spectype, depending on which parameters are non-NULL.
3792  * If there is an error, this function returns -1.
3793  *
3794  * NOTE: If this function returns the dev_info_t structure, then it
3795  * does so with a hold on the devi. Caller should ensure that they get
3796  * decremented via ddi_release_devi() or ndi_rele_devi();
3797  *
3798  * This function can be invoked in the boot case for a pathname without
3799  * device argument (:xxxx), traditionally treated as a minor name.
3800  * In this case, we do the following
3801  * (1) search the minor node of type DDM_DEFAULT.
3802  * (2) if no DDM_DEFAULT minor exists, then the first non-alias minor is chosen.
3803  * (3) if neither exists, a dev_t is faked with minor number = instance.
3804  * As of S9 FCS, no instance of #1 exists. #2 is used by several platforms
3805  * to default the boot partition to :a possibly by other OBP definitions.
3806  * #3 is used for booting off network interfaces, most SPARC network
3807  * drivers support Style-2 only, so only DDM_ALIAS minor exists.
3808  *
3809  * It is possible for OBP to present device args at the end of the path as
3810  * well as in the middle. For example, with IB the following strings are
3811  * valid boot paths.
3812  *      a /pci@8,700000/ib@1,2:port=1,pkey=ff,dhcp,...
3813  *      b /pci@8,700000/ib@1,1:port=1/ioc@xxxxxx,yyyyyyy:dhcp
3814  * Case (a), we first look for minor node "port=1,pkey...".
3815  * Failing that, we will pass "port=1,pkey..." to the bus_config
3816  * entry point of ib (HCA) driver.
3817  * Case (b), configure ib@1,1 as usual. Then invoke ib's bus_config
3818  * with argument "ioc@xxxxxxx,yyyyyyy:port=1". After configuring
3819  * the ioc, look for minor node dhcp. If not found, pass ":dhcp"
3820  * to ioc's bus_config entry point.
3821  */
3822 int
3823 resolve_pathname(char *pathname,
3824         dev_info_t **dipp, dev_t *devtp, int *spectypep)
3825 {
3826         int                     error;
3827         dev_info_t              *parent, *child;
3828         struct pathname         pn;
3829         char                    *component, *config_name;
3830         char                    *minorname = NULL;
3831         char                    *prev_minor = NULL;
3832         dev_t                   devt = NODEV;
3833         int                     spectype;
3834         struct ddi_minor_data   *dmn;
3835         int                     circ;
3836 
3837         if (*pathname != '/')
3838                 return (EINVAL);
3839         parent = ddi_root_node();       /* Begin at the top of the tree */
3840 
3841         if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
3842                 return (error);
3843         pn_skipslash(&pn);
3844 
3845         ASSERT(i_ddi_devi_attached(parent));
3846         ndi_hold_devi(parent);
3847 
3848         component = kmem_alloc(MAXNAMELEN, KM_SLEEP);
3849         config_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
3850 
3851         while (pn_pathleft(&pn)) {
3852                 /* remember prev minor (:xxx) in the middle of path */
3853                 if (minorname)
3854                         prev_minor = i_ddi_strdup(minorname, KM_SLEEP);
3855 
3856                 /* Get component and chop off minorname */
3857                 (void) pn_getcomponent(&pn, component);
3858                 if ((iscsiboot_prop != NULL) &&
3859                     (strcmp((DEVI(parent)->devi_node_name), "iscsi") == 0)) {
3860                         i_ddi_parse_iscsi_name(component, NULL, NULL,
3861                             &minorname);
3862                 } else {
3863                         i_ddi_parse_name(component, NULL, NULL, &minorname);
3864                 }
3865                 if (prev_minor == NULL) {
3866                         (void) snprintf(config_name, MAXNAMELEN, "%s",
3867                             component);
3868                 } else {
3869                         (void) snprintf(config_name, MAXNAMELEN, "%s:%s",
3870                             component, prev_minor);
3871                         kmem_free(prev_minor, strlen(prev_minor) + 1);
3872                         prev_minor = NULL;
3873                 }
3874 
3875                 /*
3876                  * Find and configure the child
3877                  */
3878                 if (ndi_devi_config_one(parent, config_name, &child,
3879                     NDI_PROMNAME | NDI_NO_EVENT) != NDI_SUCCESS) {
3880                         ndi_rele_devi(parent);
3881                         pn_free(&pn);
3882                         kmem_free(component, MAXNAMELEN);
3883                         kmem_free(config_name, MAXNAMELEN);
3884                         return (-1);
3885                 }
3886 
3887                 ASSERT(i_ddi_devi_attached(child));
3888                 ndi_rele_devi(parent);
3889                 parent = child;
3890                 pn_skipslash(&pn);
3891         }
3892 
3893         /*
3894          * First look for a minor node matching minorname.
3895          * Failing that, try to pass minorname to bus_config().
3896          */
3897         if (minorname && i_ddi_minorname_to_devtspectype(parent,
3898             minorname, &devt, &spectype) == DDI_FAILURE) {
3899                 (void) snprintf(config_name, MAXNAMELEN, "%s", minorname);
3900                 if (ndi_devi_config_obp_args(parent,
3901                     config_name, &child, 0) != NDI_SUCCESS) {
3902                         ndi_rele_devi(parent);
3903                         pn_free(&pn);
3904                         kmem_free(component, MAXNAMELEN);
3905                         kmem_free(config_name, MAXNAMELEN);
3906                         NDI_CONFIG_DEBUG((CE_NOTE,
3907                             "%s: minor node not found\n", pathname));
3908                         return (-1);
3909                 }
3910                 minorname = NULL;       /* look for default minor */
3911                 ASSERT(i_ddi_devi_attached(child));
3912                 ndi_rele_devi(parent);
3913                 parent = child;
3914         }
3915 
3916         if (devtp || spectypep) {
3917                 if (minorname == NULL) {
3918                         /*
3919                          * Search for a default entry with an active
3920                          * ndi_devi_enter to protect the devi_minor list.
3921                          */
3922                         ndi_devi_enter(parent, &circ);
3923                         for (dmn = DEVI(parent)->devi_minor; dmn;
3924                             dmn = dmn->next) {
3925                                 if (dmn->type == DDM_DEFAULT) {
3926                                         devt = dmn->ddm_dev;
3927                                         spectype = dmn->ddm_spec_type;
3928                                         break;
3929                                 }
3930                         }
3931 
3932                         if (devt == NODEV) {
3933                                 /*
3934                                  * No default minor node, try the first one;
3935                                  * else, assume 1-1 instance-minor mapping
3936                                  */
3937                                 dmn = DEVI(parent)->devi_minor;
3938                                 if (dmn && ((dmn->type == DDM_MINOR) ||
3939                                     (dmn->type == DDM_INTERNAL_PATH))) {
3940                                         devt = dmn->ddm_dev;
3941                                         spectype = dmn->ddm_spec_type;
3942                                 } else {
3943                                         devt = makedevice(
3944                                             DEVI(parent)->devi_major,
3945                                             ddi_get_instance(parent));
3946                                         spectype = S_IFCHR;
3947                                 }
3948                         }
3949                         ndi_devi_exit(parent, circ);
3950                 }
3951                 if (devtp)
3952                         *devtp = devt;
3953                 if (spectypep)
3954                         *spectypep = spectype;
3955         }
3956 
3957         pn_free(&pn);
3958         kmem_free(component, MAXNAMELEN);
3959         kmem_free(config_name, MAXNAMELEN);
3960 
3961         /*
3962          * If there is no error, return the appropriate parameters
3963          */
3964         if (dipp != NULL)
3965                 *dipp = parent;
3966         else {
3967                 /*
3968                  * We should really keep the ref count to keep the node from
3969                  * detaching but ddi_pathname_to_dev_t() specifies a NULL dipp,
3970                  * so we have no way of passing back the held dip.  Not holding
3971                  * the dip allows detaches to occur - which can cause problems
3972                  * for subsystems which call ddi_pathname_to_dev_t (console).
3973                  *
3974                  * Instead of holding the dip, we place a ddi-no-autodetach
3975                  * property on the node to prevent auto detaching.
3976                  *
3977                  * The right fix is to remove ddi_pathname_to_dev_t and replace
3978                  * it, and all references, with a call that specifies a dipp.
3979                  * In addition, the callers of this new interfaces would then
3980                  * need to call ndi_rele_devi when the reference is complete.
3981                  *
3982                  */
3983                 (void) ddi_prop_update_int(DDI_DEV_T_NONE, parent,
3984                     DDI_NO_AUTODETACH, 1);
3985                 ndi_rele_devi(parent);
3986         }
3987 
3988         return (0);
3989 }
3990 
3991 /*
3992  * Given the pathname of a device, return the dev_t of the corresponding
3993  * device.  Returns NODEV on failure.
3994  *
3995  * Note that this call sets the DDI_NO_AUTODETACH property on the devinfo node.
3996  */
3997 dev_t
3998 ddi_pathname_to_dev_t(char *pathname)
3999 {
4000         dev_t devt;
4001         int error;
4002 
4003         error = resolve_pathname(pathname, NULL, &devt, NULL);
4004 
4005         return (error ? NODEV : devt);
4006 }
4007 
4008 /*
4009  * Translate a prom pathname to kernel devfs pathname.
4010  * Caller is assumed to allocate devfspath memory of
4011  * size at least MAXPATHLEN
4012  *
4013  * The prom pathname may not include minor name, but
4014  * devfs pathname has a minor name portion.
4015  */
4016 int
4017 i_ddi_prompath_to_devfspath(char *prompath, char *devfspath)
4018 {
4019         dev_t           devt = (dev_t)NODEV;
4020         dev_info_t      *dip = NULL;
4021         char            *minor_name = NULL;
4022         int             spectype;
4023         int             error;
4024         int             circ;
4025 
4026         error = resolve_pathname(prompath, &dip, &devt, &spectype);
4027         if (error)
4028                 return (DDI_FAILURE);
4029         ASSERT(dip && devt != NODEV);
4030 
4031         /*
4032          * Get in-kernel devfs pathname
4033          */
4034         (void) ddi_pathname(dip, devfspath);
4035 
4036         ndi_devi_enter(dip, &circ);
4037         minor_name = i_ddi_devtspectype_to_minorname(dip, devt, spectype);
4038         if (minor_name) {
4039                 (void) strcat(devfspath, ":");
4040                 (void) strcat(devfspath, minor_name);
4041         } else {
4042                 /*
4043                  * If minor_name is NULL, we have an alias minor node.
4044                  * So manufacture a path to the corresponding clone minor.
4045                  */
4046                 (void) snprintf(devfspath, MAXPATHLEN, "%s:%s",
4047                     CLONE_PATH, ddi_driver_name(dip));
4048         }
4049         ndi_devi_exit(dip, circ);
4050 
4051         /* release hold from resolve_pathname() */
4052         ndi_rele_devi(dip);
4053         return (0);
4054 }
4055 
4056 /*
4057  * This function is intended to identify drivers that must quiesce for fast
4058  * reboot to succeed.  It does not claim to have more knowledge about the device
4059  * than its driver.  If a driver has implemented quiesce(), it will be invoked;
4060  * if a so identified driver does not manage any device that needs to be
4061  * quiesced, it must explicitly set its devo_quiesce dev_op to
4062  * ddi_quiesce_not_needed.
4063  */
4064 static int skip_pseudo = 1;     /* Skip pseudo devices */
4065 static int skip_non_hw = 1;     /* Skip devices with no hardware property */
4066 static int
4067 should_implement_quiesce(dev_info_t *dip)
4068 {
4069         struct dev_info *devi = DEVI(dip);
4070         dev_info_t *pdip;
4071 
4072         /*
4073          * If dip is pseudo and skip_pseudo is set, driver doesn't have to
4074          * implement quiesce().
4075          */
4076         if (skip_pseudo &&
4077             strncmp(ddi_binding_name(dip), "pseudo", sizeof ("pseudo")) == 0)
4078                 return (0);
4079 
4080         /*
4081          * If parent dip is pseudo and skip_pseudo is set, driver doesn't have
4082          * to implement quiesce().
4083          */
4084         if (skip_pseudo && (pdip = ddi_get_parent(dip)) != NULL &&
4085             strncmp(ddi_binding_name(pdip), "pseudo", sizeof ("pseudo")) == 0)
4086                 return (0);
4087 
4088         /*
4089          * If not attached, driver doesn't have to implement quiesce().
4090          */
4091         if (!i_ddi_devi_attached(dip))
4092                 return (0);
4093 
4094         /*
4095          * If dip has no hardware property and skip_non_hw is set,
4096          * driver doesn't have to implement quiesce().
4097          */
4098         if (skip_non_hw && devi->devi_hw_prop_ptr == NULL)
4099                 return (0);
4100 
4101         return (1);
4102 }
4103 
4104 static int
4105 driver_has_quiesce(struct dev_ops *ops)
4106 {
4107         if ((ops->devo_rev >= 4) && (ops->devo_quiesce != nodev) &&
4108             (ops->devo_quiesce != NULL) && (ops->devo_quiesce != nulldev) &&
4109             (ops->devo_quiesce != ddi_quiesce_not_supported))
4110                 return (1);
4111         else
4112                 return (0);
4113 }
4114 
4115 /*
4116  * Check to see if a driver has implemented the quiesce() DDI function.
4117  */
4118 int
4119 check_driver_quiesce(dev_info_t *dip, void *arg)
4120 {
4121         struct dev_ops *ops;
4122 
4123         if (!should_implement_quiesce(dip))
4124                 return (DDI_WALK_CONTINUE);
4125 
4126         if ((ops = ddi_get_driver(dip)) == NULL)
4127                 return (DDI_WALK_CONTINUE);
4128 
4129         if (driver_has_quiesce(ops)) {
4130                 if ((quiesce_debug & 0x2) == 0x2) {
4131                         if (ops->devo_quiesce == ddi_quiesce_not_needed)
4132                                 cmn_err(CE_CONT, "%s does not need to be "
4133                                     "quiesced", ddi_driver_name(dip));
4134                         else
4135                                 cmn_err(CE_CONT, "%s has quiesce routine",
4136                                     ddi_driver_name(dip));
4137                 }
4138         } else {
4139                 if (arg != NULL)
4140                         *((int *)arg) = -1;
4141                 cmn_err(CE_WARN, "%s has no quiesce()", ddi_driver_name(dip));
4142         }
4143 
4144         return (DDI_WALK_CONTINUE);
4145 }
4146 
4147 /*
4148  * Quiesce device.
4149  */
4150 static void
4151 quiesce_one_device(dev_info_t *dip, void *arg)
4152 {
4153         struct dev_ops *ops;
4154         int should_quiesce = 0;
4155 
4156         /*
4157          * If the device is not attached it doesn't need to be quiesced.
4158          */
4159         if (!i_ddi_devi_attached(dip))
4160                 return;
4161 
4162         if ((ops = ddi_get_driver(dip)) == NULL)
4163                 return;
4164 
4165         should_quiesce = should_implement_quiesce(dip);
4166 
4167         /*
4168          * If there's an implementation of quiesce(), always call it even if
4169          * some of the drivers don't have quiesce() or quiesce() have failed
4170          * so we can do force fast reboot.  The implementation of quiesce()
4171          * should not negatively affect a regular reboot.
4172          */
4173         if (driver_has_quiesce(ops)) {
4174                 int rc = DDI_SUCCESS;
4175 
4176                 if (ops->devo_quiesce == ddi_quiesce_not_needed)
4177                         return;
4178 
4179                 rc = devi_quiesce(dip);
4180 
4181                 if (rc != DDI_SUCCESS && should_quiesce) {
4182 #ifdef DEBUG
4183                         cmn_err(CE_WARN, "quiesce() failed for %s%d",
4184                             ddi_driver_name(dip), ddi_get_instance(dip));
4185 #endif /* DEBUG */
4186                         if (arg != NULL)
4187                                 *((int *)arg) = -1;
4188                 }
4189         } else if (should_quiesce && arg != NULL) {
4190                 *((int *)arg) = -1;
4191         }
4192 }
4193 
4194 /*
4195  * Traverse the dev info tree in a breadth-first manner so that we quiesce
4196  * children first.  All subtrees under the parent of dip will be quiesced.
4197  */
4198 void
4199 quiesce_devices(dev_info_t *dip, void *arg)
4200 {
4201         /*
4202          * if we're reached here, the device tree better not be changing.
4203          * so either devinfo_freeze better be set or we better be panicing.
4204          */
4205         ASSERT(devinfo_freeze || panicstr);
4206 
4207         for (; dip != NULL; dip = ddi_get_next_sibling(dip)) {
4208                 quiesce_devices(ddi_get_child(dip), arg);
4209 
4210                 quiesce_one_device(dip, arg);
4211         }
4212 }
4213 
4214 /*
4215  * Reset all the pure leaf drivers on the system at halt time
4216  */
4217 static int
4218 reset_leaf_device(dev_info_t *dip, void *arg)
4219 {
4220         _NOTE(ARGUNUSED(arg))
4221         struct dev_ops *ops;
4222 
4223         /* if the device doesn't need to be reset then there's nothing to do */
4224         if (!DEVI_NEED_RESET(dip))
4225                 return (DDI_WALK_CONTINUE);
4226 
4227         /*
4228          * if the device isn't a char/block device or doesn't have a
4229          * reset entry point then there's nothing to do.
4230          */
4231         ops = ddi_get_driver(dip);
4232         if ((ops == NULL) || (ops->devo_cb_ops == NULL) ||
4233             (ops->devo_reset == nodev) || (ops->devo_reset == nulldev) ||
4234             (ops->devo_reset == NULL))
4235                 return (DDI_WALK_CONTINUE);
4236 
4237         if (DEVI_IS_ATTACHING(dip) || DEVI_IS_DETACHING(dip)) {
4238                 static char path[MAXPATHLEN];
4239 
4240                 /*
4241                  * bad news, this device has blocked in it's attach or
4242                  * detach routine, which means it not safe to call it's
4243                  * devo_reset() entry point.
4244                  */
4245                 cmn_err(CE_WARN, "unable to reset device: %s",
4246                     ddi_pathname(dip, path));
4247                 return (DDI_WALK_CONTINUE);
4248         }
4249 
4250         NDI_CONFIG_DEBUG((CE_NOTE, "resetting %s%d\n",
4251             ddi_driver_name(dip), ddi_get_instance(dip)));
4252 
4253         (void) devi_reset(dip, DDI_RESET_FORCE);
4254         return (DDI_WALK_CONTINUE);
4255 }
4256 
4257 void
4258 reset_leaves(void)
4259 {
4260         /*
4261          * if we're reached here, the device tree better not be changing.
4262          * so either devinfo_freeze better be set or we better be panicing.
4263          */
4264         ASSERT(devinfo_freeze || panicstr);
4265 
4266         (void) walk_devs(top_devinfo, reset_leaf_device, NULL, 0);
4267 }
4268 
4269 
4270 /*
4271  * devtree_freeze() must be called before quiesce_devices() and reset_leaves()
4272  * during a normal system shutdown.  It attempts to ensure that there are no
4273  * outstanding attach or detach operations in progress when quiesce_devices() or
4274  * reset_leaves()is invoked.  It must be called before the system becomes
4275  * single-threaded because device attach and detach are multi-threaded
4276  * operations.  (note that during system shutdown the system doesn't actually
4277  * become single-thread since other threads still exist, but the shutdown thread
4278  * will disable preemption for itself, raise it's pil, and stop all the other
4279  * cpus in the system there by effectively making the system single-threaded.)
4280  */
4281 void
4282 devtree_freeze(void)
4283 {
4284         int delayed = 0;
4285 
4286         /* if we're panicing then the device tree isn't going to be changing */
4287         if (panicstr)
4288                 return;
4289 
4290         /* stop all dev_info state changes in the device tree */
4291         devinfo_freeze = gethrtime();
4292 
4293         /*
4294          * if we're not panicing and there are on-going attach or detach
4295          * operations, wait for up to 3 seconds for them to finish.  This
4296          * is a randomly chosen interval but this should be ok because:
4297          * - 3 seconds is very small relative to the deadman timer.
4298          * - normal attach and detach operations should be very quick.
4299          * - attach and detach operations are fairly rare.
4300          */
4301         while (!panicstr && atomic_add_long_nv(&devinfo_attach_detach, 0) &&
4302             (delayed < 3)) {
4303                 delayed += 1;
4304 
4305                 /* do a sleeping wait for one second */
4306                 ASSERT(!servicing_interrupt());
4307                 delay(drv_sectohz(1));
4308         }
4309 }
4310 
4311 static int
4312 bind_dip(dev_info_t *dip, void *arg)
4313 {
4314         _NOTE(ARGUNUSED(arg))
4315         char    *path;
4316         major_t major, pmajor;
4317 
4318         /*
4319          * If the node is currently bound to the wrong driver, try to unbind
4320          * so that we can rebind to the correct driver.
4321          */
4322         if (i_ddi_node_state(dip) >= DS_BOUND) {
4323                 major = ddi_compatible_driver_major(dip, NULL);
4324                 if ((DEVI(dip)->devi_major == major) &&
4325                     (i_ddi_node_state(dip) >= DS_INITIALIZED)) {
4326                         /*
4327                          * Check for a path-oriented driver alias that
4328                          * takes precedence over current driver binding.
4329                          */
4330                         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4331                         (void) ddi_pathname(dip, path);
4332                         pmajor = ddi_name_to_major(path);
4333                         if (driver_active(pmajor))
4334                                 major = pmajor;
4335                         kmem_free(path, MAXPATHLEN);
4336                 }
4337 
4338                 /* attempt unbind if current driver is incorrect */
4339                 if (driver_active(major) &&
4340                     (major != DEVI(dip)->devi_major))
4341                         (void) ndi_devi_unbind_driver(dip);
4342         }
4343 
4344         /* If unbound, try to bind to a driver */
4345         if (i_ddi_node_state(dip) < DS_BOUND)
4346                 (void) ndi_devi_bind_driver(dip, 0);
4347 
4348         return (DDI_WALK_CONTINUE);
4349 }
4350 
4351 void
4352 i_ddi_bind_devs(void)
4353 {
4354         /* flush devfs so that ndi_devi_unbind_driver will work when possible */
4355         (void) devfs_clean(top_devinfo, NULL, 0);
4356 
4357         ddi_walk_devs(top_devinfo, bind_dip, (void *)NULL);
4358 }
4359 
4360 /* callback data for unbind_children_by_alias() */
4361 typedef struct unbind_data {
4362         major_t drv_major;
4363         char    *drv_alias;
4364         int     ndevs_bound;
4365         int     unbind_errors;
4366 } unbind_data_t;
4367 
4368 /*
4369  * A utility function provided for testing and support convenience
4370  * Called for each device during an upgrade_drv -d bound to the alias
4371  * that cannot be unbound due to device in use.
4372  */
4373 static void
4374 unbind_alias_dev_in_use(dev_info_t *dip, char *alias)
4375 {
4376         if (moddebug & MODDEBUG_BINDING) {
4377                 cmn_err(CE_CONT, "%s%d: state %d: bound to %s\n",
4378                     ddi_driver_name(dip), ddi_get_instance(dip),
4379                     i_ddi_node_state(dip), alias);
4380         }
4381 }
4382 
4383 /*
4384  * walkdevs callback for unbind devices bound to specific driver
4385  * and alias.  Invoked within the context of update_drv -d <alias>.
4386  */
4387 static int
4388 unbind_children_by_alias(dev_info_t *dip, void *arg)
4389 {
4390         int             circ;
4391         dev_info_t      *cdip;
4392         dev_info_t      *next;
4393         unbind_data_t   *ub = (unbind_data_t *)(uintptr_t)arg;
4394         int             rv;
4395 
4396         /*
4397          * We are called from update_drv to try to unbind a specific
4398          * set of aliases for a driver.  Unbind what persistent nodes
4399          * we can, and return the number of nodes which cannot be unbound.
4400          * If not all nodes can be unbound, update_drv leaves the
4401          * state of the driver binding files unchanged, except in
4402          * the case of -f.
4403          */
4404         ndi_devi_enter(dip, &circ);
4405         for (cdip = ddi_get_child(dip); cdip; cdip = next) {
4406                 next = ddi_get_next_sibling(cdip);
4407                 if ((ddi_driver_major(cdip) != ub->drv_major) ||
4408                     (strcmp(DEVI(cdip)->devi_node_name, ub->drv_alias) != 0))
4409                         continue;
4410                 if (i_ddi_node_state(cdip) >= DS_BOUND) {
4411                         rv = ndi_devi_unbind_driver(cdip);
4412                         if (rv != DDI_SUCCESS ||
4413                             (i_ddi_node_state(cdip) >= DS_BOUND)) {
4414                                 unbind_alias_dev_in_use(cdip, ub->drv_alias);
4415                                 ub->ndevs_bound++;
4416                                 continue;
4417                         }
4418                         if (ndi_dev_is_persistent_node(cdip) == 0)
4419                                 (void) ddi_remove_child(cdip, 0);
4420                 }
4421         }
4422         ndi_devi_exit(dip, circ);
4423 
4424         return (DDI_WALK_CONTINUE);
4425 }
4426 
4427 /*
4428  * Unbind devices by driver & alias
4429  * Context: update_drv [-f] -d -i <alias> <driver>
4430  */
4431 int
4432 i_ddi_unbind_devs_by_alias(major_t major, char *alias)
4433 {
4434         unbind_data_t   *ub;
4435         int             rv;
4436 
4437         ub = kmem_zalloc(sizeof (*ub), KM_SLEEP);
4438         ub->drv_major = major;
4439         ub->drv_alias = alias;
4440         ub->ndevs_bound = 0;
4441         ub->unbind_errors = 0;
4442 
4443         /* flush devfs so that ndi_devi_unbind_driver will work when possible */
4444         (void) devfs_clean(top_devinfo, NULL, 0);
4445         ddi_walk_devs(top_devinfo, unbind_children_by_alias,
4446             (void *)(uintptr_t)ub);
4447 
4448         /* return the number of devices remaining bound to the alias */
4449         rv = ub->ndevs_bound + ub->unbind_errors;
4450         kmem_free(ub, sizeof (*ub));
4451         return (rv);
4452 }
4453 
4454 /*
4455  * walkdevs callback for unbind devices by driver
4456  */
4457 static int
4458 unbind_children_by_driver(dev_info_t *dip, void *arg)
4459 {
4460         int             circ;
4461         dev_info_t      *cdip;
4462         dev_info_t      *next;
4463         major_t         major = (major_t)(uintptr_t)arg;
4464         int             rv;
4465 
4466         /*
4467          * We are called either from rem_drv or update_drv when reloading
4468          * a driver.conf file. In either case, we unbind persistent nodes
4469          * and destroy .conf nodes. In the case of rem_drv, this will be
4470          * the final state. In the case of update_drv,  i_ddi_bind_devs()
4471          * may be invoked later to re-enumerate (new) driver.conf rebind
4472          * persistent nodes.
4473          */
4474         ndi_devi_enter(dip, &circ);
4475         for (cdip = ddi_get_child(dip); cdip; cdip = next) {
4476                 next = ddi_get_next_sibling(cdip);
4477                 if (ddi_driver_major(cdip) != major)
4478                         continue;
4479                 if (i_ddi_node_state(cdip) >= DS_BOUND) {
4480                         rv = ndi_devi_unbind_driver(cdip);
4481                         if (rv == DDI_FAILURE ||
4482                             (i_ddi_node_state(cdip) >= DS_BOUND))
4483                                 continue;
4484                         if (ndi_dev_is_persistent_node(cdip) == 0)
4485                                 (void) ddi_remove_child(cdip, 0);
4486                 }
4487         }
4488         ndi_devi_exit(dip, circ);
4489 
4490         return (DDI_WALK_CONTINUE);
4491 }
4492 
4493 /*
4494  * Unbind devices by driver
4495  * Context: rem_drv or unload driver.conf
4496  */
4497 void
4498 i_ddi_unbind_devs(major_t major)
4499 {
4500         /* flush devfs so that ndi_devi_unbind_driver will work when possible */
4501         (void) devfs_clean(top_devinfo, NULL, 0);
4502         ddi_walk_devs(top_devinfo, unbind_children_by_driver,
4503             (void *)(uintptr_t)major);
4504 }
4505 
4506 /*
4507  * I/O Hotplug control
4508  */
4509 
4510 /*
4511  * create and attach a dev_info node from a .conf file spec
4512  */
4513 static void
4514 init_spec_child(dev_info_t *pdip, struct hwc_spec *specp, uint_t flags)
4515 {
4516         _NOTE(ARGUNUSED(flags))
4517         dev_info_t *dip;
4518         char *node_name;
4519 
4520         if (((node_name = specp->hwc_devi_name) == NULL) ||
4521             (ddi_name_to_major(node_name) == DDI_MAJOR_T_NONE)) {
4522                 char *tmp = node_name;
4523                 if (tmp == NULL)
4524                         tmp = "<none>";
4525                 cmn_err(CE_CONT,
4526                     "init_spec_child: parent=%s, bad spec (%s)\n",
4527                     ddi_node_name(pdip), tmp);
4528                 return;
4529         }
4530 
4531         dip = i_ddi_alloc_node(pdip, node_name, (pnode_t)DEVI_PSEUDO_NODEID,
4532             -1, specp->hwc_devi_sys_prop_ptr, KM_SLEEP);
4533 
4534         if (dip == NULL)
4535                 return;
4536 
4537         if (ddi_initchild(pdip, dip) != DDI_SUCCESS)
4538                 (void) ddi_remove_child(dip, 0);
4539 }
4540 
4541 /*
4542  * Lookup hwc specs from hash tables and make children from the spec
4543  * Because some .conf children are "merge" nodes, we also initialize
4544  * .conf children to merge properties onto hardware nodes.
4545  *
4546  * The pdip must be held busy.
4547  */
4548 int
4549 i_ndi_make_spec_children(dev_info_t *pdip, uint_t flags)
4550 {
4551         extern struct hwc_spec *hwc_get_child_spec(dev_info_t *, major_t);
4552         int                     circ;
4553         struct hwc_spec         *list, *spec;
4554 
4555         ndi_devi_enter(pdip, &circ);
4556         if (DEVI(pdip)->devi_flags & DEVI_MADE_CHILDREN) {
4557                 ndi_devi_exit(pdip, circ);
4558                 return (DDI_SUCCESS);
4559         }
4560 
4561         list = hwc_get_child_spec(pdip, DDI_MAJOR_T_NONE);
4562         for (spec = list; spec != NULL; spec = spec->hwc_next) {
4563                 init_spec_child(pdip, spec, flags);
4564         }
4565         hwc_free_spec_list(list);
4566 
4567         mutex_enter(&DEVI(pdip)->devi_lock);
4568         DEVI(pdip)->devi_flags |= DEVI_MADE_CHILDREN;
4569         mutex_exit(&DEVI(pdip)->devi_lock);
4570         ndi_devi_exit(pdip, circ);
4571         return (DDI_SUCCESS);
4572 }
4573 
4574 /*
4575  * Run initchild on all child nodes such that instance assignment
4576  * for multiport network cards are contiguous.
4577  *
4578  * The pdip must be held busy.
4579  */
4580 static void
4581 i_ndi_init_hw_children(dev_info_t *pdip, uint_t flags)
4582 {
4583         dev_info_t *dip;
4584 
4585         ASSERT(DEVI(pdip)->devi_flags & DEVI_MADE_CHILDREN);
4586 
4587         /* contiguous instance assignment */
4588         e_ddi_enter_instance();
4589         dip = ddi_get_child(pdip);
4590         while (dip) {
4591                 if (ndi_dev_is_persistent_node(dip))
4592                         (void) i_ndi_config_node(dip, DS_INITIALIZED, flags);
4593                 dip = ddi_get_next_sibling(dip);
4594         }
4595         e_ddi_exit_instance();
4596 }
4597 
4598 /*
4599  * report device status
4600  */
4601 static void
4602 i_ndi_devi_report_status_change(dev_info_t *dip, char *path)
4603 {
4604         char *status;
4605 
4606         if (!DEVI_NEED_REPORT(dip) ||
4607             (i_ddi_node_state(dip) < DS_INITIALIZED) ||
4608             ndi_dev_is_hidden_node(dip)) {
4609                 return;
4610         }
4611 
4612         /* Invalidate the devinfo snapshot cache */
4613         i_ddi_di_cache_invalidate();
4614 
4615         if (DEVI_IS_DEVICE_REMOVED(dip)) {
4616                 status = "removed";
4617         } else if (DEVI_IS_DEVICE_OFFLINE(dip)) {
4618                 status = "offline";
4619         } else if (DEVI_IS_DEVICE_DOWN(dip)) {
4620                 status = "down";
4621         } else if (DEVI_IS_BUS_QUIESCED(dip)) {
4622                 status = "quiesced";
4623         } else if (DEVI_IS_BUS_DOWN(dip)) {
4624                 status = "down";
4625         } else if (i_ddi_devi_attached(dip)) {
4626                 status = "online";
4627         } else {
4628                 status = "unknown";
4629         }
4630 
4631         if (path == NULL) {
4632                 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4633                 cmn_err(CE_CONT, "?%s (%s%d) %s\n",
4634                     ddi_pathname(dip, path), ddi_driver_name(dip),
4635                     ddi_get_instance(dip), status);
4636                 kmem_free(path, MAXPATHLEN);
4637         } else {
4638                 cmn_err(CE_CONT, "?%s (%s%d) %s\n",
4639                     path, ddi_driver_name(dip),
4640                     ddi_get_instance(dip), status);
4641         }
4642 
4643         mutex_enter(&(DEVI(dip)->devi_lock));
4644         DEVI_REPORT_DONE(dip);
4645         mutex_exit(&(DEVI(dip)->devi_lock));
4646 }
4647 
4648 /*
4649  * log a notification that a dev_info node has been configured.
4650  */
4651 static int
4652 i_log_devfs_add_devinfo(dev_info_t *dip, uint_t flags)
4653 {
4654         int                     se_err;
4655         char                    *pathname;
4656         sysevent_t              *ev;
4657         sysevent_id_t           eid;
4658         sysevent_value_t        se_val;
4659         sysevent_attr_list_t    *ev_attr_list = NULL;
4660         char                    *class_name;
4661         int                     no_transport = 0;
4662 
4663         ASSERT(dip && ddi_get_parent(dip) &&
4664             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
4665 
4666         /* do not generate ESC_DEVFS_DEVI_ADD event during boot */
4667         if (!i_ddi_io_initialized())
4668                 return (DDI_SUCCESS);
4669 
4670         /* Invalidate the devinfo snapshot cache */
4671         i_ddi_di_cache_invalidate();
4672 
4673         ev = sysevent_alloc(EC_DEVFS, ESC_DEVFS_DEVI_ADD, EP_DDI, SE_SLEEP);
4674 
4675         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4676 
4677         (void) ddi_pathname(dip, pathname);
4678         ASSERT(strlen(pathname));
4679 
4680         se_val.value_type = SE_DATA_TYPE_STRING;
4681         se_val.value.sv_string = pathname;
4682         if (sysevent_add_attr(&ev_attr_list, DEVFS_PATHNAME,
4683             &se_val, SE_SLEEP) != 0) {
4684                 goto fail;
4685         }
4686 
4687         /* add the device class attribute */
4688         if ((class_name = i_ddi_devi_class(dip)) != NULL) {
4689                 se_val.value_type = SE_DATA_TYPE_STRING;
4690                 se_val.value.sv_string = class_name;
4691 
4692                 if (sysevent_add_attr(&ev_attr_list,
4693                     DEVFS_DEVI_CLASS, &se_val, SE_SLEEP) != 0) {
4694                         sysevent_free_attr(ev_attr_list);
4695                         goto fail;
4696                 }
4697         }
4698 
4699         /*
4700          * must log a branch event too unless NDI_BRANCH_EVENT_OP is set,
4701          * in which case the branch event will be logged by the caller
4702          * after the entire branch has been configured.
4703          */
4704         if ((flags & NDI_BRANCH_EVENT_OP) == 0) {
4705                 /*
4706                  * Instead of logging a separate branch event just add
4707                  * DEVFS_BRANCH_EVENT attribute. It indicates devfsadmd to
4708                  * generate a EC_DEV_BRANCH event.
4709                  */
4710                 se_val.value_type = SE_DATA_TYPE_INT32;
4711                 se_val.value.sv_int32 = 1;
4712                 if (sysevent_add_attr(&ev_attr_list,
4713                     DEVFS_BRANCH_EVENT, &se_val, SE_SLEEP) != 0) {
4714                         sysevent_free_attr(ev_attr_list);
4715                         goto fail;
4716                 }
4717         }
4718 
4719         if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
4720                 sysevent_free_attr(ev_attr_list);
4721                 goto fail;
4722         }
4723 
4724         if ((se_err = log_sysevent(ev, SE_SLEEP, &eid)) != 0) {
4725                 if (se_err == SE_NO_TRANSPORT)
4726                         no_transport = 1;
4727                 goto fail;
4728         }
4729 
4730         sysevent_free(ev);
4731         kmem_free(pathname, MAXPATHLEN);
4732 
4733         return (DDI_SUCCESS);
4734 
4735 fail:
4736         cmn_err(CE_WARN, "failed to log ESC_DEVFS_DEVI_ADD event for %s%s",
4737             pathname, (no_transport) ? " (syseventd not responding)" : "");
4738 
4739         cmn_err(CE_WARN, "/dev may not be current for driver %s. "
4740             "Run devfsadm -i %s",
4741             ddi_driver_name(dip), ddi_driver_name(dip));
4742 
4743         sysevent_free(ev);
4744         kmem_free(pathname, MAXPATHLEN);
4745         return (DDI_SUCCESS);
4746 }
4747 
4748 /*
4749  * log a notification that a dev_info node has been unconfigured.
4750  */
4751 static int
4752 i_log_devfs_remove_devinfo(char *pathname, char *class_name, char *driver_name,
4753     int instance, uint_t flags)
4754 {
4755         sysevent_t              *ev;
4756         sysevent_id_t           eid;
4757         sysevent_value_t        se_val;
4758         sysevent_attr_list_t    *ev_attr_list = NULL;
4759         int                     se_err;
4760         int                     no_transport = 0;
4761 
4762         if (!i_ddi_io_initialized())
4763                 return (DDI_SUCCESS);
4764 
4765         /* Invalidate the devinfo snapshot cache */
4766         i_ddi_di_cache_invalidate();
4767 
4768         ev = sysevent_alloc(EC_DEVFS, ESC_DEVFS_DEVI_REMOVE, EP_DDI, SE_SLEEP);
4769 
4770         se_val.value_type = SE_DATA_TYPE_STRING;
4771         se_val.value.sv_string = pathname;
4772         if (sysevent_add_attr(&ev_attr_list, DEVFS_PATHNAME,
4773             &se_val, SE_SLEEP) != 0) {
4774                 goto fail;
4775         }
4776 
4777         if (class_name) {
4778                 /* add the device class, driver name and instance attributes */
4779 
4780                 se_val.value_type = SE_DATA_TYPE_STRING;
4781                 se_val.value.sv_string = class_name;
4782                 if (sysevent_add_attr(&ev_attr_list,
4783                     DEVFS_DEVI_CLASS, &se_val, SE_SLEEP) != 0) {
4784                         sysevent_free_attr(ev_attr_list);
4785                         goto fail;
4786                 }
4787 
4788                 se_val.value_type = SE_DATA_TYPE_STRING;
4789                 se_val.value.sv_string = driver_name;
4790                 if (sysevent_add_attr(&ev_attr_list,
4791                     DEVFS_DRIVER_NAME, &se_val, SE_SLEEP) != 0) {
4792                         sysevent_free_attr(ev_attr_list);
4793                         goto fail;
4794                 }
4795 
4796                 se_val.value_type = SE_DATA_TYPE_INT32;
4797                 se_val.value.sv_int32 = instance;
4798                 if (sysevent_add_attr(&ev_attr_list,
4799                     DEVFS_INSTANCE, &se_val, SE_SLEEP) != 0) {
4800                         sysevent_free_attr(ev_attr_list);
4801                         goto fail;
4802                 }
4803         }
4804 
4805         /*
4806          * must log a branch event too unless NDI_BRANCH_EVENT_OP is set,
4807          * in which case the branch event will be logged by the caller
4808          * after the entire branch has been unconfigured.
4809          */
4810         if ((flags & NDI_BRANCH_EVENT_OP) == 0) {
4811                 /*
4812                  * Instead of logging a separate branch event just add
4813                  * DEVFS_BRANCH_EVENT attribute. It indicates devfsadmd to
4814                  * generate a EC_DEV_BRANCH event.
4815                  */
4816                 se_val.value_type = SE_DATA_TYPE_INT32;
4817                 se_val.value.sv_int32 = 1;
4818                 if (sysevent_add_attr(&ev_attr_list,
4819                     DEVFS_BRANCH_EVENT, &se_val, SE_SLEEP) != 0) {
4820                         sysevent_free_attr(ev_attr_list);
4821                         goto fail;
4822                 }
4823         }
4824 
4825         if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
4826                 sysevent_free_attr(ev_attr_list);
4827                 goto fail;
4828         }
4829 
4830         if ((se_err = log_sysevent(ev, SE_SLEEP, &eid)) != 0) {
4831                 if (se_err == SE_NO_TRANSPORT)
4832                         no_transport = 1;
4833                 goto fail;
4834         }
4835 
4836         sysevent_free(ev);
4837         return (DDI_SUCCESS);
4838 
4839 fail:
4840         sysevent_free(ev);
4841         cmn_err(CE_WARN, "failed to log ESC_DEVFS_DEVI_REMOVE event for %s%s",
4842             pathname, (no_transport) ? " (syseventd not responding)" : "");
4843         return (DDI_SUCCESS);
4844 }
4845 
4846 static void
4847 i_ddi_log_devfs_device_remove(dev_info_t *dip)
4848 {
4849         char    *path;
4850 
4851         ASSERT(dip && ddi_get_parent(dip) &&
4852             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
4853         ASSERT(DEVI_IS_DEVICE_REMOVED(dip));
4854 
4855         ASSERT(i_ddi_node_state(dip) >= DS_INITIALIZED);
4856         if (i_ddi_node_state(dip) < DS_INITIALIZED)
4857                 return;
4858 
4859         /* Inform LDI_EV_DEVICE_REMOVE callbacks. */
4860         ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, LDI_EV_DEVICE_REMOVE,
4861             LDI_EV_SUCCESS, NULL);
4862 
4863         /* Generate EC_DEVFS_DEVI_REMOVE sysevent. */
4864         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4865         (void) i_log_devfs_remove_devinfo(ddi_pathname(dip, path),
4866             i_ddi_devi_class(dip), (char *)ddi_driver_name(dip),
4867             ddi_get_instance(dip), 0);
4868         kmem_free(path, MAXPATHLEN);
4869 }
4870 
4871 static void
4872 i_ddi_log_devfs_device_insert(dev_info_t *dip)
4873 {
4874         ASSERT(dip && ddi_get_parent(dip) &&
4875             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
4876         ASSERT(!DEVI_IS_DEVICE_REMOVED(dip));
4877 
4878         (void) i_log_devfs_add_devinfo(dip, 0);
4879 }
4880 
4881 
4882 /*
4883  * log an event that a dev_info branch has been configured or unconfigured.
4884  */
4885 static int
4886 i_log_devfs_branch(char *node_path, char *subclass)
4887 {
4888         int se_err;
4889         sysevent_t *ev;
4890         sysevent_id_t eid;
4891         sysevent_value_t se_val;
4892         sysevent_attr_list_t *ev_attr_list = NULL;
4893         int no_transport = 0;
4894 
4895         /* do not generate the event during boot */
4896         if (!i_ddi_io_initialized())
4897                 return (DDI_SUCCESS);
4898 
4899         /* Invalidate the devinfo snapshot cache */
4900         i_ddi_di_cache_invalidate();
4901 
4902         ev = sysevent_alloc(EC_DEVFS, subclass, EP_DDI, SE_SLEEP);
4903 
4904         se_val.value_type = SE_DATA_TYPE_STRING;
4905         se_val.value.sv_string = node_path;
4906 
4907         if (sysevent_add_attr(&ev_attr_list, DEVFS_PATHNAME,
4908             &se_val, SE_SLEEP) != 0) {
4909                 goto fail;
4910         }
4911 
4912         if (sysevent_attach_attributes(ev, ev_attr_list) != 0) {
4913                 sysevent_free_attr(ev_attr_list);
4914                 goto fail;
4915         }
4916 
4917         if ((se_err = log_sysevent(ev, SE_SLEEP, &eid)) != 0) {
4918                 if (se_err == SE_NO_TRANSPORT)
4919                         no_transport = 1;
4920                 goto fail;
4921         }
4922 
4923         sysevent_free(ev);
4924         return (DDI_SUCCESS);
4925 
4926 fail:
4927         cmn_err(CE_WARN, "failed to log %s branch event for %s%s",
4928             subclass, node_path,
4929             (no_transport) ? " (syseventd not responding)" : "");
4930 
4931         sysevent_free(ev);
4932         return (DDI_FAILURE);
4933 }
4934 
4935 /*
4936  * log an event that a dev_info tree branch has been configured.
4937  */
4938 static int
4939 i_log_devfs_branch_add(dev_info_t *dip)
4940 {
4941         char *node_path;
4942         int rv;
4943 
4944         node_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4945         (void) ddi_pathname(dip, node_path);
4946         rv = i_log_devfs_branch(node_path, ESC_DEVFS_BRANCH_ADD);
4947         kmem_free(node_path, MAXPATHLEN);
4948 
4949         return (rv);
4950 }
4951 
4952 /*
4953  * log an event that a dev_info tree branch has been unconfigured.
4954  */
4955 static int
4956 i_log_devfs_branch_remove(char *node_path)
4957 {
4958         return (i_log_devfs_branch(node_path, ESC_DEVFS_BRANCH_REMOVE));
4959 }
4960 
4961 /*
4962  * enqueue the dip's deviname on the branch event queue.
4963  */
4964 static struct brevq_node *
4965 brevq_enqueue(struct brevq_node **brevqp, dev_info_t *dip,
4966     struct brevq_node *child)
4967 {
4968         struct brevq_node *brn;
4969         char *deviname;
4970 
4971         deviname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
4972         (void) ddi_deviname(dip, deviname);
4973 
4974         brn = kmem_zalloc(sizeof (*brn), KM_SLEEP);
4975         brn->brn_deviname = i_ddi_strdup(deviname, KM_SLEEP);
4976         kmem_free(deviname, MAXNAMELEN);
4977         brn->brn_child = child;
4978         brn->brn_sibling = *brevqp;
4979         *brevqp = brn;
4980 
4981         return (brn);
4982 }
4983 
4984 /*
4985  * free the memory allocated for the elements on the branch event queue.
4986  */
4987 static void
4988 free_brevq(struct brevq_node *brevq)
4989 {
4990         struct brevq_node *brn, *next_brn;
4991 
4992         for (brn = brevq; brn != NULL; brn = next_brn) {
4993                 next_brn = brn->brn_sibling;
4994                 ASSERT(brn->brn_child == NULL);
4995                 kmem_free(brn->brn_deviname, strlen(brn->brn_deviname) + 1);
4996                 kmem_free(brn, sizeof (*brn));
4997         }
4998 }
4999 
5000 /*
5001  * log the events queued up on the branch event queue and free the
5002  * associated memory.
5003  *
5004  * node_path must have been allocated with at least MAXPATHLEN bytes.
5005  */
5006 static void
5007 log_and_free_brevq(char *node_path, struct brevq_node *brevq)
5008 {
5009         struct brevq_node *brn;
5010         char *p;
5011 
5012         p = node_path + strlen(node_path);
5013         for (brn = brevq; brn != NULL; brn = brn->brn_sibling) {
5014                 (void) strcpy(p, brn->brn_deviname);
5015                 (void) i_log_devfs_branch_remove(node_path);
5016         }
5017         *p = '\0';
5018 
5019         free_brevq(brevq);
5020 }
5021 
5022 /*
5023  * log the events queued up on the branch event queue and free the
5024  * associated memory. Same as the previous function but operates on dip.
5025  */
5026 static void
5027 log_and_free_brevq_dip(dev_info_t *dip, struct brevq_node *brevq)
5028 {
5029         char *path;
5030 
5031         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
5032         (void) ddi_pathname(dip, path);
5033         log_and_free_brevq(path, brevq);
5034         kmem_free(path, MAXPATHLEN);
5035 }
5036 
5037 /*
5038  * log the outstanding branch remove events for the grand children of the dip
5039  * and free the associated memory.
5040  */
5041 static void
5042 log_and_free_br_events_on_grand_children(dev_info_t *dip,
5043     struct brevq_node *brevq)
5044 {
5045         struct brevq_node *brn;
5046         char *path;
5047         char *p;
5048 
5049         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
5050         (void) ddi_pathname(dip, path);
5051         p = path + strlen(path);
5052         for (brn = brevq; brn != NULL; brn = brn->brn_sibling) {
5053                 if (brn->brn_child) {
5054                         (void) strcpy(p, brn->brn_deviname);
5055                         /* now path contains the node path to the dip's child */
5056                         log_and_free_brevq(path, brn->brn_child);
5057                         brn->brn_child = NULL;
5058                 }
5059         }
5060         kmem_free(path, MAXPATHLEN);
5061 }
5062 
5063 /*
5064  * log and cleanup branch remove events for the grand children of the dip.
5065  */
5066 static void
5067 cleanup_br_events_on_grand_children(dev_info_t *dip, struct brevq_node **brevqp)
5068 {
5069         dev_info_t *child;
5070         struct brevq_node *brevq, *brn, *prev_brn, *next_brn;
5071         char *path;
5072         int circ;
5073 
5074         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
5075         prev_brn = NULL;
5076         brevq = *brevqp;
5077 
5078         ndi_devi_enter(dip, &circ);
5079         for (brn = brevq; brn != NULL; brn = next_brn) {
5080                 next_brn = brn->brn_sibling;
5081                 for (child = ddi_get_child(dip); child != NULL;
5082                     child = ddi_get_next_sibling(child)) {
5083                         if (i_ddi_node_state(child) >= DS_INITIALIZED) {
5084                                 (void) ddi_deviname(child, path);
5085                                 if (strcmp(path, brn->brn_deviname) == 0)
5086                                         break;
5087                         }
5088                 }
5089 
5090                 if (child != NULL && !(DEVI_EVREMOVE(child))) {
5091                         /*
5092                          * Event state is not REMOVE. So branch remove event
5093                          * is not going be generated on brn->brn_child.
5094                          * If any branch remove events were queued up on
5095                          * brn->brn_child log them and remove the brn
5096                          * from the queue.
5097                          */
5098                         if (brn->brn_child) {
5099                                 (void) ddi_pathname(dip, path);
5100                                 (void) strcat(path, brn->brn_deviname);
5101                                 log_and_free_brevq(path, brn->brn_child);
5102                         }
5103 
5104                         if (prev_brn)
5105                                 prev_brn->brn_sibling = next_brn;
5106                         else
5107                                 *brevqp = next_brn;
5108 
5109                         kmem_free(brn->brn_deviname,
5110                             strlen(brn->brn_deviname) + 1);
5111                         kmem_free(brn, sizeof (*brn));
5112                 } else {
5113                         /*
5114                          * Free up the outstanding branch remove events
5115                          * queued on brn->brn_child since brn->brn_child
5116                          * itself is eligible for branch remove event.
5117                          */
5118                         if (brn->brn_child) {
5119                                 free_brevq(brn->brn_child);
5120                                 brn->brn_child = NULL;
5121                         }
5122                         prev_brn = brn;
5123                 }
5124         }
5125 
5126         ndi_devi_exit(dip, circ);
5127         kmem_free(path, MAXPATHLEN);
5128 }
5129 
5130 static int
5131 need_remove_event(dev_info_t *dip, int flags)
5132 {
5133         if ((flags & (NDI_NO_EVENT | NDI_AUTODETACH)) == 0 &&
5134             (flags & (NDI_DEVI_OFFLINE | NDI_UNCONFIG | NDI_DEVI_REMOVE)) &&
5135             !(DEVI_EVREMOVE(dip)))
5136                 return (1);
5137         else
5138                 return (0);
5139 }
5140 
5141 /*
5142  * Unconfigure children/descendants of the dip.
5143  *
5144  * If the operation involves a branch event NDI_BRANCH_EVENT_OP is set
5145  * through out the unconfiguration. On successful return *brevqp is set to
5146  * a queue of dip's child devinames for which branch remove events need
5147  * to be generated.
5148  */
5149 static int
5150 devi_unconfig_branch(dev_info_t *dip, dev_info_t **dipp, int flags,
5151     struct brevq_node **brevqp)
5152 {
5153         int rval;
5154 
5155         *brevqp = NULL;
5156 
5157         if ((!(flags & NDI_BRANCH_EVENT_OP)) && need_remove_event(dip, flags))
5158                 flags |= NDI_BRANCH_EVENT_OP;
5159 
5160         if (flags & NDI_BRANCH_EVENT_OP) {
5161                 rval = devi_unconfig_common(dip, dipp, flags, DDI_MAJOR_T_NONE,
5162                     brevqp);
5163 
5164                 if (rval != NDI_SUCCESS && (*brevqp)) {
5165                         log_and_free_brevq_dip(dip, *brevqp);
5166                         *brevqp = NULL;
5167                 }
5168         } else
5169                 rval = devi_unconfig_common(dip, dipp, flags, DDI_MAJOR_T_NONE,
5170                     NULL);
5171 
5172         return (rval);
5173 }
5174 
5175 /*
5176  * If the dip is already bound to a driver transition to DS_INITIALIZED
5177  * in order to generate an event in the case where the node was left in
5178  * DS_BOUND state since boot (never got attached) and the node is now
5179  * being offlined.
5180  */
5181 static void
5182 init_bound_node_ev(dev_info_t *pdip, dev_info_t *dip, int flags)
5183 {
5184         if (need_remove_event(dip, flags) &&
5185             i_ddi_node_state(dip) == DS_BOUND &&
5186             i_ddi_devi_attached(pdip) && !DEVI_IS_DEVICE_OFFLINE(dip))
5187                 (void) ddi_initchild(pdip, dip);
5188 }
5189 
5190 /*
5191  * attach a node/branch with parent already held busy
5192  */
5193 static int
5194 devi_attach_node(dev_info_t *dip, uint_t flags)
5195 {
5196         dev_info_t *pdip = ddi_get_parent(dip);
5197 
5198         ASSERT(pdip && DEVI_BUSY_OWNED(pdip));
5199 
5200         mutex_enter(&(DEVI(dip)->devi_lock));
5201         if (flags & NDI_DEVI_ONLINE) {
5202                 if (!i_ddi_devi_attached(dip))
5203                         DEVI_SET_REPORT(dip);
5204                 DEVI_SET_DEVICE_ONLINE(dip);
5205         }
5206         if (DEVI_IS_DEVICE_OFFLINE(dip)) {
5207                 mutex_exit(&(DEVI(dip)->devi_lock));
5208                 return (NDI_FAILURE);
5209         }
5210         mutex_exit(&(DEVI(dip)->devi_lock));
5211 
5212         if (i_ddi_attachchild(dip) != DDI_SUCCESS) {
5213                 mutex_enter(&(DEVI(dip)->devi_lock));
5214                 DEVI_SET_EVUNINIT(dip);
5215                 mutex_exit(&(DEVI(dip)->devi_lock));
5216 
5217                 if (ndi_dev_is_persistent_node(dip))
5218                         (void) ddi_uninitchild(dip);
5219                 else {
5220                         /*
5221                          * Delete .conf nodes and nodes that are not
5222                          * well formed.
5223                          */
5224                         (void) ddi_remove_child(dip, 0);
5225                 }
5226                 return (NDI_FAILURE);
5227         }
5228 
5229         i_ndi_devi_report_status_change(dip, NULL);
5230 
5231         /*
5232          * log an event, but not during devfs lookups in which case
5233          * NDI_NO_EVENT is set.
5234          */
5235         if ((flags & NDI_NO_EVENT) == 0 && !(DEVI_EVADD(dip))) {
5236                 (void) i_log_devfs_add_devinfo(dip, flags);
5237 
5238                 mutex_enter(&(DEVI(dip)->devi_lock));
5239                 DEVI_SET_EVADD(dip);
5240                 mutex_exit(&(DEVI(dip)->devi_lock));
5241         } else if (!(flags & NDI_NO_EVENT_STATE_CHNG)) {
5242                 mutex_enter(&(DEVI(dip)->devi_lock));
5243                 DEVI_SET_EVADD(dip);
5244                 mutex_exit(&(DEVI(dip)->devi_lock));
5245         }
5246 
5247         return (NDI_SUCCESS);
5248 }
5249 
5250 /* internal function to config immediate children */
5251 static int
5252 config_immediate_children(dev_info_t *pdip, uint_t flags, major_t major)
5253 {
5254         dev_info_t      *child, *next;
5255         int             circ;
5256 
5257         ASSERT(i_ddi_devi_attached(pdip));
5258 
5259         if (!NEXUS_DRV(ddi_get_driver(pdip)))
5260                 return (NDI_SUCCESS);
5261 
5262         NDI_CONFIG_DEBUG((CE_CONT,
5263             "config_immediate_children: %s%d (%p), flags=%x\n",
5264             ddi_driver_name(pdip), ddi_get_instance(pdip),
5265             (void *)pdip, flags));
5266 
5267         ndi_devi_enter(pdip, &circ);
5268 
5269         if (flags & NDI_CONFIG_REPROBE) {
5270                 mutex_enter(&DEVI(pdip)->devi_lock);
5271                 DEVI(pdip)->devi_flags &= ~DEVI_MADE_CHILDREN;
5272                 mutex_exit(&DEVI(pdip)->devi_lock);
5273         }
5274         (void) i_ndi_make_spec_children(pdip, flags);
5275         i_ndi_init_hw_children(pdip, flags);
5276 
5277         child = ddi_get_child(pdip);
5278         while (child) {
5279                 /* NOTE: devi_attach_node() may remove the dip */
5280                 next = ddi_get_next_sibling(child);
5281 
5282                 /*
5283                  * Configure all nexus nodes or leaf nodes with
5284                  * matching driver major
5285                  */
5286                 if ((major == DDI_MAJOR_T_NONE) ||
5287                     (major == ddi_driver_major(child)) ||
5288                     ((flags & NDI_CONFIG) && (is_leaf_node(child) == 0)))
5289                         (void) devi_attach_node(child, flags);
5290                 child = next;
5291         }
5292 
5293         ndi_devi_exit(pdip, circ);
5294 
5295         return (NDI_SUCCESS);
5296 }
5297 
5298 /* internal function to config grand children */
5299 static int
5300 config_grand_children(dev_info_t *pdip, uint_t flags, major_t major)
5301 {
5302         struct mt_config_handle *hdl;
5303 
5304         /* multi-threaded configuration of child nexus */
5305         hdl = mt_config_init(pdip, NULL, flags, major, MT_CONFIG_OP, NULL);
5306         mt_config_children(hdl);
5307 
5308         return (mt_config_fini(hdl));   /* wait for threads to exit */
5309 }
5310 
5311 /*
5312  * Common function for device tree configuration,
5313  * either BUS_CONFIG_ALL or BUS_CONFIG_DRIVER.
5314  * The NDI_CONFIG flag causes recursive configuration of
5315  * grandchildren, devfs usage should not recurse.
5316  */
5317 static int
5318 devi_config_common(dev_info_t *dip, int flags, major_t major)
5319 {
5320         int error;
5321         int (*f)();
5322 
5323         if (!i_ddi_devi_attached(dip))
5324                 return (NDI_FAILURE);
5325 
5326         if (pm_pre_config(dip, NULL) != DDI_SUCCESS)
5327                 return (NDI_FAILURE);
5328 
5329         if ((DEVI(dip)->devi_ops->devo_bus_ops == NULL) ||
5330             (DEVI(dip)->devi_ops->devo_bus_ops->busops_rev < BUSO_REV_5) ||
5331             (f = DEVI(dip)->devi_ops->devo_bus_ops->bus_config) == NULL) {
5332                 error = config_immediate_children(dip, flags, major);
5333         } else {
5334                 /* call bus_config entry point */
5335                 ddi_bus_config_op_t bus_op = (major == DDI_MAJOR_T_NONE) ?
5336                     BUS_CONFIG_ALL : BUS_CONFIG_DRIVER;
5337                 error = (*f)(dip,
5338                     flags, bus_op, (void *)(uintptr_t)major, NULL, 0);
5339         }
5340 
5341         if (error) {
5342                 pm_post_config(dip, NULL);
5343                 return (error);
5344         }
5345 
5346         /*
5347          * Some callers, notably SCSI, need to mark the devfs cache
5348          * to be rebuilt together with the config operation.
5349          */
5350         if (flags & NDI_DEVFS_CLEAN)
5351                 (void) devfs_clean(dip, NULL, 0);
5352 
5353         if (flags & NDI_CONFIG)
5354                 (void) config_grand_children(dip, flags, major);
5355 
5356         pm_post_config(dip, NULL);
5357 
5358         return (NDI_SUCCESS);
5359 }
5360 
5361 /*
5362  * Framework entry point for BUS_CONFIG_ALL
5363  */
5364 int
5365 ndi_devi_config(dev_info_t *dip, int flags)
5366 {
5367         NDI_CONFIG_DEBUG((CE_CONT,
5368             "ndi_devi_config: par = %s%d (%p), flags = 0x%x\n",
5369             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
5370 
5371         return (devi_config_common(dip, flags, DDI_MAJOR_T_NONE));
5372 }
5373 
5374 /*
5375  * Framework entry point for BUS_CONFIG_DRIVER, bound to major
5376  */
5377 int
5378 ndi_devi_config_driver(dev_info_t *dip, int flags, major_t major)
5379 {
5380         /* don't abuse this function */
5381         ASSERT(major != DDI_MAJOR_T_NONE);
5382 
5383         NDI_CONFIG_DEBUG((CE_CONT,
5384             "ndi_devi_config_driver: par = %s%d (%p), flags = 0x%x\n",
5385             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
5386 
5387         return (devi_config_common(dip, flags, major));
5388 }
5389 
5390 /*
5391  * Called by nexus drivers to configure its children.
5392  */
5393 static int
5394 devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **cdipp,
5395     uint_t flags, clock_t timeout)
5396 {
5397         dev_info_t      *vdip = NULL;
5398         char            *drivername = NULL;
5399         int             find_by_addr = 0;
5400         char            *name, *addr;
5401         int             v_circ, p_circ;
5402         clock_t         end_time;       /* 60 sec */
5403         int             probed;
5404         dev_info_t      *cdip;
5405         mdi_pathinfo_t  *cpip;
5406 
5407         *cdipp = NULL;
5408 
5409         if (!NEXUS_DRV(ddi_get_driver(pdip)))
5410                 return (NDI_FAILURE);
5411 
5412         /* split name into "name@addr" parts */
5413         i_ddi_parse_name(devnm, &name, &addr, NULL);
5414 
5415         /*
5416          * If the nexus is a pHCI and we are not processing a pHCI from
5417          * mdi bus_config code then we need to know the vHCI.
5418          */
5419         if (MDI_PHCI(pdip))
5420                 vdip = mdi_devi_get_vdip(pdip);
5421 
5422         /*
5423          * We may have a genericname on a system that creates drivername
5424          * nodes (from .conf files).  Find the drivername by nodeid. If we
5425          * can't find a node with devnm as the node name then we search by
5426          * drivername.  This allows an implementation to supply a genericly
5427          * named boot path (disk) and locate drivename nodes (sd).  The
5428          * NDI_PROMNAME flag does not apply to /devices/pseudo paths.
5429          */
5430         if ((flags & NDI_PROMNAME) && (pdip != pseudo_dip)) {
5431                 drivername = child_path_to_driver(pdip, name, addr);
5432                 find_by_addr = 1;
5433         }
5434 
5435         /*
5436          * Determine end_time: This routine should *not* be called with a
5437          * constant non-zero timeout argument, the caller should be adjusting
5438          * the timeout argument relative to when it *started* its asynchronous
5439          * enumeration.
5440          */
5441         if (timeout > 0)
5442                 end_time = ddi_get_lbolt() + timeout;
5443 
5444         for (;;) {
5445                 /*
5446                  * For pHCI, enter (vHCI, pHCI) and search for pathinfo/client
5447                  * child - break out of for(;;) loop if child found.
5448                  * NOTE: Lock order for ndi_devi_enter is (vHCI, pHCI).
5449                  */
5450                 if (vdip) {
5451                         /* use mdi_devi_enter ordering */
5452                         ndi_devi_enter(vdip, &v_circ);
5453                         ndi_devi_enter(pdip, &p_circ);
5454                         cpip = mdi_pi_find(pdip, NULL, addr);
5455                         cdip = mdi_pi_get_client(cpip);
5456                         if (cdip)
5457                                 break;
5458                 } else
5459                         ndi_devi_enter(pdip, &p_circ);
5460 
5461                 /*
5462                  * When not a  vHCI or not all pHCI devices are required to
5463                  * enumerated under the vHCI (NDI_MDI_FALLBACK) search for
5464                  * devinfo child.
5465                  */
5466                 if ((vdip == NULL) || (flags & NDI_MDI_FALLBACK)) {
5467                         /* determine if .conf nodes already built */
5468                         probed = (DEVI(pdip)->devi_flags & DEVI_MADE_CHILDREN);
5469 
5470                         /*
5471                          * Search for child by name, if not found then search
5472                          * for a node bound to the drivername driver with the
5473                          * specified "@addr". Break out of for(;;) loop if
5474                          * child found.  To support path-oriented aliases
5475                          * binding on boot-device, we do a search_by_addr too.
5476                          */
5477 again:                  (void) i_ndi_make_spec_children(pdip, flags);
5478                         cdip = find_child_by_name(pdip, name, addr);
5479                         if ((cdip == NULL) && drivername)
5480                                 cdip = find_child_by_driver(pdip,
5481                                     drivername, addr);
5482                         if ((cdip == NULL) && find_by_addr)
5483                                 cdip = find_child_by_addr(pdip, addr);
5484                         if (cdip)
5485                                 break;
5486 
5487                         /*
5488                          * determine if we should reenumerate .conf nodes
5489                          * and look for child again.
5490                          */
5491                         if (probed &&
5492                             i_ddi_io_initialized() &&
5493                             (flags & NDI_CONFIG_REPROBE) &&
5494                             ((timeout <= 0) || (ddi_get_lbolt() >= end_time))) {
5495                                 probed = 0;
5496                                 mutex_enter(&DEVI(pdip)->devi_lock);
5497                                 DEVI(pdip)->devi_flags &= ~DEVI_MADE_CHILDREN;
5498                                 mutex_exit(&DEVI(pdip)->devi_lock);
5499                                 goto again;
5500                         }
5501                 }
5502 
5503                 /* break out of for(;;) if time expired */
5504                 if ((timeout <= 0) || (ddi_get_lbolt() >= end_time))
5505                         break;
5506 
5507                 /*
5508                  * Child not found, exit and wait for asynchronous enumeration
5509                  * to add child (or timeout). The addition of a new child (vhci
5510                  * or phci) requires the asynchronous enumeration thread to
5511                  * ndi_devi_enter/ndi_devi_exit. This exit will signal devi_cv
5512                  * and cause us to return from ndi_devi_exit_and_wait, after
5513                  * which we loop and search for the requested child again.
5514                  */
5515                 NDI_DEBUG(flags, (CE_CONT,
5516                     "%s%d: waiting for child %s@%s, timeout %ld",
5517                     ddi_driver_name(pdip), ddi_get_instance(pdip),
5518                     name, addr, timeout));
5519                 if (vdip) {
5520                         /*
5521                          * Mark vHCI for pHCI ndi_devi_exit broadcast.
5522                          */
5523                         mutex_enter(&DEVI(vdip)->devi_lock);
5524                         DEVI(vdip)->devi_flags |=
5525                             DEVI_PHCI_SIGNALS_VHCI;
5526                         mutex_exit(&DEVI(vdip)->devi_lock);
5527                         ndi_devi_exit(pdip, p_circ);
5528 
5529                         /*
5530                          * NB: There is a small race window from above
5531                          * ndi_devi_exit() of pdip to cv_wait() in
5532                          * ndi_devi_exit_and_wait() which can result in
5533                          * not immediately finding a new pHCI child
5534                          * of a pHCI that uses NDI_MDI_FAILBACK.
5535                          */
5536                         ndi_devi_exit_and_wait(vdip, v_circ, end_time);
5537                 } else {
5538                         ndi_devi_exit_and_wait(pdip, p_circ, end_time);
5539                 }
5540         }
5541 
5542         /* done with paddr, fixup i_ddi_parse_name '@'->'\0' change */
5543         if (addr && *addr != '\0')
5544                 *(addr - 1) = '@';
5545 
5546         /* attach and hold the child, returning pointer to child */
5547         if (cdip && (devi_attach_node(cdip, flags) == NDI_SUCCESS)) {
5548                 ndi_hold_devi(cdip);
5549                 *cdipp = cdip;
5550         }
5551 
5552         ndi_devi_exit(pdip, p_circ);
5553         if (vdip)
5554                 ndi_devi_exit(vdip, v_circ);
5555         return (*cdipp ? NDI_SUCCESS : NDI_FAILURE);
5556 }
5557 
5558 /*
5559  * Enumerate and attach a child specified by name 'devnm'.
5560  * Called by devfs lookup and DR to perform a BUS_CONFIG_ONE.
5561  * Note: devfs does not make use of NDI_CONFIG to configure
5562  * an entire branch.
5563  */
5564 int
5565 ndi_devi_config_one(dev_info_t *pdip, char *devnm, dev_info_t **dipp, int flags)
5566 {
5567         int error;
5568         int (*f)();
5569         char *nmdup;
5570         int duplen;
5571         int branch_event = 0;
5572 
5573         ASSERT(pdip);
5574         ASSERT(devnm);
5575         ASSERT(dipp);
5576         ASSERT(i_ddi_devi_attached(pdip));
5577 
5578         NDI_CONFIG_DEBUG((CE_CONT,
5579             "ndi_devi_config_one: par = %s%d (%p), child = %s\n",
5580             ddi_driver_name(pdip), ddi_get_instance(pdip),
5581             (void *)pdip, devnm));
5582 
5583         *dipp = NULL;
5584 
5585         if (pm_pre_config(pdip, devnm) != DDI_SUCCESS) {
5586                 cmn_err(CE_WARN, "preconfig failed: %s", devnm);
5587                 return (NDI_FAILURE);
5588         }
5589 
5590         if ((flags & (NDI_NO_EVENT | NDI_BRANCH_EVENT_OP)) == 0 &&
5591             (flags & NDI_CONFIG)) {
5592                 flags |= NDI_BRANCH_EVENT_OP;
5593                 branch_event = 1;
5594         }
5595 
5596         nmdup = strdup(devnm);
5597         duplen = strlen(devnm) + 1;
5598 
5599         if ((DEVI(pdip)->devi_ops->devo_bus_ops == NULL) ||
5600             (DEVI(pdip)->devi_ops->devo_bus_ops->busops_rev < BUSO_REV_5) ||
5601             (f = DEVI(pdip)->devi_ops->devo_bus_ops->bus_config) == NULL) {
5602                 error = devi_config_one(pdip, devnm, dipp, flags, 0);
5603         } else {
5604                 /* call bus_config entry point */
5605                 error = (*f)(pdip, flags, BUS_CONFIG_ONE, (void *)devnm, dipp);
5606         }
5607 
5608         if (error) {
5609                 *dipp = NULL;
5610         }
5611 
5612         /*
5613          * if we fail to lookup and this could be an alias, lookup currdip
5614          * To prevent recursive lookups into the same hash table, only
5615          * do the currdip lookups once the hash table init is complete.
5616          * Use tsd so that redirection doesn't recurse
5617          */
5618         if (error) {
5619                 char *alias = kmem_alloc(MAXPATHLEN, KM_NOSLEEP);
5620                 if (alias == NULL) {
5621                         ddi_err(DER_PANIC, pdip, "alias alloc failed: %s",
5622                             nmdup);
5623                 }
5624                 (void) ddi_pathname(pdip, alias);
5625                 (void) strlcat(alias, "/", MAXPATHLEN);
5626                 (void) strlcat(alias, nmdup, MAXPATHLEN);
5627 
5628                 *dipp = ddi_alias_redirect(alias);
5629                 error = (*dipp ? NDI_SUCCESS : NDI_FAILURE);
5630 
5631                 kmem_free(alias, MAXPATHLEN);
5632         }
5633         kmem_free(nmdup, duplen);
5634 
5635         if (error || !(flags & NDI_CONFIG)) {
5636                 pm_post_config(pdip, devnm);
5637                 return (error);
5638         }
5639 
5640         /*
5641          * DR usage (i.e. call with NDI_CONFIG) recursively configures
5642          * grandchildren, performing a BUS_CONFIG_ALL from the node attached
5643          * by the BUS_CONFIG_ONE.
5644          */
5645         ASSERT(*dipp);
5646         error = devi_config_common(*dipp, flags, DDI_MAJOR_T_NONE);
5647 
5648         pm_post_config(pdip, devnm);
5649 
5650         if (branch_event)
5651                 (void) i_log_devfs_branch_add(*dipp);
5652 
5653         return (error);
5654 }
5655 
5656 /*
5657  * Enumerate and attach a child specified by name 'devnm'.
5658  * Called during configure the OBP options. This configures
5659  * only one node.
5660  */
5661 static int
5662 ndi_devi_config_obp_args(dev_info_t *parent, char *devnm,
5663     dev_info_t **childp, int flags)
5664 {
5665         int error;
5666         int (*f)();
5667 
5668         ASSERT(childp);
5669         ASSERT(i_ddi_devi_attached(parent));
5670 
5671         NDI_CONFIG_DEBUG((CE_CONT, "ndi_devi_config_obp_args: "
5672             "par = %s%d (%p), child = %s\n", ddi_driver_name(parent),
5673             ddi_get_instance(parent), (void *)parent, devnm));
5674 
5675         if ((DEVI(parent)->devi_ops->devo_bus_ops == NULL) ||
5676             (DEVI(parent)->devi_ops->devo_bus_ops->busops_rev < BUSO_REV_5) ||
5677             (f = DEVI(parent)->devi_ops->devo_bus_ops->bus_config) == NULL) {
5678                 error = NDI_FAILURE;
5679         } else {
5680                 /* call bus_config entry point */
5681                 error = (*f)(parent, flags,
5682                     BUS_CONFIG_OBP_ARGS, (void *)devnm, childp);
5683         }
5684         return (error);
5685 }
5686 
5687 /*
5688  * Pay attention, the following is a bit tricky:
5689  * There are three possible cases when constraints are applied
5690  *
5691  *      - A constraint is applied and the offline is disallowed.
5692  *        Simply return failure and block the offline
5693  *
5694  *      - A constraint is applied and the offline is allowed.
5695  *        Mark the dip as having passed the constraint and allow
5696  *        offline to proceed.
5697  *
5698  *      - A constraint is not applied. Allow the offline to proceed for now.
5699  *
5700  * In the latter two cases we allow the offline to proceed. If the
5701  * offline succeeds (no users) everything is fine. It is ok for an unused
5702  * device to be offlined even if no constraints were imposed on the offline.
5703  * If the offline fails because there are users, we look at the constraint
5704  * flag on the dip. If the constraint flag is set (implying that it passed
5705  * a constraint) we allow the dip to be retired. If not, we don't allow
5706  * the retire. This ensures that we don't allow unconstrained retire.
5707  */
5708 int
5709 e_ddi_offline_notify(dev_info_t *dip)
5710 {
5711         int retval;
5712         int constraint;
5713         int failure;
5714 
5715         RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): entered: dip=%p",
5716             (void *) dip));
5717 
5718         constraint = 0;
5719         failure = 0;
5720 
5721         /*
5722          * Start with userland constraints first - applied via device contracts
5723          */
5724         retval = contract_device_offline(dip, DDI_DEV_T_ANY, 0);
5725         switch (retval) {
5726         case CT_NACK:
5727                 RIO_DEBUG((CE_NOTE, "Received NACK for dip=%p", (void *)dip));
5728                 failure = 1;
5729                 goto out;
5730         case CT_ACK:
5731                 constraint = 1;
5732                 RIO_DEBUG((CE_NOTE, "Received ACK for dip=%p", (void *)dip));
5733                 break;
5734         case CT_NONE:
5735                 /* no contracts */
5736                 RIO_DEBUG((CE_NOTE, "No contracts on dip=%p", (void *)dip));
5737                 break;
5738         default:
5739                 ASSERT(retval == CT_NONE);
5740         }
5741 
5742         /*
5743          * Next, use LDI to impose kernel constraints
5744          */
5745         retval = ldi_invoke_notify(dip, DDI_DEV_T_ANY, 0, LDI_EV_OFFLINE, NULL);
5746         switch (retval) {
5747         case LDI_EV_FAILURE:
5748                 contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_FAILURE);
5749                 RIO_DEBUG((CE_NOTE, "LDI callback failed on dip=%p",
5750                     (void *)dip));
5751                 failure = 1;
5752                 goto out;
5753         case LDI_EV_SUCCESS:
5754                 constraint = 1;
5755                 RIO_DEBUG((CE_NOTE, "LDI callback success on dip=%p",
5756                     (void *)dip));
5757                 break;
5758         case LDI_EV_NONE:
5759                 /* no matching LDI callbacks */
5760                 RIO_DEBUG((CE_NOTE, "No LDI callbacks for dip=%p",
5761                     (void *)dip));
5762                 break;
5763         default:
5764                 ASSERT(retval == LDI_EV_NONE);
5765         }
5766 
5767 out:
5768         mutex_enter(&(DEVI(dip)->devi_lock));
5769         if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && failure) {
5770                 RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
5771                     "BLOCKED flag. dip=%p", (void *)dip));
5772                 DEVI(dip)->devi_flags |= DEVI_R_BLOCKED;
5773                 if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
5774                         RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): "
5775                             "blocked. clearing RCM CONSTRAINT flag. dip=%p",
5776                             (void *)dip));
5777                         DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
5778                 }
5779         } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) && constraint) {
5780                 RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): setting "
5781                     "CONSTRAINT flag. dip=%p", (void *)dip));
5782                 DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
5783         } else if ((DEVI(dip)->devi_flags & DEVI_RETIRING) &&
5784             ((DEVI(dip)->devi_ops != NULL &&
5785             DEVI(dip)->devi_ops->devo_bus_ops != NULL) ||
5786             DEVI(dip)->devi_ref == 0)) {
5787                 /* also allow retire if nexus or if device is not in use */
5788                 RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): device not in "
5789                     "use. Setting CONSTRAINT flag. dip=%p", (void *)dip));
5790                 DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
5791         } else {
5792                 /*
5793                  * Note: We cannot ASSERT here that DEVI_R_CONSTRAINT is
5794                  * not set, since other sources (such as RCM) may have
5795                  * set the flag.
5796                  */
5797                 RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): not setting "
5798                     "constraint flag. dip=%p", (void *)dip));
5799         }
5800         mutex_exit(&(DEVI(dip)->devi_lock));
5801 
5802 
5803         RIO_VERBOSE((CE_NOTE, "e_ddi_offline_notify(): exit: dip=%p",
5804             (void *) dip));
5805 
5806         return (failure ? DDI_FAILURE : DDI_SUCCESS);
5807 }
5808 
5809 void
5810 e_ddi_offline_finalize(dev_info_t *dip, int result)
5811 {
5812         RIO_DEBUG((CE_NOTE, "e_ddi_offline_finalize(): entry: result=%s, "
5813             "dip=%p", result == DDI_SUCCESS ? "SUCCESS" : "FAILURE",
5814             (void *)dip));
5815 
5816         contract_device_negend(dip, DDI_DEV_T_ANY, 0,  result == DDI_SUCCESS ?
5817             CT_EV_SUCCESS : CT_EV_FAILURE);
5818 
5819         ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0,
5820             LDI_EV_OFFLINE, result == DDI_SUCCESS ?
5821             LDI_EV_SUCCESS : LDI_EV_FAILURE, NULL);
5822 
5823         RIO_VERBOSE((CE_NOTE, "e_ddi_offline_finalize(): exit: dip=%p",
5824             (void *)dip));
5825 }
5826 
5827 void
5828 e_ddi_degrade_finalize(dev_info_t *dip)
5829 {
5830         RIO_DEBUG((CE_NOTE, "e_ddi_degrade_finalize(): entry: "
5831             "result always = DDI_SUCCESS, dip=%p", (void *)dip));
5832 
5833         contract_device_degrade(dip, DDI_DEV_T_ANY, 0);
5834         contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
5835 
5836         ldi_invoke_finalize(dip, DDI_DEV_T_ANY, 0, LDI_EV_DEGRADE,
5837             LDI_EV_SUCCESS, NULL);
5838 
5839         RIO_VERBOSE((CE_NOTE, "e_ddi_degrade_finalize(): exit: dip=%p",
5840             (void *)dip));
5841 }
5842 
5843 void
5844 e_ddi_undegrade_finalize(dev_info_t *dip)
5845 {
5846         RIO_DEBUG((CE_NOTE, "e_ddi_undegrade_finalize(): entry: "
5847             "result always = DDI_SUCCESS, dip=%p", (void *)dip));
5848 
5849         contract_device_undegrade(dip, DDI_DEV_T_ANY, 0);
5850         contract_device_negend(dip, DDI_DEV_T_ANY, 0, CT_EV_SUCCESS);
5851 
5852         RIO_VERBOSE((CE_NOTE, "e_ddi_undegrade_finalize(): exit: dip=%p",
5853             (void *)dip));
5854 }
5855 
5856 /*
5857  * detach a node with parent already held busy
5858  */
5859 static int
5860 devi_detach_node(dev_info_t *dip, uint_t flags)
5861 {
5862         dev_info_t *pdip = ddi_get_parent(dip);
5863         int ret = NDI_SUCCESS;
5864         ddi_eventcookie_t cookie;
5865         char *path = NULL;
5866         char *class = NULL;
5867         char *driver = NULL;
5868         int instance = -1;
5869         int post_event = 0;
5870 
5871         ASSERT(pdip && DEVI_BUSY_OWNED(pdip));
5872 
5873         /*
5874          * Invoke notify if offlining
5875          */
5876         if (flags & NDI_DEVI_OFFLINE) {
5877                 RIO_DEBUG((CE_NOTE, "devi_detach_node: offlining dip=%p",
5878                     (void *)dip));
5879                 if (e_ddi_offline_notify(dip) != DDI_SUCCESS) {
5880                         RIO_DEBUG((CE_NOTE, "devi_detach_node: offline NACKed"
5881                             "dip=%p", (void *)dip));
5882                         return (NDI_FAILURE);
5883                 }
5884         }
5885 
5886         if (flags & NDI_POST_EVENT) {
5887                 if (i_ddi_devi_attached(pdip)) {
5888                         if (ddi_get_eventcookie(dip, DDI_DEVI_REMOVE_EVENT,
5889                             &cookie) == NDI_SUCCESS)
5890                                 (void) ndi_post_event(dip, dip, cookie, NULL);
5891                 }
5892         }
5893 
5894         /*
5895          * dv_mknod places a hold on the dev_info_t for each devfs node
5896          * created.  If we're to succeed in detaching this device, we must
5897          * first release all outstanding references held by devfs.
5898          */
5899         (void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
5900 
5901         if (i_ddi_detachchild(dip, flags) != DDI_SUCCESS) {
5902                 if (flags & NDI_DEVI_OFFLINE) {
5903                         RIO_DEBUG((CE_NOTE, "devi_detach_node: offline failed."
5904                             " Calling e_ddi_offline_finalize with result=%d. "
5905                             "dip=%p", DDI_FAILURE, (void *)dip));
5906                         e_ddi_offline_finalize(dip, DDI_FAILURE);
5907                 }
5908                 return (NDI_FAILURE);
5909         }
5910 
5911         if (flags & NDI_DEVI_OFFLINE) {
5912                 RIO_DEBUG((CE_NOTE, "devi_detach_node: offline succeeded."
5913                     " Calling e_ddi_offline_finalize with result=%d, "
5914                     "dip=%p", DDI_SUCCESS, (void *)dip));
5915                 e_ddi_offline_finalize(dip, DDI_SUCCESS);
5916         }
5917 
5918         if (flags & NDI_AUTODETACH)
5919                 return (NDI_SUCCESS);
5920 
5921         /*
5922          * For DR, even bound nodes may need to have offline
5923          * flag set.
5924          */
5925         if (flags & NDI_DEVI_OFFLINE) {
5926                 mutex_enter(&(DEVI(dip)->devi_lock));
5927                 DEVI_SET_DEVICE_OFFLINE(dip);
5928                 mutex_exit(&(DEVI(dip)->devi_lock));
5929         }
5930 
5931         if (i_ddi_node_state(dip) == DS_INITIALIZED) {
5932                 struct dev_info *devi = DEVI(dip);
5933 
5934                 if (devi->devi_ev_path == NULL) {
5935                         devi->devi_ev_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
5936                         (void) ddi_pathname(dip, devi->devi_ev_path);
5937                 }
5938                 if (flags & NDI_DEVI_OFFLINE)
5939                         i_ndi_devi_report_status_change(dip,
5940                             devi->devi_ev_path);
5941 
5942                 if (need_remove_event(dip, flags)) {
5943                         /*
5944                          * instance and path data are lost in call to
5945                          * ddi_uninitchild
5946                          */
5947                         devi->devi_ev_instance = ddi_get_instance(dip);
5948 
5949                         mutex_enter(&(DEVI(dip)->devi_lock));
5950                         DEVI_SET_EVREMOVE(dip);
5951                         mutex_exit(&(DEVI(dip)->devi_lock));
5952                 }
5953         }
5954 
5955         if (flags & (NDI_UNCONFIG | NDI_DEVI_REMOVE)) {
5956                 ret = ddi_uninitchild(dip);
5957                 if (ret == NDI_SUCCESS) {
5958                         /*
5959                          * Remove uninitialized pseudo nodes because
5960                          * system props are lost and the node cannot be
5961                          * reattached.
5962                          */
5963                         if (!ndi_dev_is_persistent_node(dip))
5964                                 flags |= NDI_DEVI_REMOVE;
5965 
5966                         if (flags & NDI_DEVI_REMOVE) {
5967                                 /*
5968                                  * NOTE: If there is a consumer of LDI events,
5969                                  * ddi_uninitchild above would have failed
5970                                  * because of active devi_ref from ldi_open().
5971                                  */
5972 
5973                                 if (DEVI_EVREMOVE(dip)) {
5974                                         path = i_ddi_strdup(
5975                                             DEVI(dip)->devi_ev_path,
5976                                             KM_SLEEP);
5977                                         class =
5978                                             i_ddi_strdup(i_ddi_devi_class(dip),
5979                                             KM_SLEEP);
5980                                         driver =
5981                                             i_ddi_strdup(
5982                                             (char *)ddi_driver_name(dip),
5983                                             KM_SLEEP);
5984                                         instance = DEVI(dip)->devi_ev_instance;
5985                                         post_event = 1;
5986                                 }
5987 
5988                                 ret = ddi_remove_child(dip, 0);
5989                                 if (post_event && ret == NDI_SUCCESS) {
5990                                         /* Generate EC_DEVFS_DEVI_REMOVE */
5991                                         (void) i_log_devfs_remove_devinfo(path,
5992                                             class, driver, instance, flags);
5993                                 }
5994                         }
5995 
5996                 }
5997         }
5998 
5999         if (path)
6000                 strfree(path);
6001         if (class)
6002                 strfree(class);
6003         if (driver)
6004                 strfree(driver);
6005 
6006         return (ret);
6007 }
6008 
6009 /*
6010  * unconfigure immediate children of bus nexus device
6011  */
6012 static int
6013 unconfig_immediate_children(
6014         dev_info_t *dip,
6015         dev_info_t **dipp,
6016         int flags,
6017         major_t major)
6018 {
6019         int rv = NDI_SUCCESS;
6020         int circ, vcirc;
6021         dev_info_t *child;
6022         dev_info_t *vdip = NULL;
6023         dev_info_t *next;
6024 
6025         ASSERT(dipp == NULL || *dipp == NULL);
6026 
6027         /*
6028          * Scan forward to see if we will be processing a pHCI child. If we
6029          * have a child that is a pHCI and vHCI and pHCI are not siblings then
6030          * enter vHCI before parent(pHCI) to prevent deadlock with mpxio
6031          * Client power management operations.
6032          */
6033         ndi_devi_enter(dip, &circ);
6034         for (child = ddi_get_child(dip); child;
6035             child = ddi_get_next_sibling(child)) {
6036                 /* skip same nodes we skip below */
6037                 if (((major != DDI_MAJOR_T_NONE) &&
6038                     (major != ddi_driver_major(child))) ||
6039                     ((flags & NDI_AUTODETACH) && !is_leaf_node(child)))
6040                         continue;
6041 
6042                 if (MDI_PHCI(child)) {
6043                         vdip = mdi_devi_get_vdip(child);
6044                         /*
6045                          * If vHCI and vHCI is not a sibling of pHCI
6046                          * then enter in (vHCI, parent(pHCI)) order.
6047                          */
6048                         if (vdip && (ddi_get_parent(vdip) != dip)) {
6049                                 ndi_devi_exit(dip, circ);
6050 
6051                                 /* use mdi_devi_enter ordering */
6052                                 ndi_devi_enter(vdip, &vcirc);
6053                                 ndi_devi_enter(dip, &circ);
6054                                 break;
6055                         } else
6056                                 vdip = NULL;
6057                 }
6058         }
6059 
6060         child = ddi_get_child(dip);
6061         while (child) {
6062                 next = ddi_get_next_sibling(child);
6063 
6064                 if ((major != DDI_MAJOR_T_NONE) &&
6065                     (major != ddi_driver_major(child))) {
6066                         child = next;
6067                         continue;
6068                 }
6069 
6070                 /* skip nexus nodes during autodetach */
6071                 if ((flags & NDI_AUTODETACH) && !is_leaf_node(child)) {
6072                         child = next;
6073                         continue;
6074                 }
6075 
6076                 if (devi_detach_node(child, flags) != NDI_SUCCESS) {
6077                         if (dipp && *dipp == NULL) {
6078                                 ndi_hold_devi(child);
6079                                 *dipp = child;
6080                         }
6081                         rv = NDI_FAILURE;
6082                 }
6083 
6084                 /*
6085                  * Continue upon failure--best effort algorithm
6086                  */
6087                 child = next;
6088         }
6089 
6090         ndi_devi_exit(dip, circ);
6091         if (vdip)
6092                 ndi_devi_exit(vdip, vcirc);
6093 
6094         return (rv);
6095 }
6096 
6097 /*
6098  * unconfigure grand children of bus nexus device
6099  */
6100 static int
6101 unconfig_grand_children(
6102         dev_info_t *dip,
6103         dev_info_t **dipp,
6104         int flags,
6105         major_t major,
6106         struct brevq_node **brevqp)
6107 {
6108         struct mt_config_handle *hdl;
6109 
6110         if (brevqp)
6111                 *brevqp = NULL;
6112 
6113         /* multi-threaded configuration of child nexus */
6114         hdl = mt_config_init(dip, dipp, flags, major, MT_UNCONFIG_OP, brevqp);
6115         mt_config_children(hdl);
6116 
6117         return (mt_config_fini(hdl));   /* wait for threads to exit */
6118 }
6119 
6120 /*
6121  * Unconfigure children/descendants of the dip.
6122  *
6123  * If brevqp is not NULL, on return *brevqp is set to a queue of dip's
6124  * child devinames for which branch remove events need to be generated.
6125  */
6126 static int
6127 devi_unconfig_common(
6128         dev_info_t *dip,
6129         dev_info_t **dipp,
6130         int flags,
6131         major_t major,
6132         struct brevq_node **brevqp)
6133 {
6134         int rv;
6135         int pm_cookie;
6136         int (*f)();
6137         ddi_bus_config_op_t bus_op;
6138 
6139         if (dipp)
6140                 *dipp = NULL;
6141         if (brevqp)
6142                 *brevqp = NULL;
6143 
6144         /*
6145          * Power up the dip if it is powered off.  If the flag bit
6146          * NDI_AUTODETACH is set and the dip is not at its full power,
6147          * skip the rest of the branch.
6148          */
6149         if (pm_pre_unconfig(dip, flags, &pm_cookie, NULL) != DDI_SUCCESS)
6150                 return ((flags & NDI_AUTODETACH) ? NDI_SUCCESS :
6151                     NDI_FAILURE);
6152 
6153         /*
6154          * Some callers, notably SCSI, need to clear out the devfs
6155          * cache together with the unconfig to prevent stale entries.
6156          */
6157         if (flags & NDI_DEVFS_CLEAN)
6158                 (void) devfs_clean(dip, NULL, 0);
6159 
6160         rv = unconfig_grand_children(dip, dipp, flags, major, brevqp);
6161 
6162         if ((rv != NDI_SUCCESS) && ((flags & NDI_AUTODETACH) == 0)) {
6163                 if (brevqp && *brevqp) {
6164                         log_and_free_br_events_on_grand_children(dip, *brevqp);
6165                         free_brevq(*brevqp);
6166                         *brevqp = NULL;
6167                 }
6168                 pm_post_unconfig(dip, pm_cookie, NULL);
6169                 return (rv);
6170         }
6171 
6172         if (dipp && *dipp) {
6173                 ndi_rele_devi(*dipp);
6174                 *dipp = NULL;
6175         }
6176 
6177         /*
6178          * It is possible to have a detached nexus with children
6179          * and grandchildren (for example: a branch consisting
6180          * entirely of bound nodes.) Since the nexus is detached
6181          * the bus_unconfig entry point cannot be used to remove
6182          * or unconfigure the descendants.
6183          */
6184         if (!i_ddi_devi_attached(dip) ||
6185             (DEVI(dip)->devi_ops->devo_bus_ops == NULL) ||
6186             (DEVI(dip)->devi_ops->devo_bus_ops->busops_rev < BUSO_REV_5) ||
6187             (f = DEVI(dip)->devi_ops->devo_bus_ops->bus_unconfig) == NULL) {
6188                 rv = unconfig_immediate_children(dip, dipp, flags, major);
6189         } else {
6190                 /*
6191                  * call bus_unconfig entry point
6192                  * It should reset nexus flags if unconfigure succeeds.
6193                  */
6194                 bus_op = (major == DDI_MAJOR_T_NONE) ?
6195                     BUS_UNCONFIG_ALL : BUS_UNCONFIG_DRIVER;
6196                 rv = (*f)(dip, flags, bus_op, (void *)(uintptr_t)major);
6197         }
6198 
6199         pm_post_unconfig(dip, pm_cookie, NULL);
6200 
6201         if (brevqp && *brevqp)
6202                 cleanup_br_events_on_grand_children(dip, brevqp);
6203 
6204         return (rv);
6205 }
6206 
6207 /*
6208  * called by devfs/framework to unconfigure children bound to major
6209  * If NDI_AUTODETACH is specified, this is invoked by either the
6210  * moduninstall daemon or the modunload -i 0 command.
6211  */
6212 int
6213 ndi_devi_unconfig_driver(dev_info_t *dip, int flags, major_t major)
6214 {
6215         NDI_CONFIG_DEBUG((CE_CONT,
6216             "ndi_devi_unconfig_driver: par = %s%d (%p), flags = 0x%x\n",
6217             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
6218 
6219         return (devi_unconfig_common(dip, NULL, flags, major, NULL));
6220 }
6221 
6222 int
6223 ndi_devi_unconfig(dev_info_t *dip, int flags)
6224 {
6225         NDI_CONFIG_DEBUG((CE_CONT,
6226             "ndi_devi_unconfig: par = %s%d (%p), flags = 0x%x\n",
6227             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
6228 
6229         return (devi_unconfig_common(dip, NULL, flags, DDI_MAJOR_T_NONE, NULL));
6230 }
6231 
6232 int
6233 e_ddi_devi_unconfig(dev_info_t *dip, dev_info_t **dipp, int flags)
6234 {
6235         NDI_CONFIG_DEBUG((CE_CONT,
6236             "e_ddi_devi_unconfig: par = %s%d (%p), flags = 0x%x\n",
6237             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip, flags));
6238 
6239         return (devi_unconfig_common(dip, dipp, flags, DDI_MAJOR_T_NONE, NULL));
6240 }
6241 
6242 /*
6243  * Unconfigure child by name
6244  */
6245 static int
6246 devi_unconfig_one(dev_info_t *pdip, char *devnm, int flags)
6247 {
6248         int             rv, circ;
6249         dev_info_t      *child;
6250         dev_info_t      *vdip = NULL;
6251         int             v_circ;
6252 
6253         ndi_devi_enter(pdip, &circ);
6254         child = ndi_devi_findchild(pdip, devnm);
6255 
6256         /*
6257          * If child is pHCI and vHCI and pHCI are not siblings then enter vHCI
6258          * before parent(pHCI) to avoid deadlock with mpxio Client power
6259          * management operations.
6260          */
6261         if (child && MDI_PHCI(child)) {
6262                 vdip = mdi_devi_get_vdip(child);
6263                 if (vdip && (ddi_get_parent(vdip) != pdip)) {
6264                         ndi_devi_exit(pdip, circ);
6265 
6266                         /* use mdi_devi_enter ordering */
6267                         ndi_devi_enter(vdip, &v_circ);
6268                         ndi_devi_enter(pdip, &circ);
6269                         child = ndi_devi_findchild(pdip, devnm);
6270                 } else
6271                         vdip = NULL;
6272         }
6273 
6274         if (child) {
6275                 rv = devi_detach_node(child, flags);
6276         } else {
6277                 NDI_CONFIG_DEBUG((CE_CONT,
6278                     "devi_unconfig_one: %s not found\n", devnm));
6279                 rv = NDI_SUCCESS;
6280         }
6281 
6282         ndi_devi_exit(pdip, circ);
6283         if (vdip)
6284                 ndi_devi_exit(vdip, v_circ);
6285 
6286         return (rv);
6287 }
6288 
6289 int
6290 ndi_devi_unconfig_one(
6291         dev_info_t *pdip,
6292         char *devnm,
6293         dev_info_t **dipp,
6294         int flags)
6295 {
6296         int             (*f)();
6297         int             circ, rv;
6298         int             pm_cookie;
6299         dev_info_t      *child;
6300         dev_info_t      *vdip = NULL;
6301         int             v_circ;
6302         struct brevq_node *brevq = NULL;
6303 
6304         ASSERT(i_ddi_devi_attached(pdip));
6305 
6306         NDI_CONFIG_DEBUG((CE_CONT,
6307             "ndi_devi_unconfig_one: par = %s%d (%p), child = %s\n",
6308             ddi_driver_name(pdip), ddi_get_instance(pdip),
6309             (void *)pdip, devnm));
6310 
6311         if (pm_pre_unconfig(pdip, flags, &pm_cookie, devnm) != DDI_SUCCESS)
6312                 return (NDI_FAILURE);
6313 
6314         if (dipp)
6315                 *dipp = NULL;
6316 
6317         ndi_devi_enter(pdip, &circ);
6318         child = ndi_devi_findchild(pdip, devnm);
6319 
6320         /*
6321          * If child is pHCI and vHCI and pHCI are not siblings then enter vHCI
6322          * before parent(pHCI) to avoid deadlock with mpxio Client power
6323          * management operations.
6324          */
6325         if (child && MDI_PHCI(child)) {
6326                 vdip = mdi_devi_get_vdip(child);
6327                 if (vdip && (ddi_get_parent(vdip) != pdip)) {
6328                         ndi_devi_exit(pdip, circ);
6329 
6330                         /* use mdi_devi_enter ordering */
6331                         ndi_devi_enter(vdip, &v_circ);
6332                         ndi_devi_enter(pdip, &circ);
6333                         child = ndi_devi_findchild(pdip, devnm);
6334                 } else
6335                         vdip = NULL;
6336         }
6337 
6338         if (child == NULL) {
6339                 NDI_CONFIG_DEBUG((CE_CONT, "ndi_devi_unconfig_one: %s"
6340                     " not found\n", devnm));
6341                 rv = NDI_SUCCESS;
6342                 goto out;
6343         }
6344 
6345         /*
6346          * Unconfigure children/descendants of named child
6347          */
6348         rv = devi_unconfig_branch(child, dipp, flags | NDI_UNCONFIG, &brevq);
6349         if (rv != NDI_SUCCESS)
6350                 goto out;
6351 
6352         init_bound_node_ev(pdip, child, flags);
6353 
6354         if ((DEVI(pdip)->devi_ops->devo_bus_ops == NULL) ||
6355             (DEVI(pdip)->devi_ops->devo_bus_ops->busops_rev < BUSO_REV_5) ||
6356             (f = DEVI(pdip)->devi_ops->devo_bus_ops->bus_unconfig) == NULL) {
6357                 rv = devi_detach_node(child, flags);
6358         } else {
6359                 /* call bus_config entry point */
6360                 rv = (*f)(pdip, flags, BUS_UNCONFIG_ONE, (void *)devnm);
6361         }
6362 
6363         if (brevq) {
6364                 if (rv != NDI_SUCCESS)
6365                         log_and_free_brevq_dip(child, brevq);
6366                 else
6367                         free_brevq(brevq);
6368         }
6369 
6370         if (dipp && rv != NDI_SUCCESS) {
6371                 ndi_hold_devi(child);
6372                 ASSERT(*dipp == NULL);
6373                 *dipp = child;
6374         }
6375 
6376 out:
6377         ndi_devi_exit(pdip, circ);
6378         if (vdip)
6379                 ndi_devi_exit(vdip, v_circ);
6380 
6381         pm_post_unconfig(pdip, pm_cookie, devnm);
6382 
6383         return (rv);
6384 }
6385 
6386 struct async_arg {
6387         dev_info_t *dip;
6388         uint_t flags;
6389 };
6390 
6391 /*
6392  * Common async handler for:
6393  *      ndi_devi_bind_driver_async
6394  *      ndi_devi_online_async
6395  */
6396 static int
6397 i_ndi_devi_async_common(dev_info_t *dip, uint_t flags, void (*func)())
6398 {
6399         int tqflag;
6400         int kmflag;
6401         struct async_arg *arg;
6402         dev_info_t *pdip = ddi_get_parent(dip);
6403 
6404         ASSERT(pdip);
6405         ASSERT(DEVI(pdip)->devi_taskq);
6406         ASSERT(ndi_dev_is_persistent_node(dip));
6407 
6408         if (flags & NDI_NOSLEEP) {
6409                 kmflag = KM_NOSLEEP;
6410                 tqflag = TQ_NOSLEEP;
6411         } else {
6412                 kmflag = KM_SLEEP;
6413                 tqflag = TQ_SLEEP;
6414         }
6415 
6416         arg = kmem_alloc(sizeof (*arg), kmflag);
6417         if (arg == NULL)
6418                 goto fail;
6419 
6420         arg->flags = flags;
6421         arg->dip = dip;
6422         if (ddi_taskq_dispatch(DEVI(pdip)->devi_taskq, func, arg, tqflag) ==
6423             DDI_SUCCESS) {
6424                 return (NDI_SUCCESS);
6425         }
6426 
6427 fail:
6428         NDI_CONFIG_DEBUG((CE_CONT, "%s%d: ddi_taskq_dispatch failed",
6429             ddi_driver_name(pdip), ddi_get_instance(pdip)));
6430 
6431         if (arg)
6432                 kmem_free(arg, sizeof (*arg));
6433         return (NDI_FAILURE);
6434 }
6435 
6436 static void
6437 i_ndi_devi_bind_driver_cb(struct async_arg *arg)
6438 {
6439         (void) ndi_devi_bind_driver(arg->dip, arg->flags);
6440         kmem_free(arg, sizeof (*arg));
6441 }
6442 
6443 int
6444 ndi_devi_bind_driver_async(dev_info_t *dip, uint_t flags)
6445 {
6446         return (i_ndi_devi_async_common(dip, flags,
6447             (void (*)())i_ndi_devi_bind_driver_cb));
6448 }
6449 
6450 /*
6451  * place the devinfo in the ONLINE state.
6452  */
6453 int
6454 ndi_devi_online(dev_info_t *dip, uint_t flags)
6455 {
6456         int circ, rv;
6457         dev_info_t *pdip = ddi_get_parent(dip);
6458         int branch_event = 0;
6459 
6460         ASSERT(pdip);
6461 
6462         NDI_CONFIG_DEBUG((CE_CONT, "ndi_devi_online: %s%d (%p)\n",
6463             ddi_driver_name(dip), ddi_get_instance(dip), (void *)dip));
6464 
6465         ndi_devi_enter(pdip, &circ);
6466         /* bind child before merging .conf nodes */
6467         rv = i_ndi_config_node(dip, DS_BOUND, flags);
6468         if (rv != NDI_SUCCESS) {
6469                 ndi_devi_exit(pdip, circ);
6470                 return (rv);
6471         }
6472 
6473         /* merge .conf properties */
6474         (void) i_ndi_make_spec_children(pdip, flags);
6475 
6476         flags |= (NDI_DEVI_ONLINE | NDI_CONFIG);
6477 
6478         if (flags & NDI_NO_EVENT) {
6479                 /*
6480                  * Caller is specifically asking for not to generate an event.
6481                  * Set the following flag so that devi_attach_node() don't
6482                  * change the event state.
6483                  */
6484                 flags |= NDI_NO_EVENT_STATE_CHNG;
6485         }
6486 
6487         if ((flags & (NDI_NO_EVENT | NDI_BRANCH_EVENT_OP)) == 0 &&
6488             ((flags & NDI_CONFIG) || DEVI_NEED_NDI_CONFIG(dip))) {
6489                 flags |= NDI_BRANCH_EVENT_OP;
6490                 branch_event = 1;
6491         }
6492 
6493         /*
6494          * devi_attach_node() may remove dip on failure
6495          */
6496         if ((rv = devi_attach_node(dip, flags)) == NDI_SUCCESS) {
6497                 if ((flags & NDI_CONFIG) || DEVI_NEED_NDI_CONFIG(dip)) {
6498                         /*
6499                          * Hold the attached dip, and exit the parent while
6500                          * we drive configuration of children below the
6501                          * attached dip.
6502                          */
6503                         ndi_hold_devi(dip);
6504                         ndi_devi_exit(pdip, circ);
6505 
6506                         (void) ndi_devi_config(dip, flags);
6507 
6508                         ndi_devi_enter(pdip, &circ);
6509                         ndi_rele_devi(dip);
6510                 }
6511 
6512                 if (branch_event)
6513                         (void) i_log_devfs_branch_add(dip);
6514         }
6515 
6516         ndi_devi_exit(pdip, circ);
6517 
6518         /*
6519          * Notify devfs that we have a new node. Devfs needs to invalidate
6520          * cached directory contents.
6521          *
6522          * For PCMCIA devices, it is possible the pdip is not fully
6523          * attached. In this case, calling back into devfs will
6524          * result in a loop or assertion error. Hence, the check
6525          * on node state.
6526          *
6527          * If we own parent lock, this is part of a branch operation.
6528          * We skip the devfs_clean() step because the cache invalidation
6529          * is done higher up in the device tree.
6530          */
6531         if (rv == NDI_SUCCESS && i_ddi_devi_attached(pdip) &&
6532             !DEVI_BUSY_OWNED(pdip))
6533                 (void) devfs_clean(pdip, NULL, 0);
6534         return (rv);
6535 }
6536 
6537 static void
6538 i_ndi_devi_online_cb(struct async_arg *arg)
6539 {
6540         (void) ndi_devi_online(arg->dip, arg->flags);
6541         kmem_free(arg, sizeof (*arg));
6542 }
6543 
6544 int
6545 ndi_devi_online_async(dev_info_t *dip, uint_t flags)
6546 {
6547         /* mark child as need config if requested. */
6548         if (flags & NDI_CONFIG) {
6549                 mutex_enter(&(DEVI(dip)->devi_lock));
6550                 DEVI_SET_NDI_CONFIG(dip);
6551                 mutex_exit(&(DEVI(dip)->devi_lock));
6552         }
6553 
6554         return (i_ndi_devi_async_common(dip, flags,
6555             (void (*)())i_ndi_devi_online_cb));
6556 }
6557 
6558 /*
6559  * Take a device node Offline
6560  * To take a device Offline means to detach the device instance from
6561  * the driver and prevent devfs requests from re-attaching the device
6562  * instance.
6563  *
6564  * The flag NDI_DEVI_REMOVE causes removes the device node from
6565  * the driver list and the device tree. In this case, the device
6566  * is assumed to be removed from the system.
6567  */
6568 int
6569 ndi_devi_offline(dev_info_t *dip, uint_t flags)
6570 {
6571         int             circ, rval = 0;
6572         dev_info_t      *pdip = ddi_get_parent(dip);
6573         dev_info_t      *vdip = NULL;
6574         int             v_circ;
6575         struct brevq_node *brevq = NULL;
6576 
6577         ASSERT(pdip);
6578 
6579         flags |= NDI_DEVI_OFFLINE;
6580 
6581         /*
6582          * If child is pHCI and vHCI and pHCI are not siblings then enter vHCI
6583          * before parent(pHCI) to avoid deadlock with mpxio Client power
6584          * management operations.
6585          */
6586         if (MDI_PHCI(dip)) {
6587                 vdip = mdi_devi_get_vdip(dip);
6588                 if (vdip && (ddi_get_parent(vdip) != pdip))
6589                         ndi_devi_enter(vdip, &v_circ);
6590                 else
6591                         vdip = NULL;
6592         }
6593         ndi_devi_enter(pdip, &circ);
6594 
6595         if (i_ddi_devi_attached(dip)) {
6596                 /*
6597                  * If dip is in DS_READY state, there may be cached dv_nodes
6598                  * referencing this dip, so we invoke devfs code path.
6599                  * Note that we must release busy changing on pdip to
6600                  * avoid deadlock against devfs.
6601                  */
6602                 char *devname = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
6603                 (void) ddi_deviname(dip, devname);
6604 
6605                 ndi_devi_exit(pdip, circ);
6606                 if (vdip)
6607                         ndi_devi_exit(vdip, v_circ);
6608 
6609                 /*
6610                  * If we are explictly told to clean, then clean. If we own the
6611                  * parent lock then this is part of a branch operation, and we
6612                  * skip the devfs_clean() step.
6613                  *
6614                  * NOTE: A thread performing a devfs file system lookup/
6615                  * bus_config can't call devfs_clean to unconfig without
6616                  * causing rwlock problems in devfs. For ndi_devi_offline, this
6617                  * means that the NDI_DEVFS_CLEAN flag is safe from ioctl code
6618                  * or from an async hotplug thread, but is not safe from a
6619                  * nexus driver's bus_config implementation.
6620                  */
6621                 if ((flags & NDI_DEVFS_CLEAN) ||
6622                     (!DEVI_BUSY_OWNED(pdip)))
6623                         (void) devfs_clean(pdip, devname + 1, DV_CLEAN_FORCE);
6624 
6625                 kmem_free(devname, MAXNAMELEN + 1);
6626 
6627                 rval = devi_unconfig_branch(dip, NULL, flags|NDI_UNCONFIG,
6628                     &brevq);
6629 
6630                 if (rval)
6631                         return (NDI_FAILURE);
6632 
6633                 if (vdip)
6634                         ndi_devi_enter(vdip, &v_circ);
6635                 ndi_devi_enter(pdip, &circ);
6636         }
6637 
6638         init_bound_node_ev(pdip, dip, flags);
6639 
6640         rval = devi_detach_node(dip, flags);
6641         if (brevq) {
6642                 if (rval != NDI_SUCCESS)
6643                         log_and_free_brevq_dip(dip, brevq);
6644                 else
6645                         free_brevq(brevq);
6646         }
6647 
6648         ndi_devi_exit(pdip, circ);
6649         if (vdip)
6650                 ndi_devi_exit(vdip, v_circ);
6651 
6652         return (rval);
6653 }
6654 
6655 /*
6656  * Find the child dev_info node of parent nexus 'p' whose unit address
6657  * matches "cname@caddr".  Recommend use of ndi_devi_findchild() instead.
6658  */
6659 dev_info_t *
6660 ndi_devi_find(dev_info_t *pdip, char *cname, char *caddr)
6661 {
6662         dev_info_t *child;
6663         int circ;
6664 
6665         if (pdip == NULL || cname == NULL || caddr == NULL)
6666                 return ((dev_info_t *)NULL);
6667 
6668         ndi_devi_enter(pdip, &circ);
6669         child = find_sibling(ddi_get_child(pdip), cname, caddr,
6670             FIND_NODE_BY_NODENAME, NULL);
6671         ndi_devi_exit(pdip, circ);
6672         return (child);
6673 }
6674 
6675 /*
6676  * Find the child dev_info node of parent nexus 'p' whose unit address
6677  * matches devname "name@addr".  Permits caller to hold the parent.
6678  */
6679 dev_info_t *
6680 ndi_devi_findchild(dev_info_t *pdip, char *devname)
6681 {
6682         dev_info_t *child;
6683         char    *cname, *caddr;
6684         char    *devstr;
6685 
6686         ASSERT(DEVI_BUSY_OWNED(pdip));
6687 
6688         devstr = i_ddi_strdup(devname, KM_SLEEP);
6689         i_ddi_parse_name(devstr, &cname, &caddr, NULL);
6690 
6691         if (cname == NULL || caddr == NULL) {
6692                 kmem_free(devstr, strlen(devname)+1);
6693                 return ((dev_info_t *)NULL);
6694         }
6695 
6696         child = find_sibling(ddi_get_child(pdip), cname, caddr,
6697             FIND_NODE_BY_NODENAME, NULL);
6698         kmem_free(devstr, strlen(devname)+1);
6699         return (child);
6700 }
6701 
6702 /*
6703  * Misc. routines called by framework only
6704  */
6705 
6706 /*
6707  * Clear the DEVI_MADE_CHILDREN/DEVI_ATTACHED_CHILDREN flags
6708  * if new child spec has been added.
6709  */
6710 static int
6711 reset_nexus_flags(dev_info_t *dip, void *arg)
6712 {
6713         struct hwc_spec *list;
6714         int             circ;
6715 
6716         if (((DEVI(dip)->devi_flags & DEVI_MADE_CHILDREN) == 0) ||
6717             ((list = hwc_get_child_spec(dip, (major_t)(uintptr_t)arg)) == NULL))
6718                 return (DDI_WALK_CONTINUE);
6719 
6720         hwc_free_spec_list(list);
6721 
6722         /* coordinate child state update */
6723         ndi_devi_enter(dip, &circ);
6724         mutex_enter(&DEVI(dip)->devi_lock);
6725         DEVI(dip)->devi_flags &= ~(DEVI_MADE_CHILDREN | DEVI_ATTACHED_CHILDREN);
6726         mutex_exit(&DEVI(dip)->devi_lock);
6727         ndi_devi_exit(dip, circ);
6728 
6729         return (DDI_WALK_CONTINUE);
6730 }
6731 
6732 /*
6733  * Helper functions, returns NULL if no memory.
6734  */
6735 
6736 /*
6737  * path_to_major:
6738  *
6739  * Return an alternate driver name binding for the leaf device
6740  * of the given pathname, if there is one. The purpose of this
6741  * function is to deal with generic pathnames. The default action
6742  * for platforms that can't do this (ie: x86 or any platform that
6743  * does not have prom_finddevice functionality, which matches
6744  * nodenames and unit-addresses without the drivers participation)
6745  * is to return DDI_MAJOR_T_NONE.
6746  *
6747  * Used in loadrootmodules() in the swapgeneric module to
6748  * associate a given pathname with a given leaf driver.
6749  *
6750  */
6751 major_t
6752 path_to_major(char *path)
6753 {
6754         dev_info_t *dip;
6755         char *p, *q;
6756         pnode_t nodeid;
6757         major_t major;
6758 
6759         /* check for path-oriented alias */
6760         major = ddi_name_to_major(path);
6761         if (driver_active(major)) {
6762                 NDI_CONFIG_DEBUG((CE_NOTE, "path_to_major: %s path bound %s\n",
6763                     path, ddi_major_to_name(major)));
6764                 return (major);
6765         }
6766 
6767         /*
6768          * Get the nodeid of the given pathname, if such a mapping exists.
6769          */
6770         dip = NULL;
6771         nodeid = prom_finddevice(path);
6772         if (nodeid != OBP_BADNODE) {
6773                 /*
6774                  * Find the nodeid in our copy of the device tree and return
6775                  * whatever name we used to bind this node to a driver.
6776                  */
6777                 dip = e_ddi_nodeid_to_dip(nodeid);
6778         }
6779 
6780         if (dip == NULL) {
6781                 NDI_CONFIG_DEBUG((CE_WARN,
6782                     "path_to_major: can't bind <%s>\n", path));
6783                 return (DDI_MAJOR_T_NONE);
6784         }
6785 
6786         /*
6787          * If we're bound to something other than the nodename,
6788          * note that in the message buffer and system log.
6789          */
6790         p = ddi_binding_name(dip);
6791         q = ddi_node_name(dip);
6792         if (p && q && (strcmp(p, q) != 0))
6793                 NDI_CONFIG_DEBUG((CE_NOTE, "path_to_major: %s bound to %s\n",
6794                     path, p));
6795 
6796         major = ddi_name_to_major(p);
6797 
6798         ndi_rele_devi(dip);             /* release e_ddi_nodeid_to_dip hold */
6799 
6800         return (major);
6801 }
6802 
6803 /*
6804  * Return the held dip for the specified major and instance, attempting to do
6805  * an attach if specified. Return NULL if the devi can't be found or put in
6806  * the proper state. The caller must release the hold via ddi_release_devi if
6807  * a non-NULL value is returned.
6808  *
6809  * Some callers expect to be able to perform a hold_devi() while in a context
6810  * where using ndi_devi_enter() to ensure the hold might cause deadlock (see
6811  * open-from-attach code in consconfig_dacf.c). Such special-case callers
6812  * must ensure that an ndi_devi_enter(parent)/ndi_hold_devi() from a safe
6813  * context is already active. The hold_devi() implementation must accommodate
6814  * these callers.
6815  */
6816 static dev_info_t *
6817 hold_devi(major_t major, int instance, int flags)
6818 {
6819         struct devnames *dnp;
6820         dev_info_t      *dip;
6821         char            *path;
6822         char            *vpath;
6823 
6824         if ((major >= devcnt) || (instance == -1))
6825                 return (NULL);
6826 
6827         /* try to find the instance in the per driver list */
6828         dnp = &(devnamesp[major]);
6829         LOCK_DEV_OPS(&(dnp->dn_lock));
6830         for (dip = dnp->dn_head; dip;
6831             dip = (dev_info_t *)DEVI(dip)->devi_next) {
6832                 /* skip node if instance field is not valid */
6833                 if (i_ddi_node_state(dip) < DS_INITIALIZED)
6834                         continue;
6835 
6836                 /* look for instance match */
6837                 if (DEVI(dip)->devi_instance == instance) {
6838                         /*
6839                          * To accommodate callers that can't block in
6840                          * ndi_devi_enter() we do an ndi_hold_devi(), and
6841                          * afterwards check that the node is in a state where
6842                          * the hold prevents detach(). If we did not manage to
6843                          * prevent detach then we ndi_rele_devi() and perform
6844                          * the slow path below (which can result in a blocking
6845                          * ndi_devi_enter() while driving attach top-down).
6846                          * This code depends on the ordering of
6847                          * DEVI_SET_DETACHING and the devi_ref check in the
6848                          * detach_node() code path.
6849                          */
6850                         ndi_hold_devi(dip);
6851                         if (i_ddi_devi_attached(dip) &&
6852                             !DEVI_IS_DETACHING(dip)) {
6853                                 UNLOCK_DEV_OPS(&(dnp->dn_lock));
6854                                 return (dip);   /* fast-path with devi held */
6855                         }
6856                         ndi_rele_devi(dip);
6857 
6858                         /* try slow-path */
6859                         dip = NULL;
6860                         break;
6861                 }
6862         }
6863         ASSERT(dip == NULL);
6864         UNLOCK_DEV_OPS(&(dnp->dn_lock));
6865 
6866         if (flags & E_DDI_HOLD_DEVI_NOATTACH)
6867                 return (NULL);          /* told not to drive attach */
6868 
6869         /* slow-path may block, so it should not occur from interrupt */
6870         ASSERT(!servicing_interrupt());
6871         if (servicing_interrupt())
6872                 return (NULL);
6873 
6874         /* reconstruct the path and drive attach by path through devfs. */
6875         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6876         if (e_ddi_majorinstance_to_path(major, instance, path) == 0) {
6877                 dip = e_ddi_hold_devi_by_path(path, flags);
6878 
6879                 /*
6880                  * Verify that we got the correct device - a path_to_inst file
6881                  * with a bogus/corrupt path (or a nexus that changes its
6882                  * unit-address format) could result in an incorrect answer
6883                  *
6884                  * Verify major, instance, and path.
6885                  */
6886                 vpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
6887                 if (dip &&
6888                     ((DEVI(dip)->devi_major != major) ||
6889                     ((DEVI(dip)->devi_instance != instance)) ||
6890                     (strcmp(path, ddi_pathname(dip, vpath)) != 0))) {
6891                         ndi_rele_devi(dip);
6892                         dip = NULL;     /* no answer better than wrong answer */
6893                 }
6894                 kmem_free(vpath, MAXPATHLEN);
6895         }
6896         kmem_free(path, MAXPATHLEN);
6897         return (dip);                   /* with devi held */
6898 }
6899 
6900 /*
6901  * The {e_}ddi_hold_devi{_by_{instance|dev|path}} hold the devinfo node
6902  * associated with the specified arguments.  This hold should be released
6903  * by calling ddi_release_devi.
6904  *
6905  * The E_DDI_HOLD_DEVI_NOATTACH flag argument allows the caller to to specify
6906  * a failure return if the node is not already attached.
6907  *
6908  * NOTE: by the time we make e_ddi_hold_devi public, we should be able to reuse
6909  * ddi_hold_devi again.
6910  */
6911 dev_info_t *
6912 ddi_hold_devi_by_instance(major_t major, int instance, int flags)
6913 {
6914         return (hold_devi(major, instance, flags));
6915 }
6916 
6917 dev_info_t *
6918 e_ddi_hold_devi_by_dev(dev_t dev, int flags)
6919 {
6920         major_t major = getmajor(dev);
6921         dev_info_t      *dip;
6922         struct dev_ops  *ops;
6923         dev_info_t      *ddip = NULL;
6924 
6925         dip = hold_devi(major, dev_to_instance(dev), flags);
6926 
6927         /*
6928          * The rest of this routine is legacy support for drivers that
6929          * have broken DDI_INFO_DEVT2INSTANCE implementations but may have
6930          * functional DDI_INFO_DEVT2DEVINFO implementations.  This code will
6931          * diagnose inconsistency and, for maximum compatibility with legacy
6932          * drivers, give preference to the drivers DDI_INFO_DEVT2DEVINFO
6933          * implementation over the above derived dip based the driver's
6934          * DDI_INFO_DEVT2INSTANCE implementation. This legacy support should
6935          * be removed when DDI_INFO_DEVT2DEVINFO is deprecated.
6936          *
6937          * NOTE: The following code has a race condition. DEVT2DEVINFO
6938          *      returns a dip which is not held. By the time we ref ddip,
6939          *      it could have been freed. The saving grace is that for
6940          *      most drivers, the dip returned from hold_devi() is the
6941          *      same one as the one returned by DEVT2DEVINFO, so we are
6942          *      safe for drivers with the correct getinfo(9e) impl.
6943          */
6944         if (((ops = ddi_hold_driver(major)) != NULL) &&
6945             CB_DRV_INSTALLED(ops) && ops->devo_getinfo)  {
6946                 if ((*ops->devo_getinfo)(NULL, DDI_INFO_DEVT2DEVINFO,
6947                     (void *)dev, (void **)&ddip) != DDI_SUCCESS)
6948                         ddip = NULL;
6949         }
6950 
6951         /* give preference to the driver returned DEVT2DEVINFO dip */
6952         if (ddip && (dip != ddip)) {
6953 #ifdef  DEBUG
6954                 cmn_err(CE_WARN, "%s: inconsistent getinfo(9E) implementation",
6955                     ddi_driver_name(ddip));
6956 #endif  /* DEBUG */
6957                 ndi_hold_devi(ddip);
6958                 if (dip)
6959                         ndi_rele_devi(dip);
6960                 dip = ddip;
6961         }
6962 
6963         if (ops)
6964                 ddi_rele_driver(major);
6965 
6966         return (dip);
6967 }
6968 
6969 /*
6970  * For compatibility only. Do not call this function!
6971  */
6972 dev_info_t *
6973 e_ddi_get_dev_info(dev_t dev, vtype_t type)
6974 {
6975         dev_info_t *dip = NULL;
6976         if (getmajor(dev) >= devcnt)
6977                 return (NULL);
6978 
6979         switch (type) {
6980         case VCHR:
6981         case VBLK:
6982                 dip = e_ddi_hold_devi_by_dev(dev, 0);
6983         default:
6984                 break;
6985         }
6986 
6987         /*
6988          * For compatibility reasons, we can only return the dip with
6989          * the driver ref count held. This is not a safe thing to do.
6990          * For certain broken third-party software, we are willing
6991          * to venture into unknown territory.
6992          */
6993         if (dip) {
6994                 (void) ndi_hold_driver(dip);
6995                 ndi_rele_devi(dip);
6996         }
6997         return (dip);
6998 }
6999 
7000 dev_info_t *
7001 e_ddi_hold_devi_by_path(char *path, int flags)
7002 {
7003         dev_info_t      *dip;
7004 
7005         /* can't specify NOATTACH by path */
7006         ASSERT(!(flags & E_DDI_HOLD_DEVI_NOATTACH));
7007 
7008         return (resolve_pathname(path, &dip, NULL, NULL) ? NULL : dip);
7009 }
7010 
7011 void
7012 e_ddi_hold_devi(dev_info_t *dip)
7013 {
7014         ndi_hold_devi(dip);
7015 }
7016 
7017 void
7018 ddi_release_devi(dev_info_t *dip)
7019 {
7020         ndi_rele_devi(dip);
7021 }
7022 
7023 /*
7024  * Associate a streams queue with a devinfo node
7025  * NOTE: This function is called by STREAM driver's put procedure.
7026  *      It cannot block.
7027  */
7028 void
7029 ddi_assoc_queue_with_devi(queue_t *q, dev_info_t *dip)
7030 {
7031         queue_t *rq = _RD(q);
7032         struct stdata *stp;
7033         vnode_t *vp;
7034 
7035         /* set flag indicating that ddi_assoc_queue_with_devi was called */
7036         mutex_enter(QLOCK(rq));
7037         rq->q_flag |= _QASSOCIATED;
7038         mutex_exit(QLOCK(rq));
7039 
7040         /* get the vnode associated with the queue */
7041         stp = STREAM(rq);
7042         vp = stp->sd_vnode;
7043         ASSERT(vp);
7044 
7045         /* change the hardware association of the vnode */
7046         spec_assoc_vp_with_devi(vp, dip);
7047 }
7048 
7049 /*
7050  * ddi_install_driver(name)
7051  *
7052  * Driver installation is currently a byproduct of driver loading.  This
7053  * may change.
7054  */
7055 int
7056 ddi_install_driver(char *name)
7057 {
7058         major_t major = ddi_name_to_major(name);
7059 
7060         if ((major == DDI_MAJOR_T_NONE) ||
7061             (ddi_hold_installed_driver(major) == NULL)) {
7062                 return (DDI_FAILURE);
7063         }
7064         ddi_rele_driver(major);
7065         return (DDI_SUCCESS);
7066 }
7067 
7068 struct dev_ops *
7069 ddi_hold_driver(major_t major)
7070 {
7071         return (mod_hold_dev_by_major(major));
7072 }
7073 
7074 
7075 void
7076 ddi_rele_driver(major_t major)
7077 {
7078         mod_rele_dev_by_major(major);
7079 }
7080 
7081 
7082 /*
7083  * This is called during boot to force attachment order of special dips
7084  * dip must be referenced via ndi_hold_devi()
7085  */
7086 int
7087 i_ddi_attach_node_hierarchy(dev_info_t *dip)
7088 {
7089         dev_info_t      *parent;
7090         int             ret, circ;
7091 
7092         /*
7093          * Recurse up until attached parent is found.
7094          */
7095         if (i_ddi_devi_attached(dip))
7096                 return (DDI_SUCCESS);
7097         parent = ddi_get_parent(dip);
7098         if (i_ddi_attach_node_hierarchy(parent) != DDI_SUCCESS)
7099                 return (DDI_FAILURE);
7100 
7101         /*
7102          * Come top-down, expanding .conf nodes under this parent
7103          * and driving attach.
7104          */
7105         ndi_devi_enter(parent, &circ);
7106         (void) i_ndi_make_spec_children(parent, 0);
7107         ret = i_ddi_attachchild(dip);
7108         ndi_devi_exit(parent, circ);
7109 
7110         return (ret);
7111 }
7112 
7113 /* keep this function static */
7114 static int
7115 attach_driver_nodes(major_t major)
7116 {
7117         struct devnames *dnp;
7118         dev_info_t *dip;
7119         int error = DDI_FAILURE;
7120 
7121         dnp = &devnamesp[major];
7122         LOCK_DEV_OPS(&dnp->dn_lock);
7123         dip = dnp->dn_head;
7124         while (dip) {
7125                 ndi_hold_devi(dip);
7126                 UNLOCK_DEV_OPS(&dnp->dn_lock);
7127                 if (i_ddi_attach_node_hierarchy(dip) == DDI_SUCCESS)
7128                         error = DDI_SUCCESS;
7129                 /*
7130                  * Set the 'ddi-config-driver-node' property on a nexus
7131                  * node to cause attach_driver_nodes() to configure all
7132                  * immediate children of the nexus. This property should
7133                  * be set on nodes with immediate children that bind to
7134                  * the same driver as parent.
7135                  */
7136                 if ((error == DDI_SUCCESS) && (ddi_prop_exists(DDI_DEV_T_ANY,
7137                     dip, DDI_PROP_DONTPASS, "ddi-config-driver-node"))) {
7138                         (void) ndi_devi_config(dip, NDI_NO_EVENT);
7139                 }
7140                 LOCK_DEV_OPS(&dnp->dn_lock);
7141                 ndi_rele_devi(dip);
7142                 dip = ddi_get_next(dip);
7143         }
7144         if (error == DDI_SUCCESS)
7145                 dnp->dn_flags |= DN_NO_AUTODETACH;
7146         UNLOCK_DEV_OPS(&dnp->dn_lock);
7147 
7148 
7149         return (error);
7150 }
7151 
7152 /*
7153  * i_ddi_attach_hw_nodes configures and attaches all hw nodes
7154  * bound to a specific driver. This function replaces calls to
7155  * ddi_hold_installed_driver() for drivers with no .conf
7156  * enumerated nodes.
7157  *
7158  * This facility is typically called at boot time to attach
7159  * platform-specific hardware nodes, such as ppm nodes on xcal
7160  * and grover and keyswitch nodes on cherrystone. It does not
7161  * deal with .conf enumerated node. Calling it beyond the boot
7162  * process is strongly discouraged.
7163  */
7164 int
7165 i_ddi_attach_hw_nodes(char *driver)
7166 {
7167         major_t major;
7168 
7169         major = ddi_name_to_major(driver);
7170         if (major == DDI_MAJOR_T_NONE)
7171                 return (DDI_FAILURE);
7172 
7173         return (attach_driver_nodes(major));
7174 }
7175 
7176 /*
7177  * i_ddi_attach_pseudo_node configures pseudo drivers which
7178  * has a single node. The .conf nodes must be enumerated
7179  * before calling this interface. The dip is held attached
7180  * upon returning.
7181  *
7182  * This facility should only be called only at boot time
7183  * by the I/O framework.
7184  */
7185 dev_info_t *
7186 i_ddi_attach_pseudo_node(char *driver)
7187 {
7188         major_t major;
7189         dev_info_t *dip;
7190 
7191         major = ddi_name_to_major(driver);
7192         if (major == DDI_MAJOR_T_NONE)
7193                 return (NULL);
7194 
7195         if (attach_driver_nodes(major) != DDI_SUCCESS)
7196                 return (NULL);
7197 
7198         dip = devnamesp[major].dn_head;
7199         ASSERT(dip && ddi_get_next(dip) == NULL);
7200         ndi_hold_devi(dip);
7201         return (dip);
7202 }
7203 
7204 static void
7205 diplist_to_parent_major(dev_info_t *head, char parents[])
7206 {
7207         major_t major;
7208         dev_info_t *dip, *pdip;
7209 
7210         for (dip = head; dip != NULL; dip = ddi_get_next(dip)) {
7211                 pdip = ddi_get_parent(dip);
7212                 ASSERT(pdip);   /* disallow rootnex.conf nodes */
7213                 major = ddi_driver_major(pdip);
7214                 if ((major != DDI_MAJOR_T_NONE) && parents[major] == 0)
7215                         parents[major] = 1;
7216         }
7217 }
7218 
7219 /*
7220  * Call ddi_hold_installed_driver() on each parent major
7221  * and invoke mt_config_driver() to attach child major.
7222  * This is part of the implementation of ddi_hold_installed_driver.
7223  */
7224 static int
7225 attach_driver_by_parent(major_t child_major, char parents[])
7226 {
7227         major_t par_major;
7228         struct mt_config_handle *hdl;
7229         int flags = NDI_DEVI_PERSIST | NDI_NO_EVENT;
7230 
7231         hdl = mt_config_init(NULL, NULL, flags, child_major, MT_CONFIG_OP,
7232             NULL);
7233         for (par_major = 0; par_major < devcnt; par_major++) {
7234                 /* disallow recursion on the same driver */
7235                 if (parents[par_major] == 0 || par_major == child_major)
7236                         continue;
7237                 if (ddi_hold_installed_driver(par_major) == NULL)
7238                         continue;
7239                 hdl->mtc_parmajor = par_major;
7240                 mt_config_driver(hdl);
7241                 ddi_rele_driver(par_major);
7242         }
7243         (void) mt_config_fini(hdl);
7244 
7245         return (i_ddi_devs_attached(child_major));
7246 }
7247 
7248 int
7249 i_ddi_devs_attached(major_t major)
7250 {
7251         dev_info_t *dip;
7252         struct devnames *dnp;
7253         int error = DDI_FAILURE;
7254 
7255         /* check for attached instances */
7256         dnp = &devnamesp[major];
7257         LOCK_DEV_OPS(&dnp->dn_lock);
7258         for (dip = dnp->dn_head; dip != NULL; dip = ddi_get_next(dip)) {
7259                 if (i_ddi_devi_attached(dip)) {
7260                         error = DDI_SUCCESS;
7261                         break;
7262                 }
7263         }
7264         UNLOCK_DEV_OPS(&dnp->dn_lock);
7265 
7266         return (error);
7267 }
7268 
7269 int
7270 i_ddi_minor_node_count(dev_info_t *ddip, const char *node_type)
7271 {
7272         int                     circ;
7273         struct ddi_minor_data   *dp;
7274         int                     count = 0;
7275 
7276         ndi_devi_enter(ddip, &circ);
7277         for (dp = DEVI(ddip)->devi_minor; dp != NULL; dp = dp->next) {
7278                 if (strcmp(dp->ddm_node_type, node_type) == 0)
7279                         count++;
7280         }
7281         ndi_devi_exit(ddip, circ);
7282         return (count);
7283 }
7284 
7285 /*
7286  * ddi_hold_installed_driver configures and attaches all
7287  * instances of the specified driver. To accomplish this
7288  * it configures and attaches all possible parents of
7289  * the driver, enumerated both in h/w nodes and in the
7290  * driver's .conf file.
7291  *
7292  * NOTE: This facility is for compatibility purposes only and will
7293  *      eventually go away. Its usage is strongly discouraged.
7294  */
7295 static void
7296 enter_driver(struct devnames *dnp)
7297 {
7298         mutex_enter(&dnp->dn_lock);
7299         ASSERT(dnp->dn_busy_thread != curthread);
7300         while (dnp->dn_flags & DN_DRIVER_BUSY)
7301                 cv_wait(&dnp->dn_wait, &dnp->dn_lock);
7302         dnp->dn_flags |= DN_DRIVER_BUSY;
7303         dnp->dn_busy_thread = curthread;
7304         mutex_exit(&dnp->dn_lock);
7305 }
7306 
7307 static void
7308 exit_driver(struct devnames *dnp)
7309 {
7310         mutex_enter(&dnp->dn_lock);
7311         ASSERT(dnp->dn_busy_thread == curthread);
7312         dnp->dn_flags &= ~DN_DRIVER_BUSY;
7313         dnp->dn_busy_thread = NULL;
7314         cv_broadcast(&dnp->dn_wait);
7315         mutex_exit(&dnp->dn_lock);
7316 }
7317 
7318 struct dev_ops *
7319 ddi_hold_installed_driver(major_t major)
7320 {
7321         struct dev_ops *ops;
7322         struct devnames *dnp;
7323         char *parents;
7324         int error;
7325 
7326         ops = ddi_hold_driver(major);
7327         if (ops == NULL)
7328                 return (NULL);
7329 
7330         /*
7331          * Return immediately if all the attach operations associated
7332          * with a ddi_hold_installed_driver() call have already been done.
7333          */
7334         dnp = &devnamesp[major];
7335         enter_driver(dnp);
7336         ASSERT(driver_active(major));
7337 
7338         if (dnp->dn_flags & DN_DRIVER_HELD) {
7339                 exit_driver(dnp);
7340                 if (i_ddi_devs_attached(major) == DDI_SUCCESS)
7341                         return (ops);
7342                 ddi_rele_driver(major);
7343                 return (NULL);
7344         }
7345 
7346         LOCK_DEV_OPS(&dnp->dn_lock);
7347         dnp->dn_flags |= (DN_DRIVER_HELD | DN_NO_AUTODETACH);
7348         UNLOCK_DEV_OPS(&dnp->dn_lock);
7349 
7350         DCOMPATPRINTF((CE_CONT,
7351             "ddi_hold_installed_driver: %s\n", dnp->dn_name));
7352 
7353         /*
7354          * When the driver has no .conf children, it is sufficient
7355          * to attach existing nodes in the device tree. Nodes not
7356          * enumerated by the OBP are not attached.
7357          */
7358         if (dnp->dn_pl == NULL) {
7359                 if (attach_driver_nodes(major) == DDI_SUCCESS) {
7360                         exit_driver(dnp);
7361                         return (ops);
7362                 }
7363                 exit_driver(dnp);
7364                 ddi_rele_driver(major);
7365                 return (NULL);
7366         }
7367 
7368         /*
7369          * Driver has .conf nodes. We find all possible parents
7370          * and recursively all ddi_hold_installed_driver on the
7371          * parent driver; then we invoke ndi_config_driver()
7372          * on all possible parent node in parallel to speed up
7373          * performance.
7374          */
7375         parents = kmem_zalloc(devcnt * sizeof (char), KM_SLEEP);
7376 
7377         LOCK_DEV_OPS(&dnp->dn_lock);
7378         /* find .conf parents */
7379         (void) impl_parlist_to_major(dnp->dn_pl, parents);
7380         /* find hw node parents */
7381         diplist_to_parent_major(dnp->dn_head, parents);
7382         UNLOCK_DEV_OPS(&dnp->dn_lock);
7383 
7384         error = attach_driver_by_parent(major, parents);
7385         kmem_free(parents, devcnt * sizeof (char));
7386         if (error == DDI_SUCCESS) {
7387                 exit_driver(dnp);
7388                 return (ops);
7389         }
7390 
7391         exit_driver(dnp);
7392         ddi_rele_driver(major);
7393         return (NULL);
7394 }
7395 
7396 /*
7397  * Default bus_config entry point for nexus drivers
7398  */
7399 int
7400 ndi_busop_bus_config(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
7401     void *arg, dev_info_t **child, clock_t timeout)
7402 {
7403         major_t major;
7404 
7405         /*
7406          * A timeout of 30 minutes or more is probably a mistake
7407          * This is intended to catch uses where timeout is in
7408          * the wrong units.  timeout must be in units of ticks.
7409          */
7410         ASSERT(timeout < SEC_TO_TICK(1800));
7411 
7412         major = DDI_MAJOR_T_NONE;
7413         switch (op) {
7414         case BUS_CONFIG_ONE:
7415                 NDI_DEBUG(flags, (CE_CONT, "%s%d: bus config %s timeout=%ld\n",
7416                     ddi_driver_name(pdip), ddi_get_instance(pdip),
7417                     (char *)arg, timeout));
7418                 return (devi_config_one(pdip, (char *)arg, child, flags,
7419                     timeout));
7420 
7421         case BUS_CONFIG_DRIVER:
7422                 major = (major_t)(uintptr_t)arg;
7423                 /*FALLTHROUGH*/
7424         case BUS_CONFIG_ALL:
7425                 NDI_DEBUG(flags, (CE_CONT, "%s%d: bus config timeout=%ld\n",
7426                     ddi_driver_name(pdip), ddi_get_instance(pdip),
7427                     timeout));
7428                 if (timeout > 0) {
7429                         NDI_DEBUG(flags, (CE_CONT,
7430                             "%s%d: bus config all timeout=%ld\n",
7431                             ddi_driver_name(pdip), ddi_get_instance(pdip),
7432                             timeout));
7433                         delay(timeout);
7434                 }
7435                 return (config_immediate_children(pdip, flags, major));
7436 
7437         default:
7438                 return (NDI_FAILURE);
7439         }
7440         /*NOTREACHED*/
7441 }
7442 
7443 /*
7444  * Default busop bus_unconfig handler for nexus drivers
7445  */
7446 int
7447 ndi_busop_bus_unconfig(dev_info_t *pdip, uint_t flags, ddi_bus_config_op_t op,
7448     void *arg)
7449 {
7450         major_t major;
7451 
7452         major = DDI_MAJOR_T_NONE;
7453         switch (op) {
7454         case BUS_UNCONFIG_ONE:
7455                 NDI_DEBUG(flags, (CE_CONT, "%s%d: bus unconfig %s\n",
7456                     ddi_driver_name(pdip), ddi_get_instance(pdip),
7457                     (char *)arg));
7458                 return (devi_unconfig_one(pdip, (char *)arg, flags));
7459 
7460         case BUS_UNCONFIG_DRIVER:
7461                 major = (major_t)(uintptr_t)arg;
7462                 /*FALLTHROUGH*/
7463         case BUS_UNCONFIG_ALL:
7464                 NDI_DEBUG(flags, (CE_CONT, "%s%d: bus unconfig all\n",
7465                     ddi_driver_name(pdip), ddi_get_instance(pdip)));
7466                 return (unconfig_immediate_children(pdip, NULL, flags, major));
7467 
7468         default:
7469                 return (NDI_FAILURE);
7470         }
7471         /*NOTREACHED*/
7472 }
7473 
7474 /*
7475  * dummy functions to be removed
7476  */
7477 void
7478 impl_rem_dev_props(dev_info_t *dip)
7479 {
7480         _NOTE(ARGUNUSED(dip))
7481         /* do nothing */
7482 }
7483 
7484 /*
7485  * Determine if a node is a leaf node. If not sure, return false (0).
7486  */
7487 static int
7488 is_leaf_node(dev_info_t *dip)
7489 {
7490         major_t major = ddi_driver_major(dip);
7491 
7492         if (major == DDI_MAJOR_T_NONE)
7493                 return (0);
7494 
7495         return (devnamesp[major].dn_flags & DN_LEAF_DRIVER);
7496 }
7497 
7498 /*
7499  * Multithreaded [un]configuration
7500  */
7501 static struct mt_config_handle *
7502 mt_config_init(dev_info_t *pdip, dev_info_t **dipp, int flags,
7503     major_t major, int op, struct brevq_node **brevqp)
7504 {
7505         struct mt_config_handle *hdl = kmem_alloc(sizeof (*hdl), KM_SLEEP);
7506 
7507         mutex_init(&hdl->mtc_lock, NULL, MUTEX_DEFAULT, NULL);
7508         cv_init(&hdl->mtc_cv, NULL, CV_DEFAULT, NULL);
7509         hdl->mtc_pdip = pdip;
7510         hdl->mtc_fdip = dipp;
7511         hdl->mtc_parmajor = DDI_MAJOR_T_NONE;
7512         hdl->mtc_flags = flags;
7513         hdl->mtc_major = major;
7514         hdl->mtc_thr_count = 0;
7515         hdl->mtc_op = op;
7516         hdl->mtc_error = 0;
7517         hdl->mtc_brevqp = brevqp;
7518 
7519 #ifdef DEBUG
7520         gethrestime(&hdl->start_time);
7521         hdl->total_time = 0;
7522 #endif /* DEBUG */
7523 
7524         return (hdl);
7525 }
7526 
7527 #ifdef DEBUG
7528 static int
7529 time_diff_in_msec(timestruc_t start, timestruc_t end)
7530 {
7531         int     nsec, sec;
7532 
7533         sec = end.tv_sec - start.tv_sec;
7534         nsec = end.tv_nsec - start.tv_nsec;
7535         if (nsec < 0) {
7536                 nsec += NANOSEC;
7537                 sec -= 1;
7538         }
7539 
7540         return (sec * (NANOSEC >> 20) + (nsec >> 20));
7541 }
7542 
7543 #endif  /* DEBUG */
7544 
7545 static int
7546 mt_config_fini(struct mt_config_handle *hdl)
7547 {
7548         int             rv;
7549 #ifdef DEBUG
7550         int             real_time;
7551         timestruc_t     end_time;
7552 #endif /* DEBUG */
7553 
7554         mutex_enter(&hdl->mtc_lock);
7555         while (hdl->mtc_thr_count > 0)
7556                 cv_wait(&hdl->mtc_cv, &hdl->mtc_lock);
7557         rv = hdl->mtc_error;
7558         mutex_exit(&hdl->mtc_lock);
7559 
7560 #ifdef DEBUG
7561         gethrestime(&end_time);
7562         real_time = time_diff_in_msec(hdl->start_time, end_time);
7563         if ((ddidebug & DDI_MTCONFIG) && hdl->mtc_pdip)
7564                 cmn_err(CE_NOTE,
7565                     "config %s%d: total time %d msec, real time %d msec",
7566                     ddi_driver_name(hdl->mtc_pdip),
7567                     ddi_get_instance(hdl->mtc_pdip),
7568                     hdl->total_time, real_time);
7569 #endif /* DEBUG */
7570 
7571         cv_destroy(&hdl->mtc_cv);
7572         mutex_destroy(&hdl->mtc_lock);
7573         kmem_free(hdl, sizeof (*hdl));
7574 
7575         return (rv);
7576 }
7577 
7578 struct mt_config_data {
7579         struct mt_config_handle *mtc_hdl;
7580         dev_info_t              *mtc_dip;
7581         major_t                 mtc_major;
7582         int                     mtc_flags;
7583         struct brevq_node       *mtc_brn;
7584         struct mt_config_data   *mtc_next;
7585 };
7586 
7587 static void
7588 mt_config_thread(void *arg)
7589 {
7590         struct mt_config_data   *mcd = (struct mt_config_data *)arg;
7591         struct mt_config_handle *hdl = mcd->mtc_hdl;
7592         dev_info_t              *dip = mcd->mtc_dip;
7593         dev_info_t              *rdip, **dipp;
7594         major_t                 major = mcd->mtc_major;
7595         int                     flags = mcd->mtc_flags;
7596         int                     rv = 0;
7597 
7598 #ifdef DEBUG
7599         timestruc_t start_time, end_time;
7600         gethrestime(&start_time);
7601 #endif /* DEBUG */
7602 
7603         rdip = NULL;
7604         dipp = hdl->mtc_fdip ? &rdip : NULL;
7605 
7606         switch (hdl->mtc_op) {
7607         case MT_CONFIG_OP:
7608                 rv = devi_config_common(dip, flags, major);
7609                 break;
7610         case MT_UNCONFIG_OP:
7611                 if (mcd->mtc_brn) {
7612                         struct brevq_node *brevq = NULL;
7613                         rv = devi_unconfig_common(dip, dipp, flags, major,
7614                             &brevq);
7615                         mcd->mtc_brn->brn_child = brevq;
7616                 } else
7617                         rv = devi_unconfig_common(dip, dipp, flags, major,
7618                             NULL);
7619                 break;
7620         }
7621 
7622         mutex_enter(&hdl->mtc_lock);
7623 #ifdef DEBUG
7624         gethrestime(&end_time);
7625         hdl->total_time += time_diff_in_msec(start_time, end_time);
7626 #endif /* DEBUG */
7627 
7628         if ((rv != NDI_SUCCESS) && (hdl->mtc_error == 0)) {
7629                 hdl->mtc_error = rv;
7630 #ifdef  DEBUG
7631                 if ((ddidebug & DDI_DEBUG) && (major != DDI_MAJOR_T_NONE)) {
7632                         char    *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
7633 
7634                         (void) ddi_pathname(dip, path);
7635                         cmn_err(CE_NOTE, "mt_config_thread: "
7636                             "op %d.%d.%x at %s failed %d",
7637                             hdl->mtc_op, major, flags, path, rv);
7638                         kmem_free(path, MAXPATHLEN);
7639                 }
7640 #endif  /* DEBUG */
7641         }
7642 
7643         if (hdl->mtc_fdip && *hdl->mtc_fdip == NULL) {
7644                 *hdl->mtc_fdip = rdip;
7645                 rdip = NULL;
7646         }
7647 
7648         if (rdip) {
7649                 ASSERT(rv != NDI_SUCCESS);
7650                 ndi_rele_devi(rdip);
7651         }
7652 
7653         ndi_rele_devi(dip);
7654 
7655         if (--hdl->mtc_thr_count == 0)
7656                 cv_broadcast(&hdl->mtc_cv);
7657         mutex_exit(&hdl->mtc_lock);
7658         kmem_free(mcd, sizeof (*mcd));
7659 }
7660 
7661 /*
7662  * Multi-threaded config/unconfig of child nexus
7663  */
7664 static void
7665 mt_config_children(struct mt_config_handle *hdl)
7666 {
7667         dev_info_t              *pdip = hdl->mtc_pdip;
7668         major_t                 major = hdl->mtc_major;
7669         dev_info_t              *dip;
7670         int                     circ;
7671         struct brevq_node       *brn;
7672         struct mt_config_data   *mcd_head = NULL;
7673         struct mt_config_data   *mcd_tail = NULL;
7674         struct mt_config_data   *mcd;
7675 #ifdef DEBUG
7676         timestruc_t             end_time;
7677 
7678         /* Update total_time in handle */
7679         gethrestime(&end_time);
7680         hdl->total_time += time_diff_in_msec(hdl->start_time, end_time);
7681 #endif
7682 
7683         ndi_devi_enter(pdip, &circ);
7684         dip = ddi_get_child(pdip);
7685         while (dip) {
7686                 if (hdl->mtc_op == MT_UNCONFIG_OP && hdl->mtc_brevqp &&
7687                     !(DEVI_EVREMOVE(dip)) &&
7688                     i_ddi_node_state(dip) >= DS_INITIALIZED) {
7689                         /*
7690                          * Enqueue this dip's deviname.
7691                          * No need to hold a lock while enqueuing since this
7692                          * is the only thread doing the enqueue and no one
7693                          * walks the queue while we are in multithreaded
7694                          * unconfiguration.
7695                          */
7696                         brn = brevq_enqueue(hdl->mtc_brevqp, dip, NULL);
7697                 } else
7698                         brn = NULL;
7699 
7700                 /*
7701                  * Hold the child that we are processing so he does not get
7702                  * removed. The corrisponding ndi_rele_devi() for children
7703                  * that are not being skipped is done at the end of
7704                  * mt_config_thread().
7705                  */
7706                 ndi_hold_devi(dip);
7707 
7708                 /*
7709                  * skip leaf nodes and (for configure) nodes not
7710                  * fully attached.
7711                  */
7712                 if (is_leaf_node(dip) ||
7713                     (hdl->mtc_op == MT_CONFIG_OP &&
7714                     i_ddi_node_state(dip) < DS_READY)) {
7715                         ndi_rele_devi(dip);
7716                         dip = ddi_get_next_sibling(dip);
7717                         continue;
7718                 }
7719 
7720                 mcd = kmem_alloc(sizeof (*mcd), KM_SLEEP);
7721                 mcd->mtc_dip = dip;
7722                 mcd->mtc_hdl = hdl;
7723                 mcd->mtc_brn = brn;
7724 
7725                 /*
7726                  * Switch a 'driver' operation to an 'all' operation below a
7727                  * node bound to the driver.
7728                  */
7729                 if ((major == DDI_MAJOR_T_NONE) ||
7730                     (major == ddi_driver_major(dip)))
7731                         mcd->mtc_major = DDI_MAJOR_T_NONE;
7732                 else
7733                         mcd->mtc_major = major;
7734 
7735                 /*
7736                  * The unconfig-driver to unconfig-all conversion above
7737                  * constitutes an autodetach for NDI_DETACH_DRIVER calls,
7738                  * set NDI_AUTODETACH.
7739                  */
7740                 mcd->mtc_flags = hdl->mtc_flags;
7741                 if ((mcd->mtc_flags & NDI_DETACH_DRIVER) &&
7742                     (hdl->mtc_op == MT_UNCONFIG_OP) &&
7743                     (major == ddi_driver_major(pdip)))
7744                         mcd->mtc_flags |= NDI_AUTODETACH;
7745 
7746                 mutex_enter(&hdl->mtc_lock);
7747                 hdl->mtc_thr_count++;
7748                 mutex_exit(&hdl->mtc_lock);
7749 
7750                 /*
7751                  * Add to end of list to process after ndi_devi_exit to avoid
7752                  * locking differences depending on value of mtc_off.
7753                  */
7754                 mcd->mtc_next = NULL;
7755                 if (mcd_head == NULL)
7756                         mcd_head = mcd;
7757                 else
7758                         mcd_tail->mtc_next = mcd;
7759                 mcd_tail = mcd;
7760 
7761                 dip = ddi_get_next_sibling(dip);
7762         }
7763         ndi_devi_exit(pdip, circ);
7764 
7765         /* go through the list of held children */
7766         for (mcd = mcd_head; mcd; mcd = mcd_head) {
7767                 mcd_head = mcd->mtc_next;
7768                 if (mtc_off || (mcd->mtc_flags & NDI_MTC_OFF))
7769                         mt_config_thread(mcd);
7770                 else
7771                         (void) thread_create(NULL, 0, mt_config_thread, mcd,
7772                             0, &p0, TS_RUN, minclsyspri);
7773         }
7774 }
7775 
7776 static void
7777 mt_config_driver(struct mt_config_handle *hdl)
7778 {
7779         major_t                 par_major = hdl->mtc_parmajor;
7780         major_t                 major = hdl->mtc_major;
7781         struct devnames         *dnp = &devnamesp[par_major];
7782         dev_info_t              *dip;
7783         struct mt_config_data   *mcd_head = NULL;
7784         struct mt_config_data   *mcd_tail = NULL;
7785         struct mt_config_data   *mcd;
7786 #ifdef DEBUG
7787         timestruc_t             end_time;
7788 
7789         /* Update total_time in handle */
7790         gethrestime(&end_time);
7791         hdl->total_time += time_diff_in_msec(hdl->start_time, end_time);
7792 #endif
7793         ASSERT(par_major != DDI_MAJOR_T_NONE);
7794         ASSERT(major != DDI_MAJOR_T_NONE);
7795 
7796         LOCK_DEV_OPS(&dnp->dn_lock);
7797         dip = devnamesp[par_major].dn_head;
7798         while (dip) {
7799                 /*
7800                  * Hold the child that we are processing so he does not get
7801                  * removed. The corrisponding ndi_rele_devi() for children
7802                  * that are not being skipped is done at the end of
7803                  * mt_config_thread().
7804                  */
7805                 ndi_hold_devi(dip);
7806 
7807                 /* skip leaf nodes and nodes not fully attached */
7808                 if (!i_ddi_devi_attached(dip) || is_leaf_node(dip)) {
7809                         ndi_rele_devi(dip);
7810                         dip = ddi_get_next(dip);
7811                         continue;
7812                 }
7813 
7814                 mcd = kmem_alloc(sizeof (*mcd), KM_SLEEP);
7815                 mcd->mtc_dip = dip;
7816                 mcd->mtc_hdl = hdl;
7817                 mcd->mtc_major = major;
7818                 mcd->mtc_flags = hdl->mtc_flags;
7819 
7820                 mutex_enter(&hdl->mtc_lock);
7821                 hdl->mtc_thr_count++;
7822                 mutex_exit(&hdl->mtc_lock);
7823 
7824                 /*
7825                  * Add to end of list to process after UNLOCK_DEV_OPS to avoid
7826                  * locking differences depending on value of mtc_off.
7827                  */
7828                 mcd->mtc_next = NULL;
7829                 if (mcd_head == NULL)
7830                         mcd_head = mcd;
7831                 else
7832                         mcd_tail->mtc_next = mcd;
7833                 mcd_tail = mcd;
7834 
7835                 dip = ddi_get_next(dip);
7836         }
7837         UNLOCK_DEV_OPS(&dnp->dn_lock);
7838 
7839         /* go through the list of held children */
7840         for (mcd = mcd_head; mcd; mcd = mcd_head) {
7841                 mcd_head = mcd->mtc_next;
7842                 if (mtc_off || (mcd->mtc_flags & NDI_MTC_OFF))
7843                         mt_config_thread(mcd);
7844                 else
7845                         (void) thread_create(NULL, 0, mt_config_thread, mcd,
7846                             0, &p0, TS_RUN, minclsyspri);
7847         }
7848 }
7849 
7850 /*
7851  * Given the nodeid for a persistent (PROM or SID) node, return
7852  * the corresponding devinfo node
7853  * NOTE: This function will return NULL for .conf nodeids.
7854  */
7855 dev_info_t *
7856 e_ddi_nodeid_to_dip(pnode_t nodeid)
7857 {
7858         dev_info_t              *dip = NULL;
7859         struct devi_nodeid      *prev, *elem;
7860 
7861         mutex_enter(&devimap->dno_lock);
7862 
7863         prev = NULL;
7864         for (elem = devimap->dno_head; elem; elem = elem->next) {
7865                 if (elem->nodeid == nodeid) {
7866                         ndi_hold_devi(elem->dip);
7867                         dip = elem->dip;
7868                         break;
7869                 }
7870                 prev = elem;
7871         }
7872 
7873         /*
7874          * Move to head for faster lookup next time
7875          */
7876         if (elem && prev) {
7877                 prev->next = elem->next;
7878                 elem->next = devimap->dno_head;
7879                 devimap->dno_head = elem;
7880         }
7881 
7882         mutex_exit(&devimap->dno_lock);
7883         return (dip);
7884 }
7885 
7886 static void
7887 free_cache_task(void *arg)
7888 {
7889         ASSERT(arg == NULL);
7890 
7891         mutex_enter(&di_cache.cache_lock);
7892 
7893         /*
7894          * The cache can be invalidated without holding the lock
7895          * but it can be made valid again only while the lock is held.
7896          * So if the cache is invalid when the lock is held, it will
7897          * stay invalid until lock is released.
7898          */
7899         if (!di_cache.cache_valid)
7900                 i_ddi_di_cache_free(&di_cache);
7901 
7902         mutex_exit(&di_cache.cache_lock);
7903 
7904         if (di_cache_debug)
7905                 cmn_err(CE_NOTE, "system_taskq: di_cache freed");
7906 }
7907 
7908 extern int modrootloaded;
7909 
7910 void
7911 i_ddi_di_cache_free(struct di_cache *cache)
7912 {
7913         int     error;
7914         extern int sys_shutdown;
7915 
7916         ASSERT(mutex_owned(&cache->cache_lock));
7917 
7918         if (cache->cache_size) {
7919                 ASSERT(cache->cache_size > 0);
7920                 ASSERT(cache->cache_data);
7921 
7922                 kmem_free(cache->cache_data, cache->cache_size);
7923                 cache->cache_data = NULL;
7924                 cache->cache_size = 0;
7925 
7926                 if (di_cache_debug)
7927                         cmn_err(CE_NOTE, "i_ddi_di_cache_free: freed cachemem");
7928         } else {
7929                 ASSERT(cache->cache_data == NULL);
7930                 if (di_cache_debug)
7931                         cmn_err(CE_NOTE, "i_ddi_di_cache_free: NULL cache");
7932         }
7933 
7934         if (!modrootloaded || rootvp == NULL ||
7935             vn_is_readonly(rootvp) || sys_shutdown) {
7936                 if (di_cache_debug) {
7937                         cmn_err(CE_WARN, "/ not mounted/RDONLY. Skip unlink");
7938                 }
7939                 return;
7940         }
7941 
7942         error = vn_remove(DI_CACHE_FILE, UIO_SYSSPACE, RMFILE);
7943         if (di_cache_debug && error && error != ENOENT) {
7944                 cmn_err(CE_WARN, "%s: unlink failed: %d", DI_CACHE_FILE, error);
7945         } else if (di_cache_debug && !error) {
7946                 cmn_err(CE_NOTE, "i_ddi_di_cache_free: unlinked cache file");
7947         }
7948 }
7949 
7950 void
7951 i_ddi_di_cache_invalidate()
7952 {
7953         int     cache_valid;
7954 
7955         if (!modrootloaded || !i_ddi_io_initialized()) {
7956                 if (di_cache_debug)
7957                         cmn_err(CE_NOTE, "I/O not inited. Skipping invalidate");
7958                 return;
7959         }
7960 
7961         /* Increment devtree generation number. */
7962         atomic_inc_ulong(&devtree_gen);
7963 
7964         /* Invalidate the in-core cache and dispatch free on valid->invalid */
7965         cache_valid = atomic_swap_uint(&di_cache.cache_valid, 0);
7966         if (cache_valid) {
7967                 /*
7968                  * This is an optimization to start cleaning up a cached
7969                  * snapshot early.  For this reason, it is OK for
7970                  * taskq_dispatach to fail (and it is OK to not track calling
7971                  * context relative to sleep, and assume NOSLEEP).
7972                  */
7973                 (void) taskq_dispatch(system_taskq, free_cache_task, NULL,
7974                     TQ_NOSLEEP);
7975         }
7976 
7977         if (di_cache_debug) {
7978                 cmn_err(CE_NOTE, "invalidation");
7979         }
7980 }
7981 
7982 
7983 static void
7984 i_bind_vhci_node(dev_info_t *dip)
7985 {
7986         DEVI(dip)->devi_major = ddi_name_to_major(ddi_node_name(dip));
7987         i_ddi_set_node_state(dip, DS_BOUND);
7988 }
7989 
7990 static char vhci_node_addr[2];
7991 
7992 static int
7993 i_init_vhci_node(dev_info_t *dip)
7994 {
7995         add_global_props(dip);
7996         DEVI(dip)->devi_ops = ndi_hold_driver(dip);
7997         if (DEVI(dip)->devi_ops == NULL)
7998                 return (-1);
7999 
8000         DEVI(dip)->devi_instance = e_ddi_assign_instance(dip);
8001         e_ddi_keep_instance(dip);
8002         vhci_node_addr[0]       = '\0';
8003         ddi_set_name_addr(dip, vhci_node_addr);
8004         i_ddi_set_node_state(dip, DS_INITIALIZED);
8005         return (0);
8006 }
8007 
8008 static void
8009 i_link_vhci_node(dev_info_t *dip)
8010 {
8011         ASSERT(MUTEX_HELD(&global_vhci_lock));
8012 
8013         /*
8014          * scsi_vhci should be kept left most of the device tree.
8015          */
8016         if (scsi_vhci_dip) {
8017                 DEVI(dip)->devi_sibling = DEVI(scsi_vhci_dip)->devi_sibling;
8018                 DEVI(scsi_vhci_dip)->devi_sibling = DEVI(dip);
8019         } else {
8020                 DEVI(dip)->devi_sibling = DEVI(top_devinfo)->devi_child;
8021                 DEVI(top_devinfo)->devi_child = DEVI(dip);
8022         }
8023 }
8024 
8025 
8026 /*
8027  * This a special routine to enumerate vhci node (child of rootnex
8028  * node) without holding the ndi_devi_enter() lock. The device node
8029  * is allocated, initialized and brought into DS_READY state before
8030  * inserting into the device tree. The VHCI node is handcrafted
8031  * here to bring the node to DS_READY, similar to rootnex node.
8032  *
8033  * The global_vhci_lock protects linking the node into the device
8034  * as same lock is held before linking/unlinking any direct child
8035  * of rootnex children.
8036  *
8037  * This routine is a workaround to handle a possible deadlock
8038  * that occurs while trying to enumerate node in a different sub-tree
8039  * during _init/_attach entry points.
8040  */
8041 /*ARGSUSED*/
8042 dev_info_t *
8043 ndi_devi_config_vhci(char *drvname, int flags)
8044 {
8045         struct devnames         *dnp;
8046         dev_info_t              *dip;
8047         major_t                 major = ddi_name_to_major(drvname);
8048 
8049         if (major == -1)
8050                 return (NULL);
8051 
8052         /* Make sure we create the VHCI node only once */
8053         dnp = &devnamesp[major];
8054         LOCK_DEV_OPS(&dnp->dn_lock);
8055         if (dnp->dn_head) {
8056                 dip = dnp->dn_head;
8057                 UNLOCK_DEV_OPS(&dnp->dn_lock);
8058                 return (dip);
8059         }
8060         UNLOCK_DEV_OPS(&dnp->dn_lock);
8061 
8062         /* Allocate the VHCI node */
8063         ndi_devi_alloc_sleep(top_devinfo, drvname, DEVI_SID_NODEID, &dip);
8064         ndi_hold_devi(dip);
8065 
8066         /* Mark the node as VHCI */
8067         DEVI(dip)->devi_node_attributes |= DDI_VHCI_NODE;
8068 
8069         i_ddi_add_devimap(dip);
8070         i_bind_vhci_node(dip);
8071         if (i_init_vhci_node(dip) == -1) {
8072                 ndi_rele_devi(dip);
8073                 (void) ndi_devi_free(dip);
8074                 return (NULL);
8075         }
8076 
8077         mutex_enter(&(DEVI(dip)->devi_lock));
8078         DEVI_SET_ATTACHING(dip);
8079         mutex_exit(&(DEVI(dip)->devi_lock));
8080 
8081         if (devi_attach(dip, DDI_ATTACH) != DDI_SUCCESS) {
8082                 cmn_err(CE_CONT, "Could not attach %s driver", drvname);
8083                 e_ddi_free_instance(dip, vhci_node_addr);
8084                 ndi_rele_devi(dip);
8085                 (void) ndi_devi_free(dip);
8086                 return (NULL);
8087         }
8088         mutex_enter(&(DEVI(dip)->devi_lock));
8089         DEVI_CLR_ATTACHING(dip);
8090         mutex_exit(&(DEVI(dip)->devi_lock));
8091 
8092         mutex_enter(&global_vhci_lock);
8093         i_link_vhci_node(dip);
8094         mutex_exit(&global_vhci_lock);
8095         i_ddi_set_node_state(dip, DS_READY);
8096 
8097         LOCK_DEV_OPS(&dnp->dn_lock);
8098         dnp->dn_flags |= DN_DRIVER_HELD;
8099         dnp->dn_head = dip;
8100         UNLOCK_DEV_OPS(&dnp->dn_lock);
8101 
8102         i_ndi_devi_report_status_change(dip, NULL);
8103 
8104         return (dip);
8105 }
8106 
8107 /*
8108  * Maintain DEVI_DEVICE_REMOVED hotplug devi_state for remove/reinsert hotplug
8109  * of open devices. Currently, because of tight coupling between the devfs file
8110  * system and the Solaris device tree, a driver can't always make the device
8111  * tree state (esp devi_node_state) match device hardware hotplug state. Until
8112  * resolved, to overcome this deficiency we use the following interfaces that
8113  * maintain the DEVI_DEVICE_REMOVED devi_state status bit.  These interface
8114  * report current state, and drive operation (like events and cache
8115  * invalidation) when a driver changes remove/insert state of an open device.
8116  *
8117  * The ndi_devi_device_isremoved() returns 1 if the device is currently removed.
8118  *
8119  * The ndi_devi_device_remove() interface declares the device as removed, and
8120  * returns 1 if there was a state change associated with this declaration.
8121  *
8122  * The ndi_devi_device_insert() declares the device as inserted, and returns 1
8123  * if there was a state change associated with this declaration.
8124  */
8125 int
8126 ndi_devi_device_isremoved(dev_info_t *dip)
8127 {
8128         return (DEVI_IS_DEVICE_REMOVED(dip));
8129 }
8130 
8131 int
8132 ndi_devi_device_remove(dev_info_t *dip)
8133 {
8134         ASSERT(dip && ddi_get_parent(dip) &&
8135             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
8136 
8137         /* Return if already marked removed. */
8138         if (ndi_devi_device_isremoved(dip))
8139                 return (0);
8140 
8141         /* Mark the device as having been physically removed. */
8142         mutex_enter(&(DEVI(dip)->devi_lock));
8143         ndi_devi_set_hidden(dip);       /* invisible: lookup/snapshot */
8144         DEVI_SET_DEVICE_REMOVED(dip);
8145         DEVI_SET_EVREMOVE(dip);         /* this clears EVADD too */
8146         mutex_exit(&(DEVI(dip)->devi_lock));
8147 
8148         /* report remove (as 'removed') */
8149         i_ndi_devi_report_status_change(dip, NULL);
8150 
8151         /*
8152          * Invalidate the cache to ensure accurate
8153          * (di_state() & DI_DEVICE_REMOVED).
8154          */
8155         i_ddi_di_cache_invalidate();
8156 
8157         /*
8158          * Generate sysevent for those interested in removal (either
8159          * directly via private EC_DEVFS or indirectly via devfsadmd
8160          * generated EC_DEV). This will generate LDI DEVICE_REMOVE
8161          * event too.
8162          */
8163         i_ddi_log_devfs_device_remove(dip);
8164 
8165         return (1);             /* DEVICE_REMOVED state changed */
8166 }
8167 
8168 int
8169 ndi_devi_device_insert(dev_info_t *dip)
8170 {
8171         ASSERT(dip && ddi_get_parent(dip) &&
8172             DEVI_BUSY_OWNED(ddi_get_parent(dip)));
8173 
8174         /* Return if not marked removed. */
8175         if (!ndi_devi_device_isremoved(dip))
8176                 return (0);
8177 
8178         /* Mark the device as having been physically reinserted. */
8179         mutex_enter(&(DEVI(dip)->devi_lock));
8180         ndi_devi_clr_hidden(dip);       /* visible: lookup/snapshot */
8181         DEVI_SET_DEVICE_REINSERTED(dip);
8182         DEVI_SET_EVADD(dip);            /* this clears EVREMOVE too */
8183         mutex_exit(&(DEVI(dip)->devi_lock));
8184 
8185         /* report insert (as 'online') */
8186         i_ndi_devi_report_status_change(dip, NULL);
8187 
8188         /*
8189          * Invalidate the cache to ensure accurate
8190          * (di_state() & DI_DEVICE_REMOVED).
8191          */
8192         i_ddi_di_cache_invalidate();
8193 
8194         /*
8195          * Generate sysevent for those interested in removal (either directly
8196          * via EC_DEVFS or indirectly via devfsadmd generated EC_DEV).
8197          */
8198         i_ddi_log_devfs_device_insert(dip);
8199 
8200         return (1);             /* DEVICE_REMOVED state changed */
8201 }
8202 
8203 /*
8204  * ibt_hw_is_present() returns 0 when there is no IB hardware actively
8205  * running.  This is primarily useful for modules like rpcmod which
8206  * needs a quick check to decide whether or not it should try to use
8207  * InfiniBand
8208  */
8209 int ib_hw_status = 0;
8210 int
8211 ibt_hw_is_present()
8212 {
8213         return (ib_hw_status);
8214 }
8215 
8216 /*
8217  * ASSERT that constraint flag is not set and then set the "retire attempt"
8218  * flag.
8219  */
8220 int
8221 e_ddi_mark_retiring(dev_info_t *dip, void *arg)
8222 {
8223         char    **cons_array = (char **)arg;
8224         char    *path;
8225         int     constraint;
8226         int     i;
8227 
8228         constraint = 0;
8229         if (cons_array) {
8230                 path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8231                 (void) ddi_pathname(dip, path);
8232                 for (i = 0; cons_array[i] != NULL; i++) {
8233                         if (strcmp(path, cons_array[i]) == 0) {
8234                                 constraint = 1;
8235                                 break;
8236                         }
8237                 }
8238                 kmem_free(path, MAXPATHLEN);
8239         }
8240 
8241         mutex_enter(&DEVI(dip)->devi_lock);
8242         ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
8243         DEVI(dip)->devi_flags |= DEVI_RETIRING;
8244         if (constraint)
8245                 DEVI(dip)->devi_flags |= DEVI_R_CONSTRAINT;
8246         mutex_exit(&DEVI(dip)->devi_lock);
8247 
8248         RIO_VERBOSE((CE_NOTE, "marked dip as undergoing retire process dip=%p",
8249             (void *)dip));
8250 
8251         if (constraint)
8252                 RIO_DEBUG((CE_NOTE, "marked dip as constrained, dip=%p",
8253                     (void *)dip));
8254 
8255         if (MDI_PHCI(dip))
8256                 mdi_phci_mark_retiring(dip, cons_array);
8257 
8258         return (DDI_WALK_CONTINUE);
8259 }
8260 
8261 static void
8262 free_array(char **cons_array)
8263 {
8264         int     i;
8265 
8266         if (cons_array == NULL)
8267                 return;
8268 
8269         for (i = 0; cons_array[i] != NULL; i++) {
8270                 kmem_free(cons_array[i], strlen(cons_array[i]) + 1);
8271         }
8272         kmem_free(cons_array, (i+1) * sizeof (char *));
8273 }
8274 
8275 /*
8276  * Walk *every* node in subtree and check if it blocks, allows or has no
8277  * comment on a proposed retire.
8278  */
8279 int
8280 e_ddi_retire_notify(dev_info_t *dip, void *arg)
8281 {
8282         int     *constraint = (int *)arg;
8283 
8284         RIO_DEBUG((CE_NOTE, "retire notify: dip = %p", (void *)dip));
8285 
8286         (void) e_ddi_offline_notify(dip);
8287 
8288         mutex_enter(&(DEVI(dip)->devi_lock));
8289         if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
8290                 RIO_DEBUG((CE_WARN, "retire notify: dip in retire "
8291                     "subtree is not marked: dip = %p", (void *)dip));
8292                 *constraint = 0;
8293         } else if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
8294                 ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
8295                 RIO_DEBUG((CE_NOTE, "retire notify: BLOCKED: dip = %p",
8296                     (void *)dip));
8297                 *constraint = 0;
8298         } else if (!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT)) {
8299                 RIO_DEBUG((CE_NOTE, "retire notify: NO CONSTRAINT: "
8300                     "dip = %p", (void *)dip));
8301                 *constraint = 0;
8302         } else {
8303                 RIO_DEBUG((CE_NOTE, "retire notify: CONSTRAINT set: "
8304                     "dip = %p", (void *)dip));
8305         }
8306         mutex_exit(&DEVI(dip)->devi_lock);
8307 
8308         if (MDI_PHCI(dip))
8309                 mdi_phci_retire_notify(dip, constraint);
8310 
8311         return (DDI_WALK_CONTINUE);
8312 }
8313 
8314 int
8315 e_ddi_retire_finalize(dev_info_t *dip, void *arg)
8316 {
8317         int constraint = *(int *)arg;
8318         int finalize;
8319         int phci_only;
8320 
8321         mutex_enter(&DEVI(dip)->devi_lock);
8322         if (!(DEVI(dip)->devi_flags & DEVI_RETIRING)) {
8323                 RIO_DEBUG((CE_WARN,
8324                     "retire: unmarked dip(%p) in retire subtree",
8325                     (void *)dip));
8326                 ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRED));
8327                 ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
8328                 ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
8329                 mutex_exit(&DEVI(dip)->devi_lock);
8330                 return (DDI_WALK_CONTINUE);
8331         }
8332 
8333         /*
8334          * retire the device if constraints have been applied
8335          * or if the device is not in use
8336          */
8337         finalize = 0;
8338         if (constraint) {
8339                 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(dip)));
8340 
8341                 ASSERT(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT);
8342                 ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
8343                 DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
8344                 DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
8345                 DEVI(dip)->devi_flags |= DEVI_RETIRED;
8346                 mutex_exit(&DEVI(dip)->devi_lock);
8347                 (void) spec_fence_snode(dip, NULL);
8348                 RIO_DEBUG((CE_NOTE, "Fenced off: dip = %p", (void *)dip));
8349                 e_ddi_offline_finalize(dip, DDI_SUCCESS);
8350         } else {
8351                 if (DEVI(dip)->devi_flags & DEVI_R_BLOCKED) {
8352                         ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
8353                         DEVI(dip)->devi_flags &= ~DEVI_R_BLOCKED;
8354                         DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
8355                         /* we have already finalized during notify */
8356                 } else if (DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT) {
8357                         DEVI(dip)->devi_flags &= ~DEVI_R_CONSTRAINT;
8358                         DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
8359                         finalize = 1;
8360                 } else {
8361                         DEVI(dip)->devi_flags &= ~DEVI_RETIRING;
8362                         /*
8363                          * even if no contracts, need to call finalize
8364                          * to clear the contract barrier on the dip
8365                          */
8366                         finalize = 1;
8367                 }
8368                 mutex_exit(&DEVI(dip)->devi_lock);
8369                 RIO_DEBUG((CE_NOTE, "finalize: NOT retired: dip = %p",
8370                     (void *)dip));
8371                 if (finalize)
8372                         e_ddi_offline_finalize(dip, DDI_FAILURE);
8373         }
8374 
8375         /*
8376          * phci_only variable indicates no client checking, just
8377          * offline the PHCI. We set that to 0 to enable client
8378          * checking
8379          */
8380         phci_only = 0;
8381         if (MDI_PHCI(dip))
8382                 mdi_phci_retire_finalize(dip, phci_only, arg);
8383 
8384         return (DDI_WALK_CONTINUE);
8385 }
8386 
8387 /*
8388  * Returns
8389  *      DDI_SUCCESS if constraints allow retire
8390  *      DDI_FAILURE if constraints don't allow retire.
8391  * cons_array is a NULL terminated array of node paths for
8392  * which constraints have already been applied.
8393  */
8394 int
8395 e_ddi_retire_device(char *path, char **cons_array)
8396 {
8397         dev_info_t      *dip;
8398         dev_info_t      *pdip;
8399         int             circ;
8400         int             circ2;
8401         int             constraint;
8402         char            *devnm;
8403 
8404         /*
8405          * First, lookup the device
8406          */
8407         dip = e_ddi_hold_devi_by_path(path, 0);
8408         if (dip == NULL) {
8409                 /*
8410                  * device does not exist. This device cannot be
8411                  * a critical device since it is not in use. Thus
8412                  * this device is always retireable. Return DDI_SUCCESS
8413                  * to indicate this. If this device is ever
8414                  * instantiated, I/O framework will consult the
8415                  * the persistent retire store, mark it as
8416                  * retired and fence it off.
8417                  */
8418                 RIO_DEBUG((CE_NOTE, "Retire device: device doesn't exist."
8419                     " NOP. Just returning SUCCESS. path=%s", path));
8420                 free_array(cons_array);
8421                 return (DDI_SUCCESS);
8422         }
8423 
8424         RIO_DEBUG((CE_NOTE, "Retire device: found dip = %p.", (void *)dip));
8425 
8426         pdip = ddi_get_parent(dip);
8427         ndi_hold_devi(pdip);
8428 
8429         /*
8430          * Run devfs_clean() in case dip has no constraints and is
8431          * not in use, so is retireable but there are dv_nodes holding
8432          * ref-count on the dip. Note that devfs_clean() always returns
8433          * success.
8434          */
8435         devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
8436         (void) ddi_deviname(dip, devnm);
8437         (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
8438         kmem_free(devnm, MAXNAMELEN + 1);
8439 
8440         ndi_devi_enter(pdip, &circ);
8441 
8442         /* release hold from e_ddi_hold_devi_by_path */
8443         ndi_rele_devi(dip);
8444 
8445         /*
8446          * If it cannot make a determination, is_leaf_node() assumes
8447          * dip is a nexus.
8448          */
8449         (void) e_ddi_mark_retiring(dip, cons_array);
8450         if (!is_leaf_node(dip)) {
8451                 ndi_devi_enter(dip, &circ2);
8452                 ddi_walk_devs(ddi_get_child(dip), e_ddi_mark_retiring,
8453                     cons_array);
8454                 ndi_devi_exit(dip, circ2);
8455         }
8456         free_array(cons_array);
8457 
8458         /*
8459          * apply constraints
8460          */
8461         RIO_DEBUG((CE_NOTE, "retire: subtree retire notify: path = %s", path));
8462 
8463         constraint = 1; /* assume constraints allow retire */
8464         (void) e_ddi_retire_notify(dip, &constraint);
8465         if (!is_leaf_node(dip)) {
8466                 ndi_devi_enter(dip, &circ2);
8467                 ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_notify,
8468                     &constraint);
8469                 ndi_devi_exit(dip, circ2);
8470         }
8471 
8472         /*
8473          * Now finalize the retire
8474          */
8475         (void) e_ddi_retire_finalize(dip, &constraint);
8476         if (!is_leaf_node(dip)) {
8477                 ndi_devi_enter(dip, &circ2);
8478                 ddi_walk_devs(ddi_get_child(dip), e_ddi_retire_finalize,
8479                     &constraint);
8480                 ndi_devi_exit(dip, circ2);
8481         }
8482 
8483         if (!constraint) {
8484                 RIO_DEBUG((CE_WARN, "retire failed: path = %s", path));
8485         } else {
8486                 RIO_DEBUG((CE_NOTE, "retire succeeded: path = %s", path));
8487         }
8488 
8489         ndi_devi_exit(pdip, circ);
8490         ndi_rele_devi(pdip);
8491         return (constraint ? DDI_SUCCESS : DDI_FAILURE);
8492 }
8493 
8494 static int
8495 unmark_and_unfence(dev_info_t *dip, void *arg)
8496 {
8497         char    *path = (char *)arg;
8498 
8499         ASSERT(path);
8500 
8501         (void) ddi_pathname(dip, path);
8502 
8503         mutex_enter(&DEVI(dip)->devi_lock);
8504         DEVI(dip)->devi_flags &= ~DEVI_RETIRED;
8505         DEVI_SET_DEVICE_ONLINE(dip);
8506         mutex_exit(&DEVI(dip)->devi_lock);
8507 
8508         RIO_VERBOSE((CE_NOTE, "Cleared RETIRED flag: dip=%p, path=%s",
8509             (void *)dip, path));
8510 
8511         (void) spec_unfence_snode(dip);
8512         RIO_DEBUG((CE_NOTE, "Unfenced device: %s", path));
8513 
8514         if (MDI_PHCI(dip))
8515                 mdi_phci_unretire(dip);
8516 
8517         return (DDI_WALK_CONTINUE);
8518 }
8519 
8520 struct find_dip {
8521         char    *fd_buf;
8522         char    *fd_path;
8523         dev_info_t *fd_dip;
8524 };
8525 
8526 static int
8527 find_dip_fcn(dev_info_t *dip, void *arg)
8528 {
8529         struct find_dip *findp = (struct find_dip *)arg;
8530 
8531         (void) ddi_pathname(dip, findp->fd_buf);
8532 
8533         if (strcmp(findp->fd_path, findp->fd_buf) != 0)
8534                 return (DDI_WALK_CONTINUE);
8535 
8536         ndi_hold_devi(dip);
8537         findp->fd_dip = dip;
8538 
8539         return (DDI_WALK_TERMINATE);
8540 }
8541 
8542 int
8543 e_ddi_unretire_device(char *path)
8544 {
8545         int             circ;
8546         int             circ2;
8547         char            *path2;
8548         dev_info_t      *pdip;
8549         dev_info_t      *dip;
8550         struct find_dip  find_dip;
8551 
8552         ASSERT(path);
8553         ASSERT(*path == '/');
8554 
8555         if (strcmp(path, "/") == 0) {
8556                 cmn_err(CE_WARN, "Root node cannot be retired. Skipping "
8557                     "device unretire: %s", path);
8558                 return (0);
8559         }
8560 
8561         /*
8562          * We can't lookup the dip (corresponding to path) via
8563          * e_ddi_hold_devi_by_path() because the dip may be offline
8564          * and may not attach. Use ddi_walk_devs() instead;
8565          */
8566         find_dip.fd_buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8567         find_dip.fd_path = path;
8568         find_dip.fd_dip = NULL;
8569 
8570         pdip = ddi_root_node();
8571 
8572         ndi_devi_enter(pdip, &circ);
8573         ddi_walk_devs(ddi_get_child(pdip), find_dip_fcn, &find_dip);
8574         ndi_devi_exit(pdip, circ);
8575 
8576         kmem_free(find_dip.fd_buf, MAXPATHLEN);
8577 
8578         if (find_dip.fd_dip == NULL) {
8579                 cmn_err(CE_WARN, "Device not found in device tree. Skipping "
8580                     "device unretire: %s", path);
8581                 return (0);
8582         }
8583 
8584         dip = find_dip.fd_dip;
8585 
8586         pdip = ddi_get_parent(dip);
8587 
8588         ndi_hold_devi(pdip);
8589 
8590         ndi_devi_enter(pdip, &circ);
8591 
8592         path2 = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8593 
8594         (void) unmark_and_unfence(dip, path2);
8595         if (!is_leaf_node(dip)) {
8596                 ndi_devi_enter(dip, &circ2);
8597                 ddi_walk_devs(ddi_get_child(dip), unmark_and_unfence, path2);
8598                 ndi_devi_exit(dip, circ2);
8599         }
8600 
8601         kmem_free(path2, MAXPATHLEN);
8602 
8603         /* release hold from find_dip_fcn() */
8604         ndi_rele_devi(dip);
8605 
8606         ndi_devi_exit(pdip, circ);
8607 
8608         ndi_rele_devi(pdip);
8609 
8610         return (0);
8611 }
8612 
8613 /*
8614  * Called before attach on a dip that has been retired.
8615  */
8616 static int
8617 mark_and_fence(dev_info_t *dip, void *arg)
8618 {
8619         char    *fencepath = (char *)arg;
8620 
8621         /*
8622          * We have already decided to retire this device. The various
8623          * constraint checking should not be set.
8624          * NOTE that the retire flag may already be set due to
8625          * fenced -> detach -> fenced transitions.
8626          */
8627         mutex_enter(&DEVI(dip)->devi_lock);
8628         ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_CONSTRAINT));
8629         ASSERT(!(DEVI(dip)->devi_flags & DEVI_R_BLOCKED));
8630         ASSERT(!(DEVI(dip)->devi_flags & DEVI_RETIRING));
8631         DEVI(dip)->devi_flags |= DEVI_RETIRED;
8632         mutex_exit(&DEVI(dip)->devi_lock);
8633         RIO_VERBOSE((CE_NOTE, "marked as RETIRED dip=%p", (void *)dip));
8634 
8635         if (fencepath) {
8636                 (void) spec_fence_snode(dip, NULL);
8637                 RIO_DEBUG((CE_NOTE, "Fenced: %s",
8638                     ddi_pathname(dip, fencepath)));
8639         }
8640 
8641         return (DDI_WALK_CONTINUE);
8642 }
8643 
8644 /*
8645  * Checks the retire database and:
8646  *
8647  * - if device is present in the retire database, marks the device retired
8648  *   and fences it off.
8649  * - if device is not in retire database, allows the device to attach normally
8650  *
8651  * To be called only by framework attach code on first attach attempt.
8652  *
8653  */
8654 static int
8655 i_ddi_check_retire(dev_info_t *dip)
8656 {
8657         char            *path;
8658         dev_info_t      *pdip;
8659         int             circ;
8660         int             phci_only;
8661         int             constraint;
8662 
8663         pdip = ddi_get_parent(dip);
8664 
8665         /*
8666          * Root dip is treated special and doesn't take this code path.
8667          * Also root can never be retired.
8668          */
8669         ASSERT(pdip);
8670         ASSERT(DEVI_BUSY_OWNED(pdip));
8671         ASSERT(i_ddi_node_state(dip) < DS_ATTACHED);
8672 
8673         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8674 
8675         (void) ddi_pathname(dip, path);
8676 
8677         RIO_VERBOSE((CE_NOTE, "Checking if dip should attach: dip=%p, path=%s",
8678             (void *)dip, path));
8679 
8680         /*
8681          * Check if this device is in the "retired" store i.e.  should
8682          * be retired. If not, we have nothing to do.
8683          */
8684         if (e_ddi_device_retired(path) == 0) {
8685                 RIO_VERBOSE((CE_NOTE, "device is NOT retired: path=%s", path));
8686                 if (DEVI(dip)->devi_flags & DEVI_RETIRED)
8687                         (void) e_ddi_unretire_device(path);
8688                 kmem_free(path, MAXPATHLEN);
8689                 return (0);
8690         }
8691 
8692         RIO_DEBUG((CE_NOTE, "attach: device is retired: path=%s", path));
8693 
8694         /*
8695          * Mark dips and fence off snodes (if any)
8696          */
8697         RIO_DEBUG((CE_NOTE, "attach: Mark and fence subtree: path=%s", path));
8698         (void) mark_and_fence(dip, path);
8699         if (!is_leaf_node(dip)) {
8700                 ndi_devi_enter(dip, &circ);
8701                 ddi_walk_devs(ddi_get_child(dip), mark_and_fence, path);
8702                 ndi_devi_exit(dip, circ);
8703         }
8704 
8705         kmem_free(path, MAXPATHLEN);
8706 
8707         /*
8708          * We don't want to check the client. We just want to
8709          * offline the PHCI
8710          */
8711         phci_only = 1;
8712         constraint = 1;
8713         if (MDI_PHCI(dip))
8714                 mdi_phci_retire_finalize(dip, phci_only, &constraint);
8715         return (1);
8716 }
8717 
8718 
8719 #define VAL_ALIAS(array, x)     (strlen(array[x].pair_alias))
8720 #define VAL_CURR(array, x)      (strlen(array[x].pair_curr))
8721 #define SWAP(array, x, y)                       \
8722 {                                               \
8723         alias_pair_t tmpair = array[x];         \
8724         array[x] = array[y];                    \
8725         array[y] = tmpair;                      \
8726 }
8727 
8728 static int
8729 partition_curr(alias_pair_t *array, int start, int end)
8730 {
8731         int     i = start - 1;
8732         int     j = end + 1;
8733         int     pivot = start;
8734 
8735         for (;;) {
8736                 do {
8737                         j--;
8738                 } while (VAL_CURR(array, j) > VAL_CURR(array, pivot));
8739 
8740                 do {
8741                         i++;
8742                 } while (VAL_CURR(array, i) < VAL_CURR(array, pivot));
8743 
8744                 if (i < j)
8745                         SWAP(array, i, j)
8746                 else
8747                         return (j);
8748         }
8749 }
8750 
8751 static int
8752 partition_aliases(alias_pair_t *array, int start, int end)
8753 {
8754         int     i = start - 1;
8755         int     j = end + 1;
8756         int     pivot = start;
8757 
8758         for (;;) {
8759                 do {
8760                         j--;
8761                 } while (VAL_ALIAS(array, j) > VAL_ALIAS(array, pivot));
8762 
8763                 do {
8764                         i++;
8765                 } while (VAL_ALIAS(array, i) < VAL_ALIAS(array, pivot));
8766 
8767                 if (i < j)
8768                         SWAP(array, i, j)
8769                 else
8770                         return (j);
8771         }
8772 }
8773 static void
8774 sort_alias_pairs(alias_pair_t *array, int start, int end)
8775 {
8776         int mid;
8777 
8778         if (start < end) {
8779                 mid = partition_aliases(array, start, end);
8780                 sort_alias_pairs(array, start, mid);
8781                 sort_alias_pairs(array, mid + 1, end);
8782         }
8783 }
8784 
8785 static void
8786 sort_curr_pairs(alias_pair_t *array, int start, int end)
8787 {
8788         int mid;
8789 
8790         if (start < end) {
8791                 mid = partition_curr(array, start, end);
8792                 sort_curr_pairs(array, start, mid);
8793                 sort_curr_pairs(array, mid + 1, end);
8794         }
8795 }
8796 
8797 static void
8798 create_sorted_pairs(plat_alias_t *pali, int npali)
8799 {
8800         int             i;
8801         int             j;
8802         int             k;
8803         int             count;
8804 
8805         count = 0;
8806         for (i = 0; i < npali; i++) {
8807                 count += pali[i].pali_naliases;
8808         }
8809 
8810         ddi_aliases.dali_alias_pairs = kmem_zalloc(
8811             (sizeof (alias_pair_t)) * count, KM_NOSLEEP);
8812         if (ddi_aliases.dali_alias_pairs == NULL) {
8813                 cmn_err(CE_PANIC, "alias path-pair alloc failed");
8814                 /*NOTREACHED*/
8815         }
8816 
8817         ddi_aliases.dali_curr_pairs = kmem_zalloc(
8818             (sizeof (alias_pair_t)) * count, KM_NOSLEEP);
8819         if (ddi_aliases.dali_curr_pairs == NULL) {
8820                 cmn_err(CE_PANIC, "curr path-pair alloc failed");
8821                 /*NOTREACHED*/
8822         }
8823 
8824         for (i = 0, k = 0; i < npali; i++) {
8825                 for (j = 0; j < pali[i].pali_naliases; j++, k++) {
8826                         ddi_aliases.dali_alias_pairs[k].pair_curr =
8827                             ddi_aliases.dali_curr_pairs[k].pair_curr =
8828                             pali[i].pali_current;
8829                         ddi_aliases.dali_alias_pairs[k].pair_alias =
8830                             ddi_aliases.dali_curr_pairs[k].pair_alias =
8831                             pali[i].pali_aliases[j];
8832                 }
8833         }
8834 
8835         ASSERT(k == count);
8836 
8837         ddi_aliases.dali_num_pairs = count;
8838 
8839         /* Now sort the array based on length of pair_alias */
8840         sort_alias_pairs(ddi_aliases.dali_alias_pairs, 0, count - 1);
8841         sort_curr_pairs(ddi_aliases.dali_curr_pairs, 0, count - 1);
8842 }
8843 
8844 void
8845 ddi_register_aliases(plat_alias_t *pali, uint64_t npali)
8846 {
8847 
8848         ASSERT((pali == NULL) ^ (npali != 0));
8849 
8850         if (npali == 0) {
8851                 ddi_err(DER_PANIC, NULL, "npali == 0");
8852                 /*NOTREACHED*/
8853         }
8854 
8855         if (ddi_aliases_present == B_TRUE) {
8856                 ddi_err(DER_PANIC, NULL, "multiple init");
8857                 /*NOTREACHED*/
8858         }
8859 
8860         ddi_aliases.dali_alias_TLB = mod_hash_create_strhash(
8861             "ddi-alias-tlb", DDI_ALIAS_HASH_SIZE, mod_hash_null_valdtor);
8862         if (ddi_aliases.dali_alias_TLB == NULL) {
8863                 ddi_err(DER_PANIC, NULL, "alias TLB hash alloc failed");
8864                 /*NOTREACHED*/
8865         }
8866 
8867         ddi_aliases.dali_curr_TLB = mod_hash_create_strhash(
8868             "ddi-curr-tlb", DDI_ALIAS_HASH_SIZE, mod_hash_null_valdtor);
8869         if (ddi_aliases.dali_curr_TLB == NULL) {
8870                 ddi_err(DER_PANIC, NULL, "curr TLB hash alloc failed");
8871                 /*NOTREACHED*/
8872         }
8873 
8874         create_sorted_pairs(pali, npali);
8875 
8876         tsd_create(&tsd_ddi_redirect, NULL);
8877 
8878         ddi_aliases_present = B_TRUE;
8879 }
8880 
8881 static dev_info_t *
8882 path_to_dip(char *path)
8883 {
8884         dev_info_t      *currdip;
8885         int             error;
8886         char            *pdup;
8887 
8888         pdup = ddi_strdup(path, KM_NOSLEEP);
8889         if (pdup == NULL) {
8890                 cmn_err(CE_PANIC, "path strdup failed: %s", path);
8891                 /*NOTREACHED*/
8892         }
8893 
8894         error = resolve_pathname(pdup, &currdip, NULL, NULL);
8895 
8896         kmem_free(pdup, strlen(path) + 1);
8897 
8898         return (error ? NULL : currdip);
8899 }
8900 
8901 dev_info_t *
8902 ddi_alias_to_currdip(char *alias, int i)
8903 {
8904         alias_pair_t *pair;
8905         char *curr;
8906         dev_info_t *currdip = NULL;
8907         char *aliasdup;
8908         int rv, len;
8909 
8910         pair = &(ddi_aliases.dali_alias_pairs[i]);
8911         len = strlen(pair->pair_alias);
8912 
8913         curr = NULL;
8914         aliasdup = ddi_strdup(alias, KM_NOSLEEP);
8915         if (aliasdup == NULL) {
8916                 cmn_err(CE_PANIC, "aliasdup alloc failed");
8917                 /*NOTREACHED*/
8918         }
8919 
8920         if (strncmp(alias, pair->pair_alias, len)  != 0)
8921                 goto out;
8922 
8923         if (alias[len] != '/' && alias[len] != '\0')
8924                 goto out;
8925 
8926         curr = kmem_alloc(MAXPATHLEN, KM_NOSLEEP);
8927         if (curr == NULL) {
8928                 cmn_err(CE_PANIC, "curr alloc failed");
8929                 /*NOTREACHED*/
8930         }
8931         (void) strlcpy(curr, pair->pair_curr, MAXPATHLEN);
8932         if (alias[len] == '/') {
8933                 (void) strlcat(curr, "/", MAXPATHLEN);
8934                 (void) strlcat(curr, &alias[len + 1], MAXPATHLEN);
8935         }
8936 
8937         currdip = path_to_dip(curr);
8938 
8939 out:
8940         if (currdip) {
8941                 rv = mod_hash_insert(ddi_aliases.dali_alias_TLB,
8942                     (mod_hash_key_t)aliasdup, (mod_hash_val_t)curr);
8943                 if (rv != 0) {
8944                         kmem_free(curr, MAXPATHLEN);
8945                         strfree(aliasdup);
8946                 }
8947         } else {
8948                 rv = mod_hash_insert(ddi_aliases.dali_alias_TLB,
8949                     (mod_hash_key_t)aliasdup, (mod_hash_val_t)NULL);
8950                 if (rv != 0) {
8951                         strfree(aliasdup);
8952                 }
8953                 if (curr)
8954                         kmem_free(curr, MAXPATHLEN);
8955         }
8956 
8957         return (currdip);
8958 }
8959 
8960 char *
8961 ddi_curr_to_alias(char *curr, int i)
8962 {
8963         alias_pair_t    *pair;
8964         char            *alias;
8965         char            *currdup;
8966         int             len;
8967         int             rv;
8968 
8969         pair = &(ddi_aliases.dali_curr_pairs[i]);
8970 
8971         len = strlen(pair->pair_curr);
8972 
8973         alias = NULL;
8974 
8975         currdup = ddi_strdup(curr, KM_NOSLEEP);
8976         if (currdup == NULL) {
8977                 cmn_err(CE_PANIC, "currdup alloc failed");
8978                 /*NOTREACHED*/
8979         }
8980 
8981         if (strncmp(curr, pair->pair_curr, len) != 0)
8982                 goto out;
8983 
8984         if (curr[len] != '/' && curr[len] != '\0')
8985                 goto out;
8986 
8987         alias = kmem_alloc(MAXPATHLEN, KM_NOSLEEP);
8988         if (alias == NULL) {
8989                 cmn_err(CE_PANIC, "alias alloc failed");
8990                 /*NOTREACHED*/
8991         }
8992 
8993         (void) strlcpy(alias, pair->pair_alias, MAXPATHLEN);
8994         if (curr[len] == '/') {
8995                 (void) strlcat(alias, "/", MAXPATHLEN);
8996                 (void) strlcat(alias, &curr[len + 1], MAXPATHLEN);
8997         }
8998 
8999         if (e_ddi_path_to_instance(alias) == NULL) {
9000                 kmem_free(alias, MAXPATHLEN);
9001                 alias = NULL;
9002         }
9003 
9004 out:
9005         rv = mod_hash_insert(ddi_aliases.dali_curr_TLB,
9006             (mod_hash_key_t)currdup, (mod_hash_val_t)alias);
9007         if (rv != 0) {
9008                 strfree(currdup);
9009         }
9010 
9011         return (alias);
9012 }
9013 
9014 dev_info_t *
9015 ddi_alias_redirect(char *alias)
9016 {
9017         char            *curr;
9018         dev_info_t      *currdip;
9019         int             i;
9020 
9021         if (ddi_aliases_present == B_FALSE)
9022                 return (NULL);
9023 
9024         if (tsd_get(tsd_ddi_redirect))
9025                 return (NULL);
9026 
9027         (void) tsd_set(tsd_ddi_redirect, (void *)1);
9028 
9029         ASSERT(ddi_aliases.dali_alias_TLB);
9030         ASSERT(ddi_aliases.dali_alias_pairs);
9031 
9032         curr = NULL;
9033         if (mod_hash_find(ddi_aliases.dali_alias_TLB,
9034             (mod_hash_key_t)alias, (mod_hash_val_t *)&curr) == 0) {
9035                 currdip = curr ? path_to_dip(curr) : NULL;
9036                 goto out;
9037         }
9038 
9039         /* The TLB has no translation, do it the hard way */
9040         currdip = NULL;
9041         for (i = ddi_aliases.dali_num_pairs - 1; i >= 0; i--) {
9042                 currdip = ddi_alias_to_currdip(alias, i);
9043                 if (currdip)
9044                         break;
9045         }
9046 out:
9047         (void) tsd_set(tsd_ddi_redirect, NULL);
9048 
9049         return (currdip);
9050 }
9051 
9052 char *
9053 ddi_curr_redirect(char *curr)
9054 {
9055         char    *alias;
9056         int i;
9057 
9058         if (ddi_aliases_present == B_FALSE)
9059                 return (NULL);
9060 
9061         if (tsd_get(tsd_ddi_redirect))
9062                 return (NULL);
9063 
9064         (void) tsd_set(tsd_ddi_redirect, (void *)1);
9065 
9066         ASSERT(ddi_aliases.dali_curr_TLB);
9067         ASSERT(ddi_aliases.dali_curr_pairs);
9068 
9069         alias = NULL;
9070         if (mod_hash_find(ddi_aliases.dali_curr_TLB,
9071             (mod_hash_key_t)curr, (mod_hash_val_t *)&alias) == 0) {
9072                 goto out;
9073         }
9074 
9075 
9076         /* The TLB has no translation, do it the slow way */
9077         alias = NULL;
9078         for (i = ddi_aliases.dali_num_pairs - 1; i >= 0; i--) {
9079                 alias = ddi_curr_to_alias(curr, i);
9080                 if (alias)
9081                         break;
9082         }
9083 
9084 out:
9085         (void) tsd_set(tsd_ddi_redirect, NULL);
9086 
9087         return (alias);
9088 }
9089 
9090 void
9091 ddi_err(ddi_err_t ade, dev_info_t *rdip, const char *fmt, ...)
9092 {
9093         va_list ap;
9094         char strbuf[256];
9095         char *buf;
9096         size_t buflen, tlen;
9097         int ce;
9098         int de;
9099         const char *fmtbad = "Invalid arguments to ddi_err()";
9100 
9101         de = DER_CONT;
9102         strbuf[1] = '\0';
9103 
9104         switch (ade) {
9105         case DER_CONS:
9106                 strbuf[0] = '^';
9107                 break;
9108         case DER_LOG:
9109                 strbuf[0] = '!';
9110                 break;
9111         case DER_VERB:
9112                 strbuf[0] = '?';
9113                 break;
9114         default:
9115                 strbuf[0] = '\0';
9116                 de = ade;
9117                 break;
9118         }
9119 
9120         tlen = strlen(strbuf);
9121         buf = strbuf + tlen;
9122         buflen = sizeof (strbuf) - tlen;
9123 
9124         if (rdip && ddi_get_instance(rdip) == -1) {
9125                 (void) snprintf(buf, buflen, "%s: ",
9126                     ddi_driver_name(rdip));
9127         } else if (rdip) {
9128                 (void) snprintf(buf, buflen, "%s%d: ",
9129                     ddi_driver_name(rdip), ddi_get_instance(rdip));
9130         }
9131 
9132         tlen = strlen(strbuf);
9133         buf = strbuf + tlen;
9134         buflen = sizeof (strbuf) - tlen;
9135 
9136         va_start(ap, fmt);
9137         switch (de) {
9138         case DER_CONT:
9139                 (void) vsnprintf(buf, buflen, fmt, ap);
9140                 if (ade != DER_CONT) {
9141                         (void) strlcat(strbuf, "\n", sizeof (strbuf));
9142                 }
9143                 ce = CE_CONT;
9144                 break;
9145         case DER_NOTE:
9146                 (void) vsnprintf(buf, buflen, fmt, ap);
9147                 ce = CE_NOTE;
9148                 break;
9149         case DER_WARN:
9150                 (void) vsnprintf(buf, buflen, fmt, ap);
9151                 ce = CE_WARN;
9152                 break;
9153         case DER_MODE:
9154                 (void) vsnprintf(buf, buflen, fmt, ap);
9155                 if (ddi_err_panic == B_TRUE) {
9156                         ce = CE_PANIC;
9157                 } else {
9158                         ce = CE_WARN;
9159                 }
9160                 break;
9161         case DER_DEBUG:
9162                 (void) snprintf(buf, buflen, "DEBUG: ");
9163                 tlen = strlen("DEBUG: ");
9164                 (void) vsnprintf(buf + tlen, buflen - tlen, fmt, ap);
9165                 ce = CE_CONT;
9166                 break;
9167         case DER_PANIC:
9168                 (void) vsnprintf(buf, buflen, fmt, ap);
9169                 ce = CE_PANIC;
9170                 break;
9171         case DER_INVALID:
9172         default:
9173                 (void) snprintf(buf, buflen, fmtbad);
9174                 tlen = strlen(fmtbad);
9175                 (void) vsnprintf(buf + tlen, buflen - tlen, fmt, ap);
9176                 ce = CE_PANIC;
9177                 break;
9178         }
9179         va_end(ap);
9180 
9181         cmn_err(ce, strbuf);
9182 }
9183 
9184 /*ARGSUSED*/
9185 void
9186 ddi_mem_update(uint64_t addr, uint64_t size)
9187 {
9188 #if defined(__x86) && !defined(__xpv)
9189         extern void immu_physmem_update(uint64_t addr, uint64_t size);
9190         immu_physmem_update(addr, size);
9191 #else
9192         /*LINTED*/
9193         ;
9194 #endif
9195 }