1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  24  */
  25 
  26 /*
  27  * utility routines for the /dev fs
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/systm.h>
  34 #include <sys/sysmacros.h>
  35 #include <sys/user.h>
  36 #include <sys/time.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vnode.h>
  39 #include <sys/file.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/flock.h>
  42 #include <sys/kmem.h>
  43 #include <sys/uio.h>
  44 #include <sys/errno.h>
  45 #include <sys/stat.h>
  46 #include <sys/cred.h>
  47 #include <sys/dirent.h>
  48 #include <sys/pathname.h>
  49 #include <sys/cmn_err.h>
  50 #include <sys/debug.h>
  51 #include <sys/mode.h>
  52 #include <sys/policy.h>
  53 #include <fs/fs_subr.h>
  54 #include <sys/mount.h>
  55 #include <sys/fs/snode.h>
  56 #include <sys/fs/dv_node.h>
  57 #include <sys/fs/sdev_impl.h>
  58 #include <sys/sunndi.h>
  59 #include <sys/sunmdi.h>
  60 #include <sys/conf.h>
  61 #include <sys/proc.h>
  62 #include <sys/user.h>
  63 #include <sys/modctl.h>
  64 
  65 #ifdef DEBUG
  66 int sdev_debug = 0x00000001;
  67 int sdev_debug_cache_flags = 0;
  68 #endif
  69 
  70 /*
  71  * globals
  72  */
  73 /* prototype memory vattrs */
  74 vattr_t sdev_vattr_dir = {
  75         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  76         VDIR,                                   /* va_type */
  77         SDEV_DIRMODE_DEFAULT,                   /* va_mode */
  78         SDEV_UID_DEFAULT,                       /* va_uid */
  79         SDEV_GID_DEFAULT,                       /* va_gid */
  80         0,                                      /* va_fsid */
  81         0,                                      /* va_nodeid */
  82         0,                                      /* va_nlink */
  83         0,                                      /* va_size */
  84         0,                                      /* va_atime */
  85         0,                                      /* va_mtime */
  86         0,                                      /* va_ctime */
  87         0,                                      /* va_rdev */
  88         0,                                      /* va_blksize */
  89         0,                                      /* va_nblocks */
  90         0                                       /* va_vcode */
  91 };
  92 
  93 vattr_t sdev_vattr_lnk = {
  94         AT_TYPE|AT_MODE,                        /* va_mask */
  95         VLNK,                                   /* va_type */
  96         SDEV_LNKMODE_DEFAULT,                   /* va_mode */
  97         SDEV_UID_DEFAULT,                       /* va_uid */
  98         SDEV_GID_DEFAULT,                       /* va_gid */
  99         0,                                      /* va_fsid */
 100         0,                                      /* va_nodeid */
 101         0,                                      /* va_nlink */
 102         0,                                      /* va_size */
 103         0,                                      /* va_atime */
 104         0,                                      /* va_mtime */
 105         0,                                      /* va_ctime */
 106         0,                                      /* va_rdev */
 107         0,                                      /* va_blksize */
 108         0,                                      /* va_nblocks */
 109         0                                       /* va_vcode */
 110 };
 111 
 112 vattr_t sdev_vattr_blk = {
 113         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 114         VBLK,                                   /* va_type */
 115         S_IFBLK | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 116         SDEV_UID_DEFAULT,                       /* va_uid */
 117         SDEV_GID_DEFAULT,                       /* va_gid */
 118         0,                                      /* va_fsid */
 119         0,                                      /* va_nodeid */
 120         0,                                      /* va_nlink */
 121         0,                                      /* va_size */
 122         0,                                      /* va_atime */
 123         0,                                      /* va_mtime */
 124         0,                                      /* va_ctime */
 125         0,                                      /* va_rdev */
 126         0,                                      /* va_blksize */
 127         0,                                      /* va_nblocks */
 128         0                                       /* va_vcode */
 129 };
 130 
 131 vattr_t sdev_vattr_chr = {
 132         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
 133         VCHR,                                   /* va_type */
 134         S_IFCHR | SDEV_DEVMODE_DEFAULT,         /* va_mode */
 135         SDEV_UID_DEFAULT,                       /* va_uid */
 136         SDEV_GID_DEFAULT,                       /* va_gid */
 137         0,                                      /* va_fsid */
 138         0,                                      /* va_nodeid */
 139         0,                                      /* va_nlink */
 140         0,                                      /* va_size */
 141         0,                                      /* va_atime */
 142         0,                                      /* va_mtime */
 143         0,                                      /* va_ctime */
 144         0,                                      /* va_rdev */
 145         0,                                      /* va_blksize */
 146         0,                                      /* va_nblocks */
 147         0                                       /* va_vcode */
 148 };
 149 
 150 kmem_cache_t    *sdev_node_cache;       /* sdev_node cache */
 151 int             devtype;                /* fstype */
 152 
 153 /* static */
 154 static struct vnodeops *sdev_get_vop(struct sdev_node *);
 155 static void sdev_set_no_negcache(struct sdev_node *);
 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []);
 157 static void sdev_free_vtab(fs_operation_def_t *);
 158 
 159 static void
 160 sdev_prof_free(struct sdev_node *dv)
 161 {
 162         ASSERT(!SDEV_IS_GLOBAL(dv));
 163         if (dv->sdev_prof.dev_name)
 164                 nvlist_free(dv->sdev_prof.dev_name);
 165         if (dv->sdev_prof.dev_map)
 166                 nvlist_free(dv->sdev_prof.dev_map);
 167         if (dv->sdev_prof.dev_symlink)
 168                 nvlist_free(dv->sdev_prof.dev_symlink);
 169         if (dv->sdev_prof.dev_glob_incdir)
 170                 nvlist_free(dv->sdev_prof.dev_glob_incdir);
 171         if (dv->sdev_prof.dev_glob_excdir)
 172                 nvlist_free(dv->sdev_prof.dev_glob_excdir);
 173         bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 174 }
 175 
 176 /* sdev_node cache constructor */
 177 /*ARGSUSED1*/
 178 static int
 179 i_sdev_node_ctor(void *buf, void *cfarg, int flag)
 180 {
 181         struct sdev_node *dv = (struct sdev_node *)buf;
 182         struct vnode *vp;
 183 
 184         bzero(buf, sizeof (struct sdev_node));
 185         vp = dv->sdev_vnode = vn_alloc(flag);
 186         if (vp == NULL) {
 187                 return (-1);
 188         }
 189         vp->v_data = dv;
 190         rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL);
 191         return (0);
 192 }
 193 
 194 /* sdev_node cache destructor */
 195 /*ARGSUSED1*/
 196 static void
 197 i_sdev_node_dtor(void *buf, void *arg)
 198 {
 199         struct sdev_node *dv = (struct sdev_node *)buf;
 200         struct vnode *vp = SDEVTOV(dv);
 201 
 202         rw_destroy(&dv->sdev_contents);
 203         vn_free(vp);
 204 }
 205 
 206 /* initialize sdev_node cache */
 207 void
 208 sdev_node_cache_init()
 209 {
 210         int flags = 0;
 211 
 212 #ifdef  DEBUG
 213         flags = sdev_debug_cache_flags;
 214         if (flags)
 215                 sdcmn_err(("cache debug flags 0x%x\n", flags));
 216 #endif  /* DEBUG */
 217 
 218         ASSERT(sdev_node_cache == NULL);
 219         sdev_node_cache = kmem_cache_create("sdev_node_cache",
 220             sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor,
 221             NULL, NULL, NULL, flags);
 222 }
 223 
 224 /* destroy sdev_node cache */
 225 void
 226 sdev_node_cache_fini()
 227 {
 228         ASSERT(sdev_node_cache != NULL);
 229         kmem_cache_destroy(sdev_node_cache);
 230         sdev_node_cache = NULL;
 231 }
 232 
 233 /*
 234  * Compare two nodes lexographically to balance avl tree
 235  */
 236 static int
 237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2)
 238 {
 239         int rv;
 240         if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0)
 241                 return (0);
 242         return ((rv < 0) ? -1 : 1);
 243 }
 244 
 245 void
 246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state)
 247 {
 248         ASSERT(dv);
 249         ASSERT(RW_WRITE_HELD(&dv->sdev_contents));
 250         dv->sdev_state = state;
 251 }
 252 
 253 static void
 254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap)
 255 {
 256         timestruc_t     now;
 257         struct vattr    *attrp;
 258         uint_t          mask;
 259 
 260         ASSERT(dv->sdev_attr);
 261         ASSERT(vap);
 262 
 263         attrp = dv->sdev_attr;
 264         mask = vap->va_mask;
 265         if (mask & AT_TYPE)
 266                 attrp->va_type = vap->va_type;
 267         if (mask & AT_MODE)
 268                 attrp->va_mode = vap->va_mode;
 269         if (mask & AT_UID)
 270                 attrp->va_uid = vap->va_uid;
 271         if (mask & AT_GID)
 272                 attrp->va_gid = vap->va_gid;
 273         if (mask & AT_RDEV)
 274                 attrp->va_rdev = vap->va_rdev;
 275 
 276         gethrestime(&now);
 277         attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now;
 278         attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now;
 279         attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now;
 280 }
 281 
 282 static void
 283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap)
 284 {
 285         ASSERT(dv->sdev_attr == NULL);
 286         ASSERT(vap->va_mask & AT_TYPE);
 287         ASSERT(vap->va_mask & AT_MODE);
 288 
 289         dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
 290         sdev_attr_update(dv, vap);
 291 }
 292 
 293 /* alloc and initialize a sdev_node */
 294 int
 295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 296     vattr_t *vap)
 297 {
 298         struct sdev_node *dv = NULL;
 299         struct vnode *vp;
 300         size_t nmlen, len;
 301         devname_handle_t  *dhl;
 302 
 303         nmlen = strlen(nm) + 1;
 304         if (nmlen > MAXNAMELEN) {
 305                 sdcmn_err9(("sdev_nodeinit: node name %s"
 306                     " too long\n", nm));
 307                 *newdv = NULL;
 308                 return (ENAMETOOLONG);
 309         }
 310 
 311         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 312 
 313         dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP);
 314         bcopy(nm, dv->sdev_name, nmlen);
 315         dv->sdev_namelen = nmlen - 1;        /* '\0' not included */
 316         len = strlen(ddv->sdev_path) + strlen(nm) + 2;
 317         dv->sdev_path = kmem_alloc(len, KM_SLEEP);
 318         (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm);
 319         /* overwritten for VLNK nodes */
 320         dv->sdev_symlink = NULL;
 321 
 322         vp = SDEVTOV(dv);
 323         vn_reinit(vp);
 324         vp->v_vfsp = SDEVTOV(ddv)->v_vfsp;
 325         if (vap)
 326                 vp->v_type = vap->va_type;
 327 
 328         /*
 329          * initialized to the parent's vnodeops.
 330          * maybe overwriten for a VDIR
 331          */
 332         vn_setops(vp, vn_getops(SDEVTOV(ddv)));
 333         vn_exists(vp);
 334 
 335         dv->sdev_dotdot = NULL;
 336         dv->sdev_attrvp = NULL;
 337         if (vap) {
 338                 sdev_attr_alloc(dv, vap);
 339         } else {
 340                 dv->sdev_attr = NULL;
 341         }
 342 
 343         dv->sdev_ino = sdev_mkino(dv);
 344         dv->sdev_nlink = 0;          /* updated on insert */
 345         dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */
 346         dv->sdev_flags |= SDEV_BUILD;
 347         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 348         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 349         if (SDEV_IS_GLOBAL(ddv)) {
 350                 dv->sdev_flags |= SDEV_GLOBAL;
 351                 dhl = &(dv->sdev_handle);
 352                 dhl->dh_data = dv;
 353                 dhl->dh_args = NULL;
 354                 sdev_set_no_negcache(dv);
 355                 dv->sdev_gdir_gen = 0;
 356         } else {
 357                 dv->sdev_flags &= ~SDEV_GLOBAL;
 358                 dv->sdev_origin = NULL; /* set later */
 359                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 360                 dv->sdev_ldir_gen = 0;
 361                 dv->sdev_devtree_gen = 0;
 362         }
 363 
 364         rw_enter(&dv->sdev_contents, RW_WRITER);
 365         sdev_set_nodestate(dv, SDEV_INIT);
 366         rw_exit(&dv->sdev_contents);
 367         *newdv = dv;
 368 
 369         return (0);
 370 }
 371 
 372 /*
 373  * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the
 374  * caller to transition the node to the SDEV_ZOMBIE state.
 375  */
 376 int
 377 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp,
 378     void *args, struct cred *cred)
 379 {
 380         int error = 0;
 381         struct vnode *vp = SDEVTOV(dv);
 382         vtype_t type;
 383 
 384         ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap);
 385 
 386         type = vap->va_type;
 387         vp->v_type = type;
 388         vp->v_rdev = vap->va_rdev;
 389         rw_enter(&dv->sdev_contents, RW_WRITER);
 390         if (type == VDIR) {
 391                 dv->sdev_nlink = 2;
 392                 dv->sdev_flags &= ~SDEV_PERSIST;
 393                 dv->sdev_flags &= ~SDEV_DYNAMIC;
 394                 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */
 395                 ASSERT(dv->sdev_dotdot);
 396                 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR);
 397                 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev;
 398                 avl_create(&dv->sdev_entries,
 399                     (int (*)(const void *, const void *))sdev_compare_nodes,
 400                     sizeof (struct sdev_node),
 401                     offsetof(struct sdev_node, sdev_avllink));
 402         } else if (type == VLNK) {
 403                 ASSERT(args);
 404                 dv->sdev_nlink = 1;
 405                 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP);
 406         } else {
 407                 dv->sdev_nlink = 1;
 408         }
 409 
 410         if (!(SDEV_IS_GLOBAL(dv))) {
 411                 dv->sdev_origin = (struct sdev_node *)args;
 412                 dv->sdev_flags &= ~SDEV_PERSIST;
 413         }
 414 
 415         /*
 416          * shadow node is created here OR
 417          * if failed (indicated by dv->sdev_attrvp == NULL),
 418          * created later in sdev_setattr
 419          */
 420         if (avp) {
 421                 dv->sdev_attrvp = avp;
 422         } else {
 423                 if (dv->sdev_attr == NULL) {
 424                         sdev_attr_alloc(dv, vap);
 425                 } else {
 426                         sdev_attr_update(dv, vap);
 427                 }
 428 
 429                 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv))
 430                         error = sdev_shadow_node(dv, cred);
 431         }
 432 
 433         if (error == 0) {
 434                 /* transition to READY state */
 435                 sdev_set_nodestate(dv, SDEV_READY);
 436                 sdev_nc_node_exists(dv);
 437         }
 438         rw_exit(&dv->sdev_contents);
 439         return (error);
 440 }
 441 
 442 /*
 443  * Build the VROOT sdev_node.
 444  */
 445 /*ARGSUSED*/
 446 struct sdev_node *
 447 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp,
 448     struct vnode *avp, struct cred *cred)
 449 {
 450         struct sdev_node *dv;
 451         struct vnode *vp;
 452         char devdir[] = "/dev";
 453 
 454         ASSERT(sdev_node_cache != NULL);
 455         ASSERT(avp);
 456         dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP);
 457         vp = SDEVTOV(dv);
 458         vn_reinit(vp);
 459         vp->v_flag |= VROOT;
 460         vp->v_vfsp = vfsp;
 461         vp->v_type = VDIR;
 462         vp->v_rdev = devdev;
 463         vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */
 464         vn_exists(vp);
 465 
 466         if (vfsp->vfs_mntpt)
 467                 dv->sdev_name = i_ddi_strdup(
 468                     (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP);
 469         else
 470                 /* vfs_mountdev1 set mount point later */
 471                 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP);
 472         dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */
 473         dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP);
 474         dv->sdev_ino = SDEV_ROOTINO;
 475         dv->sdev_nlink = 2;          /* name + . (no sdev_insert) */
 476         dv->sdev_dotdot = dv;                /* .. == self */
 477         dv->sdev_attrvp = avp;
 478         dv->sdev_attr = NULL;
 479         mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL);
 480         cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL);
 481         if (strcmp(dv->sdev_name, "/dev") == 0) {
 482                 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST;
 483                 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle));
 484                 dv->sdev_gdir_gen = 0;
 485         } else {
 486                 dv->sdev_flags = SDEV_BUILD;
 487                 dv->sdev_flags &= ~SDEV_PERSIST;
 488                 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof));
 489                 dv->sdev_ldir_gen = 0;
 490                 dv->sdev_devtree_gen = 0;
 491         }
 492 
 493         avl_create(&dv->sdev_entries,
 494             (int (*)(const void *, const void *))sdev_compare_nodes,
 495             sizeof (struct sdev_node),
 496             offsetof(struct sdev_node, sdev_avllink));
 497 
 498         rw_enter(&dv->sdev_contents, RW_WRITER);
 499         sdev_set_nodestate(dv, SDEV_READY);
 500         rw_exit(&dv->sdev_contents);
 501         sdev_nc_node_exists(dv);
 502         return (dv);
 503 }
 504 
 505 /* directory dependent vop table */
 506 struct sdev_vop_table {
 507         char *vt_name;                          /* subdirectory name */
 508         const fs_operation_def_t *vt_service;   /* vnodeops table */
 509         struct vnodeops *vt_vops;               /* constructed vop */
 510         struct vnodeops **vt_global_vops;       /* global container for vop */
 511         int (*vt_vtor)(struct sdev_node *);     /* validate sdev_node */
 512         int vt_flags;
 513 };
 514 
 515 /*
 516  * A nice improvement would be to provide a plug-in mechanism
 517  * for this table instead of a const table.
 518  */
 519 static struct sdev_vop_table vtab[] =
 520 {
 521         { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate,
 522         SDEV_DYNAMIC | SDEV_VTOR },
 523 
 524         { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate,
 525         SDEV_DYNAMIC | SDEV_VTOR },
 526 
 527         { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops,
 528         devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR },
 529 
 530         { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE },
 531 
 532         { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate,
 533         SDEV_DYNAMIC | SDEV_VTOR },
 534 
 535         { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops,
 536         devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE },
 537 
 538         /*
 539          * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the
 540          * lofi driver controls child nodes.
 541          *
 542          * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted
 543          * stale nodes (e.g. from devfsadm -R).
 544          *
 545          * In addition, devfsadm knows not to attempt a rmdir: a zone
 546          * may hold a reference, which would zombify the node,
 547          * preventing a mkdir.
 548          */
 549 
 550         { "lofi", NULL, NULL, NULL, NULL,
 551             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 552         { "rlofi", NULL, NULL, NULL, NULL,
 553             SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST },
 554 
 555         { NULL, NULL, NULL, NULL, NULL, 0}
 556 };
 557 
 558 /*
 559  * We need to match off of the sdev_path, not the sdev_name. We are only allowed
 560  * to exist directly under /dev.
 561  */
 562 struct sdev_vop_table *
 563 sdev_match(struct sdev_node *dv)
 564 {
 565         int vlen;
 566         int i;
 567         const char *path;
 568 
 569         if (strlen(dv->sdev_path) <= 5)
 570                 return (NULL);
 571 
 572         if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
 573                 return (NULL);
 574         path = dv->sdev_path + 5;
 575 
 576         for (i = 0; vtab[i].vt_name; i++) {
 577                 if (strcmp(vtab[i].vt_name, path) == 0)
 578                         return (&vtab[i]);
 579                 if (vtab[i].vt_flags & SDEV_SUBDIR) {
 580                         vlen = strlen(vtab[i].vt_name);
 581                         if ((strncmp(vtab[i].vt_name, path,
 582                             vlen - 1) == 0) && path[vlen] == '/')
 583                                 return (&vtab[i]);
 584                 }
 585 
 586         }
 587         return (NULL);
 588 }
 589 
 590 /*
 591  *  sets a directory's vnodeops if the directory is in the vtab;
 592  */
 593 static struct vnodeops *
 594 sdev_get_vop(struct sdev_node *dv)
 595 {
 596         struct sdev_vop_table *vtp;
 597         char *path;
 598 
 599         path = dv->sdev_path;
 600         ASSERT(path);
 601 
 602         /* gets the relative path to /dev/ */
 603         path += 5;
 604 
 605         /* gets the vtab entry it matches */
 606         if ((vtp = sdev_match(dv)) != NULL) {
 607                 dv->sdev_flags |= vtp->vt_flags;
 608                 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
 609                     (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
 610                         dv->sdev_flags |= SDEV_PERSIST;
 611 
 612                 if (vtp->vt_vops) {
 613                         if (vtp->vt_global_vops)
 614                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 615 
 616                         return (vtp->vt_vops);
 617                 }
 618 
 619                 if (vtp->vt_service) {
 620                         fs_operation_def_t *templ;
 621                         templ = sdev_merge_vtab(vtp->vt_service);
 622                         if (vn_make_ops(vtp->vt_name,
 623                             (const fs_operation_def_t *)templ,
 624                             &vtp->vt_vops) != 0) {
 625                                 cmn_err(CE_PANIC, "%s: malformed vnode ops\n",
 626                                     vtp->vt_name);
 627                                 /*NOTREACHED*/
 628                         }
 629                         if (vtp->vt_global_vops) {
 630                                 *(vtp->vt_global_vops) = vtp->vt_vops;
 631                         }
 632                         sdev_free_vtab(templ);
 633 
 634                         return (vtp->vt_vops);
 635                 }
 636 
 637                 return (sdev_vnodeops);
 638         }
 639 
 640         /* child inherits the persistence of the parent */
 641         if (SDEV_IS_PERSIST(dv->sdev_dotdot))
 642                 dv->sdev_flags |= SDEV_PERSIST;
 643 
 644         return (sdev_vnodeops);
 645 }
 646 
 647 static void
 648 sdev_set_no_negcache(struct sdev_node *dv)
 649 {
 650         int i;
 651         char *path;
 652 
 653         ASSERT(dv->sdev_path);
 654         path = dv->sdev_path + strlen("/dev/");
 655 
 656         for (i = 0; vtab[i].vt_name; i++) {
 657                 if (strcmp(vtab[i].vt_name, path) == 0) {
 658                         if (vtab[i].vt_flags & SDEV_NO_NCACHE)
 659                                 dv->sdev_flags |= SDEV_NO_NCACHE;
 660                         break;
 661                 }
 662         }
 663 }
 664 
 665 void *
 666 sdev_get_vtor(struct sdev_node *dv)
 667 {
 668         struct sdev_vop_table *vtp;
 669 
 670         vtp = sdev_match(dv);
 671         if (vtp)
 672                 return ((void *)vtp->vt_vtor);
 673         else
 674                 return (NULL);
 675 }
 676 
 677 /*
 678  * Build the base root inode
 679  */
 680 ino_t
 681 sdev_mkino(struct sdev_node *dv)
 682 {
 683         ino_t   ino;
 684 
 685         /*
 686          * for now, follow the lead of tmpfs here
 687          * need to someday understand the requirements here
 688          */
 689         ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3);
 690         ino += SDEV_ROOTINO + 1;
 691 
 692         return (ino);
 693 }
 694 
 695 int
 696 sdev_getlink(struct vnode *linkvp, char **link)
 697 {
 698         int err;
 699         char *buf;
 700         struct uio uio = {0};
 701         struct iovec iov = {0};
 702 
 703         if (linkvp == NULL)
 704                 return (ENOENT);
 705         ASSERT(linkvp->v_type == VLNK);
 706 
 707         buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 708         iov.iov_base = buf;
 709         iov.iov_len = MAXPATHLEN;
 710         uio.uio_iov = &iov;
 711         uio.uio_iovcnt = 1;
 712         uio.uio_resid = MAXPATHLEN;
 713         uio.uio_segflg = UIO_SYSSPACE;
 714         uio.uio_llimit = MAXOFFSET_T;
 715 
 716         err = VOP_READLINK(linkvp, &uio, kcred, NULL);
 717         if (err) {
 718                 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf);
 719                 kmem_free(buf, MAXPATHLEN);
 720                 return (ENOENT);
 721         }
 722 
 723         /* mission complete */
 724         *link = i_ddi_strdup(buf, KM_SLEEP);
 725         kmem_free(buf, MAXPATHLEN);
 726         return (0);
 727 }
 728 
 729 /*
 730  * A convenient wrapper to get the devfs node vnode for a device
 731  * minor functionality: readlink() of a /dev symlink
 732  * Place the link into dv->sdev_symlink
 733  */
 734 static int
 735 sdev_follow_link(struct sdev_node *dv)
 736 {
 737         int err;
 738         struct vnode *linkvp;
 739         char *link = NULL;
 740 
 741         linkvp = SDEVTOV(dv);
 742         if (linkvp == NULL)
 743                 return (ENOENT);
 744         ASSERT(linkvp->v_type == VLNK);
 745         err = sdev_getlink(linkvp, &link);
 746         if (err) {
 747                 dv->sdev_symlink = NULL;
 748                 return (ENOENT);
 749         }
 750 
 751         ASSERT(link != NULL);
 752         dv->sdev_symlink = link;
 753         return (0);
 754 }
 755 
 756 static int
 757 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs)
 758 {
 759         vtype_t otype = SDEVTOV(dv)->v_type;
 760 
 761         /*
 762          * existing sdev_node has a different type.
 763          */
 764         if (otype != nvap->va_type) {
 765                 sdcmn_err9(("sdev_node_check: existing node "
 766                     "  %s type %d does not match new node type %d\n",
 767                     dv->sdev_name, otype, nvap->va_type));
 768                 return (EEXIST);
 769         }
 770 
 771         /*
 772          * For a symlink, the target should be the same.
 773          */
 774         if (otype == VLNK) {
 775                 ASSERT(nargs != NULL);
 776                 ASSERT(dv->sdev_symlink != NULL);
 777                 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) {
 778                         sdcmn_err9(("sdev_node_check: existing node "
 779                             " %s has different symlink %s as new node "
 780                             " %s\n", dv->sdev_name, dv->sdev_symlink,
 781                             (char *)nargs));
 782                         return (EEXIST);
 783                 }
 784         }
 785 
 786         return (0);
 787 }
 788 
 789 /*
 790  * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready()
 791  *
 792  * arguments:
 793  *      - ddv (parent)
 794  *      - nm (child name)
 795  *      - newdv (sdev_node for nm is returned here)
 796  *      - vap (vattr for the node to be created, va_type should be set.
 797  *      - avp (attribute vnode)
 798  *        the defaults should be used if unknown)
 799  *      - cred
 800  *      - args
 801  *          . tnm (for VLNK)
 802  *          . global sdev_node (for !SDEV_GLOBAL)
 803  *      - state: SDEV_INIT, SDEV_READY
 804  *
 805  * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT)
 806  *
 807  * NOTE:  directory contents writers lock needs to be held before
 808  *        calling this routine.
 809  */
 810 int
 811 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv,
 812     struct vattr *vap, struct vnode *avp, void *args, struct cred *cred,
 813     sdev_node_state_t state)
 814 {
 815         int error = 0;
 816         sdev_node_state_t node_state;
 817         struct sdev_node *dv = NULL;
 818 
 819         ASSERT(state != SDEV_ZOMBIE);
 820         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
 821 
 822         if (*newdv) {
 823                 dv = *newdv;
 824         } else {
 825                 /* allocate and initialize a sdev_node */
 826                 if (ddv->sdev_state == SDEV_ZOMBIE) {
 827                         sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n",
 828                             ddv->sdev_path));
 829                         return (ENOENT);
 830                 }
 831 
 832                 error = sdev_nodeinit(ddv, nm, &dv, vap);
 833                 if (error != 0) {
 834                         sdcmn_err9(("sdev_mknode: error %d,"
 835                             " name %s can not be initialized\n",
 836                             error, nm));
 837                         return (error);
 838                 }
 839                 ASSERT(dv);
 840 
 841                 /* insert into the directory cache */
 842                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD);
 843         }
 844 
 845         ASSERT(dv);
 846         node_state = dv->sdev_state;
 847         ASSERT(node_state != SDEV_ZOMBIE);
 848 
 849         if (state == SDEV_READY) {
 850                 switch (node_state) {
 851                 case SDEV_INIT:
 852                         error = sdev_nodeready(dv, vap, avp, args, cred);
 853                         if (error) {
 854                                 sdcmn_err9(("sdev_mknode: node %s can NOT"
 855                                     " be transitioned into READY state, "
 856                                     "error %d\n", nm, error));
 857                         }
 858                         break;
 859                 case SDEV_READY:
 860                         /*
 861                          * Do some sanity checking to make sure
 862                          * the existing sdev_node is what has been
 863                          * asked for.
 864                          */
 865                         error = sdev_node_check(dv, vap, args);
 866                         break;
 867                 default:
 868                         break;
 869                 }
 870         }
 871 
 872         if (!error) {
 873                 *newdv = dv;
 874                 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE);
 875         } else {
 876                 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
 877                 /*
 878                  * We created this node, it wasn't passed into us. Therefore it
 879                  * is up to us to delete it.
 880                  */
 881                 if (*newdv == NULL)
 882                         SDEV_SIMPLE_RELE(dv);
 883                 *newdv = NULL;
 884         }
 885 
 886         return (error);
 887 }
 888 
 889 /*
 890  * convenient wrapper to change vp's ATIME, CTIME and MTIME
 891  */
 892 void
 893 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask)
 894 {
 895         struct vattr attr;
 896         timestruc_t now;
 897         int err;
 898 
 899         ASSERT(vp);
 900         gethrestime(&now);
 901         if (mask & AT_CTIME)
 902                 attr.va_ctime = now;
 903         if (mask & AT_MTIME)
 904                 attr.va_mtime = now;
 905         if (mask & AT_ATIME)
 906                 attr.va_atime = now;
 907 
 908         attr.va_mask = (mask & AT_TIMES);
 909         err = VOP_SETATTR(vp, &attr, 0, cred, NULL);
 910         if (err && (err != EROFS)) {
 911                 sdcmn_err(("update timestamps error %d\n", err));
 912         }
 913 }
 914 
 915 /*
 916  * the backing store vnode is released here
 917  */
 918 /*ARGSUSED1*/
 919 void
 920 sdev_nodedestroy(struct sdev_node *dv, uint_t flags)
 921 {
 922         /* no references */
 923         ASSERT(dv->sdev_nlink == 0);
 924 
 925         if (dv->sdev_attrvp != NULLVP) {
 926                 VN_RELE(dv->sdev_attrvp);
 927                 /*
 928                  * reset the attrvp so that no more
 929                  * references can be made on this already
 930                  * vn_rele() vnode
 931                  */
 932                 dv->sdev_attrvp = NULLVP;
 933         }
 934 
 935         if (dv->sdev_attr != NULL) {
 936                 kmem_free(dv->sdev_attr, sizeof (struct vattr));
 937                 dv->sdev_attr = NULL;
 938         }
 939 
 940         if (dv->sdev_name != NULL) {
 941                 kmem_free(dv->sdev_name, dv->sdev_namelen + 1);
 942                 dv->sdev_name = NULL;
 943         }
 944 
 945         if (dv->sdev_symlink != NULL) {
 946                 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1);
 947                 dv->sdev_symlink = NULL;
 948         }
 949 
 950         if (dv->sdev_path) {
 951                 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1);
 952                 dv->sdev_path = NULL;
 953         }
 954 
 955         if (!SDEV_IS_GLOBAL(dv))
 956                 sdev_prof_free(dv);
 957 
 958         if (SDEVTOV(dv)->v_type == VDIR) {
 959                 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL);
 960                 avl_destroy(&dv->sdev_entries);
 961         }
 962 
 963         mutex_destroy(&dv->sdev_lookup_lock);
 964         cv_destroy(&dv->sdev_lookup_cv);
 965 
 966         /* return node to initial state as per constructor */
 967         (void) memset((void *)&dv->sdev_instance_data, 0,
 968             sizeof (dv->sdev_instance_data));
 969         vn_invalid(SDEVTOV(dv));
 970         kmem_cache_free(sdev_node_cache, dv);
 971 }
 972 
 973 /*
 974  * DIRECTORY CACHE lookup
 975  */
 976 struct sdev_node *
 977 sdev_findbyname(struct sdev_node *ddv, char *nm)
 978 {
 979         struct sdev_node *dv;
 980         struct sdev_node dvtmp;
 981         avl_index_t     where;
 982 
 983         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
 984 
 985         dvtmp.sdev_name = nm;
 986         dv = avl_find(&ddv->sdev_entries, &dvtmp, &where);
 987         if (dv) {
 988                 ASSERT(dv->sdev_dotdot == ddv);
 989                 ASSERT(strcmp(dv->sdev_name, nm) == 0);
 990                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
 991                 SDEV_HOLD(dv);
 992                 return (dv);
 993         }
 994         return (NULL);
 995 }
 996 
 997 /*
 998  * Inserts a new sdev_node in a parent directory
 999  */
1000 void
1001 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv)
1002 {
1003         avl_index_t where;
1004 
1005         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1006         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
1007         ASSERT(ddv->sdev_nlink >= 2);
1008         ASSERT(dv->sdev_nlink == 0);
1009         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1010 
1011         dv->sdev_dotdot = ddv;
1012         VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL);
1013         avl_insert(&ddv->sdev_entries, dv, where);
1014         ddv->sdev_nlink++;
1015 }
1016 
1017 /*
1018  * The following check is needed because while sdev_nodes are linked
1019  * in SDEV_INIT state, they have their link counts incremented only
1020  * in SDEV_READY state.
1021  */
1022 static void
1023 decr_link(struct sdev_node *dv)
1024 {
1025         VERIFY(RW_WRITE_HELD(&dv->sdev_contents));
1026         if (dv->sdev_state != SDEV_INIT) {
1027                 VERIFY(dv->sdev_nlink >= 1);
1028                 dv->sdev_nlink--;
1029         } else {
1030                 VERIFY(dv->sdev_nlink == 0);
1031         }
1032 }
1033 
1034 /*
1035  * Delete an existing dv from directory cache
1036  *
1037  * In the case of a node is still held by non-zero reference count, the node is
1038  * put into ZOMBIE state. The node is always unlinked from its parent, but it is
1039  * not destroyed via sdev_inactive until its reference count reaches "0".
1040  */
1041 static void
1042 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv)
1043 {
1044         struct vnode *vp;
1045         sdev_node_state_t os;
1046 
1047         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1048 
1049         vp = SDEVTOV(dv);
1050         mutex_enter(&vp->v_lock);
1051         rw_enter(&dv->sdev_contents, RW_WRITER);
1052         os = dv->sdev_state;
1053         ASSERT(os != SDEV_ZOMBIE);
1054         dv->sdev_state = SDEV_ZOMBIE;
1055 
1056         /*
1057          * unlink ourselves from the parent directory now to take care of the ..
1058          * link. However, if we're a directory, we don't remove our reference to
1059          * ourself eg. '.' until we are torn down in the inactive callback.
1060          */
1061         decr_link(ddv);
1062         avl_remove(&ddv->sdev_entries, dv);
1063         /*
1064          * sdev_inactive expects nodes to have a link to themselves when we're
1065          * tearing them down. If we're transitioning from the initial state to
1066          * zombie and not via ready, then we're not going to have this link that
1067          * comes from the node being ready. As a result, we need to increment
1068          * our link count by one to account for this.
1069          */
1070         if (os == SDEV_INIT && dv->sdev_nlink == 0)
1071                 dv->sdev_nlink++;
1072         rw_exit(&dv->sdev_contents);
1073         mutex_exit(&vp->v_lock);
1074 }
1075 
1076 /*
1077  * check if the source is in the path of the target
1078  *
1079  * source and target are different
1080  */
1081 /*ARGSUSED2*/
1082 static int
1083 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred)
1084 {
1085         int error = 0;
1086         struct sdev_node *dotdot, *dir;
1087 
1088         dotdot = tdv->sdev_dotdot;
1089         ASSERT(dotdot);
1090 
1091         /* fs root */
1092         if (dotdot == tdv) {
1093                 return (0);
1094         }
1095 
1096         for (;;) {
1097                 /*
1098                  * avoid error cases like
1099                  *      mv a a/b
1100                  *      mv a a/b/c
1101                  *      etc.
1102                  */
1103                 if (dotdot == sdv) {
1104                         error = EINVAL;
1105                         break;
1106                 }
1107 
1108                 dir = dotdot;
1109                 dotdot = dir->sdev_dotdot;
1110 
1111                 /* done checking because root is reached */
1112                 if (dir == dotdot) {
1113                         break;
1114                 }
1115         }
1116         return (error);
1117 }
1118 
1119 int
1120 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv,
1121     struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm,
1122     struct cred *cred)
1123 {
1124         int error = 0;
1125         struct vnode *ovp = SDEVTOV(odv);
1126         struct vnode *nvp;
1127         struct vattr vattr;
1128         int doingdir = (ovp->v_type == VDIR);
1129         char *link = NULL;
1130         int samedir = (oddv == nddv) ? 1 : 0;
1131         int bkstore = 0;
1132         struct sdev_node *idv = NULL;
1133         struct sdev_node *ndv = NULL;
1134         timestruc_t now;
1135 
1136         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1137         error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL);
1138         if (error)
1139                 return (error);
1140 
1141         if (!samedir)
1142                 rw_enter(&oddv->sdev_contents, RW_WRITER);
1143         rw_enter(&nddv->sdev_contents, RW_WRITER);
1144 
1145         /*
1146          * the source may have been deleted by another thread before
1147          * we gets here.
1148          */
1149         if (odv->sdev_state != SDEV_READY) {
1150                 error = ENOENT;
1151                 goto err_out;
1152         }
1153 
1154         if (doingdir && (odv == nddv)) {
1155                 error = EINVAL;
1156                 goto err_out;
1157         }
1158 
1159         /*
1160          * If renaming a directory, and the parents are different (".." must be
1161          * changed) then the source dir must not be in the dir hierarchy above
1162          * the target since it would orphan everything below the source dir.
1163          */
1164         if (doingdir && (oddv != nddv)) {
1165                 error = sdev_checkpath(odv, nddv, cred);
1166                 if (error)
1167                         goto err_out;
1168         }
1169 
1170         /* fix the source for a symlink */
1171         if (vattr.va_type == VLNK) {
1172                 if (odv->sdev_symlink == NULL) {
1173                         error = sdev_follow_link(odv);
1174                         if (error) {
1175                                 /*
1176                                  * The underlying symlink doesn't exist. This
1177                                  * node probably shouldn't even exist. While
1178                                  * it's a bit jarring to consumers, we're going
1179                                  * to remove the node from /dev.
1180                                  */
1181                                 if (SDEV_IS_PERSIST((*ndvp)))
1182                                         bkstore = 1;
1183                                 sdev_dirdelete(oddv, odv);
1184                                 if (bkstore) {
1185                                         ASSERT(nddv->sdev_attrvp);
1186                                         error = VOP_REMOVE(nddv->sdev_attrvp,
1187                                             nnm, cred, NULL, 0);
1188                                         if (error)
1189                                                 goto err_out;
1190                                 }
1191                                 error = ENOENT;
1192                                 goto err_out;
1193                         }
1194                 }
1195                 ASSERT(odv->sdev_symlink);
1196                 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP);
1197         }
1198 
1199         /* destination existing */
1200         if (*ndvp) {
1201                 nvp = SDEVTOV(*ndvp);
1202                 ASSERT(nvp);
1203 
1204                 /* handling renaming to itself */
1205                 if (odv == *ndvp) {
1206                         error = 0;
1207                         goto err_out;
1208                 }
1209 
1210                 if (nvp->v_type == VDIR) {
1211                         if (!doingdir) {
1212                                 error = EISDIR;
1213                                 goto err_out;
1214                         }
1215 
1216                         if (vn_vfswlock(nvp)) {
1217                                 error = EBUSY;
1218                                 goto err_out;
1219                         }
1220 
1221                         if (vn_mountedvfs(nvp) != NULL) {
1222                                 vn_vfsunlock(nvp);
1223                                 error = EBUSY;
1224                                 goto err_out;
1225                         }
1226 
1227                         /* in case dir1 exists in dir2 and "mv dir1 dir2" */
1228                         if ((*ndvp)->sdev_nlink > 2) {
1229                                 vn_vfsunlock(nvp);
1230                                 error = EEXIST;
1231                                 goto err_out;
1232                         }
1233                         vn_vfsunlock(nvp);
1234 
1235                         /*
1236                          * We did not place the hold on *ndvp, so even though
1237                          * we're deleting the node, we should not get rid of our
1238                          * reference.
1239                          */
1240                         sdev_dirdelete(nddv, *ndvp);
1241                         *ndvp = NULL;
1242                         ASSERT(nddv->sdev_attrvp);
1243                         error = VOP_RMDIR(nddv->sdev_attrvp, nnm,
1244                             nddv->sdev_attrvp, cred, NULL, 0);
1245                         if (error)
1246                                 goto err_out;
1247                 } else {
1248                         if (doingdir) {
1249                                 error = ENOTDIR;
1250                                 goto err_out;
1251                         }
1252 
1253                         if (SDEV_IS_PERSIST((*ndvp))) {
1254                                 bkstore = 1;
1255                         }
1256 
1257                         /*
1258                          * Get rid of the node from the directory cache note.
1259                          * Don't forget that it's not up to us to remove the vn
1260                          * ref on the sdev node, as we did not place it.
1261                          */
1262                         sdev_dirdelete(nddv, *ndvp);
1263                         *ndvp = NULL;
1264                         if (bkstore) {
1265                                 ASSERT(nddv->sdev_attrvp);
1266                                 error = VOP_REMOVE(nddv->sdev_attrvp,
1267                                     nnm, cred, NULL, 0);
1268                                 if (error)
1269                                         goto err_out;
1270                         }
1271                 }
1272         }
1273 
1274         /*
1275          * make a fresh node from the source attrs
1276          */
1277         ASSERT(RW_WRITE_HELD(&nddv->sdev_contents));
1278         error = sdev_mknode(nddv, nnm, ndvp, &vattr,
1279             NULL, (void *)link, cred, SDEV_READY);
1280 
1281         if (link != NULL) {
1282                 kmem_free(link, strlen(link) + 1);
1283                 link = NULL;
1284         }
1285 
1286         if (error)
1287                 goto err_out;
1288         ASSERT(*ndvp);
1289         ASSERT((*ndvp)->sdev_state == SDEV_READY);
1290 
1291         /* move dir contents */
1292         if (doingdir) {
1293                 for (idv = SDEV_FIRST_ENTRY(odv); idv;
1294                     idv = SDEV_NEXT_ENTRY(odv, idv)) {
1295                         SDEV_HOLD(idv);
1296                         error = sdev_rnmnode(odv, idv,
1297                             (struct sdev_node *)(*ndvp), &ndv,
1298                             idv->sdev_name, cred);
1299                         SDEV_RELE(idv);
1300                         if (error)
1301                                 goto err_out;
1302                         ndv = NULL;
1303                 }
1304         }
1305 
1306         if ((*ndvp)->sdev_attrvp) {
1307                 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred,
1308                     AT_CTIME|AT_ATIME);
1309         } else {
1310                 ASSERT((*ndvp)->sdev_attr);
1311                 gethrestime(&now);
1312                 (*ndvp)->sdev_attr->va_ctime = now;
1313                 (*ndvp)->sdev_attr->va_atime = now;
1314         }
1315 
1316         if (nddv->sdev_attrvp) {
1317                 sdev_update_timestamps(nddv->sdev_attrvp, kcred,
1318                     AT_MTIME|AT_ATIME);
1319         } else {
1320                 ASSERT(nddv->sdev_attr);
1321                 gethrestime(&now);
1322                 nddv->sdev_attr->va_mtime = now;
1323                 nddv->sdev_attr->va_atime = now;
1324         }
1325         rw_exit(&nddv->sdev_contents);
1326         if (!samedir)
1327                 rw_exit(&oddv->sdev_contents);
1328 
1329         SDEV_RELE(*ndvp);
1330         return (error);
1331 
1332 err_out:
1333         if (link != NULL) {
1334                 kmem_free(link, strlen(link) + 1);
1335                 link = NULL;
1336         }
1337 
1338         rw_exit(&nddv->sdev_contents);
1339         if (!samedir)
1340                 rw_exit(&oddv->sdev_contents);
1341         return (error);
1342 }
1343 
1344 /*
1345  * Merge sdev_node specific information into an attribute structure.
1346  *
1347  * note: sdev_node is not locked here
1348  */
1349 void
1350 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap)
1351 {
1352         struct vnode *vp = SDEVTOV(dv);
1353 
1354         vap->va_nlink = dv->sdev_nlink;
1355         vap->va_nodeid = dv->sdev_ino;
1356         vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev;
1357         vap->va_type = vp->v_type;
1358 
1359         if (vp->v_type == VDIR) {
1360                 vap->va_rdev = 0;
1361                 vap->va_fsid = vp->v_rdev;
1362         } else if (vp->v_type == VLNK) {
1363                 vap->va_rdev = 0;
1364                 vap->va_mode  &= ~S_IFMT;
1365                 vap->va_mode |= S_IFLNK;
1366         } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) {
1367                 vap->va_rdev = vp->v_rdev;
1368                 vap->va_mode &= ~S_IFMT;
1369                 if (vap->va_type == VCHR)
1370                         vap->va_mode |= S_IFCHR;
1371                 else
1372                         vap->va_mode |= S_IFBLK;
1373         } else {
1374                 vap->va_rdev = 0;
1375         }
1376 }
1377 
1378 struct vattr *
1379 sdev_getdefault_attr(enum vtype type)
1380 {
1381         if (type == VDIR)
1382                 return (&sdev_vattr_dir);
1383         else if (type == VCHR)
1384                 return (&sdev_vattr_chr);
1385         else if (type == VBLK)
1386                 return (&sdev_vattr_blk);
1387         else if (type == VLNK)
1388                 return (&sdev_vattr_lnk);
1389         else
1390                 return (NULL);
1391 }
1392 int
1393 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp)
1394 {
1395         int rv = 0;
1396         struct vnode *vp = SDEVTOV(dv);
1397 
1398         switch (vp->v_type) {
1399         case VCHR:
1400         case VBLK:
1401                 /*
1402                  * If vnode is a device, return special vnode instead
1403                  * (though it knows all about -us- via sp->s_realvp)
1404                  */
1405                 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred);
1406                 VN_RELE(vp);
1407                 if (*vpp == NULLVP)
1408                         rv = ENOSYS;
1409                 break;
1410         default:        /* most types are returned as is */
1411                 *vpp = vp;
1412                 break;
1413         }
1414         return (rv);
1415 }
1416 
1417 /*
1418  * junction between devname and root file system, e.g. ufs
1419  */
1420 int
1421 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp)
1422 {
1423         struct vnode *rdvp = ddv->sdev_attrvp;
1424         int rval = 0;
1425 
1426         ASSERT(rdvp);
1427 
1428         rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL,
1429             NULL);
1430         return (rval);
1431 }
1432 
1433 static int
1434 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred)
1435 {
1436         struct sdev_node *dv = NULL;
1437         char    *nm;
1438         struct vnode *dirvp;
1439         int     error;
1440         vnode_t *vp;
1441         int eof;
1442         struct iovec iov;
1443         struct uio uio;
1444         struct dirent64 *dp;
1445         dirent64_t *dbuf;
1446         size_t dbuflen;
1447         struct vattr vattr;
1448         char *link = NULL;
1449 
1450         if (ddv->sdev_attrvp == NULL)
1451                 return (0);
1452         if (!(ddv->sdev_flags & SDEV_BUILD))
1453                 return (0);
1454 
1455         dirvp = ddv->sdev_attrvp;
1456         VN_HOLD(dirvp);
1457         dbuf = kmem_zalloc(dlen, KM_SLEEP);
1458 
1459         uio.uio_iov = &iov;
1460         uio.uio_iovcnt = 1;
1461         uio.uio_segflg = UIO_SYSSPACE;
1462         uio.uio_fmode = 0;
1463         uio.uio_extflg = UIO_COPY_CACHED;
1464         uio.uio_loffset = 0;
1465         uio.uio_llimit = MAXOFFSET_T;
1466 
1467         eof = 0;
1468         error = 0;
1469         while (!error && !eof) {
1470                 uio.uio_resid = dlen;
1471                 iov.iov_base = (char *)dbuf;
1472                 iov.iov_len = dlen;
1473                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1474                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1475                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1476 
1477                 dbuflen = dlen - uio.uio_resid;
1478                 if (error || dbuflen == 0)
1479                         break;
1480 
1481                 if (!(ddv->sdev_flags & SDEV_BUILD))
1482                         break;
1483 
1484                 for (dp = dbuf; ((intptr_t)dp <
1485                     (intptr_t)dbuf + dbuflen);
1486                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1487                         nm = dp->d_name;
1488 
1489                         if (strcmp(nm, ".") == 0 ||
1490                             strcmp(nm, "..") == 0)
1491                                 continue;
1492 
1493                         vp = NULLVP;
1494                         dv = sdev_cache_lookup(ddv, nm);
1495                         if (dv) {
1496                                 VERIFY(dv->sdev_state != SDEV_ZOMBIE);
1497                                 SDEV_SIMPLE_RELE(dv);
1498                                 continue;
1499                         }
1500 
1501                         /* refill the cache if not already */
1502                         error = devname_backstore_lookup(ddv, nm, &vp);
1503                         if (error)
1504                                 continue;
1505 
1506                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
1507                         error = VOP_GETATTR(vp, &vattr, 0, cred, NULL);
1508                         if (error)
1509                                 continue;
1510 
1511                         if (vattr.va_type == VLNK) {
1512                                 error = sdev_getlink(vp, &link);
1513                                 if (error) {
1514                                         continue;
1515                                 }
1516                                 ASSERT(link != NULL);
1517                         }
1518 
1519                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
1520                                 rw_exit(&ddv->sdev_contents);
1521                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
1522                         }
1523                         error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link,
1524                             cred, SDEV_READY);
1525                         rw_downgrade(&ddv->sdev_contents);
1526 
1527                         if (link != NULL) {
1528                                 kmem_free(link, strlen(link) + 1);
1529                                 link = NULL;
1530                         }
1531 
1532                         if (!error) {
1533                                 ASSERT(dv);
1534                                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1535                                 SDEV_SIMPLE_RELE(dv);
1536                         }
1537                         vp = NULL;
1538                         dv = NULL;
1539                 }
1540         }
1541 
1542 done:
1543         VN_RELE(dirvp);
1544         kmem_free(dbuf, dlen);
1545 
1546         return (error);
1547 }
1548 
1549 void
1550 sdev_filldir_dynamic(struct sdev_node *ddv)
1551 {
1552         int error;
1553         int i;
1554         struct vattr vattr;
1555         struct vattr *vap = &vattr;
1556         char *nm = NULL;
1557         struct sdev_node *dv = NULL;
1558 
1559         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1560         ASSERT((ddv->sdev_flags & SDEV_BUILD));
1561 
1562         *vap = *sdev_getdefault_attr(VDIR);     /* note structure copy here */
1563         gethrestime(&vap->va_atime);
1564         vap->va_mtime = vap->va_atime;
1565         vap->va_ctime = vap->va_atime;
1566         for (i = 0; vtab[i].vt_name != NULL; i++) {
1567                 /*
1568                  * This early, we may be in a read-only /dev environment: leave
1569                  * the creation of any nodes we'd attempt to persist to
1570                  * devfsadm. Because /dev itself is normally persistent, any
1571                  * node which is not marked dynamic will end up being marked
1572                  * persistent. However, some nodes are both dynamic and
1573                  * persistent, mostly lofi and rlofi, so we need to be careful
1574                  * in our check.
1575                  */
1576                 if ((vtab[i].vt_flags & SDEV_PERSIST) ||
1577                     !(vtab[i].vt_flags & SDEV_DYNAMIC))
1578                         continue;
1579                 nm = vtab[i].vt_name;
1580                 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1581                 dv = NULL;
1582                 error = sdev_mknode(ddv, nm, &dv, vap, NULL,
1583                     NULL, kcred, SDEV_READY);
1584                 if (error) {
1585                         cmn_err(CE_WARN, "%s/%s: error %d\n",
1586                             ddv->sdev_name, nm, error);
1587                 } else {
1588                         ASSERT(dv);
1589                         ASSERT(dv->sdev_state != SDEV_ZOMBIE);
1590                         SDEV_SIMPLE_RELE(dv);
1591                 }
1592         }
1593 }
1594 
1595 /*
1596  * Creating a backing store entry based on sdev_attr.
1597  * This is called either as part of node creation in a persistent directory
1598  * or from setattr/setsecattr to persist access attributes across reboot.
1599  */
1600 int
1601 sdev_shadow_node(struct sdev_node *dv, struct cred *cred)
1602 {
1603         int error = 0;
1604         struct vnode *dvp = SDEVTOV(dv->sdev_dotdot);
1605         struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp;
1606         struct vattr *vap = dv->sdev_attr;
1607         char *nm = dv->sdev_name;
1608         struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL;
1609 
1610         ASSERT(dv && dv->sdev_name && rdvp);
1611         ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL);
1612 
1613 lookup:
1614         /* try to find it in the backing store */
1615         error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL,
1616             NULL);
1617         if (error == 0) {
1618                 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) {
1619                         VN_HOLD(rrvp);
1620                         VN_RELE(*rvp);
1621                         *rvp = rrvp;
1622                 }
1623 
1624                 kmem_free(dv->sdev_attr, sizeof (vattr_t));
1625                 dv->sdev_attr = NULL;
1626                 dv->sdev_attrvp = *rvp;
1627                 return (0);
1628         }
1629 
1630         /* let's try to persist the node */
1631         gethrestime(&vap->va_atime);
1632         vap->va_mtime = vap->va_atime;
1633         vap->va_ctime = vap->va_atime;
1634         vap->va_mask |= AT_TYPE|AT_MODE;
1635         switch (vap->va_type) {
1636         case VDIR:
1637                 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL);
1638                 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n",
1639                     (void *)(*rvp), error));
1640                 if (!error)
1641                         VN_RELE(*rvp);
1642                 break;
1643         case VCHR:
1644         case VBLK:
1645         case VREG:
1646         case VDOOR:
1647                 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE,
1648                     rvp, cred, 0, NULL, NULL);
1649                 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n",
1650                     (void *)(*rvp), error));
1651                 if (!error)
1652                         VN_RELE(*rvp);
1653                 break;
1654         case VLNK:
1655                 ASSERT(dv->sdev_symlink);
1656                 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred,
1657                     NULL, 0);
1658                 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n",
1659                     error));
1660                 break;
1661         default:
1662                 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node "
1663                     "create\n", nm);
1664                 /*NOTREACHED*/
1665         }
1666 
1667         /* go back to lookup to factor out spec node and set attrvp */
1668         if (error == 0)
1669                 goto lookup;
1670 
1671         sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error));
1672         return (error);
1673 }
1674 
1675 static void
1676 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm)
1677 {
1678         struct sdev_node *dup = NULL;
1679 
1680         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1681         if ((dup = sdev_findbyname(ddv, nm)) == NULL) {
1682                 sdev_direnter(ddv, *dv);
1683         } else {
1684                 VERIFY(dup->sdev_state != SDEV_ZOMBIE);
1685                 SDEV_SIMPLE_RELE(*dv);
1686                 sdev_nodedestroy(*dv, 0);
1687                 *dv = dup;
1688         }
1689 }
1690 
1691 static void
1692 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv)
1693 {
1694         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1695         sdev_dirdelete(ddv, *dv);
1696 }
1697 
1698 /*
1699  * update the in-core directory cache
1700  */
1701 void
1702 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm,
1703     sdev_cache_ops_t ops)
1704 {
1705         ASSERT((SDEV_HELD(*dv)));
1706 
1707         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1708         switch (ops) {
1709         case SDEV_CACHE_ADD:
1710                 sdev_cache_add(ddv, dv, nm);
1711                 break;
1712         case SDEV_CACHE_DELETE:
1713                 sdev_cache_delete(ddv, dv);
1714                 break;
1715         default:
1716                 break;
1717         }
1718 }
1719 
1720 /*
1721  * retrieve the named entry from the directory cache
1722  */
1723 struct sdev_node *
1724 sdev_cache_lookup(struct sdev_node *ddv, char *nm)
1725 {
1726         struct sdev_node *dv = NULL;
1727 
1728         ASSERT(RW_LOCK_HELD(&ddv->sdev_contents));
1729         dv = sdev_findbyname(ddv, nm);
1730 
1731         return (dv);
1732 }
1733 
1734 /*
1735  * Implicit reconfig for nodes constructed by a link generator
1736  * Start devfsadm if needed, or if devfsadm is in progress,
1737  * prepare to block on devfsadm either completing or
1738  * constructing the desired node.  As devfsadmd is global
1739  * in scope, constructing all necessary nodes, we only
1740  * need to initiate it once.
1741  */
1742 static int
1743 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm)
1744 {
1745         int error = 0;
1746 
1747         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
1748                 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n",
1749                     ddv->sdev_name, nm, devfsadm_state));
1750                 mutex_enter(&dv->sdev_lookup_lock);
1751                 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING));
1752                 mutex_exit(&dv->sdev_lookup_lock);
1753                 error = 0;
1754         } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) {
1755                 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n",
1756                     ddv->sdev_name, nm, devfsadm_state));
1757 
1758                 sdev_devfsadmd_thread(ddv, dv, kcred);
1759                 mutex_enter(&dv->sdev_lookup_lock);
1760                 SDEV_BLOCK_OTHERS(dv,
1761                     (SDEV_LOOKUP | SDEV_LGWAITING));
1762                 mutex_exit(&dv->sdev_lookup_lock);
1763                 error = 0;
1764         } else {
1765                 error = -1;
1766         }
1767 
1768         return (error);
1769 }
1770 
1771 /*
1772  *  Support for specialized device naming construction mechanisms
1773  */
1774 static int
1775 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm,
1776     int (*callback)(struct sdev_node *, char *, void **, struct cred *,
1777     void *, char *), int flags, struct cred *cred)
1778 {
1779         int rv = 0;
1780         char *physpath = NULL;
1781         struct vattr vattr;
1782         struct vattr *vap = &vattr;
1783         struct sdev_node *dv = NULL;
1784 
1785         ASSERT(RW_WRITE_HELD(&ddv->sdev_contents));
1786         if (flags & SDEV_VLINK) {
1787                 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1788                 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL,
1789                     NULL);
1790                 if (rv) {
1791                         kmem_free(physpath, MAXPATHLEN);
1792                         return (-1);
1793                 }
1794 
1795                 *vap = *sdev_getdefault_attr(VLNK);     /* structure copy */
1796                 vap->va_size = strlen(physpath);
1797                 gethrestime(&vap->va_atime);
1798                 vap->va_mtime = vap->va_atime;
1799                 vap->va_ctime = vap->va_atime;
1800 
1801                 rv = sdev_mknode(ddv, nm, &dv, vap, NULL,
1802                     (void *)physpath, cred, SDEV_READY);
1803                 kmem_free(physpath, MAXPATHLEN);
1804                 if (rv)
1805                         return (rv);
1806         } else if (flags & SDEV_VATTR) {
1807                 /*
1808                  * /dev/pts
1809                  *
1810                  * callback is responsible to set the basic attributes,
1811                  * e.g. va_type/va_uid/va_gid/
1812                  *    dev_t if VCHR or VBLK/
1813                  */
1814                 ASSERT(callback);
1815                 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL);
1816                 if (rv) {
1817                         sdcmn_err3(("devname_lookup_func: SDEV_NONE "
1818                             "callback failed \n"));
1819                         return (-1);
1820                 }
1821 
1822                 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL,
1823                     cred, SDEV_READY);
1824 
1825                 if (rv)
1826                         return (rv);
1827 
1828         } else {
1829                 impossible(("lookup: %s/%s by %s not supported (%d)\n",
1830                     SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm,
1831                     __LINE__));
1832                 rv = -1;
1833         }
1834 
1835         *dvp = dv;
1836         return (rv);
1837 }
1838 
1839 static int
1840 is_devfsadm_thread(char *exec_name)
1841 {
1842         /*
1843          * note: because devfsadmd -> /usr/sbin/devfsadm
1844          * it is safe to use "devfsadm" to capture the lookups
1845          * from devfsadm and its daemon version.
1846          */
1847         if (strcmp(exec_name, "devfsadm") == 0)
1848                 return (1);
1849         return (0);
1850 }
1851 
1852 /*
1853  * Lookup Order:
1854  *      sdev_node cache;
1855  *      backing store (SDEV_PERSIST);
1856  *      DBNR: a. dir_ops implemented in the loadable modules;
1857  *            b. vnode ops in vtab.
1858  */
1859 int
1860 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp,
1861     struct cred *cred, int (*callback)(struct sdev_node *, char *, void **,
1862     struct cred *, void *, char *), int flags)
1863 {
1864         int rv = 0, nmlen;
1865         struct vnode *rvp = NULL;
1866         struct sdev_node *dv = NULL;
1867         int     retried = 0;
1868         int     error = 0;
1869         struct vattr vattr;
1870         char *lookup_thread = curproc->p_user.u_comm;
1871         int failed_flags = 0;
1872         int (*vtor)(struct sdev_node *) = NULL;
1873         int state;
1874         int parent_state;
1875         char *link = NULL;
1876 
1877         if (SDEVTOV(ddv)->v_type != VDIR)
1878                 return (ENOTDIR);
1879 
1880         /*
1881          * Empty name or ., return node itself.
1882          */
1883         nmlen = strlen(nm);
1884         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
1885                 *vpp = SDEVTOV(ddv);
1886                 VN_HOLD(*vpp);
1887                 return (0);
1888         }
1889 
1890         /*
1891          * .., return the parent directory
1892          */
1893         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
1894                 *vpp = SDEVTOV(ddv->sdev_dotdot);
1895                 VN_HOLD(*vpp);
1896                 return (0);
1897         }
1898 
1899         rw_enter(&ddv->sdev_contents, RW_READER);
1900         if (ddv->sdev_flags & SDEV_VTOR) {
1901                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
1902                 ASSERT(vtor);
1903         }
1904 
1905 tryagain:
1906         /*
1907          * (a) directory cache lookup:
1908          */
1909         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1910         parent_state = ddv->sdev_state;
1911         dv = sdev_cache_lookup(ddv, nm);
1912         if (dv) {
1913                 state = dv->sdev_state;
1914                 switch (state) {
1915                 case SDEV_INIT:
1916                         if (is_devfsadm_thread(lookup_thread))
1917                                 break;
1918 
1919                         /* ZOMBIED parent won't allow node creation */
1920                         if (parent_state == SDEV_ZOMBIE) {
1921                                 SD_TRACE_FAILED_LOOKUP(ddv, nm,
1922                                     retried);
1923                                 goto nolock_notfound;
1924                         }
1925 
1926                         mutex_enter(&dv->sdev_lookup_lock);
1927                         /* compensate the threads started after devfsadm */
1928                         if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
1929                             !(SDEV_IS_LOOKUP(dv)))
1930                                 SDEV_BLOCK_OTHERS(dv,
1931                                     (SDEV_LOOKUP | SDEV_LGWAITING));
1932 
1933                         if (SDEV_IS_LOOKUP(dv)) {
1934                                 failed_flags |= SLF_REBUILT;
1935                                 rw_exit(&ddv->sdev_contents);
1936                                 error = sdev_wait4lookup(dv, SDEV_LOOKUP);
1937                                 mutex_exit(&dv->sdev_lookup_lock);
1938                                 rw_enter(&ddv->sdev_contents, RW_READER);
1939 
1940                                 if (error != 0) {
1941                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1942                                             retried);
1943                                         goto nolock_notfound;
1944                                 }
1945 
1946                                 state = dv->sdev_state;
1947                                 if (state == SDEV_INIT) {
1948                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1949                                             retried);
1950                                         goto nolock_notfound;
1951                                 } else if (state == SDEV_READY) {
1952                                         goto found;
1953                                 } else if (state == SDEV_ZOMBIE) {
1954                                         rw_exit(&ddv->sdev_contents);
1955                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
1956                                             retried);
1957                                         SDEV_RELE(dv);
1958                                         goto lookup_failed;
1959                                 }
1960                         } else {
1961                                 mutex_exit(&dv->sdev_lookup_lock);
1962                         }
1963                         break;
1964                 case SDEV_READY:
1965                         goto found;
1966                 case SDEV_ZOMBIE:
1967                         rw_exit(&ddv->sdev_contents);
1968                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1969                         SDEV_RELE(dv);
1970                         goto lookup_failed;
1971                 default:
1972                         rw_exit(&ddv->sdev_contents);
1973                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1974                         sdev_lookup_failed(ddv, nm, failed_flags);
1975                         *vpp = NULLVP;
1976                         return (ENOENT);
1977                 }
1978         }
1979         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
1980 
1981         /*
1982          * ZOMBIED parent does not allow new node creation.
1983          * bail out early
1984          */
1985         if (parent_state == SDEV_ZOMBIE) {
1986                 rw_exit(&ddv->sdev_contents);
1987                 *vpp = NULLVP;
1988                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
1989                 return (ENOENT);
1990         }
1991 
1992         /*
1993          * (b0): backing store lookup
1994          *      SDEV_PERSIST is default except:
1995          *              1) pts nodes
1996          *              2) non-chmod'ed local nodes
1997          *              3) zvol nodes
1998          */
1999         if (SDEV_IS_PERSIST(ddv)) {
2000                 error = devname_backstore_lookup(ddv, nm, &rvp);
2001 
2002                 if (!error) {
2003 
2004                         vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
2005                         error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL);
2006                         if (error) {
2007                                 rw_exit(&ddv->sdev_contents);
2008                                 if (dv)
2009                                         SDEV_RELE(dv);
2010                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2011                                 sdev_lookup_failed(ddv, nm, failed_flags);
2012                                 *vpp = NULLVP;
2013                                 return (ENOENT);
2014                         }
2015 
2016                         if (vattr.va_type == VLNK) {
2017                                 error = sdev_getlink(rvp, &link);
2018                                 if (error) {
2019                                         rw_exit(&ddv->sdev_contents);
2020                                         if (dv)
2021                                                 SDEV_RELE(dv);
2022                                         SD_TRACE_FAILED_LOOKUP(ddv, nm,
2023                                             retried);
2024                                         sdev_lookup_failed(ddv, nm,
2025                                             failed_flags);
2026                                         *vpp = NULLVP;
2027                                         return (ENOENT);
2028                                 }
2029                                 ASSERT(link != NULL);
2030                         }
2031 
2032                         if (!rw_tryupgrade(&ddv->sdev_contents)) {
2033                                 rw_exit(&ddv->sdev_contents);
2034                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2035                         }
2036                         error = sdev_mknode(ddv, nm, &dv, &vattr,
2037                             rvp, link, cred, SDEV_READY);
2038                         rw_downgrade(&ddv->sdev_contents);
2039 
2040                         if (link != NULL) {
2041                                 kmem_free(link, strlen(link) + 1);
2042                                 link = NULL;
2043                         }
2044 
2045                         if (error) {
2046                                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2047                                 rw_exit(&ddv->sdev_contents);
2048                                 if (dv)
2049                                         SDEV_RELE(dv);
2050                                 goto lookup_failed;
2051                         } else {
2052                                 goto found;
2053                         }
2054                 } else if (retried) {
2055                         rw_exit(&ddv->sdev_contents);
2056                         sdcmn_err3(("retry of lookup of %s/%s: failed\n",
2057                             ddv->sdev_name, nm));
2058                         if (dv)
2059                                 SDEV_RELE(dv);
2060                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2061                         sdev_lookup_failed(ddv, nm, failed_flags);
2062                         *vpp = NULLVP;
2063                         return (ENOENT);
2064                 }
2065         }
2066 
2067 lookup_create_node:
2068         /* first thread that is doing the lookup on this node */
2069         if (callback) {
2070                 ASSERT(dv == NULL);
2071                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2072                         rw_exit(&ddv->sdev_contents);
2073                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2074                 }
2075                 error = sdev_call_dircallback(ddv, &dv, nm, callback,
2076                     flags, cred);
2077                 rw_downgrade(&ddv->sdev_contents);
2078                 if (error == 0) {
2079                         goto found;
2080                 } else {
2081                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2082                         rw_exit(&ddv->sdev_contents);
2083                         goto lookup_failed;
2084                 }
2085         }
2086         if (!dv) {
2087                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2088                         rw_exit(&ddv->sdev_contents);
2089                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2090                 }
2091                 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL,
2092                     cred, SDEV_INIT);
2093                 if (!dv) {
2094                         rw_exit(&ddv->sdev_contents);
2095                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2096                         sdev_lookup_failed(ddv, nm, failed_flags);
2097                         *vpp = NULLVP;
2098                         return (ENOENT);
2099                 }
2100                 rw_downgrade(&ddv->sdev_contents);
2101         }
2102 
2103         /*
2104          * (b1) invoking devfsadm once per life time for devfsadm nodes
2105          */
2106         ASSERT(SDEV_HELD(dv));
2107 
2108         if (SDEV_IS_NO_NCACHE(dv))
2109                 failed_flags |= SLF_NO_NCACHE;
2110         if (sdev_reconfig_boot || !i_ddi_io_initialized() ||
2111             SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) ||
2112             ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) {
2113                 ASSERT(SDEV_HELD(dv));
2114                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2115                 goto nolock_notfound;
2116         }
2117 
2118         /*
2119          * filter out known non-existent devices recorded
2120          * during initial reconfiguration boot for which
2121          * reconfig should not be done and lookup may
2122          * be short-circuited now.
2123          */
2124         if (sdev_lookup_filter(ddv, nm)) {
2125                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2126                 goto nolock_notfound;
2127         }
2128 
2129         /* bypassing devfsadm internal nodes */
2130         if (is_devfsadm_thread(lookup_thread)) {
2131                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2132                 goto nolock_notfound;
2133         }
2134 
2135         if (sdev_reconfig_disable) {
2136                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2137                 goto nolock_notfound;
2138         }
2139 
2140         error = sdev_call_devfsadmd(ddv, dv, nm);
2141         if (error == 0) {
2142                 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n",
2143                     ddv->sdev_name, nm, curproc->p_user.u_comm));
2144                 if (sdev_reconfig_verbose) {
2145                         cmn_err(CE_CONT,
2146                             "?lookup of %s/%s by %s: reconfig\n",
2147                             ddv->sdev_name, nm, curproc->p_user.u_comm);
2148                 }
2149                 retried = 1;
2150                 failed_flags |= SLF_REBUILT;
2151                 ASSERT(dv->sdev_state != SDEV_ZOMBIE);
2152                 SDEV_SIMPLE_RELE(dv);
2153                 goto tryagain;
2154         } else {
2155                 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2156                 goto nolock_notfound;
2157         }
2158 
2159 found:
2160         ASSERT(dv->sdev_state == SDEV_READY);
2161         if (vtor) {
2162                 /*
2163                  * Check validity of returned node
2164                  */
2165                 switch (vtor(dv)) {
2166                 case SDEV_VTOR_VALID:
2167                         break;
2168                 case SDEV_VTOR_STALE:
2169                         /*
2170                          * The name exists, but the cache entry is
2171                          * stale and needs to be re-created.
2172                          */
2173                         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2174                         if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
2175                                 rw_exit(&ddv->sdev_contents);
2176                                 rw_enter(&ddv->sdev_contents, RW_WRITER);
2177                         }
2178                         sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE);
2179                         rw_downgrade(&ddv->sdev_contents);
2180                         SDEV_RELE(dv);
2181                         dv = NULL;
2182                         goto lookup_create_node;
2183                         /* FALLTHRU */
2184                 case SDEV_VTOR_INVALID:
2185                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2186                         sdcmn_err7(("lookup: destroy invalid "
2187                             "node: %s(%p)\n", dv->sdev_name, (void *)dv));
2188                         goto nolock_notfound;
2189                 case SDEV_VTOR_SKIP:
2190                         sdcmn_err7(("lookup: node not applicable - "
2191                             "skipping: %s(%p)\n", dv->sdev_name, (void *)dv));
2192                         rw_exit(&ddv->sdev_contents);
2193                         SD_TRACE_FAILED_LOOKUP(ddv, nm, retried);
2194                         SDEV_RELE(dv);
2195                         goto lookup_failed;
2196                 default:
2197                         cmn_err(CE_PANIC,
2198                             "dev fs: validator failed: %s(%p)\n",
2199                             dv->sdev_name, (void *)dv);
2200                         break;
2201                 }
2202         }
2203 
2204         rw_exit(&ddv->sdev_contents);
2205         rv = sdev_to_vp(dv, vpp);
2206         sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d "
2207             "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count,
2208             dv->sdev_state, nm, rv));
2209         return (rv);
2210 
2211 nolock_notfound:
2212         /*
2213          * Destroy the node that is created for synchronization purposes.
2214          */
2215         sdcmn_err3(("devname_lookup_func: %s with state %d\n",
2216             nm, dv->sdev_state));
2217         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2218         if (dv->sdev_state == SDEV_INIT) {
2219                 if (!rw_tryupgrade(&ddv->sdev_contents)) {
2220                         rw_exit(&ddv->sdev_contents);
2221                         rw_enter(&ddv->sdev_contents, RW_WRITER);
2222                 }
2223 
2224                 /*
2225                  * Node state may have changed during the lock
2226                  * changes. Re-check.
2227                  */
2228                 if (dv->sdev_state == SDEV_INIT) {
2229                         sdev_dirdelete(ddv, dv);
2230                         rw_exit(&ddv->sdev_contents);
2231                         sdev_lookup_failed(ddv, nm, failed_flags);
2232                         SDEV_RELE(dv);
2233                         *vpp = NULL;
2234                         return (ENOENT);
2235                 }
2236         }
2237 
2238         rw_exit(&ddv->sdev_contents);
2239         SDEV_RELE(dv);
2240 
2241 lookup_failed:
2242         sdev_lookup_failed(ddv, nm, failed_flags);
2243         *vpp = NULL;
2244         return (ENOENT);
2245 }
2246 
2247 /*
2248  * Given a directory node, mark all nodes beneath as
2249  * STALE, i.e. nodes that don't exist as far as new
2250  * consumers are concerned.  Remove them from the
2251  * list of directory entries so that no lookup or
2252  * directory traversal will find them.  The node
2253  * not deallocated so existing holds are not affected.
2254  */
2255 void
2256 sdev_stale(struct sdev_node *ddv)
2257 {
2258         struct sdev_node *dv;
2259         struct vnode *vp;
2260 
2261         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2262 
2263         rw_enter(&ddv->sdev_contents, RW_WRITER);
2264         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2265                 vp = SDEVTOV(dv);
2266                 SDEV_HOLD(dv);
2267                 if (vp->v_type == VDIR)
2268                         sdev_stale(dv);
2269 
2270                 sdev_dirdelete(ddv, dv);
2271                 SDEV_RELE(dv);
2272         }
2273         ddv->sdev_flags |= SDEV_BUILD;
2274         rw_exit(&ddv->sdev_contents);
2275 }
2276 
2277 /*
2278  * Given a directory node, clean out all the nodes beneath.
2279  * If expr is specified, clean node with names matching expr.
2280  * If SDEV_ENFORCE is specified in flags, busy nodes are made stale,
2281  *      so they are excluded from future lookups.
2282  */
2283 int
2284 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags)
2285 {
2286         int error = 0;
2287         int busy = 0;
2288         struct vnode *vp;
2289         struct sdev_node *dv;
2290         int bkstore = 0;
2291         int len = 0;
2292         char *bks_name = NULL;
2293 
2294         ASSERT(SDEVTOV(ddv)->v_type == VDIR);
2295 
2296         /*
2297          * We try our best to destroy all unused sdev_node's
2298          */
2299         rw_enter(&ddv->sdev_contents, RW_WRITER);
2300         while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) {
2301                 vp = SDEVTOV(dv);
2302 
2303                 if (expr && gmatch(dv->sdev_name, expr) == 0)
2304                         continue;
2305 
2306                 if (vp->v_type == VDIR &&
2307                     sdev_cleandir(dv, NULL, flags) != 0) {
2308                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2309                             dv->sdev_name));
2310                         busy++;
2311                         continue;
2312                 }
2313 
2314                 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) {
2315                         sdcmn_err9(("sdev_cleandir: dir %s busy\n",
2316                             dv->sdev_name));
2317                         busy++;
2318                         continue;
2319                 }
2320 
2321                 /*
2322                  * at this point, either dv is not held or SDEV_ENFORCE
2323                  * is specified. In either case, dv needs to be deleted
2324                  */
2325                 SDEV_HOLD(dv);
2326 
2327                 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0;
2328                 if (bkstore && (vp->v_type == VDIR))
2329                         bkstore += 1;
2330 
2331                 if (bkstore) {
2332                         len = strlen(dv->sdev_name) + 1;
2333                         bks_name = kmem_alloc(len, KM_SLEEP);
2334                         bcopy(dv->sdev_name, bks_name, len);
2335                 }
2336 
2337                 sdev_dirdelete(ddv, dv);
2338 
2339                 /* take care the backing store clean up */
2340                 if (bkstore) {
2341                         ASSERT(bks_name);
2342                         ASSERT(ddv->sdev_attrvp);
2343 
2344                         if (bkstore == 1) {
2345                                 error = VOP_REMOVE(ddv->sdev_attrvp,
2346                                     bks_name, kcred, NULL, 0);
2347                         } else if (bkstore == 2) {
2348                                 error = VOP_RMDIR(ddv->sdev_attrvp,
2349                                     bks_name, ddv->sdev_attrvp, kcred, NULL, 0);
2350                         }
2351 
2352                         /* do not propagate the backing store errors */
2353                         if (error) {
2354                                 sdcmn_err9(("sdev_cleandir: backing store"
2355                                     "not cleaned\n"));
2356                                 error = 0;
2357                         }
2358 
2359                         bkstore = 0;
2360                         kmem_free(bks_name, len);
2361                         bks_name = NULL;
2362                         len = 0;
2363                 }
2364 
2365                 ddv->sdev_flags |= SDEV_BUILD;
2366                 SDEV_RELE(dv);
2367         }
2368 
2369         ddv->sdev_flags |= SDEV_BUILD;
2370         rw_exit(&ddv->sdev_contents);
2371 
2372         if (busy) {
2373                 error = EBUSY;
2374         }
2375 
2376         return (error);
2377 }
2378 
2379 /*
2380  * a convenient wrapper for readdir() funcs
2381  */
2382 size_t
2383 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off)
2384 {
2385         size_t reclen = DIRENT64_RECLEN(strlen(nm));
2386         if (reclen > size)
2387                 return (0);
2388 
2389         de->d_ino = (ino64_t)ino;
2390         de->d_off = (off64_t)off + 1;
2391         de->d_reclen = (ushort_t)reclen;
2392         (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen));
2393         return (reclen);
2394 }
2395 
2396 /*
2397  * sdev_mount service routines
2398  */
2399 int
2400 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args)
2401 {
2402         int     error;
2403 
2404         if (uap->datalen != sizeof (*args))
2405                 return (EINVAL);
2406 
2407         if (error = copyin(uap->dataptr, args, sizeof (*args))) {
2408                 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not"
2409                     "get user data. error %d\n", error);
2410                 return (EFAULT);
2411         }
2412 
2413         return (0);
2414 }
2415 
2416 #ifdef nextdp
2417 #undef nextdp
2418 #endif
2419 #define nextdp(dp)      ((struct dirent64 *) \
2420                             (intptr_t)((char *)(dp) + (dp)->d_reclen))
2421 
2422 /*
2423  * readdir helper func
2424  */
2425 int
2426 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp,
2427     int flags)
2428 {
2429         struct sdev_node *ddv = VTOSDEV(vp);
2430         struct sdev_node *dv;
2431         dirent64_t      *dp;
2432         ulong_t         outcount = 0;
2433         size_t          namelen;
2434         ulong_t         alloc_count;
2435         void            *outbuf;
2436         struct iovec    *iovp;
2437         int             error = 0;
2438         size_t          reclen;
2439         offset_t        diroff;
2440         offset_t        soff;
2441         int             this_reclen;
2442         int (*vtor)(struct sdev_node *) = NULL;
2443         struct vattr attr;
2444         timestruc_t now;
2445 
2446         ASSERT(ddv->sdev_attr || ddv->sdev_attrvp);
2447         ASSERT(RW_READ_HELD(&ddv->sdev_contents));
2448 
2449         if (uiop->uio_loffset >= MAXOFF_T) {
2450                 if (eofp)
2451                         *eofp = 1;
2452                 return (0);
2453         }
2454 
2455         if (uiop->uio_iovcnt != 1)
2456                 return (EINVAL);
2457 
2458         if (vp->v_type != VDIR)
2459                 return (ENOTDIR);
2460 
2461         if (ddv->sdev_flags & SDEV_VTOR) {
2462                 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv);
2463                 ASSERT(vtor);
2464         }
2465 
2466         if (eofp != NULL)
2467                 *eofp = 0;
2468 
2469         soff = uiop->uio_loffset;
2470         iovp = uiop->uio_iov;
2471         alloc_count = iovp->iov_len;
2472         dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP);
2473         outcount = 0;
2474 
2475         if (ddv->sdev_state == SDEV_ZOMBIE)
2476                 goto get_cache;
2477 
2478         if (SDEV_IS_GLOBAL(ddv)) {
2479 
2480                 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) &&
2481                     !sdev_reconfig_boot && (flags & SDEV_BROWSE) &&
2482                     !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) &&
2483                     ((moddebug & MODDEBUG_FINI_EBUSY) == 0) &&
2484                     !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) &&
2485                     !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) &&
2486                     !sdev_reconfig_disable) {
2487                         /*
2488                          * invoking "devfsadm" to do system device reconfig
2489                          */
2490                         mutex_enter(&ddv->sdev_lookup_lock);
2491                         SDEV_BLOCK_OTHERS(ddv,
2492                             (SDEV_READDIR|SDEV_LGWAITING));
2493                         mutex_exit(&ddv->sdev_lookup_lock);
2494 
2495                         sdcmn_err8(("readdir of %s by %s: reconfig\n",
2496                             ddv->sdev_path, curproc->p_user.u_comm));
2497                         if (sdev_reconfig_verbose) {
2498                                 cmn_err(CE_CONT,
2499                                     "?readdir of %s by %s: reconfig\n",
2500                                     ddv->sdev_path, curproc->p_user.u_comm);
2501                         }
2502 
2503                         sdev_devfsadmd_thread(ddv, NULL, kcred);
2504                 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) {
2505                         /*
2506                          * compensate the "ls" started later than "devfsadm"
2507                          */
2508                         mutex_enter(&ddv->sdev_lookup_lock);
2509                         SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING));
2510                         mutex_exit(&ddv->sdev_lookup_lock);
2511                 }
2512 
2513                 /*
2514                  * release the contents lock so that
2515                  * the cache may be updated by devfsadmd
2516                  */
2517                 rw_exit(&ddv->sdev_contents);
2518                 mutex_enter(&ddv->sdev_lookup_lock);
2519                 if (SDEV_IS_READDIR(ddv))
2520                         (void) sdev_wait4lookup(ddv, SDEV_READDIR);
2521                 mutex_exit(&ddv->sdev_lookup_lock);
2522                 rw_enter(&ddv->sdev_contents, RW_READER);
2523 
2524                 sdcmn_err4(("readdir of directory %s by %s\n",
2525                     ddv->sdev_name, curproc->p_user.u_comm));
2526                 if (ddv->sdev_flags & SDEV_BUILD) {
2527                         if (SDEV_IS_PERSIST(ddv)) {
2528                                 error = sdev_filldir_from_store(ddv,
2529                                     alloc_count, cred);
2530                         }
2531                         ddv->sdev_flags &= ~SDEV_BUILD;
2532                 }
2533         }
2534 
2535 get_cache:
2536         /* handle "." and ".." */
2537         diroff = 0;
2538         if (soff == 0) {
2539                 /* first time */
2540                 this_reclen = DIRENT64_RECLEN(1);
2541                 if (alloc_count < this_reclen) {
2542                         error = EINVAL;
2543                         goto done;
2544                 }
2545 
2546                 dp->d_ino = (ino64_t)ddv->sdev_ino;
2547                 dp->d_off = (off64_t)1;
2548                 dp->d_reclen = (ushort_t)this_reclen;
2549 
2550                 (void) strncpy(dp->d_name, ".",
2551                     DIRENT64_NAMELEN(this_reclen));
2552                 outcount += dp->d_reclen;
2553                 dp = nextdp(dp);
2554         }
2555 
2556         diroff++;
2557         if (soff <= 1) {
2558                 this_reclen = DIRENT64_RECLEN(2);
2559                 if (alloc_count < outcount + this_reclen) {
2560                         error = EINVAL;
2561                         goto done;
2562                 }
2563 
2564                 dp->d_reclen = (ushort_t)this_reclen;
2565                 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino;
2566                 dp->d_off = (off64_t)2;
2567 
2568                 (void) strncpy(dp->d_name, "..",
2569                     DIRENT64_NAMELEN(this_reclen));
2570                 outcount += dp->d_reclen;
2571 
2572                 dp = nextdp(dp);
2573         }
2574 
2575 
2576         /* gets the cache */
2577         diroff++;
2578         for (dv = SDEV_FIRST_ENTRY(ddv); dv;
2579             dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) {
2580                 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n",
2581                     diroff, soff, dv->sdev_name));
2582 
2583                 /* bypassing pre-matured nodes */
2584                 if (diroff < soff || (dv->sdev_state != SDEV_READY)) {
2585                         sdcmn_err3(("sdev_readdir: pre-mature node  "
2586                             "%s %d\n", dv->sdev_name, dv->sdev_state));
2587                         continue;
2588                 }
2589 
2590                 /*
2591                  * Check validity of node
2592                  * Drop invalid and nodes to be skipped.
2593                  * A node the validator indicates as stale needs
2594                  * to be returned as presumably the node name itself
2595                  * is valid and the node data itself will be refreshed
2596                  * on lookup.  An application performing a readdir then
2597                  * stat on each entry should thus always see consistent
2598                  * data.  In any case, it is not possible to synchronize
2599                  * with dynamic kernel state, and any view we return can
2600                  * never be anything more than a snapshot at a point in time.
2601                  */
2602                 if (vtor) {
2603                         switch (vtor(dv)) {
2604                         case SDEV_VTOR_VALID:
2605                                 break;
2606                         case SDEV_VTOR_INVALID:
2607                         case SDEV_VTOR_SKIP:
2608                                 continue;
2609                         case SDEV_VTOR_STALE:
2610                                 sdcmn_err3(("sdev_readir: %s stale\n",
2611                                     dv->sdev_name));
2612                                 break;
2613                         default:
2614                                 cmn_err(CE_PANIC,
2615                                     "dev fs: validator failed: %s(%p)\n",
2616                                     dv->sdev_name, (void *)dv);
2617                                 break;
2618                         /*NOTREACHED*/
2619                         }
2620                 }
2621 
2622                 namelen = strlen(dv->sdev_name);
2623                 reclen = DIRENT64_RECLEN(namelen);
2624                 if (outcount + reclen > alloc_count) {
2625                         goto full;
2626                 }
2627                 dp->d_reclen = (ushort_t)reclen;
2628                 dp->d_ino = (ino64_t)dv->sdev_ino;
2629                 dp->d_off = (off64_t)diroff + 1;
2630                 (void) strncpy(dp->d_name, dv->sdev_name,
2631                     DIRENT64_NAMELEN(reclen));
2632                 outcount += reclen;
2633                 dp = nextdp(dp);
2634         }
2635 
2636 full:
2637         sdcmn_err4(("sdev_readdir: moving %lu bytes: "
2638             "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff,
2639             (void *)dv));
2640 
2641         if (outcount)
2642                 error = uiomove(outbuf, outcount, UIO_READ, uiop);
2643 
2644         if (!error) {
2645                 uiop->uio_loffset = diroff;
2646                 if (eofp)
2647                         *eofp = dv ? 0 : 1;
2648         }
2649 
2650 
2651         if (ddv->sdev_attrvp) {
2652                 gethrestime(&now);
2653                 attr.va_ctime = now;
2654                 attr.va_atime = now;
2655                 attr.va_mask = AT_CTIME|AT_ATIME;
2656 
2657                 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL);
2658         }
2659 done:
2660         kmem_free(outbuf, alloc_count);
2661         return (error);
2662 }
2663 
2664 static int
2665 sdev_modctl_lookup(const char *path, vnode_t **r_vp)
2666 {
2667         vnode_t *vp;
2668         vnode_t *cvp;
2669         struct sdev_node *svp;
2670         char *nm;
2671         struct pathname pn;
2672         int error;
2673         int persisted = 0;
2674 
2675         ASSERT(INGLOBALZONE(curproc));
2676 
2677         if (error = pn_get((char *)path, UIO_SYSSPACE, &pn))
2678                 return (error);
2679         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
2680 
2681         vp = rootdir;
2682         VN_HOLD(vp);
2683 
2684         while (pn_pathleft(&pn)) {
2685                 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK);
2686                 (void) pn_getcomponent(&pn, nm);
2687 
2688                 /*
2689                  * Deal with the .. special case where we may be
2690                  * traversing up across a mount point, to the
2691                  * root of this filesystem or global root.
2692                  */
2693                 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) {
2694 checkforroot:
2695                         if (VN_CMP(vp, rootdir)) {
2696                                 nm[1] = 0;
2697                         } else if (vp->v_flag & VROOT) {
2698                                 vfs_t *vfsp;
2699                                 cvp = vp;
2700                                 vfsp = cvp->v_vfsp;
2701                                 vfs_rlock_wait(vfsp);
2702                                 vp = cvp->v_vfsp->vfs_vnodecovered;
2703                                 if (vp == NULL ||
2704                                     (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
2705                                         vfs_unlock(vfsp);
2706                                         VN_RELE(cvp);
2707                                         error = EIO;
2708                                         break;
2709                                 }
2710                                 VN_HOLD(vp);
2711                                 vfs_unlock(vfsp);
2712                                 VN_RELE(cvp);
2713                                 cvp = NULL;
2714                                 goto checkforroot;
2715                         }
2716                 }
2717 
2718                 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL,
2719                     NULL, NULL);
2720                 if (error) {
2721                         VN_RELE(vp);
2722                         break;
2723                 }
2724 
2725                 /* traverse mount points encountered on our journey */
2726                 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) {
2727                         VN_RELE(vp);
2728                         VN_RELE(cvp);
2729                         break;
2730                 }
2731 
2732                 /*
2733                  * symbolic link, can be either relative and absolute
2734                  */
2735                 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) {
2736                         struct pathname linkpath;
2737                         pn_alloc(&linkpath);
2738                         if (error = pn_getsymlink(cvp, &linkpath, kcred)) {
2739                                 pn_free(&linkpath);
2740                                 break;
2741                         }
2742                         if (pn_pathleft(&linkpath) == 0)
2743                                 (void) pn_set(&linkpath, ".");
2744                         error = pn_insert(&pn, &linkpath, strlen(nm));
2745                         pn_free(&linkpath);
2746                         if (pn.pn_pathlen == 0) {
2747                                 VN_RELE(vp);
2748                                 return (ENOENT);
2749                         }
2750                         if (pn.pn_path[0] == '/') {
2751                                 pn_skipslash(&pn);
2752                                 VN_RELE(vp);
2753                                 VN_RELE(cvp);
2754                                 vp = rootdir;
2755                                 VN_HOLD(vp);
2756                         } else {
2757                                 VN_RELE(cvp);
2758                         }
2759                         continue;
2760                 }
2761 
2762                 VN_RELE(vp);
2763 
2764                 /*
2765                  * Direct the operation to the persisting filesystem
2766                  * underlying /dev.  Bail if we encounter a
2767                  * non-persistent dev entity here.
2768                  */
2769                 if (cvp->v_vfsp->vfs_fstype == devtype) {
2770 
2771                         if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) {
2772                                 error = ENOENT;
2773                                 VN_RELE(cvp);
2774                                 break;
2775                         }
2776 
2777                         if (VTOSDEV(cvp) == NULL) {
2778                                 error = ENOENT;
2779                                 VN_RELE(cvp);
2780                                 break;
2781                         }
2782                         svp = VTOSDEV(cvp);
2783                         if ((vp = svp->sdev_attrvp) == NULL) {
2784                                 error = ENOENT;
2785                                 VN_RELE(cvp);
2786                                 break;
2787                         }
2788                         persisted = 1;
2789                         VN_HOLD(vp);
2790                         VN_RELE(cvp);
2791                         cvp = vp;
2792                 }
2793 
2794                 vp = cvp;
2795                 pn_skipslash(&pn);
2796         }
2797 
2798         kmem_free(nm, MAXNAMELEN);
2799         pn_free(&pn);
2800 
2801         if (error)
2802                 return (error);
2803 
2804         /*
2805          * Only return persisted nodes in the filesystem underlying /dev.
2806          */
2807         if (!persisted) {
2808                 VN_RELE(vp);
2809                 return (ENOENT);
2810         }
2811 
2812         *r_vp = vp;
2813         return (0);
2814 }
2815 
2816 int
2817 sdev_modctl_readdir(const char *dir, char ***dirlistp,
2818         int *npathsp, int *npathsp_alloc, int checking_empty)
2819 {
2820         char    **pathlist = NULL;
2821         char    **newlist = NULL;
2822         int     npaths = 0;
2823         int     npaths_alloc = 0;
2824         dirent64_t *dbuf = NULL;
2825         int     n;
2826         char    *s;
2827         int error;
2828         vnode_t *vp;
2829         int eof;
2830         struct iovec iov;
2831         struct uio uio;
2832         struct dirent64 *dp;
2833         size_t dlen;
2834         size_t dbuflen;
2835         int ndirents = 64;
2836         char *nm;
2837 
2838         error = sdev_modctl_lookup(dir, &vp);
2839         sdcmn_err11(("modctl readdir: %s by %s: %s\n",
2840             dir, curproc->p_user.u_comm,
2841             (error == 0) ? "ok" : "failed"));
2842         if (error)
2843                 return (error);
2844 
2845         dlen = ndirents * (sizeof (*dbuf));
2846         dbuf = kmem_alloc(dlen, KM_SLEEP);
2847 
2848         uio.uio_iov = &iov;
2849         uio.uio_iovcnt = 1;
2850         uio.uio_segflg = UIO_SYSSPACE;
2851         uio.uio_fmode = 0;
2852         uio.uio_extflg = UIO_COPY_CACHED;
2853         uio.uio_loffset = 0;
2854         uio.uio_llimit = MAXOFFSET_T;
2855 
2856         eof = 0;
2857         error = 0;
2858         while (!error && !eof) {
2859                 uio.uio_resid = dlen;
2860                 iov.iov_base = (char *)dbuf;
2861                 iov.iov_len = dlen;
2862 
2863                 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
2864                 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0);
2865                 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
2866 
2867                 dbuflen = dlen - uio.uio_resid;
2868 
2869                 if (error || dbuflen == 0)
2870                         break;
2871 
2872                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
2873                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
2874 
2875                         nm = dp->d_name;
2876 
2877                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
2878                                 continue;
2879                         if (npaths == npaths_alloc) {
2880                                 npaths_alloc += 64;
2881                                 newlist = (char **)
2882                                     kmem_zalloc((npaths_alloc + 1) *
2883                                     sizeof (char *), KM_SLEEP);
2884                                 if (pathlist) {
2885                                         bcopy(pathlist, newlist,
2886                                             npaths * sizeof (char *));
2887                                         kmem_free(pathlist,
2888                                             (npaths + 1) * sizeof (char *));
2889                                 }
2890                                 pathlist = newlist;
2891                         }
2892                         n = strlen(nm) + 1;
2893                         s = kmem_alloc(n, KM_SLEEP);
2894                         bcopy(nm, s, n);
2895                         pathlist[npaths++] = s;
2896                         sdcmn_err11(("  %s/%s\n", dir, s));
2897 
2898                         /* if checking empty, one entry is as good as many */
2899                         if (checking_empty) {
2900                                 eof = 1;
2901                                 break;
2902                         }
2903                 }
2904         }
2905 
2906 exit:
2907         VN_RELE(vp);
2908 
2909         if (dbuf)
2910                 kmem_free(dbuf, dlen);
2911 
2912         if (error)
2913                 return (error);
2914 
2915         *dirlistp = pathlist;
2916         *npathsp = npaths;
2917         *npathsp_alloc = npaths_alloc;
2918 
2919         return (0);
2920 }
2921 
2922 void
2923 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc)
2924 {
2925         int     i, n;
2926 
2927         for (i = 0; i < npaths; i++) {
2928                 n = strlen(pathlist[i]) + 1;
2929                 kmem_free(pathlist[i], n);
2930         }
2931 
2932         kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *));
2933 }
2934 
2935 int
2936 sdev_modctl_devexists(const char *path)
2937 {
2938         vnode_t *vp;
2939         int error;
2940 
2941         error = sdev_modctl_lookup(path, &vp);
2942         sdcmn_err11(("modctl dev exists: %s by %s: %s\n",
2943             path, curproc->p_user.u_comm,
2944             (error == 0) ? "ok" : "failed"));
2945         if (error == 0)
2946                 VN_RELE(vp);
2947 
2948         return (error);
2949 }
2950 
2951 extern int sdev_vnodeops_tbl_size;
2952 
2953 /*
2954  * construct a new template with overrides from vtab
2955  */
2956 static fs_operation_def_t *
2957 sdev_merge_vtab(const fs_operation_def_t tab[])
2958 {
2959         fs_operation_def_t *new;
2960         const fs_operation_def_t *tab_entry;
2961 
2962         /* make a copy of standard vnode ops table */
2963         new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
2964         bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
2965 
2966         /* replace the overrides from tab */
2967         for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
2968                 fs_operation_def_t *std_entry = new;
2969                 while (std_entry->name) {
2970                         if (strcmp(tab_entry->name, std_entry->name) == 0) {
2971                                 std_entry->func = tab_entry->func;
2972                                 break;
2973                         }
2974                         std_entry++;
2975                 }
2976                 if (std_entry->name == NULL)
2977                         cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.",
2978                             tab_entry->name);
2979         }
2980 
2981         return (new);
2982 }
2983 
2984 /* free memory allocated by sdev_merge_vtab */
2985 static void
2986 sdev_free_vtab(fs_operation_def_t *new)
2987 {
2988         kmem_free(new, sdev_vnodeops_tbl_size);
2989 }
2990 
2991 /*
2992  * a generic setattr() function
2993  *
2994  * note: flags only supports AT_UID and AT_GID.
2995  *       Future enhancements can be done for other types, e.g. AT_MODE
2996  */
2997 int
2998 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags,
2999     struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *,
3000     int), int protocol)
3001 {
3002         struct sdev_node        *dv = VTOSDEV(vp);
3003         struct sdev_node        *parent = dv->sdev_dotdot;
3004         struct vattr            *get;
3005         uint_t                  mask = vap->va_mask;
3006         int                     error;
3007 
3008         /* some sanity checks */
3009         if (vap->va_mask & AT_NOSET)
3010                 return (EINVAL);
3011 
3012         if (vap->va_mask & AT_SIZE) {
3013                 if (vp->v_type == VDIR) {
3014                         return (EISDIR);
3015                 }
3016         }
3017 
3018         /* no need to set attribute, but do not fail either */
3019         ASSERT(parent);
3020         rw_enter(&parent->sdev_contents, RW_READER);
3021         if (dv->sdev_state == SDEV_ZOMBIE) {
3022                 rw_exit(&parent->sdev_contents);
3023                 return (0);
3024         }
3025 
3026         /* If backing store exists, just set it. */
3027         if (dv->sdev_attrvp) {
3028                 rw_exit(&parent->sdev_contents);
3029                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3030         }
3031 
3032         /*
3033          * Otherwise, for nodes with the persistence attribute, create it.
3034          */
3035         ASSERT(dv->sdev_attr);
3036         if (SDEV_IS_PERSIST(dv) ||
3037             ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) {
3038                 sdev_vattr_merge(dv, vap);
3039                 rw_enter(&dv->sdev_contents, RW_WRITER);
3040                 error = sdev_shadow_node(dv, cred);
3041                 rw_exit(&dv->sdev_contents);
3042                 rw_exit(&parent->sdev_contents);
3043 
3044                 if (error)
3045                         return (error);
3046                 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL));
3047         }
3048 
3049 
3050         /*
3051          * sdev_attr was allocated in sdev_mknode
3052          */
3053         rw_enter(&dv->sdev_contents, RW_WRITER);
3054         error = secpolicy_vnode_setattr(cred, vp, vap,
3055             dv->sdev_attr, flags, sdev_unlocked_access, dv);
3056         if (error) {
3057                 rw_exit(&dv->sdev_contents);
3058                 rw_exit(&parent->sdev_contents);
3059                 return (error);
3060         }
3061 
3062         get = dv->sdev_attr;
3063         if (mask & AT_MODE) {
3064                 get->va_mode &= S_IFMT;
3065                 get->va_mode |= vap->va_mode & ~S_IFMT;
3066         }
3067 
3068         if ((mask & AT_UID) || (mask & AT_GID)) {
3069                 if (mask & AT_UID)
3070                         get->va_uid = vap->va_uid;
3071                 if (mask & AT_GID)
3072                         get->va_gid = vap->va_gid;
3073                 /*
3074                  * a callback must be provided if the protocol is set
3075                  */
3076                 if ((protocol & AT_UID) || (protocol & AT_GID)) {
3077                         ASSERT(callback);
3078                         error = callback(dv, get, protocol);
3079                         if (error) {
3080                                 rw_exit(&dv->sdev_contents);
3081                                 rw_exit(&parent->sdev_contents);
3082                                 return (error);
3083                         }
3084                 }
3085         }
3086 
3087         if (mask & AT_ATIME)
3088                 get->va_atime = vap->va_atime;
3089         if (mask & AT_MTIME)
3090                 get->va_mtime = vap->va_mtime;
3091         if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) {
3092                 gethrestime(&get->va_ctime);
3093         }
3094 
3095         sdev_vattr_merge(dv, get);
3096         rw_exit(&dv->sdev_contents);
3097         rw_exit(&parent->sdev_contents);
3098         return (0);
3099 }
3100 
3101 /*
3102  * a generic inactive() function
3103  */
3104 /*ARGSUSED*/
3105 void
3106 devname_inactive_func(struct vnode *vp, struct cred *cred,
3107     void (*callback)(struct vnode *))
3108 {
3109         int clean;
3110         struct sdev_node *dv = VTOSDEV(vp);
3111         int state;
3112 
3113         mutex_enter(&vp->v_lock);
3114         ASSERT(vp->v_count >= 1);
3115 
3116 
3117         if (vp->v_count == 1 && callback != NULL)
3118                 callback(vp);
3119 
3120         rw_enter(&dv->sdev_contents, RW_WRITER);
3121         state = dv->sdev_state;
3122 
3123         clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE);
3124 
3125         /*
3126          * sdev is a rather bad public citizen. It violates the general
3127          * agreement that in memory nodes should always have a valid reference
3128          * count on their vnode. But that's not the case here. This means that
3129          * we do actually have to distinguish between getting inactive callbacks
3130          * for zombies and otherwise. This should probably be fixed.
3131          */
3132         if (clean) {
3133                 /* Remove the . entry to ourselves */
3134                 if (vp->v_type == VDIR) {
3135                         decr_link(dv);
3136                 }
3137                 VERIFY(dv->sdev_nlink == 1);
3138                 decr_link(dv);
3139                 --vp->v_count;
3140                 rw_exit(&dv->sdev_contents);
3141                 mutex_exit(&vp->v_lock);
3142                 sdev_nodedestroy(dv, 0);
3143         } else {
3144                 --vp->v_count;
3145                 rw_exit(&dv->sdev_contents);
3146                 mutex_exit(&vp->v_lock);
3147         }
3148 }