5045-use-atomic_inc_*-atomic_dec_*-instead-of-atomic_add_* Wdiff usr/src/uts/common/fs/vnode.c

Print this page

5045 use atomic_{inc,dec}_* instead of atomic_add_*

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/vnode.c
          +++ new/usr/src/uts/common/fs/vnode.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  28   28  /*        All Rights Reserved   */
  29   29  
  30   30  /*
  31   31   * University Copyright- Copyright (c) 1982, 1986, 1988
  32   32   * The Regents of the University of California
  33   33   * All Rights Reserved
  34   34   *
  35   35   * University Acknowledgment- Portions of this document are derived from
  36   36   * software developed by the University of California, Berkeley, and its
  37   37   * contributors.
  38   38   */
  39   39  
  40   40  #include <sys/types.h>
  41   41  #include <sys/param.h>
  42   42  #include <sys/t_lock.h>
  43   43  #include <sys/errno.h>
  44   44  #include <sys/cred.h>
  45   45  #include <sys/user.h>
  46   46  #include <sys/uio.h>
  47   47  #include <sys/file.h>
  48   48  #include <sys/pathname.h>
  49   49  #include <sys/vfs.h>
  50   50  #include <sys/vfs_opreg.h>
  51   51  #include <sys/vnode.h>
  52   52  #include <sys/rwstlock.h>
  53   53  #include <sys/fem.h>
  54   54  #include <sys/stat.h>
  55   55  #include <sys/mode.h>
  56   56  #include <sys/conf.h>
  57   57  #include <sys/sysmacros.h>
  58   58  #include <sys/cmn_err.h>
  59   59  #include <sys/systm.h>
  60   60  #include <sys/kmem.h>
  61   61  #include <sys/debug.h>
  62   62  #include <c2/audit.h>
  63   63  #include <sys/acl.h>
  64   64  #include <sys/nbmlock.h>
  65   65  #include <sys/fcntl.h>
  66   66  #include <fs/fs_subr.h>
  67   67  #include <sys/taskq.h>
  68   68  #include <fs/fs_reparse.h>
  69   69  
  70   70  /* Determine if this vnode is a file that is read-only */
  71   71  #define ISROFILE(vp)    \
  72   72          ((vp)->v_type != VCHR && (vp)->v_type != VBLK && \
  73   73              (vp)->v_type != VFIFO && vn_is_readonly(vp))
  74   74  
  75   75  /* Tunable via /etc/system; used only by admin/install */
  76   76  int nfs_global_client_only;
  77   77  
  78   78  /*
  79   79   * Array of vopstats_t for per-FS-type vopstats.  This array has the same
  80   80   * number of entries as and parallel to the vfssw table.  (Arguably, it could
  81   81   * be part of the vfssw table.)  Once it's initialized, it's accessed using
  82   82   * the same fstype index that is used to index into the vfssw table.
  83   83   */
  84   84  vopstats_t **vopstats_fstype;
  85   85  
  86   86  /* vopstats initialization template used for fast initialization via bcopy() */
  87   87  static vopstats_t *vs_templatep;
  88   88  
  89   89  /* Kmem cache handle for vsk_anchor_t allocations */
  90   90  kmem_cache_t *vsk_anchor_cache;
  91   91  
  92   92  /* file events cleanup routine */
  93   93  extern void free_fopdata(vnode_t *);
  94   94  
  95   95  /*
  96   96   * Root of AVL tree for the kstats associated with vopstats.  Lock protects
  97   97   * updates to vsktat_tree.
  98   98   */
  99   99  avl_tree_t      vskstat_tree;
 100  100  kmutex_t        vskstat_tree_lock;
 101  101  
 102  102  /* Global variable which enables/disables the vopstats collection */
 103  103  int vopstats_enabled = 1;
 104  104  
 105  105  /*
 106  106   * forward declarations for internal vnode specific data (vsd)
 107  107   */
 108  108  static void *vsd_realloc(void *, size_t, size_t);
 109  109  
 110  110  /*
 111  111   * forward declarations for reparse point functions
 112  112   */
 113  113  static int fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr);
 114  114  
 115  115  /*
 116  116   * VSD -- VNODE SPECIFIC DATA
 117  117   * The v_data pointer is typically used by a file system to store a
 118  118   * pointer to the file system's private node (e.g. ufs inode, nfs rnode).
 119  119   * However, there are times when additional project private data needs
 120  120   * to be stored separately from the data (node) pointed to by v_data.
 121  121   * This additional data could be stored by the file system itself or
 122  122   * by a completely different kernel entity.  VSD provides a way for
 123  123   * callers to obtain a key and store a pointer to private data associated
 124  124   * with a vnode.
 125  125   *
 126  126   * Callers are responsible for protecting the vsd by holding v_vsd_lock
 127  127   * for calls to vsd_set() and vsd_get().
 128  128   */
 129  129  
 130  130  /*
 131  131   * vsd_lock protects:
 132  132   *   vsd_nkeys - creation and deletion of vsd keys
 133  133   *   vsd_list - insertion and deletion of vsd_node in the vsd_list
 134  134   *   vsd_destructor - adding and removing destructors to the list
 135  135   */
 136  136  static kmutex_t         vsd_lock;
 137  137  static uint_t           vsd_nkeys;       /* size of destructor array */
 138  138  /* list of vsd_node's */
 139  139  static list_t *vsd_list = NULL;
 140  140  /* per-key destructor funcs */
 141  141  static void             (**vsd_destructor)(void *);
 142  142  
 143  143  /*
 144  144   * The following is the common set of actions needed to update the
 145  145   * vopstats structure from a vnode op.  Both VOPSTATS_UPDATE() and
 146  146   * VOPSTATS_UPDATE_IO() do almost the same thing, except for the
 147  147   * recording of the bytes transferred.  Since the code is similar
 148  148   * but small, it is nearly a duplicate.  Consequently any changes
 149  149   * to one may need to be reflected in the other.
 150  150   * Rundown of the variables:
 151  151   * vp - Pointer to the vnode
 152  152   * counter - Partial name structure member to update in vopstats for counts
 153  153   * bytecounter - Partial name structure member to update in vopstats for bytes
 154  154   * bytesval - Value to update in vopstats for bytes
 155  155   * fstype - Index into vsanchor_fstype[], same as index into vfssw[]
 156  156   * vsp - Pointer to vopstats structure (either in vfs or vsanchor_fstype[i])
 157  157   */
 158  158  
 159  159  #define VOPSTATS_UPDATE(vp, counter) {                                  \
 160  160          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 161  161          if (vfsp && vfsp->vfs_implp &&                                  \
 162  162              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 163  163                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 164  164                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 165  165                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 166  166                      size_t, uint64_t *);                                \
 167  167                  __dtrace_probe___fsinfo_##counter(vp, 0, stataddr);     \
 168  168                  (*stataddr)++;                                          \
 169  169                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 170  170                          vsp->n##counter.value.ui64++;                   \
 171  171                  }                                                       \
 172  172          }                                                               \
 173  173  }
 174  174  
 175  175  #define VOPSTATS_UPDATE_IO(vp, counter, bytecounter, bytesval) {        \
 176  176          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 177  177          if (vfsp && vfsp->vfs_implp &&                                  \
 178  178              (vfsp->vfs_flag & VFS_STATS) && (vp)->v_type != VBAD) {     \
 179  179                  vopstats_t *vsp = &vfsp->vfs_vopstats;                  \
 180  180                  uint64_t *stataddr = &(vsp->n##counter.value.ui64);     \
 181  181                  extern void __dtrace_probe___fsinfo_##counter(vnode_t *, \
 182  182                      size_t, uint64_t *);                                \
 183  183                  __dtrace_probe___fsinfo_##counter(vp, bytesval, stataddr); \
 184  184                  (*stataddr)++;                                          \
 185  185                  vsp->bytecounter.value.ui64 += bytesval;                \
 186  186                  if ((vsp = vfsp->vfs_fstypevsp) != NULL) {              \
 187  187                          vsp->n##counter.value.ui64++;                   \
 188  188                          vsp->bytecounter.value.ui64 += bytesval;        \
 189  189                  }                                                       \
 190  190          }                                                               \
 191  191  }
 192  192  
 193  193  /*
 194  194   * If the filesystem does not support XIDs map credential
 195  195   * If the vfsp is NULL, perhaps we should also map?
 196  196   */
 197  197  #define VOPXID_MAP_CR(vp, cr)   {                                       \
 198  198          vfs_t *vfsp = (vp)->v_vfsp;                                     \
 199  199          if (vfsp != NULL && (vfsp->vfs_flag & VFS_XID) == 0)            \
 200  200                  cr = crgetmapped(cr);                                   \
 201  201          }
 202  202  
 203  203  /*
 204  204   * Convert stat(2) formats to vnode types and vice versa.  (Knows about
 205  205   * numerical order of S_IFMT and vnode types.)
 206  206   */
 207  207  enum vtype iftovt_tab[] = {
 208  208          VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 209  209          VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VNON
 210  210  };
 211  211  
 212  212  ushort_t vttoif_tab[] = {
 213  213          0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFIFO,
 214  214          S_IFDOOR, 0, S_IFSOCK, S_IFPORT, 0
 215  215  };
 216  216  
 217  217  /*
 218  218   * The system vnode cache.
 219  219   */
 220  220  
 221  221  kmem_cache_t *vn_cache;
 222  222  
 223  223  
 224  224  /*
 225  225   * Vnode operations vector.
 226  226   */
 227  227  
 228  228  static const fs_operation_trans_def_t vn_ops_table[] = {
 229  229          VOPNAME_OPEN, offsetof(struct vnodeops, vop_open),
 230  230              fs_nosys, fs_nosys,
 231  231  
 232  232          VOPNAME_CLOSE, offsetof(struct vnodeops, vop_close),
 233  233              fs_nosys, fs_nosys,
 234  234  
 235  235          VOPNAME_READ, offsetof(struct vnodeops, vop_read),
 236  236              fs_nosys, fs_nosys,
 237  237  
 238  238          VOPNAME_WRITE, offsetof(struct vnodeops, vop_write),
 239  239              fs_nosys, fs_nosys,
 240  240  
 241  241          VOPNAME_IOCTL, offsetof(struct vnodeops, vop_ioctl),
 242  242              fs_nosys, fs_nosys,
 243  243  
 244  244          VOPNAME_SETFL, offsetof(struct vnodeops, vop_setfl),
 245  245              fs_setfl, fs_nosys,
 246  246  
 247  247          VOPNAME_GETATTR, offsetof(struct vnodeops, vop_getattr),
 248  248              fs_nosys, fs_nosys,
 249  249  
 250  250          VOPNAME_SETATTR, offsetof(struct vnodeops, vop_setattr),
 251  251              fs_nosys, fs_nosys,
 252  252  
 253  253          VOPNAME_ACCESS, offsetof(struct vnodeops, vop_access),
 254  254              fs_nosys, fs_nosys,
 255  255  
 256  256          VOPNAME_LOOKUP, offsetof(struct vnodeops, vop_lookup),
 257  257              fs_nosys, fs_nosys,
 258  258  
 259  259          VOPNAME_CREATE, offsetof(struct vnodeops, vop_create),
 260  260              fs_nosys, fs_nosys,
 261  261  
 262  262          VOPNAME_REMOVE, offsetof(struct vnodeops, vop_remove),
 263  263              fs_nosys, fs_nosys,
 264  264  
 265  265          VOPNAME_LINK, offsetof(struct vnodeops, vop_link),
 266  266              fs_nosys, fs_nosys,
 267  267  
 268  268          VOPNAME_RENAME, offsetof(struct vnodeops, vop_rename),
 269  269              fs_nosys, fs_nosys,
 270  270  
 271  271          VOPNAME_MKDIR, offsetof(struct vnodeops, vop_mkdir),
 272  272              fs_nosys, fs_nosys,
 273  273  
 274  274          VOPNAME_RMDIR, offsetof(struct vnodeops, vop_rmdir),
 275  275              fs_nosys, fs_nosys,
 276  276  
 277  277          VOPNAME_READDIR, offsetof(struct vnodeops, vop_readdir),
 278  278              fs_nosys, fs_nosys,
 279  279  
 280  280          VOPNAME_SYMLINK, offsetof(struct vnodeops, vop_symlink),
 281  281              fs_nosys, fs_nosys,
 282  282  
 283  283          VOPNAME_READLINK, offsetof(struct vnodeops, vop_readlink),
 284  284              fs_nosys, fs_nosys,
 285  285  
 286  286          VOPNAME_FSYNC, offsetof(struct vnodeops, vop_fsync),
 287  287              fs_nosys, fs_nosys,
 288  288  
 289  289          VOPNAME_INACTIVE, offsetof(struct vnodeops, vop_inactive),
 290  290              fs_nosys, fs_nosys,
 291  291  
 292  292          VOPNAME_FID, offsetof(struct vnodeops, vop_fid),
 293  293              fs_nosys, fs_nosys,
 294  294  
 295  295          VOPNAME_RWLOCK, offsetof(struct vnodeops, vop_rwlock),
 296  296              fs_rwlock, fs_rwlock,
 297  297  
 298  298          VOPNAME_RWUNLOCK, offsetof(struct vnodeops, vop_rwunlock),
 299  299              (fs_generic_func_p) fs_rwunlock,
 300  300              (fs_generic_func_p) fs_rwunlock,    /* no errors allowed */
 301  301  
 302  302          VOPNAME_SEEK, offsetof(struct vnodeops, vop_seek),
 303  303              fs_nosys, fs_nosys,
 304  304  
 305  305          VOPNAME_CMP, offsetof(struct vnodeops, vop_cmp),
 306  306              fs_cmp, fs_cmp,             /* no errors allowed */
 307  307  
 308  308          VOPNAME_FRLOCK, offsetof(struct vnodeops, vop_frlock),
 309  309              fs_frlock, fs_nosys,
 310  310  
 311  311          VOPNAME_SPACE, offsetof(struct vnodeops, vop_space),
 312  312              fs_nosys, fs_nosys,
 313  313  
 314  314          VOPNAME_REALVP, offsetof(struct vnodeops, vop_realvp),
 315  315              fs_nosys, fs_nosys,
 316  316  
 317  317          VOPNAME_GETPAGE, offsetof(struct vnodeops, vop_getpage),
 318  318              fs_nosys, fs_nosys,
 319  319  
 320  320          VOPNAME_PUTPAGE, offsetof(struct vnodeops, vop_putpage),
 321  321              fs_nosys, fs_nosys,
 322  322  
 323  323          VOPNAME_MAP, offsetof(struct vnodeops, vop_map),
 324  324              (fs_generic_func_p) fs_nosys_map,
 325  325              (fs_generic_func_p) fs_nosys_map,
 326  326  
 327  327          VOPNAME_ADDMAP, offsetof(struct vnodeops, vop_addmap),
 328  328              (fs_generic_func_p) fs_nosys_addmap,
 329  329              (fs_generic_func_p) fs_nosys_addmap,
 330  330  
 331  331          VOPNAME_DELMAP, offsetof(struct vnodeops, vop_delmap),
 332  332              fs_nosys, fs_nosys,
 333  333  
 334  334          VOPNAME_POLL, offsetof(struct vnodeops, vop_poll),
 335  335              (fs_generic_func_p) fs_poll, (fs_generic_func_p) fs_nosys_poll,
 336  336  
 337  337          VOPNAME_DUMP, offsetof(struct vnodeops, vop_dump),
 338  338              fs_nosys, fs_nosys,
 339  339  
 340  340          VOPNAME_PATHCONF, offsetof(struct vnodeops, vop_pathconf),
 341  341              fs_pathconf, fs_nosys,
 342  342  
 343  343          VOPNAME_PAGEIO, offsetof(struct vnodeops, vop_pageio),
 344  344              fs_nosys, fs_nosys,
 345  345  
 346  346          VOPNAME_DUMPCTL, offsetof(struct vnodeops, vop_dumpctl),
 347  347              fs_nosys, fs_nosys,
 348  348  
 349  349          VOPNAME_DISPOSE, offsetof(struct vnodeops, vop_dispose),
 350  350              (fs_generic_func_p) fs_dispose,
 351  351              (fs_generic_func_p) fs_nodispose,
 352  352  
 353  353          VOPNAME_SETSECATTR, offsetof(struct vnodeops, vop_setsecattr),
 354  354              fs_nosys, fs_nosys,
 355  355  
 356  356          VOPNAME_GETSECATTR, offsetof(struct vnodeops, vop_getsecattr),
 357  357              fs_fab_acl, fs_nosys,
 358  358  
 359  359          VOPNAME_SHRLOCK, offsetof(struct vnodeops, vop_shrlock),
 360  360              fs_shrlock, fs_nosys,
 361  361  
 362  362          VOPNAME_VNEVENT, offsetof(struct vnodeops, vop_vnevent),
 363  363              (fs_generic_func_p) fs_vnevent_nosupport,
 364  364              (fs_generic_func_p) fs_vnevent_nosupport,
 365  365  
 366  366          VOPNAME_REQZCBUF, offsetof(struct vnodeops, vop_reqzcbuf),
 367  367              fs_nosys, fs_nosys,
 368  368  
 369  369          VOPNAME_RETZCBUF, offsetof(struct vnodeops, vop_retzcbuf),
 370  370              fs_nosys, fs_nosys,
 371  371  
 372  372          NULL, 0, NULL, NULL
 373  373  };
 374  374  
 375  375  /* Extensible attribute (xva) routines. */
 376  376  
 377  377  /*
 378  378   * Zero out the structure, set the size of the requested/returned bitmaps,
 379  379   * set AT_XVATTR in the embedded vattr_t's va_mask, and set up the pointer
 380  380   * to the returned attributes array.
 381  381   */
 382  382  void
 383  383  xva_init(xvattr_t *xvap)
 384  384  {
 385  385          bzero(xvap, sizeof (xvattr_t));
 386  386          xvap->xva_mapsize = XVA_MAPSIZE;
 387  387          xvap->xva_magic = XVA_MAGIC;
 388  388          xvap->xva_vattr.va_mask = AT_XVATTR;
 389  389          xvap->xva_rtnattrmapp = &(xvap->xva_rtnattrmap)[0];
 390  390  }
 391  391  
 392  392  /*
 393  393   * If AT_XVATTR is set, returns a pointer to the embedded xoptattr_t
 394  394   * structure.  Otherwise, returns NULL.
 395  395   */
 396  396  xoptattr_t *
 397  397  xva_getxoptattr(xvattr_t *xvap)
 398  398  {
 399  399          xoptattr_t *xoap = NULL;
 400  400          if (xvap->xva_vattr.va_mask & AT_XVATTR)
 401  401                  xoap = &xvap->xva_xoptattrs;
 402  402          return (xoap);
 403  403  }
 404  404  
 405  405  /*
 406  406   * Used by the AVL routines to compare two vsk_anchor_t structures in the tree.
 407  407   * We use the f_fsid reported by VFS_STATVFS() since we use that for the
 408  408   * kstat name.
 409  409   */
 410  410  static int
 411  411  vska_compar(const void *n1, const void *n2)
 412  412  {
 413  413          int ret;
 414  414          ulong_t p1 = ((vsk_anchor_t *)n1)->vsk_fsid;
 415  415          ulong_t p2 = ((vsk_anchor_t *)n2)->vsk_fsid;
 416  416  
 417  417          if (p1 < p2) {
 418  418                  ret = -1;
 419  419          } else if (p1 > p2) {
 420  420                  ret = 1;
 421  421          } else {
 422  422                  ret = 0;
 423  423          }
 424  424  
 425  425          return (ret);
 426  426  }
 427  427  
 428  428  /*
 429  429   * Used to create a single template which will be bcopy()ed to a newly
 430  430   * allocated vsanchor_combo_t structure in new_vsanchor(), below.
 431  431   */
 432  432  static vopstats_t *
 433  433  create_vopstats_template()
 434  434  {
 435  435          vopstats_t              *vsp;
 436  436  
 437  437          vsp = kmem_alloc(sizeof (vopstats_t), KM_SLEEP);
 438  438          bzero(vsp, sizeof (*vsp));      /* Start fresh */
 439  439  
 440  440          /* VOP_OPEN */
 441  441          kstat_named_init(&vsp->nopen, "nopen", KSTAT_DATA_UINT64);
 442  442          /* VOP_CLOSE */
 443  443          kstat_named_init(&vsp->nclose, "nclose", KSTAT_DATA_UINT64);
 444  444          /* VOP_READ I/O */
 445  445          kstat_named_init(&vsp->nread, "nread", KSTAT_DATA_UINT64);
 446  446          kstat_named_init(&vsp->read_bytes, "read_bytes", KSTAT_DATA_UINT64);
 447  447          /* VOP_WRITE I/O */
 448  448          kstat_named_init(&vsp->nwrite, "nwrite", KSTAT_DATA_UINT64);
 449  449          kstat_named_init(&vsp->write_bytes, "write_bytes", KSTAT_DATA_UINT64);
 450  450          /* VOP_IOCTL */
 451  451          kstat_named_init(&vsp->nioctl, "nioctl", KSTAT_DATA_UINT64);
 452  452          /* VOP_SETFL */
 453  453          kstat_named_init(&vsp->nsetfl, "nsetfl", KSTAT_DATA_UINT64);
 454  454          /* VOP_GETATTR */
 455  455          kstat_named_init(&vsp->ngetattr, "ngetattr", KSTAT_DATA_UINT64);
 456  456          /* VOP_SETATTR */
 457  457          kstat_named_init(&vsp->nsetattr, "nsetattr", KSTAT_DATA_UINT64);
 458  458          /* VOP_ACCESS */
 459  459          kstat_named_init(&vsp->naccess, "naccess", KSTAT_DATA_UINT64);
 460  460          /* VOP_LOOKUP */
 461  461          kstat_named_init(&vsp->nlookup, "nlookup", KSTAT_DATA_UINT64);
 462  462          /* VOP_CREATE */
 463  463          kstat_named_init(&vsp->ncreate, "ncreate", KSTAT_DATA_UINT64);
 464  464          /* VOP_REMOVE */
 465  465          kstat_named_init(&vsp->nremove, "nremove", KSTAT_DATA_UINT64);
 466  466          /* VOP_LINK */
 467  467          kstat_named_init(&vsp->nlink, "nlink", KSTAT_DATA_UINT64);
 468  468          /* VOP_RENAME */
 469  469          kstat_named_init(&vsp->nrename, "nrename", KSTAT_DATA_UINT64);
 470  470          /* VOP_MKDIR */
 471  471          kstat_named_init(&vsp->nmkdir, "nmkdir", KSTAT_DATA_UINT64);
 472  472          /* VOP_RMDIR */
 473  473          kstat_named_init(&vsp->nrmdir, "nrmdir", KSTAT_DATA_UINT64);
 474  474          /* VOP_READDIR I/O */
 475  475          kstat_named_init(&vsp->nreaddir, "nreaddir", KSTAT_DATA_UINT64);
 476  476          kstat_named_init(&vsp->readdir_bytes, "readdir_bytes",
 477  477              KSTAT_DATA_UINT64);
 478  478          /* VOP_SYMLINK */
 479  479          kstat_named_init(&vsp->nsymlink, "nsymlink", KSTAT_DATA_UINT64);
 480  480          /* VOP_READLINK */
 481  481          kstat_named_init(&vsp->nreadlink, "nreadlink", KSTAT_DATA_UINT64);
 482  482          /* VOP_FSYNC */
 483  483          kstat_named_init(&vsp->nfsync, "nfsync", KSTAT_DATA_UINT64);
 484  484          /* VOP_INACTIVE */
 485  485          kstat_named_init(&vsp->ninactive, "ninactive", KSTAT_DATA_UINT64);
 486  486          /* VOP_FID */
 487  487          kstat_named_init(&vsp->nfid, "nfid", KSTAT_DATA_UINT64);
 488  488          /* VOP_RWLOCK */
 489  489          kstat_named_init(&vsp->nrwlock, "nrwlock", KSTAT_DATA_UINT64);
 490  490          /* VOP_RWUNLOCK */
 491  491          kstat_named_init(&vsp->nrwunlock, "nrwunlock", KSTAT_DATA_UINT64);
 492  492          /* VOP_SEEK */
 493  493          kstat_named_init(&vsp->nseek, "nseek", KSTAT_DATA_UINT64);
 494  494          /* VOP_CMP */
 495  495          kstat_named_init(&vsp->ncmp, "ncmp", KSTAT_DATA_UINT64);
 496  496          /* VOP_FRLOCK */
 497  497          kstat_named_init(&vsp->nfrlock, "nfrlock", KSTAT_DATA_UINT64);
 498  498          /* VOP_SPACE */
 499  499          kstat_named_init(&vsp->nspace, "nspace", KSTAT_DATA_UINT64);
 500  500          /* VOP_REALVP */
 501  501          kstat_named_init(&vsp->nrealvp, "nrealvp", KSTAT_DATA_UINT64);
 502  502          /* VOP_GETPAGE */
 503  503          kstat_named_init(&vsp->ngetpage, "ngetpage", KSTAT_DATA_UINT64);
 504  504          /* VOP_PUTPAGE */
 505  505          kstat_named_init(&vsp->nputpage, "nputpage", KSTAT_DATA_UINT64);
 506  506          /* VOP_MAP */
 507  507          kstat_named_init(&vsp->nmap, "nmap", KSTAT_DATA_UINT64);
 508  508          /* VOP_ADDMAP */
 509  509          kstat_named_init(&vsp->naddmap, "naddmap", KSTAT_DATA_UINT64);
 510  510          /* VOP_DELMAP */
 511  511          kstat_named_init(&vsp->ndelmap, "ndelmap", KSTAT_DATA_UINT64);
 512  512          /* VOP_POLL */
 513  513          kstat_named_init(&vsp->npoll, "npoll", KSTAT_DATA_UINT64);
 514  514          /* VOP_DUMP */
 515  515          kstat_named_init(&vsp->ndump, "ndump", KSTAT_DATA_UINT64);
 516  516          /* VOP_PATHCONF */
 517  517          kstat_named_init(&vsp->npathconf, "npathconf", KSTAT_DATA_UINT64);
 518  518          /* VOP_PAGEIO */
 519  519          kstat_named_init(&vsp->npageio, "npageio", KSTAT_DATA_UINT64);
 520  520          /* VOP_DUMPCTL */
 521  521          kstat_named_init(&vsp->ndumpctl, "ndumpctl", KSTAT_DATA_UINT64);
 522  522          /* VOP_DISPOSE */
 523  523          kstat_named_init(&vsp->ndispose, "ndispose", KSTAT_DATA_UINT64);
 524  524          /* VOP_SETSECATTR */
 525  525          kstat_named_init(&vsp->nsetsecattr, "nsetsecattr", KSTAT_DATA_UINT64);
 526  526          /* VOP_GETSECATTR */
 527  527          kstat_named_init(&vsp->ngetsecattr, "ngetsecattr", KSTAT_DATA_UINT64);
 528  528          /* VOP_SHRLOCK */
 529  529          kstat_named_init(&vsp->nshrlock, "nshrlock", KSTAT_DATA_UINT64);
 530  530          /* VOP_VNEVENT */
 531  531          kstat_named_init(&vsp->nvnevent, "nvnevent", KSTAT_DATA_UINT64);
 532  532          /* VOP_REQZCBUF */
 533  533          kstat_named_init(&vsp->nreqzcbuf, "nreqzcbuf", KSTAT_DATA_UINT64);
 534  534          /* VOP_RETZCBUF */
 535  535          kstat_named_init(&vsp->nretzcbuf, "nretzcbuf", KSTAT_DATA_UINT64);
 536  536  
 537  537          return (vsp);
 538  538  }
 539  539  
 540  540  /*
 541  541   * Creates a kstat structure associated with a vopstats structure.
 542  542   */
 543  543  kstat_t *
 544  544  new_vskstat(char *ksname, vopstats_t *vsp)
 545  545  {
 546  546          kstat_t         *ksp;
 547  547  
 548  548          if (!vopstats_enabled) {
 549  549                  return (NULL);
 550  550          }
 551  551  
 552  552          ksp = kstat_create("unix", 0, ksname, "misc", KSTAT_TYPE_NAMED,
 553  553              sizeof (vopstats_t)/sizeof (kstat_named_t),
 554  554              KSTAT_FLAG_VIRTUAL|KSTAT_FLAG_WRITABLE);
 555  555          if (ksp) {
 556  556                  ksp->ks_data = vsp;
 557  557                  kstat_install(ksp);
 558  558          }
 559  559  
 560  560          return (ksp);
 561  561  }
 562  562  
 563  563  /*
 564  564   * Called from vfsinit() to initialize the support mechanisms for vopstats
 565  565   */
 566  566  void
 567  567  vopstats_startup()
 568  568  {
 569  569          if (!vopstats_enabled)
 570  570                  return;
 571  571  
 572  572          /*
 573  573           * Creates the AVL tree which holds per-vfs vopstat anchors.  This
 574  574           * is necessary since we need to check if a kstat exists before we
 575  575           * attempt to create it.  Also, initialize its lock.
 576  576           */
 577  577          avl_create(&vskstat_tree, vska_compar, sizeof (vsk_anchor_t),
 578  578              offsetof(vsk_anchor_t, vsk_node));
 579  579          mutex_init(&vskstat_tree_lock, NULL, MUTEX_DEFAULT, NULL);
 580  580  
 581  581          vsk_anchor_cache = kmem_cache_create("vsk_anchor_cache",
 582  582              sizeof (vsk_anchor_t), sizeof (uintptr_t), NULL, NULL, NULL,
 583  583              NULL, NULL, 0);
 584  584  
 585  585          /*
 586  586           * Set up the array of pointers for the vopstats-by-FS-type.
 587  587           * The entries will be allocated/initialized as each file system
 588  588           * goes through modload/mod_installfs.
 589  589           */
 590  590          vopstats_fstype = (vopstats_t **)kmem_zalloc(
 591  591              (sizeof (vopstats_t *) * nfstype), KM_SLEEP);
 592  592  
 593  593          /* Set up the global vopstats initialization template */
 594  594          vs_templatep = create_vopstats_template();
 595  595  }
 596  596  
 597  597  /*
 598  598   * We need to have the all of the counters zeroed.
 599  599   * The initialization of the vopstats_t includes on the order of
 600  600   * 50 calls to kstat_named_init().  Rather that do that on every call,
 601  601   * we do it once in a template (vs_templatep) then bcopy it over.
 602  602   */
 603  603  void
 604  604  initialize_vopstats(vopstats_t *vsp)
 605  605  {
 606  606          if (vsp == NULL)
 607  607                  return;
 608  608  
 609  609          bcopy(vs_templatep, vsp, sizeof (vopstats_t));
 610  610  }
 611  611  
 612  612  /*
 613  613   * If possible, determine which vopstats by fstype to use and
 614  614   * return a pointer to the caller.
 615  615   */
 616  616  vopstats_t *
 617  617  get_fstype_vopstats(vfs_t *vfsp, struct vfssw *vswp)
 618  618  {
 619  619          int             fstype = 0;     /* Index into vfssw[] */
 620  620          vopstats_t      *vsp = NULL;
 621  621  
 622  622          if (vfsp == NULL || (vfsp->vfs_flag & VFS_STATS) == 0 ||
 623  623              !vopstats_enabled)
 624  624                  return (NULL);
 625  625          /*
 626  626           * Set up the fstype.  We go to so much trouble because all versions
 627  627           * of NFS use the same fstype in their vfs even though they have
 628  628           * distinct entries in the vfssw[] table.
 629  629           * NOTE: A special vfs (e.g., EIO_vfs) may not have an entry.
 630  630           */
 631  631          if (vswp) {
 632  632                  fstype = vswp - vfssw;  /* Gets us the index */
 633  633          } else {
 634  634                  fstype = vfsp->vfs_fstype;
 635  635          }
 636  636  
 637  637          /*
 638  638           * Point to the per-fstype vopstats. The only valid values are
 639  639           * non-zero positive values less than the number of vfssw[] table
 640  640           * entries.
 641  641           */
 642  642          if (fstype > 0 && fstype < nfstype) {
 643  643                  vsp = vopstats_fstype[fstype];
 644  644          }
 645  645  
 646  646          return (vsp);
 647  647  }
 648  648  
 649  649  /*
 650  650   * Generate a kstat name, create the kstat structure, and allocate a
 651  651   * vsk_anchor_t to hold it together.  Return the pointer to the vsk_anchor_t
 652  652   * to the caller.  This must only be called from a mount.
 653  653   */
 654  654  vsk_anchor_t *
 655  655  get_vskstat_anchor(vfs_t *vfsp)
 656  656  {
 657  657          char            kstatstr[KSTAT_STRLEN]; /* kstat name for vopstats */
 658  658          statvfs64_t     statvfsbuf;             /* Needed to find f_fsid */
 659  659          vsk_anchor_t    *vskp = NULL;           /* vfs <--> kstat anchor */
 660  660          kstat_t         *ksp;                   /* Ptr to new kstat */
 661  661          avl_index_t     where;                  /* Location in the AVL tree */
 662  662  
 663  663          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 664  664              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 665  665                  return (NULL);
 666  666  
 667  667          /* Need to get the fsid to build a kstat name */
 668  668          if (VFS_STATVFS(vfsp, &statvfsbuf) == 0) {
 669  669                  /* Create a name for our kstats based on fsid */
 670  670                  (void) snprintf(kstatstr, KSTAT_STRLEN, "%s%lx",
 671  671                      VOPSTATS_STR, statvfsbuf.f_fsid);
 672  672  
 673  673                  /* Allocate and initialize the vsk_anchor_t */
 674  674                  vskp = kmem_cache_alloc(vsk_anchor_cache, KM_SLEEP);
 675  675                  bzero(vskp, sizeof (*vskp));
 676  676                  vskp->vsk_fsid = statvfsbuf.f_fsid;
 677  677  
 678  678                  mutex_enter(&vskstat_tree_lock);
 679  679                  if (avl_find(&vskstat_tree, vskp, &where) == NULL) {
 680  680                          avl_insert(&vskstat_tree, vskp, where);
 681  681                          mutex_exit(&vskstat_tree_lock);
 682  682  
 683  683                          /*
 684  684                           * Now that we've got the anchor in the AVL
 685  685                           * tree, we can create the kstat.
 686  686                           */
 687  687                          ksp = new_vskstat(kstatstr, &vfsp->vfs_vopstats);
 688  688                          if (ksp) {
 689  689                                  vskp->vsk_ksp = ksp;
 690  690                          }
 691  691                  } else {
 692  692                          /* Oops, found one! Release memory and lock. */
 693  693                          mutex_exit(&vskstat_tree_lock);
 694  694                          kmem_cache_free(vsk_anchor_cache, vskp);
 695  695                          vskp = NULL;
 696  696                  }
 697  697          }
 698  698          return (vskp);
 699  699  }
 700  700  
 701  701  /*
 702  702   * We're in the process of tearing down the vfs and need to cleanup
 703  703   * the data structures associated with the vopstats. Must only be called
 704  704   * from dounmount().
 705  705   */
 706  706  void
 707  707  teardown_vopstats(vfs_t *vfsp)
 708  708  {
 709  709          vsk_anchor_t    *vskap;
 710  710          avl_index_t     where;
 711  711  
 712  712          if (vfsp == NULL || vfsp->vfs_implp == NULL ||
 713  713              (vfsp->vfs_flag & VFS_STATS) == 0 || !vopstats_enabled)
 714  714                  return;
 715  715  
 716  716          /* This is a safe check since VFS_STATS must be set (see above) */
 717  717          if ((vskap = vfsp->vfs_vskap) == NULL)
 718  718                  return;
 719  719  
 720  720          /* Whack the pointer right away */
 721  721          vfsp->vfs_vskap = NULL;
 722  722  
 723  723          /* Lock the tree, remove the node, and delete the kstat */
 724  724          mutex_enter(&vskstat_tree_lock);
 725  725          if (avl_find(&vskstat_tree, vskap, &where)) {
 726  726                  avl_remove(&vskstat_tree, vskap);
 727  727          }
 728  728  
 729  729          if (vskap->vsk_ksp) {
 730  730                  kstat_delete(vskap->vsk_ksp);
 731  731          }
 732  732          mutex_exit(&vskstat_tree_lock);
 733  733  
 734  734          kmem_cache_free(vsk_anchor_cache, vskap);
 735  735  }
 736  736  
 737  737  /*
 738  738   * Read or write a vnode.  Called from kernel code.
 739  739   */
 740  740  int
 741  741  vn_rdwr(
 742  742          enum uio_rw rw,
 743  743          struct vnode *vp,
 744  744          caddr_t base,
 745  745          ssize_t len,
 746  746          offset_t offset,
 747  747          enum uio_seg seg,
 748  748          int ioflag,
 749  749          rlim64_t ulimit,        /* meaningful only if rw is UIO_WRITE */
 750  750          cred_t *cr,
 751  751          ssize_t *residp)
 752  752  {
 753  753          struct uio uio;
 754  754          struct iovec iov;
 755  755          int error;
 756  756          int in_crit = 0;
 757  757  
 758  758          if (rw == UIO_WRITE && ISROFILE(vp))
 759  759                  return (EROFS);
 760  760  
 761  761          if (len < 0)
 762  762                  return (EIO);
 763  763  
 764  764          VOPXID_MAP_CR(vp, cr);
 765  765  
 766  766          iov.iov_base = base;
 767  767          iov.iov_len = len;
 768  768          uio.uio_iov = &iov;
 769  769          uio.uio_iovcnt = 1;
 770  770          uio.uio_loffset = offset;
 771  771          uio.uio_segflg = (short)seg;
 772  772          uio.uio_resid = len;
 773  773          uio.uio_llimit = ulimit;
 774  774  
 775  775          /*
 776  776           * We have to enter the critical region before calling VOP_RWLOCK
 777  777           * to avoid a deadlock with ufs.
 778  778           */
 779  779          if (nbl_need_check(vp)) {
 780  780                  int svmand;
 781  781  
 782  782                  nbl_start_crit(vp, RW_READER);
 783  783                  in_crit = 1;
 784  784                  error = nbl_svmand(vp, cr, &svmand);
 785  785                  if (error != 0)
 786  786                          goto done;
 787  787                  if (nbl_conflict(vp, rw == UIO_WRITE ? NBL_WRITE : NBL_READ,
 788  788                      uio.uio_offset, uio.uio_resid, svmand, NULL)) {
 789  789                          error = EACCES;
 790  790                          goto done;
 791  791                  }
 792  792          }
 793  793  
 794  794          (void) VOP_RWLOCK(vp,
 795  795              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 796  796          if (rw == UIO_WRITE) {
 797  797                  uio.uio_fmode = FWRITE;
 798  798                  uio.uio_extflg = UIO_COPY_DEFAULT;
 799  799                  error = VOP_WRITE(vp, &uio, ioflag, cr, NULL);
 800  800          } else {
 801  801                  uio.uio_fmode = FREAD;
 802  802                  uio.uio_extflg = UIO_COPY_CACHED;
 803  803                  error = VOP_READ(vp, &uio, ioflag, cr, NULL);
 804  804          }
 805  805          VOP_RWUNLOCK(vp,
 806  806              rw == UIO_WRITE ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE, NULL);
 807  807          if (residp)
 808  808                  *residp = uio.uio_resid;
 809  809          else if (uio.uio_resid)
 810  810                  error = EIO;
 811  811  
 812  812  done:
 813  813          if (in_crit)
 814  814                  nbl_end_crit(vp);
 815  815          return (error);
 816  816  }
 817  817  
 818  818  /*
 819  819   * Release a vnode.  Call VOP_INACTIVE on last reference or
 820  820   * decrement reference count.
 821  821   *
 822  822   * To avoid race conditions, the v_count is left at 1 for
 823  823   * the call to VOP_INACTIVE. This prevents another thread
 824  824   * from reclaiming and releasing the vnode *before* the
 825  825   * VOP_INACTIVE routine has a chance to destroy the vnode.
 826  826   * We can't have more than 1 thread calling VOP_INACTIVE
 827  827   * on a vnode.
 828  828   */
 829  829  void
 830  830  vn_rele(vnode_t *vp)
 831  831  {
 832  832          VERIFY(vp->v_count > 0);
 833  833          mutex_enter(&vp->v_lock);
 834  834          if (vp->v_count == 1) {
 835  835                  mutex_exit(&vp->v_lock);
 836  836                  VOP_INACTIVE(vp, CRED(), NULL);
 837  837                  return;
 838  838          }
 839  839          vp->v_count--;
 840  840          mutex_exit(&vp->v_lock);
 841  841  }
 842  842  
 843  843  /*
 844  844   * Release a vnode referenced by the DNLC. Multiple DNLC references are treated
 845  845   * as a single reference, so v_count is not decremented until the last DNLC hold
 846  846   * is released. This makes it possible to distinguish vnodes that are referenced
 847  847   * only by the DNLC.
 848  848   */
 849  849  void
 850  850  vn_rele_dnlc(vnode_t *vp)
 851  851  {
 852  852          VERIFY((vp->v_count > 0) && (vp->v_count_dnlc > 0));
 853  853          mutex_enter(&vp->v_lock);
 854  854          if (--vp->v_count_dnlc == 0) {
 855  855                  if (vp->v_count == 1) {
 856  856                          mutex_exit(&vp->v_lock);
 857  857                          VOP_INACTIVE(vp, CRED(), NULL);
 858  858                          return;
 859  859                  }
 860  860                  vp->v_count--;
 861  861          }
 862  862          mutex_exit(&vp->v_lock);
 863  863  }
 864  864  
 865  865  /*
 866  866   * Like vn_rele() except that it clears v_stream under v_lock.
 867  867   * This is used by sockfs when it dismantels the association between
 868  868   * the sockfs node and the vnode in the underlaying file system.
 869  869   * v_lock has to be held to prevent a thread coming through the lookupname
 870  870   * path from accessing a stream head that is going away.
 871  871   */
 872  872  void
 873  873  vn_rele_stream(vnode_t *vp)
 874  874  {
 875  875          VERIFY(vp->v_count > 0);
 876  876          mutex_enter(&vp->v_lock);
 877  877          vp->v_stream = NULL;
 878  878          if (vp->v_count == 1) {
 879  879                  mutex_exit(&vp->v_lock);
 880  880                  VOP_INACTIVE(vp, CRED(), NULL);
 881  881                  return;
 882  882          }
 883  883          vp->v_count--;
 884  884          mutex_exit(&vp->v_lock);
 885  885  }
 886  886  
 887  887  static void
 888  888  vn_rele_inactive(vnode_t *vp)
 889  889  {
 890  890          VOP_INACTIVE(vp, CRED(), NULL);
 891  891  }
 892  892  
 893  893  /*
 894  894   * Like vn_rele() except if we are going to call VOP_INACTIVE() then do it
 895  895   * asynchronously using a taskq. This can avoid deadlocks caused by re-entering
 896  896   * the file system as a result of releasing the vnode. Note, file systems
 897  897   * already have to handle the race where the vnode is incremented before the
 898  898   * inactive routine is called and does its locking.
 899  899   *
 900  900   * Warning: Excessive use of this routine can lead to performance problems.
 901  901   * This is because taskqs throttle back allocation if too many are created.
 902  902   */
 903  903  void
 904  904  vn_rele_async(vnode_t *vp, taskq_t *taskq)
 905  905  {
 906  906          VERIFY(vp->v_count > 0);
 907  907          mutex_enter(&vp->v_lock);
 908  908          if (vp->v_count == 1) {
 909  909                  mutex_exit(&vp->v_lock);
 910  910                  VERIFY(taskq_dispatch(taskq, (task_func_t *)vn_rele_inactive,
 911  911                      vp, TQ_SLEEP) != NULL);
 912  912                  return;
 913  913          }
 914  914          vp->v_count--;
 915  915          mutex_exit(&vp->v_lock);
 916  916  }
 917  917  
 918  918  int
 919  919  vn_open(
 920  920          char *pnamep,
 921  921          enum uio_seg seg,
 922  922          int filemode,
 923  923          int createmode,
 924  924          struct vnode **vpp,
 925  925          enum create crwhy,
 926  926          mode_t umask)
 927  927  {
 928  928          return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
 929  929              umask, NULL, -1));
 930  930  }
 931  931  
 932  932  
 933  933  /*
 934  934   * Open/create a vnode.
 935  935   * This may be callable by the kernel, the only known use
 936  936   * of user context being that the current user credentials
 937  937   * are used for permissions.  crwhy is defined iff filemode & FCREAT.
 938  938   */
 939  939  int
 940  940  vn_openat(
 941  941          char *pnamep,
 942  942          enum uio_seg seg,
 943  943          int filemode,
 944  944          int createmode,
 945  945          struct vnode **vpp,
 946  946          enum create crwhy,
 947  947          mode_t umask,
 948  948          struct vnode *startvp,
 949  949          int fd)
 950  950  {
 951  951          struct vnode *vp;
 952  952          int mode;
 953  953          int accessflags;
 954  954          int error;
 955  955          int in_crit = 0;
 956  956          int open_done = 0;
 957  957          int shrlock_done = 0;
 958  958          struct vattr vattr;
 959  959          enum symfollow follow;
 960  960          int estale_retry = 0;
 961  961          struct shrlock shr;
 962  962          struct shr_locowner shr_own;
 963  963  
 964  964          mode = 0;
 965  965          accessflags = 0;
 966  966          if (filemode & FREAD)
 967  967                  mode |= VREAD;
 968  968          if (filemode & (FWRITE|FTRUNC))
 969  969                  mode |= VWRITE;
 970  970          if (filemode & (FSEARCH|FEXEC|FXATTRDIROPEN))
 971  971                  mode |= VEXEC;
 972  972  
 973  973          /* symlink interpretation */
 974  974          if (filemode & FNOFOLLOW)
 975  975                  follow = NO_FOLLOW;
 976  976          else
 977  977                  follow = FOLLOW;
 978  978  
 979  979          if (filemode & FAPPEND)
 980  980                  accessflags |= V_APPEND;
 981  981  
 982  982  top:
 983  983          if (filemode & FCREAT) {
 984  984                  enum vcexcl excl;
 985  985  
 986  986                  /*
 987  987                   * Wish to create a file.
 988  988                   */
 989  989                  vattr.va_type = VREG;
 990  990                  vattr.va_mode = createmode;
 991  991                  vattr.va_mask = AT_TYPE|AT_MODE;
 992  992                  if (filemode & FTRUNC) {
 993  993                          vattr.va_size = 0;
 994  994                          vattr.va_mask |= AT_SIZE;
 995  995                  }
 996  996                  if (filemode & FEXCL)
 997  997                          excl = EXCL;
 998  998                  else
 999  999                          excl = NONEXCL;
1000 1000  
1001 1001                  if (error =
1002 1002                      vn_createat(pnamep, seg, &vattr, excl, mode, &vp, crwhy,
1003 1003                      (filemode & ~(FTRUNC|FEXCL)), umask, startvp))
1004 1004                          return (error);
1005 1005          } else {
1006 1006                  /*
1007 1007                   * Wish to open a file.  Just look it up.
1008 1008                   */
1009 1009                  if (error = lookupnameat(pnamep, seg, follow,
1010 1010                      NULLVPP, &vp, startvp)) {
1011 1011                          if ((error == ESTALE) &&
1012 1012                              fs_need_estale_retry(estale_retry++))
1013 1013                                  goto top;
1014 1014                          return (error);
1015 1015                  }
1016 1016  
1017 1017                  /*
1018 1018                   * Get the attributes to check whether file is large.
1019 1019                   * We do this only if the FOFFMAX flag is not set and
1020 1020                   * only for regular files.
1021 1021                   */
1022 1022  
1023 1023                  if (!(filemode & FOFFMAX) && (vp->v_type == VREG)) {
1024 1024                          vattr.va_mask = AT_SIZE;
1025 1025                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1026 1026                              CRED(), NULL))) {
1027 1027                                  goto out;
1028 1028                          }
1029 1029                          if (vattr.va_size > (u_offset_t)MAXOFF32_T) {
1030 1030                                  /*
1031 1031                                   * Large File API - regular open fails
1032 1032                                   * if FOFFMAX flag is set in file mode
1033 1033                                   */
1034 1034                                  error = EOVERFLOW;
1035 1035                                  goto out;
1036 1036                          }
1037 1037                  }
1038 1038                  /*
1039 1039                   * Can't write directories, active texts, or
1040 1040                   * read-only filesystems.  Can't truncate files
1041 1041                   * on which mandatory locking is in effect.
1042 1042                   */
1043 1043                  if (filemode & (FWRITE|FTRUNC)) {
1044 1044                          /*
1045 1045                           * Allow writable directory if VDIROPEN flag is set.
1046 1046                           */
1047 1047                          if (vp->v_type == VDIR && !(vp->v_flag & VDIROPEN)) {
1048 1048                                  error = EISDIR;
1049 1049                                  goto out;
1050 1050                          }
1051 1051                          if (ISROFILE(vp)) {
1052 1052                                  error = EROFS;
1053 1053                                  goto out;
1054 1054                          }
1055 1055                          /*
1056 1056                           * Can't truncate files on which
1057 1057                           * sysv mandatory locking is in effect.
1058 1058                           */
1059 1059                          if (filemode & FTRUNC) {
1060 1060                                  vnode_t *rvp;
1061 1061  
1062 1062                                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1063 1063                                          rvp = vp;
1064 1064                                  if (rvp->v_filocks != NULL) {
1065 1065                                          vattr.va_mask = AT_MODE;
1066 1066                                          if ((error = VOP_GETATTR(vp,
1067 1067                                              &vattr, 0, CRED(), NULL)) == 0 &&
1068 1068                                              MANDLOCK(vp, vattr.va_mode))
1069 1069                                                  error = EAGAIN;
1070 1070                                  }
1071 1071                          }
1072 1072                          if (error)
1073 1073                                  goto out;
1074 1074                  }
1075 1075                  /*
1076 1076                   * Check permissions.
1077 1077                   */
1078 1078                  if (error = VOP_ACCESS(vp, mode, accessflags, CRED(), NULL))
1079 1079                          goto out;
1080 1080                  /*
1081 1081                   * Require FSEARCH to return a directory.
1082 1082                   * Require FEXEC to return a regular file.
1083 1083                   */
1084 1084                  if ((filemode & FSEARCH) && vp->v_type != VDIR) {
1085 1085                          error = ENOTDIR;
1086 1086                          goto out;
1087 1087                  }
1088 1088                  if ((filemode & FEXEC) && vp->v_type != VREG) {
1089 1089                          error = ENOEXEC;        /* XXX: error code? */
1090 1090                          goto out;
1091 1091                  }
1092 1092          }
1093 1093  
1094 1094          /*
1095 1095           * Do remaining checks for FNOFOLLOW and FNOLINKS.
1096 1096           */
1097 1097          if ((filemode & FNOFOLLOW) && vp->v_type == VLNK) {
1098 1098                  error = ELOOP;
1099 1099                  goto out;
1100 1100          }
1101 1101          if (filemode & FNOLINKS) {
1102 1102                  vattr.va_mask = AT_NLINK;
1103 1103                  if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))) {
1104 1104                          goto out;
1105 1105                  }
1106 1106                  if (vattr.va_nlink != 1) {
1107 1107                          error = EMLINK;
1108 1108                          goto out;
1109 1109                  }
1110 1110          }
1111 1111  
1112 1112          /*
1113 1113           * Opening a socket corresponding to the AF_UNIX pathname
1114 1114           * in the filesystem name space is not supported.
1115 1115           * However, VSOCK nodes in namefs are supported in order
1116 1116           * to make fattach work for sockets.
1117 1117           *
1118 1118           * XXX This uses VOP_REALVP to distinguish between
1119 1119           * an unopened namefs node (where VOP_REALVP returns a
1120 1120           * different VSOCK vnode) and a VSOCK created by vn_create
1121 1121           * in some file system (where VOP_REALVP would never return
1122 1122           * a different vnode).
1123 1123           */
1124 1124          if (vp->v_type == VSOCK) {
1125 1125                  struct vnode *nvp;
1126 1126  
1127 1127                  error = VOP_REALVP(vp, &nvp, NULL);
1128 1128                  if (error != 0 || nvp == NULL || nvp == vp ||
1129 1129                      nvp->v_type != VSOCK) {
1130 1130                          error = EOPNOTSUPP;
1131 1131                          goto out;
1132 1132                  }
1133 1133          }
1134 1134  
1135 1135          if ((vp->v_type == VREG) && nbl_need_check(vp)) {
1136 1136                  /* get share reservation */
1137 1137                  shr.s_access = 0;
1138 1138                  if (filemode & FWRITE)
1139 1139                          shr.s_access |= F_WRACC;
1140 1140                  if (filemode & FREAD)
1141 1141                          shr.s_access |= F_RDACC;
1142 1142                  shr.s_deny = 0;
1143 1143                  shr.s_sysid = 0;
1144 1144                  shr.s_pid = ttoproc(curthread)->p_pid;
1145 1145                  shr_own.sl_pid = shr.s_pid;
1146 1146                  shr_own.sl_id = fd;
1147 1147                  shr.s_own_len = sizeof (shr_own);
1148 1148                  shr.s_owner = (caddr_t)&shr_own;
1149 1149                  error = VOP_SHRLOCK(vp, F_SHARE_NBMAND, &shr, filemode, CRED(),
1150 1150                      NULL);
1151 1151                  if (error)
1152 1152                          goto out;
1153 1153                  shrlock_done = 1;
1154 1154  
1155 1155                  /* nbmand conflict check if truncating file */
1156 1156                  if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1157 1157                          nbl_start_crit(vp, RW_READER);
1158 1158                          in_crit = 1;
1159 1159  
1160 1160                          vattr.va_mask = AT_SIZE;
1161 1161                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
1162 1162                                  goto out;
1163 1163                          if (nbl_conflict(vp, NBL_WRITE, 0, vattr.va_size, 0,
1164 1164                              NULL)) {
1165 1165                                  error = EACCES;
1166 1166                                  goto out;
1167 1167                          }
1168 1168                  }
1169 1169          }
1170 1170  
1171 1171          /*
1172 1172           * Do opening protocol.
1173 1173           */
1174 1174          error = VOP_OPEN(&vp, filemode, CRED(), NULL);
1175 1175          if (error)
1176 1176                  goto out;
1177 1177          open_done = 1;
1178 1178  
1179 1179          /*
1180 1180           * Truncate if required.
1181 1181           */
1182 1182          if ((filemode & FTRUNC) && !(filemode & FCREAT)) {
1183 1183                  vattr.va_size = 0;
1184 1184                  vattr.va_mask = AT_SIZE;
1185 1185                  if ((error = VOP_SETATTR(vp, &vattr, 0, CRED(), NULL)) != 0)
1186 1186                          goto out;
1187 1187          }
1188 1188  out:
1189 1189          ASSERT(vp->v_count > 0);
1190 1190  
1191 1191          if (in_crit) {
1192 1192                  nbl_end_crit(vp);
1193 1193                  in_crit = 0;
1194 1194          }
1195 1195          if (error) {
1196 1196                  if (open_done) {
1197 1197                          (void) VOP_CLOSE(vp, filemode, 1, (offset_t)0, CRED(),
1198 1198                              NULL);
1199 1199                          open_done = 0;
1200 1200                          shrlock_done = 0;
1201 1201                  }
1202 1202                  if (shrlock_done) {
1203 1203                          (void) VOP_SHRLOCK(vp, F_UNSHARE, &shr, 0, CRED(),
1204 1204                              NULL);
1205 1205                          shrlock_done = 0;
1206 1206                  }
1207 1207  
1208 1208                  /*
1209 1209                   * The following clause was added to handle a problem
1210 1210                   * with NFS consistency.  It is possible that a lookup
1211 1211                   * of the file to be opened succeeded, but the file
1212 1212                   * itself doesn't actually exist on the server.  This
1213 1213                   * is chiefly due to the DNLC containing an entry for
1214 1214                   * the file which has been removed on the server.  In
1215 1215                   * this case, we just start over.  If there was some
1216 1216                   * other cause for the ESTALE error, then the lookup
1217 1217                   * of the file will fail and the error will be returned
1218 1218                   * above instead of looping around from here.
1219 1219                   */
1220 1220                  VN_RELE(vp);
1221 1221                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1222 1222                          goto top;
1223 1223          } else
1224 1224                  *vpp = vp;
1225 1225          return (error);
1226 1226  }
1227 1227  
1228 1228  /*
1229 1229   * The following two accessor functions are for the NFSv4 server.  Since there
1230 1230   * is no VOP_OPEN_UP/DOWNGRADE we need a way for the NFS server to keep the
1231 1231   * vnode open counts correct when a client "upgrades" an open or does an
1232 1232   * open_downgrade.  In NFS, an upgrade or downgrade can not only change the
1233 1233   * open mode (add or subtract read or write), but also change the share/deny
1234 1234   * modes.  However, share reservations are not integrated with OPEN, yet, so
1235 1235   * we need to handle each separately.  These functions are cleaner than having
1236 1236   * the NFS server manipulate the counts directly, however, nobody else should

↓ open down ↓

1236 lines elided

↑ open up ↑

1237 1237   * use these functions.
1238 1238   */
1239 1239  void
1240 1240  vn_open_upgrade(
1241 1241          vnode_t *vp,
1242 1242          int filemode)
1243 1243  {
1244 1244          ASSERT(vp->v_type == VREG);
1245 1245  
1246 1246          if (filemode & FREAD)
1247      -                atomic_add_32(&(vp->v_rdcnt), 1);
     1247 +                atomic_inc_32(&vp->v_rdcnt);
1248 1248          if (filemode & FWRITE)
1249      -                atomic_add_32(&(vp->v_wrcnt), 1);
     1249 +                atomic_inc_32(&vp->v_wrcnt);
1250 1250  
1251 1251  }
1252 1252  
1253 1253  void
1254 1254  vn_open_downgrade(
1255 1255          vnode_t *vp,
1256 1256          int filemode)
1257 1257  {
1258 1258          ASSERT(vp->v_type == VREG);
1259 1259  
1260 1260          if (filemode & FREAD) {
1261 1261                  ASSERT(vp->v_rdcnt > 0);
1262      -                atomic_add_32(&(vp->v_rdcnt), -1);
     1262 +                atomic_dec_32(&vp->v_rdcnt);
1263 1263          }
1264 1264          if (filemode & FWRITE) {
1265 1265                  ASSERT(vp->v_wrcnt > 0);
1266      -                atomic_add_32(&(vp->v_wrcnt), -1);
     1266 +                atomic_dec_32(&vp->v_wrcnt);
1267 1267          }
1268 1268  
1269 1269  }
1270 1270  
1271 1271  int
1272 1272  vn_create(
1273 1273          char *pnamep,
1274 1274          enum uio_seg seg,
1275 1275          struct vattr *vap,
1276 1276          enum vcexcl excl,

1277 1277          int mode,
1278 1278          struct vnode **vpp,
1279 1279          enum create why,
1280 1280          int flag,
1281 1281          mode_t umask)
1282 1282  {
1283 1283          return (vn_createat(pnamep, seg, vap, excl, mode, vpp, why, flag,
1284 1284              umask, NULL));
1285 1285  }
1286 1286  
1287 1287  /*
1288 1288   * Create a vnode (makenode).
1289 1289   */
1290 1290  int
1291 1291  vn_createat(
1292 1292          char *pnamep,
1293 1293          enum uio_seg seg,
1294 1294          struct vattr *vap,
1295 1295          enum vcexcl excl,
1296 1296          int mode,
1297 1297          struct vnode **vpp,
1298 1298          enum create why,
1299 1299          int flag,
1300 1300          mode_t umask,
1301 1301          struct vnode *startvp)
1302 1302  {
1303 1303          struct vnode *dvp;      /* ptr to parent dir vnode */
1304 1304          struct vnode *vp = NULL;
1305 1305          struct pathname pn;
1306 1306          int error;
1307 1307          int in_crit = 0;
1308 1308          struct vattr vattr;
1309 1309          enum symfollow follow;
1310 1310          int estale_retry = 0;
1311 1311          uint32_t auditing = AU_AUDITING();
1312 1312  
1313 1313          ASSERT((vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE));
1314 1314  
1315 1315          /* symlink interpretation */
1316 1316          if ((flag & FNOFOLLOW) || excl == EXCL)
1317 1317                  follow = NO_FOLLOW;
1318 1318          else
1319 1319                  follow = FOLLOW;
1320 1320          flag &= ~(FNOFOLLOW|FNOLINKS);
1321 1321  
1322 1322  top:
1323 1323          /*
1324 1324           * Lookup directory.
1325 1325           * If new object is a file, call lower level to create it.
1326 1326           * Note that it is up to the lower level to enforce exclusive
1327 1327           * creation, if the file is already there.
1328 1328           * This allows the lower level to do whatever
1329 1329           * locking or protocol that is needed to prevent races.
1330 1330           * If the new object is directory call lower level to make
1331 1331           * the new directory, with "." and "..".
1332 1332           */
1333 1333          if (error = pn_get(pnamep, seg, &pn))
1334 1334                  return (error);
1335 1335          if (auditing)
1336 1336                  audit_vncreate_start();
1337 1337          dvp = NULL;
1338 1338          *vpp = NULL;
1339 1339          /*
1340 1340           * lookup will find the parent directory for the vnode.
1341 1341           * When it is done the pn holds the name of the entry
1342 1342           * in the directory.
1343 1343           * If this is a non-exclusive create we also find the node itself.
1344 1344           */
1345 1345          error = lookuppnat(&pn, NULL, follow, &dvp,
1346 1346              (excl == EXCL) ? NULLVPP : vpp, startvp);
1347 1347          if (error) {
1348 1348                  pn_free(&pn);
1349 1349                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1350 1350                          goto top;
1351 1351                  if (why == CRMKDIR && error == EINVAL)
1352 1352                          error = EEXIST;         /* SVID */
1353 1353                  return (error);
1354 1354          }
1355 1355  
1356 1356          if (why != CRMKNOD)
1357 1357                  vap->va_mode &= ~VSVTX;
1358 1358  
1359 1359          /*
1360 1360           * If default ACLs are defined for the directory don't apply the
1361 1361           * umask if umask is passed.
1362 1362           */
1363 1363  
1364 1364          if (umask) {
1365 1365  
1366 1366                  vsecattr_t vsec;
1367 1367  
1368 1368                  vsec.vsa_aclcnt = 0;
1369 1369                  vsec.vsa_aclentp = NULL;
1370 1370                  vsec.vsa_dfaclcnt = 0;
1371 1371                  vsec.vsa_dfaclentp = NULL;
1372 1372                  vsec.vsa_mask = VSA_DFACLCNT;
1373 1373                  error = VOP_GETSECATTR(dvp, &vsec, 0, CRED(), NULL);
1374 1374                  /*
1375 1375                   * If error is ENOSYS then treat it as no error
1376 1376                   * Don't want to force all file systems to support
1377 1377                   * aclent_t style of ACL's.
1378 1378                   */
1379 1379                  if (error == ENOSYS)
1380 1380                          error = 0;
1381 1381                  if (error) {
1382 1382                          if (*vpp != NULL)
1383 1383                                  VN_RELE(*vpp);
1384 1384                          goto out;
1385 1385                  } else {
1386 1386                          /*
1387 1387                           * Apply the umask if no default ACLs.
1388 1388                           */
1389 1389                          if (vsec.vsa_dfaclcnt == 0)
1390 1390                                  vap->va_mode &= ~umask;
1391 1391  
1392 1392                          /*
1393 1393                           * VOP_GETSECATTR() may have allocated memory for
1394 1394                           * ACLs we didn't request, so double-check and
1395 1395                           * free it if necessary.
1396 1396                           */
1397 1397                          if (vsec.vsa_aclcnt && vsec.vsa_aclentp != NULL)
1398 1398                                  kmem_free((caddr_t)vsec.vsa_aclentp,
1399 1399                                      vsec.vsa_aclcnt * sizeof (aclent_t));
1400 1400                          if (vsec.vsa_dfaclcnt && vsec.vsa_dfaclentp != NULL)
1401 1401                                  kmem_free((caddr_t)vsec.vsa_dfaclentp,
1402 1402                                      vsec.vsa_dfaclcnt * sizeof (aclent_t));
1403 1403                  }
1404 1404          }
1405 1405  
1406 1406          /*
1407 1407           * In general we want to generate EROFS if the file system is
1408 1408           * readonly.  However, POSIX (IEEE Std. 1003.1) section 5.3.1
1409 1409           * documents the open system call, and it says that O_CREAT has no
1410 1410           * effect if the file already exists.  Bug 1119649 states
1411 1411           * that open(path, O_CREAT, ...) fails when attempting to open an
1412 1412           * existing file on a read only file system.  Thus, the first part
1413 1413           * of the following if statement has 3 checks:
1414 1414           *      if the file exists &&
1415 1415           *              it is being open with write access &&
1416 1416           *              the file system is read only
1417 1417           *      then generate EROFS
1418 1418           */
1419 1419          if ((*vpp != NULL && (mode & VWRITE) && ISROFILE(*vpp)) ||
1420 1420              (*vpp == NULL && dvp->v_vfsp->vfs_flag & VFS_RDONLY)) {
1421 1421                  if (*vpp)
1422 1422                          VN_RELE(*vpp);
1423 1423                  error = EROFS;
1424 1424          } else if (excl == NONEXCL && *vpp != NULL) {
1425 1425                  vnode_t *rvp;
1426 1426  
1427 1427                  /*
1428 1428                   * File already exists.  If a mandatory lock has been
1429 1429                   * applied, return error.
1430 1430                   */
1431 1431                  vp = *vpp;
1432 1432                  if (VOP_REALVP(vp, &rvp, NULL) != 0)
1433 1433                          rvp = vp;
1434 1434                  if ((vap->va_mask & AT_SIZE) && nbl_need_check(vp)) {
1435 1435                          nbl_start_crit(vp, RW_READER);
1436 1436                          in_crit = 1;
1437 1437                  }
1438 1438                  if (rvp->v_filocks != NULL || rvp->v_shrlocks != NULL) {
1439 1439                          vattr.va_mask = AT_MODE|AT_SIZE;
1440 1440                          if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL)) {
1441 1441                                  goto out;
1442 1442                          }
1443 1443                          if (MANDLOCK(vp, vattr.va_mode)) {
1444 1444                                  error = EAGAIN;
1445 1445                                  goto out;
1446 1446                          }
1447 1447                          /*
1448 1448                           * File cannot be truncated if non-blocking mandatory
1449 1449                           * locks are currently on the file.
1450 1450                           */
1451 1451                          if ((vap->va_mask & AT_SIZE) && in_crit) {
1452 1452                                  u_offset_t offset;
1453 1453                                  ssize_t length;
1454 1454  
1455 1455                                  offset = vap->va_size > vattr.va_size ?
1456 1456                                      vattr.va_size : vap->va_size;
1457 1457                                  length = vap->va_size > vattr.va_size ?
1458 1458                                      vap->va_size - vattr.va_size :
1459 1459                                      vattr.va_size - vap->va_size;
1460 1460                                  if (nbl_conflict(vp, NBL_WRITE, offset,
1461 1461                                      length, 0, NULL)) {
1462 1462                                          error = EACCES;
1463 1463                                          goto out;
1464 1464                                  }
1465 1465                          }
1466 1466                  }
1467 1467  
1468 1468                  /*
1469 1469                   * If the file is the root of a VFS, we've crossed a
1470 1470                   * mount point and the "containing" directory that we
1471 1471                   * acquired above (dvp) is irrelevant because it's in
1472 1472                   * a different file system.  We apply VOP_CREATE to the
1473 1473                   * target itself instead of to the containing directory
1474 1474                   * and supply a null path name to indicate (conventionally)
1475 1475                   * the node itself as the "component" of interest.
1476 1476                   *
1477 1477                   * The intercession of the file system is necessary to
1478 1478                   * ensure that the appropriate permission checks are
1479 1479                   * done.
1480 1480                   */
1481 1481                  if (vp->v_flag & VROOT) {
1482 1482                          ASSERT(why != CRMKDIR);
1483 1483                          error = VOP_CREATE(vp, "", vap, excl, mode, vpp,
1484 1484                              CRED(), flag, NULL, NULL);
1485 1485                          /*
1486 1486                           * If the create succeeded, it will have created
1487 1487                           * a new reference to the vnode.  Give up the
1488 1488                           * original reference.  The assertion should not
1489 1489                           * get triggered because NBMAND locks only apply to
1490 1490                           * VREG files.  And if in_crit is non-zero for some
1491 1491                           * reason, detect that here, rather than when we
1492 1492                           * deference a null vp.
1493 1493                           */
1494 1494                          ASSERT(in_crit == 0);
1495 1495                          VN_RELE(vp);
1496 1496                          vp = NULL;
1497 1497                          goto out;
1498 1498                  }
1499 1499  
1500 1500                  /*
1501 1501                   * Large File API - non-large open (FOFFMAX flag not set)
1502 1502                   * of regular file fails if the file size exceeds MAXOFF32_T.
1503 1503                   */
1504 1504                  if (why != CRMKDIR &&
1505 1505                      !(flag & FOFFMAX) &&
1506 1506                      (vp->v_type == VREG)) {
1507 1507                          vattr.va_mask = AT_SIZE;
1508 1508                          if ((error = VOP_GETATTR(vp, &vattr, 0,
1509 1509                              CRED(), NULL))) {
1510 1510                                  goto out;
1511 1511                          }
1512 1512                          if ((vattr.va_size > (u_offset_t)MAXOFF32_T)) {
1513 1513                                  error = EOVERFLOW;
1514 1514                                  goto out;
1515 1515                          }
1516 1516                  }
1517 1517          }
1518 1518  
1519 1519          if (error == 0) {
1520 1520                  /*
1521 1521                   * Call mkdir() if specified, otherwise create().
1522 1522                   */
1523 1523                  int must_be_dir = pn_fixslash(&pn);     /* trailing '/'? */
1524 1524  
1525 1525                  if (why == CRMKDIR)
1526 1526                          /*
1527 1527                           * N.B., if vn_createat() ever requests
1528 1528                           * case-insensitive behavior then it will need
1529 1529                           * to be passed to VOP_MKDIR().  VOP_CREATE()
1530 1530                           * will already get it via "flag"
1531 1531                           */
1532 1532                          error = VOP_MKDIR(dvp, pn.pn_path, vap, vpp, CRED(),
1533 1533                              NULL, 0, NULL);
1534 1534                  else if (!must_be_dir)
1535 1535                          error = VOP_CREATE(dvp, pn.pn_path, vap,
1536 1536                              excl, mode, vpp, CRED(), flag, NULL, NULL);
1537 1537                  else
1538 1538                          error = ENOTDIR;
1539 1539          }
1540 1540  
1541 1541  out:
1542 1542  
1543 1543          if (auditing)
1544 1544                  audit_vncreate_finish(*vpp, error);
1545 1545          if (in_crit) {
1546 1546                  nbl_end_crit(vp);
1547 1547                  in_crit = 0;
1548 1548          }
1549 1549          if (vp != NULL) {
1550 1550                  VN_RELE(vp);
1551 1551                  vp = NULL;
1552 1552          }
1553 1553          pn_free(&pn);
1554 1554          VN_RELE(dvp);
1555 1555          /*
1556 1556           * The following clause was added to handle a problem
1557 1557           * with NFS consistency.  It is possible that a lookup
1558 1558           * of the file to be created succeeded, but the file
1559 1559           * itself doesn't actually exist on the server.  This
1560 1560           * is chiefly due to the DNLC containing an entry for
1561 1561           * the file which has been removed on the server.  In
1562 1562           * this case, we just start over.  If there was some
1563 1563           * other cause for the ESTALE error, then the lookup
1564 1564           * of the file will fail and the error will be returned
1565 1565           * above instead of looping around from here.
1566 1566           */
1567 1567          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1568 1568                  goto top;
1569 1569          return (error);
1570 1570  }
1571 1571  
1572 1572  int
1573 1573  vn_link(char *from, char *to, enum uio_seg seg)
1574 1574  {
1575 1575          return (vn_linkat(NULL, from, NO_FOLLOW, NULL, to, seg));
1576 1576  }
1577 1577  
1578 1578  int
1579 1579  vn_linkat(vnode_t *fstartvp, char *from, enum symfollow follow,
1580 1580      vnode_t *tstartvp, char *to, enum uio_seg seg)
1581 1581  {
1582 1582          struct vnode *fvp;              /* from vnode ptr */
1583 1583          struct vnode *tdvp;             /* to directory vnode ptr */
1584 1584          struct pathname pn;
1585 1585          int error;
1586 1586          struct vattr vattr;
1587 1587          dev_t fsid;
1588 1588          int estale_retry = 0;
1589 1589          uint32_t auditing = AU_AUDITING();
1590 1590  
1591 1591  top:
1592 1592          fvp = tdvp = NULL;
1593 1593          if (error = pn_get(to, seg, &pn))
1594 1594                  return (error);
1595 1595          if (auditing && fstartvp != NULL)
1596 1596                  audit_setfsat_path(1);
1597 1597          if (error = lookupnameat(from, seg, follow, NULLVPP, &fvp, fstartvp))
1598 1598                  goto out;
1599 1599          if (auditing && tstartvp != NULL)
1600 1600                  audit_setfsat_path(3);
1601 1601          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &tdvp, NULLVPP, tstartvp))
1602 1602                  goto out;
1603 1603          /*
1604 1604           * Make sure both source vnode and target directory vnode are
1605 1605           * in the same vfs and that it is writeable.
1606 1606           */
1607 1607          vattr.va_mask = AT_FSID;
1608 1608          if (error = VOP_GETATTR(fvp, &vattr, 0, CRED(), NULL))
1609 1609                  goto out;
1610 1610          fsid = vattr.va_fsid;
1611 1611          vattr.va_mask = AT_FSID;
1612 1612          if (error = VOP_GETATTR(tdvp, &vattr, 0, CRED(), NULL))
1613 1613                  goto out;
1614 1614          if (fsid != vattr.va_fsid) {
1615 1615                  error = EXDEV;
1616 1616                  goto out;
1617 1617          }
1618 1618          if (tdvp->v_vfsp->vfs_flag & VFS_RDONLY) {
1619 1619                  error = EROFS;
1620 1620                  goto out;
1621 1621          }
1622 1622          /*
1623 1623           * Do the link.
1624 1624           */
1625 1625          (void) pn_fixslash(&pn);
1626 1626          error = VOP_LINK(tdvp, fvp, pn.pn_path, CRED(), NULL, 0);
1627 1627  out:
1628 1628          pn_free(&pn);
1629 1629          if (fvp)
1630 1630                  VN_RELE(fvp);
1631 1631          if (tdvp)
1632 1632                  VN_RELE(tdvp);
1633 1633          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1634 1634                  goto top;
1635 1635          return (error);
1636 1636  }
1637 1637  
1638 1638  int
1639 1639  vn_rename(char *from, char *to, enum uio_seg seg)
1640 1640  {
1641 1641          return (vn_renameat(NULL, from, NULL, to, seg));
1642 1642  }
1643 1643  
1644 1644  int
1645 1645  vn_renameat(vnode_t *fdvp, char *fname, vnode_t *tdvp,
1646 1646                  char *tname, enum uio_seg seg)
1647 1647  {
1648 1648          int error;
1649 1649          struct vattr vattr;
1650 1650          struct pathname fpn;            /* from pathname */
1651 1651          struct pathname tpn;            /* to pathname */
1652 1652          dev_t fsid;
1653 1653          int in_crit_src, in_crit_targ;
1654 1654          vnode_t *fromvp, *fvp;
1655 1655          vnode_t *tovp, *targvp;
1656 1656          int estale_retry = 0;
1657 1657          uint32_t auditing = AU_AUDITING();
1658 1658  
1659 1659  top:
1660 1660          fvp = fromvp = tovp = targvp = NULL;
1661 1661          in_crit_src = in_crit_targ = 0;
1662 1662          /*
1663 1663           * Get to and from pathnames.
1664 1664           */
1665 1665          if (error = pn_get(fname, seg, &fpn))
1666 1666                  return (error);
1667 1667          if (error = pn_get(tname, seg, &tpn)) {
1668 1668                  pn_free(&fpn);
1669 1669                  return (error);
1670 1670          }
1671 1671  
1672 1672          /*
1673 1673           * First we need to resolve the correct directories
1674 1674           * The passed in directories may only be a starting point,
1675 1675           * but we need the real directories the file(s) live in.
1676 1676           * For example the fname may be something like usr/lib/sparc
1677 1677           * and we were passed in the / directory, but we need to
1678 1678           * use the lib directory for the rename.
1679 1679           */
1680 1680  
1681 1681          if (auditing && fdvp != NULL)
1682 1682                  audit_setfsat_path(1);
1683 1683          /*
1684 1684           * Lookup to and from directories.
1685 1685           */
1686 1686          if (error = lookuppnat(&fpn, NULL, NO_FOLLOW, &fromvp, &fvp, fdvp)) {
1687 1687                  goto out;
1688 1688          }
1689 1689  
1690 1690          /*
1691 1691           * Make sure there is an entry.
1692 1692           */
1693 1693          if (fvp == NULL) {
1694 1694                  error = ENOENT;
1695 1695                  goto out;
1696 1696          }
1697 1697  
1698 1698          if (auditing && tdvp != NULL)
1699 1699                  audit_setfsat_path(3);
1700 1700          if (error = lookuppnat(&tpn, NULL, NO_FOLLOW, &tovp, &targvp, tdvp)) {
1701 1701                  goto out;
1702 1702          }
1703 1703  
1704 1704          /*
1705 1705           * Make sure both the from vnode directory and the to directory
1706 1706           * are in the same vfs and the to directory is writable.
1707 1707           * We check fsid's, not vfs pointers, so loopback fs works.
1708 1708           */
1709 1709          if (fromvp != tovp) {
1710 1710                  vattr.va_mask = AT_FSID;
1711 1711                  if (error = VOP_GETATTR(fromvp, &vattr, 0, CRED(), NULL))
1712 1712                          goto out;
1713 1713                  fsid = vattr.va_fsid;
1714 1714                  vattr.va_mask = AT_FSID;
1715 1715                  if (error = VOP_GETATTR(tovp, &vattr, 0, CRED(), NULL))
1716 1716                          goto out;
1717 1717                  if (fsid != vattr.va_fsid) {
1718 1718                          error = EXDEV;
1719 1719                          goto out;
1720 1720                  }
1721 1721          }
1722 1722  
1723 1723          if (tovp->v_vfsp->vfs_flag & VFS_RDONLY) {
1724 1724                  error = EROFS;
1725 1725                  goto out;
1726 1726          }
1727 1727  
1728 1728          if (targvp && (fvp != targvp)) {
1729 1729                  nbl_start_crit(targvp, RW_READER);
1730 1730                  in_crit_targ = 1;
1731 1731                  if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
1732 1732                          error = EACCES;
1733 1733                          goto out;
1734 1734                  }
1735 1735          }
1736 1736  
1737 1737          if (nbl_need_check(fvp)) {
1738 1738                  nbl_start_crit(fvp, RW_READER);
1739 1739                  in_crit_src = 1;
1740 1740                  if (nbl_conflict(fvp, NBL_RENAME, 0, 0, 0, NULL)) {
1741 1741                          error = EACCES;
1742 1742                          goto out;
1743 1743                  }
1744 1744          }
1745 1745  
1746 1746          /*
1747 1747           * Do the rename.
1748 1748           */
1749 1749          (void) pn_fixslash(&tpn);
1750 1750          error = VOP_RENAME(fromvp, fpn.pn_path, tovp, tpn.pn_path, CRED(),
1751 1751              NULL, 0);
1752 1752  
1753 1753  out:
1754 1754          pn_free(&fpn);
1755 1755          pn_free(&tpn);
1756 1756          if (in_crit_src)
1757 1757                  nbl_end_crit(fvp);
1758 1758          if (in_crit_targ)
1759 1759                  nbl_end_crit(targvp);
1760 1760          if (fromvp)
1761 1761                  VN_RELE(fromvp);
1762 1762          if (tovp)
1763 1763                  VN_RELE(tovp);
1764 1764          if (targvp)
1765 1765                  VN_RELE(targvp);
1766 1766          if (fvp)
1767 1767                  VN_RELE(fvp);
1768 1768          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1769 1769                  goto top;
1770 1770          return (error);
1771 1771  }
1772 1772  
1773 1773  /*
1774 1774   * Remove a file or directory.
1775 1775   */
1776 1776  int
1777 1777  vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
1778 1778  {
1779 1779          return (vn_removeat(NULL, fnamep, seg, dirflag));
1780 1780  }
1781 1781  
1782 1782  int
1783 1783  vn_removeat(vnode_t *startvp, char *fnamep, enum uio_seg seg, enum rm dirflag)
1784 1784  {
1785 1785          struct vnode *vp;               /* entry vnode */
1786 1786          struct vnode *dvp;              /* ptr to parent dir vnode */
1787 1787          struct vnode *coveredvp;
1788 1788          struct pathname pn;             /* name of entry */
1789 1789          enum vtype vtype;
1790 1790          int error;
1791 1791          struct vfs *vfsp;
1792 1792          struct vfs *dvfsp;      /* ptr to parent dir vfs */
1793 1793          int in_crit = 0;
1794 1794          int estale_retry = 0;
1795 1795  
1796 1796  top:
1797 1797          if (error = pn_get(fnamep, seg, &pn))
1798 1798                  return (error);
1799 1799          dvp = vp = NULL;
1800 1800          if (error = lookuppnat(&pn, NULL, NO_FOLLOW, &dvp, &vp, startvp)) {
1801 1801                  pn_free(&pn);
1802 1802                  if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1803 1803                          goto top;
1804 1804                  return (error);
1805 1805          }
1806 1806  
1807 1807          /*
1808 1808           * Make sure there is an entry.
1809 1809           */
1810 1810          if (vp == NULL) {
1811 1811                  error = ENOENT;
1812 1812                  goto out;
1813 1813          }
1814 1814  
1815 1815          vfsp = vp->v_vfsp;
1816 1816          dvfsp = dvp->v_vfsp;
1817 1817  
1818 1818          /*
1819 1819           * If the named file is the root of a mounted filesystem, fail,
1820 1820           * unless it's marked unlinkable.  In that case, unmount the
1821 1821           * filesystem and proceed to unlink the covered vnode.  (If the
1822 1822           * covered vnode is a directory, use rmdir instead of unlink,
1823 1823           * to avoid file system corruption.)
1824 1824           */
1825 1825          if (vp->v_flag & VROOT) {
1826 1826                  if ((vfsp->vfs_flag & VFS_UNLINKABLE) == 0) {
1827 1827                          error = EBUSY;
1828 1828                          goto out;
1829 1829                  }
1830 1830  
1831 1831                  /*
1832 1832                   * Namefs specific code starts here.
1833 1833                   */
1834 1834  
1835 1835                  if (dirflag == RMDIRECTORY) {
1836 1836                          /*
1837 1837                           * User called rmdir(2) on a file that has
1838 1838                           * been namefs mounted on top of.  Since
1839 1839                           * namefs doesn't allow directories to
1840 1840                           * be mounted on other files we know
1841 1841                           * vp is not of type VDIR so fail to operation.
1842 1842                           */
1843 1843                          error = ENOTDIR;
1844 1844                          goto out;
1845 1845                  }
1846 1846  
1847 1847                  /*
1848 1848                   * If VROOT is still set after grabbing vp->v_lock,
1849 1849                   * noone has finished nm_unmount so far and coveredvp
1850 1850                   * is valid.
1851 1851                   * If we manage to grab vn_vfswlock(coveredvp) before releasing
1852 1852                   * vp->v_lock, any race window is eliminated.
1853 1853                   */
1854 1854  
1855 1855                  mutex_enter(&vp->v_lock);
1856 1856                  if ((vp->v_flag & VROOT) == 0) {
1857 1857                          /* Someone beat us to the unmount */
1858 1858                          mutex_exit(&vp->v_lock);
1859 1859                          error = EBUSY;
1860 1860                          goto out;
1861 1861                  }
1862 1862                  vfsp = vp->v_vfsp;
1863 1863                  coveredvp = vfsp->vfs_vnodecovered;
1864 1864                  ASSERT(coveredvp);
1865 1865                  /*
1866 1866                   * Note: Implementation of vn_vfswlock shows that ordering of
1867 1867                   * v_lock / vn_vfswlock is not an issue here.
1868 1868                   */
1869 1869                  error = vn_vfswlock(coveredvp);
1870 1870                  mutex_exit(&vp->v_lock);
1871 1871  
1872 1872                  if (error)
1873 1873                          goto out;
1874 1874  
1875 1875                  VN_HOLD(coveredvp);
1876 1876                  VN_RELE(vp);
1877 1877                  error = dounmount(vfsp, 0, CRED());
1878 1878  
1879 1879                  /*
1880 1880                   * Unmounted the namefs file system; now get
1881 1881                   * the object it was mounted over.
1882 1882                   */
1883 1883                  vp = coveredvp;
1884 1884                  /*
1885 1885                   * If namefs was mounted over a directory, then
1886 1886                   * we want to use rmdir() instead of unlink().
1887 1887                   */
1888 1888                  if (vp->v_type == VDIR)
1889 1889                          dirflag = RMDIRECTORY;
1890 1890  
1891 1891                  if (error)
1892 1892                          goto out;
1893 1893          }
1894 1894  
1895 1895          /*
1896 1896           * Make sure filesystem is writeable.
1897 1897           * We check the parent directory's vfs in case this is an lofs vnode.
1898 1898           */
1899 1899          if (dvfsp && dvfsp->vfs_flag & VFS_RDONLY) {
1900 1900                  error = EROFS;
1901 1901                  goto out;
1902 1902          }
1903 1903  
1904 1904          vtype = vp->v_type;
1905 1905  
1906 1906          /*
1907 1907           * If there is the possibility of an nbmand share reservation, make
1908 1908           * sure it's okay to remove the file.  Keep a reference to the
1909 1909           * vnode, so that we can exit the nbl critical region after
1910 1910           * calling VOP_REMOVE.
1911 1911           * If there is no possibility of an nbmand share reservation,
1912 1912           * release the vnode reference now.  Filesystems like NFS may
1913 1913           * behave differently if there is an extra reference, so get rid of
1914 1914           * this one.  Fortunately, we can't have nbmand mounts on NFS
1915 1915           * filesystems.
1916 1916           */
1917 1917          if (nbl_need_check(vp)) {
1918 1918                  nbl_start_crit(vp, RW_READER);
1919 1919                  in_crit = 1;
1920 1920                  if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
1921 1921                          error = EACCES;
1922 1922                          goto out;
1923 1923                  }
1924 1924          } else {
1925 1925                  VN_RELE(vp);
1926 1926                  vp = NULL;
1927 1927          }
1928 1928  
1929 1929          if (dirflag == RMDIRECTORY) {
1930 1930                  /*
1931 1931                   * Caller is using rmdir(2), which can only be applied to
1932 1932                   * directories.
1933 1933                   */
1934 1934                  if (vtype != VDIR) {
1935 1935                          error = ENOTDIR;
1936 1936                  } else {
1937 1937                          vnode_t *cwd;
1938 1938                          proc_t *pp = curproc;
1939 1939  
1940 1940                          mutex_enter(&pp->p_lock);
1941 1941                          cwd = PTOU(pp)->u_cdir;
1942 1942                          VN_HOLD(cwd);
1943 1943                          mutex_exit(&pp->p_lock);
1944 1944                          error = VOP_RMDIR(dvp, pn.pn_path, cwd, CRED(),
1945 1945                              NULL, 0);
1946 1946                          VN_RELE(cwd);
1947 1947                  }
1948 1948          } else {
1949 1949                  /*
1950 1950                   * Unlink(2) can be applied to anything.
1951 1951                   */
1952 1952                  error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
1953 1953          }
1954 1954  
1955 1955  out:
1956 1956          pn_free(&pn);
1957 1957          if (in_crit) {
1958 1958                  nbl_end_crit(vp);
1959 1959                  in_crit = 0;
1960 1960          }
1961 1961          if (vp != NULL)
1962 1962                  VN_RELE(vp);
1963 1963          if (dvp != NULL)
1964 1964                  VN_RELE(dvp);
1965 1965          if ((error == ESTALE) && fs_need_estale_retry(estale_retry++))
1966 1966                  goto top;
1967 1967          return (error);
1968 1968  }
1969 1969  
1970 1970  /*
1971 1971   * Utility function to compare equality of vnodes.
1972 1972   * Compare the underlying real vnodes, if there are underlying vnodes.
1973 1973   * This is a more thorough comparison than the VN_CMP() macro provides.
1974 1974   */
1975 1975  int
1976 1976  vn_compare(vnode_t *vp1, vnode_t *vp2)
1977 1977  {
1978 1978          vnode_t *realvp;
1979 1979  
1980 1980          if (vp1 != NULL && VOP_REALVP(vp1, &realvp, NULL) == 0)
1981 1981                  vp1 = realvp;
1982 1982          if (vp2 != NULL && VOP_REALVP(vp2, &realvp, NULL) == 0)
1983 1983                  vp2 = realvp;
1984 1984          return (VN_CMP(vp1, vp2));
1985 1985  }
1986 1986  
1987 1987  /*
1988 1988   * The number of locks to hash into.  This value must be a power
1989 1989   * of 2 minus 1 and should probably also be prime.
1990 1990   */
1991 1991  #define NUM_BUCKETS     1023
1992 1992  
1993 1993  struct  vn_vfslocks_bucket {
1994 1994          kmutex_t vb_lock;
1995 1995          vn_vfslocks_entry_t *vb_list;
1996 1996          char pad[64 - sizeof (kmutex_t) - sizeof (void *)];
1997 1997  };
1998 1998  
1999 1999  /*
2000 2000   * Total number of buckets will be NUM_BUCKETS + 1 .
2001 2001   */
2002 2002  
2003 2003  #pragma align   64(vn_vfslocks_buckets)
2004 2004  static  struct vn_vfslocks_bucket       vn_vfslocks_buckets[NUM_BUCKETS + 1];
2005 2005  
2006 2006  #define VN_VFSLOCKS_SHIFT       9
2007 2007  
2008 2008  #define VN_VFSLOCKS_HASH(vfsvpptr)      \
2009 2009          ((((intptr_t)(vfsvpptr)) >> VN_VFSLOCKS_SHIFT) & NUM_BUCKETS)
2010 2010  
2011 2011  /*
2012 2012   * vn_vfslocks_getlock() uses an HASH scheme to generate
2013 2013   * rwstlock using vfs/vnode pointer passed to it.
2014 2014   *
2015 2015   * vn_vfslocks_rele() releases a reference in the
2016 2016   * HASH table which allows the entry allocated by
2017 2017   * vn_vfslocks_getlock() to be freed at a later
2018 2018   * stage when the refcount drops to zero.
2019 2019   */
2020 2020  
2021 2021  vn_vfslocks_entry_t *
2022 2022  vn_vfslocks_getlock(void *vfsvpptr)
2023 2023  {
2024 2024          struct vn_vfslocks_bucket *bp;
2025 2025          vn_vfslocks_entry_t *vep;
2026 2026          vn_vfslocks_entry_t *tvep;
2027 2027  
2028 2028          ASSERT(vfsvpptr != NULL);
2029 2029          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vfsvpptr)];
2030 2030  
2031 2031          mutex_enter(&bp->vb_lock);
2032 2032          for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2033 2033                  if (vep->ve_vpvfs == vfsvpptr) {
2034 2034                          vep->ve_refcnt++;
2035 2035                          mutex_exit(&bp->vb_lock);
2036 2036                          return (vep);
2037 2037                  }
2038 2038          }
2039 2039          mutex_exit(&bp->vb_lock);
2040 2040          vep = kmem_alloc(sizeof (*vep), KM_SLEEP);
2041 2041          rwst_init(&vep->ve_lock, NULL, RW_DEFAULT, NULL);
2042 2042          vep->ve_vpvfs = (char *)vfsvpptr;
2043 2043          vep->ve_refcnt = 1;
2044 2044          mutex_enter(&bp->vb_lock);
2045 2045          for (tvep = bp->vb_list; tvep != NULL; tvep = tvep->ve_next) {
2046 2046                  if (tvep->ve_vpvfs == vfsvpptr) {
2047 2047                          tvep->ve_refcnt++;
2048 2048                          mutex_exit(&bp->vb_lock);
2049 2049  
2050 2050                          /*
2051 2051                           * There is already an entry in the hash
2052 2052                           * destroy what we just allocated.
2053 2053                           */
2054 2054                          rwst_destroy(&vep->ve_lock);
2055 2055                          kmem_free(vep, sizeof (*vep));
2056 2056                          return (tvep);
2057 2057                  }
2058 2058          }
2059 2059          vep->ve_next = bp->vb_list;
2060 2060          bp->vb_list = vep;
2061 2061          mutex_exit(&bp->vb_lock);
2062 2062          return (vep);
2063 2063  }
2064 2064  
2065 2065  void
2066 2066  vn_vfslocks_rele(vn_vfslocks_entry_t *vepent)
2067 2067  {
2068 2068          struct vn_vfslocks_bucket *bp;
2069 2069          vn_vfslocks_entry_t *vep;
2070 2070          vn_vfslocks_entry_t *pvep;
2071 2071  
2072 2072          ASSERT(vepent != NULL);
2073 2073          ASSERT(vepent->ve_vpvfs != NULL);
2074 2074  
2075 2075          bp = &vn_vfslocks_buckets[VN_VFSLOCKS_HASH(vepent->ve_vpvfs)];
2076 2076  
2077 2077          mutex_enter(&bp->vb_lock);
2078 2078          vepent->ve_refcnt--;
2079 2079  
2080 2080          if ((int32_t)vepent->ve_refcnt < 0)
2081 2081                  cmn_err(CE_PANIC, "vn_vfslocks_rele: refcount negative");
2082 2082  
2083 2083          if (vepent->ve_refcnt == 0) {
2084 2084                  for (vep = bp->vb_list; vep != NULL; vep = vep->ve_next) {
2085 2085                          if (vep->ve_vpvfs == vepent->ve_vpvfs) {
2086 2086                                  if (bp->vb_list == vep)
2087 2087                                          bp->vb_list = vep->ve_next;
2088 2088                                  else {
2089 2089                                          /* LINTED */
2090 2090                                          pvep->ve_next = vep->ve_next;
2091 2091                                  }
2092 2092                                  mutex_exit(&bp->vb_lock);
2093 2093                                  rwst_destroy(&vep->ve_lock);
2094 2094                                  kmem_free(vep, sizeof (*vep));
2095 2095                                  return;
2096 2096                          }
2097 2097                          pvep = vep;
2098 2098                  }
2099 2099                  cmn_err(CE_PANIC, "vn_vfslocks_rele: vp/vfs not found");
2100 2100          }
2101 2101          mutex_exit(&bp->vb_lock);
2102 2102  }
2103 2103  
2104 2104  /*
2105 2105   * vn_vfswlock_wait is used to implement a lock which is logically a writers
2106 2106   * lock protecting the v_vfsmountedhere field.
2107 2107   * vn_vfswlock_wait has been modified to be similar to vn_vfswlock,
2108 2108   * except that it blocks to acquire the lock VVFSLOCK.
2109 2109   *
2110 2110   * traverse() and routines re-implementing part of traverse (e.g. autofs)
2111 2111   * need to hold this lock. mount(), vn_rename(), vn_remove() and so on
2112 2112   * need the non-blocking version of the writers lock i.e. vn_vfswlock
2113 2113   */
2114 2114  int
2115 2115  vn_vfswlock_wait(vnode_t *vp)
2116 2116  {
2117 2117          int retval;
2118 2118          vn_vfslocks_entry_t *vpvfsentry;
2119 2119          ASSERT(vp != NULL);
2120 2120  
2121 2121          vpvfsentry = vn_vfslocks_getlock(vp);
2122 2122          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_WRITER);
2123 2123  
2124 2124          if (retval == EINTR) {
2125 2125                  vn_vfslocks_rele(vpvfsentry);
2126 2126                  return (EINTR);
2127 2127          }
2128 2128          return (retval);
2129 2129  }
2130 2130  
2131 2131  int
2132 2132  vn_vfsrlock_wait(vnode_t *vp)
2133 2133  {
2134 2134          int retval;
2135 2135          vn_vfslocks_entry_t *vpvfsentry;
2136 2136          ASSERT(vp != NULL);
2137 2137  
2138 2138          vpvfsentry = vn_vfslocks_getlock(vp);
2139 2139          retval = rwst_enter_sig(&vpvfsentry->ve_lock, RW_READER);
2140 2140  
2141 2141          if (retval == EINTR) {
2142 2142                  vn_vfslocks_rele(vpvfsentry);
2143 2143                  return (EINTR);
2144 2144          }
2145 2145  
2146 2146          return (retval);
2147 2147  }
2148 2148  
2149 2149  
2150 2150  /*
2151 2151   * vn_vfswlock is used to implement a lock which is logically a writers lock
2152 2152   * protecting the v_vfsmountedhere field.
2153 2153   */
2154 2154  int
2155 2155  vn_vfswlock(vnode_t *vp)
2156 2156  {
2157 2157          vn_vfslocks_entry_t *vpvfsentry;
2158 2158  
2159 2159          /*
2160 2160           * If vp is NULL then somebody is trying to lock the covered vnode
2161 2161           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2162 2162           * only happen when unmounting /.  Since that operation will fail
2163 2163           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2164 2164           */
2165 2165          if (vp == NULL)
2166 2166                  return (EBUSY);
2167 2167  
2168 2168          vpvfsentry = vn_vfslocks_getlock(vp);
2169 2169  
2170 2170          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_WRITER))
2171 2171                  return (0);
2172 2172  
2173 2173          vn_vfslocks_rele(vpvfsentry);
2174 2174          return (EBUSY);
2175 2175  }
2176 2176  
2177 2177  int
2178 2178  vn_vfsrlock(vnode_t *vp)
2179 2179  {
2180 2180          vn_vfslocks_entry_t *vpvfsentry;
2181 2181  
2182 2182          /*
2183 2183           * If vp is NULL then somebody is trying to lock the covered vnode
2184 2184           * of /.  (vfs_vnodecovered is NULL for /).  This situation will
2185 2185           * only happen when unmounting /.  Since that operation will fail
2186 2186           * anyway, return EBUSY here instead of in VFS_UNMOUNT.
2187 2187           */
2188 2188          if (vp == NULL)
2189 2189                  return (EBUSY);
2190 2190  
2191 2191          vpvfsentry = vn_vfslocks_getlock(vp);
2192 2192  
2193 2193          if (rwst_tryenter(&vpvfsentry->ve_lock, RW_READER))
2194 2194                  return (0);
2195 2195  
2196 2196          vn_vfslocks_rele(vpvfsentry);
2197 2197          return (EBUSY);
2198 2198  }
2199 2199  
2200 2200  void
2201 2201  vn_vfsunlock(vnode_t *vp)
2202 2202  {
2203 2203          vn_vfslocks_entry_t *vpvfsentry;
2204 2204  
2205 2205          /*
2206 2206           * ve_refcnt needs to be decremented twice.
2207 2207           * 1. To release refernce after a call to vn_vfslocks_getlock()
2208 2208           * 2. To release the reference from the locking routines like
2209 2209           *    vn_vfsrlock/vn_vfswlock etc,.
2210 2210           */
2211 2211          vpvfsentry = vn_vfslocks_getlock(vp);
2212 2212          vn_vfslocks_rele(vpvfsentry);
2213 2213  
2214 2214          rwst_exit(&vpvfsentry->ve_lock);
2215 2215          vn_vfslocks_rele(vpvfsentry);
2216 2216  }
2217 2217  
2218 2218  int
2219 2219  vn_vfswlock_held(vnode_t *vp)
2220 2220  {
2221 2221          int held;
2222 2222          vn_vfslocks_entry_t *vpvfsentry;
2223 2223  
2224 2224          ASSERT(vp != NULL);
2225 2225  
2226 2226          vpvfsentry = vn_vfslocks_getlock(vp);
2227 2227          held = rwst_lock_held(&vpvfsentry->ve_lock, RW_WRITER);
2228 2228  
2229 2229          vn_vfslocks_rele(vpvfsentry);
2230 2230          return (held);
2231 2231  }
2232 2232  
2233 2233  
2234 2234  int
2235 2235  vn_make_ops(
2236 2236          const char *name,                       /* Name of file system */
2237 2237          const fs_operation_def_t *templ,        /* Operation specification */
2238 2238          vnodeops_t **actual)                    /* Return the vnodeops */
2239 2239  {
2240 2240          int unused_ops;
2241 2241          int error;
2242 2242  
2243 2243          *actual = (vnodeops_t *)kmem_alloc(sizeof (vnodeops_t), KM_SLEEP);
2244 2244  
2245 2245          (*actual)->vnop_name = name;
2246 2246  
2247 2247          error = fs_build_vector(*actual, &unused_ops, vn_ops_table, templ);
2248 2248          if (error) {
2249 2249                  kmem_free(*actual, sizeof (vnodeops_t));
2250 2250          }
2251 2251  
2252 2252  #if DEBUG
2253 2253          if (unused_ops != 0)
2254 2254                  cmn_err(CE_WARN, "vn_make_ops: %s: %d operations supplied "
2255 2255                      "but not used", name, unused_ops);
2256 2256  #endif
2257 2257  
2258 2258          return (error);
2259 2259  }
2260 2260  
2261 2261  /*
2262 2262   * Free the vnodeops created as a result of vn_make_ops()
2263 2263   */
2264 2264  void
2265 2265  vn_freevnodeops(vnodeops_t *vnops)
2266 2266  {
2267 2267          kmem_free(vnops, sizeof (vnodeops_t));
2268 2268  }
2269 2269  
2270 2270  /*
2271 2271   * Vnode cache.
2272 2272   */
2273 2273  
2274 2274  /* ARGSUSED */
2275 2275  static int
2276 2276  vn_cache_constructor(void *buf, void *cdrarg, int kmflags)
2277 2277  {
2278 2278          struct vnode *vp;
2279 2279  
2280 2280          vp = buf;
2281 2281  
2282 2282          mutex_init(&vp->v_lock, NULL, MUTEX_DEFAULT, NULL);
2283 2283          mutex_init(&vp->v_vsd_lock, NULL, MUTEX_DEFAULT, NULL);
2284 2284          cv_init(&vp->v_cv, NULL, CV_DEFAULT, NULL);
2285 2285          rw_init(&vp->v_nbllock, NULL, RW_DEFAULT, NULL);
2286 2286          vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2287 2287          vp->v_path = NULL;
2288 2288          vp->v_mpssdata = NULL;
2289 2289          vp->v_vsd = NULL;
2290 2290          vp->v_fopdata = NULL;
2291 2291  
2292 2292          return (0);
2293 2293  }
2294 2294  
2295 2295  /* ARGSUSED */
2296 2296  static void
2297 2297  vn_cache_destructor(void *buf, void *cdrarg)
2298 2298  {
2299 2299          struct vnode *vp;
2300 2300  
2301 2301          vp = buf;
2302 2302  
2303 2303          rw_destroy(&vp->v_nbllock);
2304 2304          cv_destroy(&vp->v_cv);
2305 2305          mutex_destroy(&vp->v_vsd_lock);
2306 2306          mutex_destroy(&vp->v_lock);
2307 2307  }
2308 2308  
2309 2309  void
2310 2310  vn_create_cache(void)
2311 2311  {
2312 2312          /* LINTED */
2313 2313          ASSERT((1 << VNODE_ALIGN_LOG2) ==
2314 2314              P2ROUNDUP(sizeof (struct vnode), VNODE_ALIGN));
2315 2315          vn_cache = kmem_cache_create("vn_cache", sizeof (struct vnode),
2316 2316              VNODE_ALIGN, vn_cache_constructor, vn_cache_destructor, NULL, NULL,
2317 2317              NULL, 0);
2318 2318  }
2319 2319  
2320 2320  void
2321 2321  vn_destroy_cache(void)
2322 2322  {
2323 2323          kmem_cache_destroy(vn_cache);
2324 2324  }
2325 2325  
2326 2326  /*
2327 2327   * Used by file systems when fs-specific nodes (e.g., ufs inodes) are
2328 2328   * cached by the file system and vnodes remain associated.
2329 2329   */
2330 2330  void
2331 2331  vn_recycle(vnode_t *vp)
2332 2332  {
2333 2333          ASSERT(vp->v_pages == NULL);
2334 2334  
2335 2335          /*
2336 2336           * XXX - This really belongs in vn_reinit(), but we have some issues
2337 2337           * with the counts.  Best to have it here for clean initialization.
2338 2338           */
2339 2339          vp->v_rdcnt = 0;
2340 2340          vp->v_wrcnt = 0;
2341 2341          vp->v_mmap_read = 0;
2342 2342          vp->v_mmap_write = 0;
2343 2343  
2344 2344          /*
2345 2345           * If FEM was in use, make sure everything gets cleaned up
2346 2346           * NOTE: vp->v_femhead is initialized to NULL in the vnode
2347 2347           * constructor.
2348 2348           */
2349 2349          if (vp->v_femhead) {
2350 2350                  /* XXX - There should be a free_femhead() that does all this */
2351 2351                  ASSERT(vp->v_femhead->femh_list == NULL);
2352 2352                  mutex_destroy(&vp->v_femhead->femh_lock);
2353 2353                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2354 2354                  vp->v_femhead = NULL;
2355 2355          }
2356 2356          if (vp->v_path) {
2357 2357                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2358 2358                  vp->v_path = NULL;
2359 2359          }
2360 2360  
2361 2361          if (vp->v_fopdata != NULL) {
2362 2362                  free_fopdata(vp);
2363 2363          }
2364 2364          vp->v_mpssdata = NULL;
2365 2365          vsd_free(vp);
2366 2366  }
2367 2367  
2368 2368  /*
2369 2369   * Used to reset the vnode fields including those that are directly accessible
2370 2370   * as well as those which require an accessor function.
2371 2371   *
2372 2372   * Does not initialize:
2373 2373   *      synchronization objects: v_lock, v_vsd_lock, v_nbllock, v_cv
2374 2374   *      v_data (since FS-nodes and vnodes point to each other and should
2375 2375   *              be updated simultaneously)
2376 2376   *      v_op (in case someone needs to make a VOP call on this object)
2377 2377   */
2378 2378  void
2379 2379  vn_reinit(vnode_t *vp)
2380 2380  {
2381 2381          vp->v_count = 1;
2382 2382          vp->v_count_dnlc = 0;
2383 2383          vp->v_vfsp = NULL;
2384 2384          vp->v_stream = NULL;
2385 2385          vp->v_vfsmountedhere = NULL;
2386 2386          vp->v_flag = 0;
2387 2387          vp->v_type = VNON;
2388 2388          vp->v_rdev = NODEV;
2389 2389  
2390 2390          vp->v_filocks = NULL;
2391 2391          vp->v_shrlocks = NULL;
2392 2392          vp->v_pages = NULL;
2393 2393  
2394 2394          vp->v_locality = NULL;
2395 2395          vp->v_xattrdir = NULL;
2396 2396  
2397 2397          /* Handles v_femhead, v_path, and the r/w/map counts */
2398 2398          vn_recycle(vp);
2399 2399  }
2400 2400  
2401 2401  vnode_t *
2402 2402  vn_alloc(int kmflag)
2403 2403  {
2404 2404          vnode_t *vp;
2405 2405  
2406 2406          vp = kmem_cache_alloc(vn_cache, kmflag);
2407 2407  
2408 2408          if (vp != NULL) {
2409 2409                  vp->v_femhead = NULL;   /* Must be done before vn_reinit() */
2410 2410                  vp->v_fopdata = NULL;
2411 2411                  vn_reinit(vp);
2412 2412          }
2413 2413  
2414 2414          return (vp);
2415 2415  }
2416 2416  
2417 2417  void
2418 2418  vn_free(vnode_t *vp)
2419 2419  {
2420 2420          ASSERT(vp->v_shrlocks == NULL);
2421 2421          ASSERT(vp->v_filocks == NULL);
2422 2422  
2423 2423          /*
2424 2424           * Some file systems call vn_free() with v_count of zero,
2425 2425           * some with v_count of 1.  In any case, the value should
2426 2426           * never be anything else.
2427 2427           */
2428 2428          ASSERT((vp->v_count == 0) || (vp->v_count == 1));
2429 2429          ASSERT(vp->v_count_dnlc == 0);
2430 2430          if (vp->v_path != NULL) {
2431 2431                  kmem_free(vp->v_path, strlen(vp->v_path) + 1);
2432 2432                  vp->v_path = NULL;
2433 2433          }
2434 2434  
2435 2435          /* If FEM was in use, make sure everything gets cleaned up */
2436 2436          if (vp->v_femhead) {
2437 2437                  /* XXX - There should be a free_femhead() that does all this */
2438 2438                  ASSERT(vp->v_femhead->femh_list == NULL);
2439 2439                  mutex_destroy(&vp->v_femhead->femh_lock);
2440 2440                  kmem_free(vp->v_femhead, sizeof (*(vp->v_femhead)));
2441 2441                  vp->v_femhead = NULL;
2442 2442          }
2443 2443  
2444 2444          if (vp->v_fopdata != NULL) {
2445 2445                  free_fopdata(vp);
2446 2446          }
2447 2447          vp->v_mpssdata = NULL;
2448 2448          vsd_free(vp);
2449 2449          kmem_cache_free(vn_cache, vp);
2450 2450  }
2451 2451  
2452 2452  /*
2453 2453   * vnode status changes, should define better states than 1, 0.
2454 2454   */
2455 2455  void
2456 2456  vn_reclaim(vnode_t *vp)
2457 2457  {
2458 2458          vfs_t   *vfsp = vp->v_vfsp;
2459 2459  
2460 2460          if (vfsp == NULL ||
2461 2461              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2462 2462                  return;
2463 2463          }
2464 2464          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_RECLAIMED);
2465 2465  }
2466 2466  
2467 2467  void
2468 2468  vn_idle(vnode_t *vp)
2469 2469  {
2470 2470          vfs_t   *vfsp = vp->v_vfsp;
2471 2471  
2472 2472          if (vfsp == NULL ||
2473 2473              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2474 2474                  return;
2475 2475          }
2476 2476          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_IDLED);
2477 2477  }
2478 2478  void
2479 2479  vn_exists(vnode_t *vp)
2480 2480  {
2481 2481          vfs_t   *vfsp = vp->v_vfsp;
2482 2482  
2483 2483          if (vfsp == NULL ||
2484 2484              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2485 2485                  return;
2486 2486          }
2487 2487          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_EXISTS);
2488 2488  }
2489 2489  
2490 2490  void
2491 2491  vn_invalid(vnode_t *vp)
2492 2492  {
2493 2493          vfs_t   *vfsp = vp->v_vfsp;
2494 2494  
2495 2495          if (vfsp == NULL ||
2496 2496              vfsp->vfs_implp == NULL || vfsp->vfs_femhead == NULL) {
2497 2497                  return;
2498 2498          }
2499 2499          (void) VFS_VNSTATE(vfsp, vp, VNTRANS_DESTROYED);
2500 2500  }
2501 2501  
2502 2502  /* Vnode event notification */
2503 2503  
2504 2504  int
2505 2505  vnevent_support(vnode_t *vp, caller_context_t *ct)
2506 2506  {
2507 2507          if (vp == NULL)
2508 2508                  return (EINVAL);
2509 2509  
2510 2510          return (VOP_VNEVENT(vp, VE_SUPPORT, NULL, NULL, ct));
2511 2511  }
2512 2512  
2513 2513  void
2514 2514  vnevent_rename_src(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2515 2515  {
2516 2516          if (vp == NULL || vp->v_femhead == NULL) {
2517 2517                  return;
2518 2518          }
2519 2519          (void) VOP_VNEVENT(vp, VE_RENAME_SRC, dvp, name, ct);
2520 2520  }
2521 2521  
2522 2522  void
2523 2523  vnevent_rename_dest(vnode_t *vp, vnode_t *dvp, char *name,
2524 2524      caller_context_t *ct)
2525 2525  {
2526 2526          if (vp == NULL || vp->v_femhead == NULL) {
2527 2527                  return;
2528 2528          }
2529 2529          (void) VOP_VNEVENT(vp, VE_RENAME_DEST, dvp, name, ct);
2530 2530  }
2531 2531  
2532 2532  void
2533 2533  vnevent_rename_dest_dir(vnode_t *vp, caller_context_t *ct)
2534 2534  {
2535 2535          if (vp == NULL || vp->v_femhead == NULL) {
2536 2536                  return;
2537 2537          }
2538 2538          (void) VOP_VNEVENT(vp, VE_RENAME_DEST_DIR, NULL, NULL, ct);
2539 2539  }
2540 2540  
2541 2541  void
2542 2542  vnevent_remove(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2543 2543  {
2544 2544          if (vp == NULL || vp->v_femhead == NULL) {
2545 2545                  return;
2546 2546          }
2547 2547          (void) VOP_VNEVENT(vp, VE_REMOVE, dvp, name, ct);
2548 2548  }
2549 2549  
2550 2550  void
2551 2551  vnevent_rmdir(vnode_t *vp, vnode_t *dvp, char *name, caller_context_t *ct)
2552 2552  {
2553 2553          if (vp == NULL || vp->v_femhead == NULL) {
2554 2554                  return;
2555 2555          }
2556 2556          (void) VOP_VNEVENT(vp, VE_RMDIR, dvp, name, ct);
2557 2557  }
2558 2558  
2559 2559  void
2560 2560  vnevent_create(vnode_t *vp, caller_context_t *ct)
2561 2561  {
2562 2562          if (vp == NULL || vp->v_femhead == NULL) {
2563 2563                  return;
2564 2564          }
2565 2565          (void) VOP_VNEVENT(vp, VE_CREATE, NULL, NULL, ct);
2566 2566  }
2567 2567  
2568 2568  void
2569 2569  vnevent_link(vnode_t *vp, caller_context_t *ct)
2570 2570  {
2571 2571          if (vp == NULL || vp->v_femhead == NULL) {
2572 2572                  return;
2573 2573          }
2574 2574          (void) VOP_VNEVENT(vp, VE_LINK, NULL, NULL, ct);
2575 2575  }
2576 2576  
2577 2577  void
2578 2578  vnevent_mountedover(vnode_t *vp, caller_context_t *ct)
2579 2579  {
2580 2580          if (vp == NULL || vp->v_femhead == NULL) {
2581 2581                  return;
2582 2582          }
2583 2583          (void) VOP_VNEVENT(vp, VE_MOUNTEDOVER, NULL, NULL, ct);
2584 2584  }
2585 2585  
2586 2586  void
2587 2587  vnevent_truncate(vnode_t *vp, caller_context_t *ct)
2588 2588  {
2589 2589          if (vp == NULL || vp->v_femhead == NULL) {
2590 2590                  return;
2591 2591          }
2592 2592          (void) VOP_VNEVENT(vp, VE_TRUNCATE, NULL, NULL, ct);
2593 2593  }
2594 2594  
2595 2595  /*
2596 2596   * Vnode accessors.
2597 2597   */
2598 2598  
2599 2599  int
2600 2600  vn_is_readonly(vnode_t *vp)
2601 2601  {
2602 2602          return (vp->v_vfsp->vfs_flag & VFS_RDONLY);
2603 2603  }
2604 2604  
2605 2605  int
2606 2606  vn_has_flocks(vnode_t *vp)
2607 2607  {
2608 2608          return (vp->v_filocks != NULL);
2609 2609  }
2610 2610  
2611 2611  int
2612 2612  vn_has_mandatory_locks(vnode_t *vp, int mode)
2613 2613  {
2614 2614          return ((vp->v_filocks != NULL) && (MANDLOCK(vp, mode)));
2615 2615  }
2616 2616  
2617 2617  int
2618 2618  vn_has_cached_data(vnode_t *vp)
2619 2619  {
2620 2620          return (vp->v_pages != NULL);
2621 2621  }
2622 2622  
2623 2623  /*
2624 2624   * Return 0 if the vnode in question shouldn't be permitted into a zone via
2625 2625   * zone_enter(2).
2626 2626   */
2627 2627  int
2628 2628  vn_can_change_zones(vnode_t *vp)
2629 2629  {
2630 2630          struct vfssw *vswp;
2631 2631          int allow = 1;
2632 2632          vnode_t *rvp;
2633 2633  
2634 2634          if (nfs_global_client_only != 0)
2635 2635                  return (1);
2636 2636  
2637 2637          /*
2638 2638           * We always want to look at the underlying vnode if there is one.
2639 2639           */
2640 2640          if (VOP_REALVP(vp, &rvp, NULL) != 0)
2641 2641                  rvp = vp;
2642 2642          /*
2643 2643           * Some pseudo filesystems (including doorfs) don't actually register
2644 2644           * their vfsops_t, so the following may return NULL; we happily let
2645 2645           * such vnodes switch zones.
2646 2646           */
2647 2647          vswp = vfs_getvfsswbyvfsops(vfs_getops(rvp->v_vfsp));
2648 2648          if (vswp != NULL) {
2649 2649                  if (vswp->vsw_flag & VSW_NOTZONESAFE)
2650 2650                          allow = 0;
2651 2651                  vfs_unrefvfssw(vswp);
2652 2652          }
2653 2653          return (allow);
2654 2654  }
2655 2655  
2656 2656  /*
2657 2657   * Return nonzero if the vnode is a mount point, zero if not.
2658 2658   */
2659 2659  int
2660 2660  vn_ismntpt(vnode_t *vp)
2661 2661  {
2662 2662          return (vp->v_vfsmountedhere != NULL);
2663 2663  }
2664 2664  
2665 2665  /* Retrieve the vfs (if any) mounted on this vnode */
2666 2666  vfs_t *
2667 2667  vn_mountedvfs(vnode_t *vp)
2668 2668  {
2669 2669          return (vp->v_vfsmountedhere);
2670 2670  }
2671 2671  
2672 2672  /*
2673 2673   * Return nonzero if the vnode is referenced by the dnlc, zero if not.
2674 2674   */
2675 2675  int
2676 2676  vn_in_dnlc(vnode_t *vp)
2677 2677  {
2678 2678          return (vp->v_count_dnlc > 0);
2679 2679  }
2680 2680  
2681 2681  /*
2682 2682   * vn_has_other_opens() checks whether a particular file is opened by more than
2683 2683   * just the caller and whether the open is for read and/or write.
2684 2684   * This routine is for calling after the caller has already called VOP_OPEN()
2685 2685   * and the caller wishes to know if they are the only one with it open for
2686 2686   * the mode(s) specified.
2687 2687   *
2688 2688   * Vnode counts are only kept on regular files (v_type=VREG).
2689 2689   */
2690 2690  int
2691 2691  vn_has_other_opens(
2692 2692          vnode_t *vp,
2693 2693          v_mode_t mode)
2694 2694  {
2695 2695  
2696 2696          ASSERT(vp != NULL);
2697 2697  
2698 2698          switch (mode) {
2699 2699          case V_WRITE:
2700 2700                  if (vp->v_wrcnt > 1)
2701 2701                          return (V_TRUE);
2702 2702                  break;
2703 2703          case V_RDORWR:
2704 2704                  if ((vp->v_rdcnt > 1) || (vp->v_wrcnt > 1))
2705 2705                          return (V_TRUE);
2706 2706                  break;
2707 2707          case V_RDANDWR:
2708 2708                  if ((vp->v_rdcnt > 1) && (vp->v_wrcnt > 1))
2709 2709                          return (V_TRUE);
2710 2710                  break;
2711 2711          case V_READ:
2712 2712                  if (vp->v_rdcnt > 1)
2713 2713                          return (V_TRUE);
2714 2714                  break;
2715 2715          }
2716 2716  
2717 2717          return (V_FALSE);
2718 2718  }
2719 2719  
2720 2720  /*
2721 2721   * vn_is_opened() checks whether a particular file is opened and
2722 2722   * whether the open is for read and/or write.
2723 2723   *
2724 2724   * Vnode counts are only kept on regular files (v_type=VREG).
2725 2725   */
2726 2726  int
2727 2727  vn_is_opened(
2728 2728          vnode_t *vp,
2729 2729          v_mode_t mode)
2730 2730  {
2731 2731  
2732 2732          ASSERT(vp != NULL);
2733 2733  
2734 2734          switch (mode) {
2735 2735          case V_WRITE:
2736 2736                  if (vp->v_wrcnt)
2737 2737                          return (V_TRUE);
2738 2738                  break;
2739 2739          case V_RDANDWR:
2740 2740                  if (vp->v_rdcnt && vp->v_wrcnt)
2741 2741                          return (V_TRUE);
2742 2742                  break;
2743 2743          case V_RDORWR:
2744 2744                  if (vp->v_rdcnt || vp->v_wrcnt)
2745 2745                          return (V_TRUE);
2746 2746                  break;
2747 2747          case V_READ:
2748 2748                  if (vp->v_rdcnt)
2749 2749                          return (V_TRUE);
2750 2750                  break;
2751 2751          }
2752 2752  
2753 2753          return (V_FALSE);
2754 2754  }
2755 2755  
2756 2756  /*
2757 2757   * vn_is_mapped() checks whether a particular file is mapped and whether
2758 2758   * the file is mapped read and/or write.
2759 2759   */
2760 2760  int
2761 2761  vn_is_mapped(
2762 2762          vnode_t *vp,
2763 2763          v_mode_t mode)
2764 2764  {
2765 2765  
2766 2766          ASSERT(vp != NULL);
2767 2767  
2768 2768  #if !defined(_LP64)
2769 2769          switch (mode) {
2770 2770          /*
2771 2771           * The atomic_add_64_nv functions force atomicity in the
2772 2772           * case of 32 bit architectures. Otherwise the 64 bit values
2773 2773           * require two fetches. The value of the fields may be
2774 2774           * (potentially) changed between the first fetch and the
2775 2775           * second
2776 2776           */
2777 2777          case V_WRITE:
2778 2778                  if (atomic_add_64_nv((&(vp->v_mmap_write)), 0))
2779 2779                          return (V_TRUE);
2780 2780                  break;
2781 2781          case V_RDANDWR:
2782 2782                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) &&
2783 2783                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2784 2784                          return (V_TRUE);
2785 2785                  break;
2786 2786          case V_RDORWR:
2787 2787                  if ((atomic_add_64_nv((&(vp->v_mmap_read)), 0)) ||
2788 2788                      (atomic_add_64_nv((&(vp->v_mmap_write)), 0)))
2789 2789                          return (V_TRUE);
2790 2790                  break;
2791 2791          case V_READ:
2792 2792                  if (atomic_add_64_nv((&(vp->v_mmap_read)), 0))
2793 2793                          return (V_TRUE);
2794 2794                  break;
2795 2795          }
2796 2796  #else
2797 2797          switch (mode) {
2798 2798          case V_WRITE:
2799 2799                  if (vp->v_mmap_write)
2800 2800                          return (V_TRUE);
2801 2801                  break;
2802 2802          case V_RDANDWR:
2803 2803                  if (vp->v_mmap_read && vp->v_mmap_write)
2804 2804                          return (V_TRUE);
2805 2805                  break;
2806 2806          case V_RDORWR:
2807 2807                  if (vp->v_mmap_read || vp->v_mmap_write)
2808 2808                          return (V_TRUE);
2809 2809                  break;
2810 2810          case V_READ:
2811 2811                  if (vp->v_mmap_read)
2812 2812                          return (V_TRUE);
2813 2813                  break;
2814 2814          }
2815 2815  #endif
2816 2816  
2817 2817          return (V_FALSE);
2818 2818  }
2819 2819  
2820 2820  /*
2821 2821   * Set the operations vector for a vnode.
2822 2822   *
2823 2823   * FEM ensures that the v_femhead pointer is filled in before the
2824 2824   * v_op pointer is changed.  This means that if the v_femhead pointer
2825 2825   * is NULL, and the v_op field hasn't changed since before which checked
2826 2826   * the v_femhead pointer; then our update is ok - we are not racing with
2827 2827   * FEM.
2828 2828   */
2829 2829  void
2830 2830  vn_setops(vnode_t *vp, vnodeops_t *vnodeops)
2831 2831  {
2832 2832          vnodeops_t      *op;
2833 2833  
2834 2834          ASSERT(vp != NULL);
2835 2835          ASSERT(vnodeops != NULL);
2836 2836  
2837 2837          op = vp->v_op;
2838 2838          membar_consumer();
2839 2839          /*
2840 2840           * If vp->v_femhead == NULL, then we'll call atomic_cas_ptr() to do
2841 2841           * the compare-and-swap on vp->v_op.  If either fails, then FEM is
2842 2842           * in effect on the vnode and we need to have FEM deal with it.
2843 2843           */
2844 2844          if (vp->v_femhead != NULL || atomic_cas_ptr(&vp->v_op, op, vnodeops) !=
2845 2845              op) {
2846 2846                  fem_setvnops(vp, vnodeops);
2847 2847          }
2848 2848  }
2849 2849  
2850 2850  /*
2851 2851   * Retrieve the operations vector for a vnode
2852 2852   * As with vn_setops(above); make sure we aren't racing with FEM.
2853 2853   * FEM sets the v_op to a special, internal, vnodeops that wouldn't
2854 2854   * make sense to the callers of this routine.
2855 2855   */
2856 2856  vnodeops_t *
2857 2857  vn_getops(vnode_t *vp)
2858 2858  {
2859 2859          vnodeops_t      *op;
2860 2860  
2861 2861          ASSERT(vp != NULL);
2862 2862  
2863 2863          op = vp->v_op;
2864 2864          membar_consumer();
2865 2865          if (vp->v_femhead == NULL && op == vp->v_op) {
2866 2866                  return (op);
2867 2867          } else {
2868 2868                  return (fem_getvnops(vp));
2869 2869          }
2870 2870  }
2871 2871  
2872 2872  /*
2873 2873   * Returns non-zero (1) if the vnodeops matches that of the vnode.
2874 2874   * Returns zero (0) if not.
2875 2875   */
2876 2876  int
2877 2877  vn_matchops(vnode_t *vp, vnodeops_t *vnodeops)
2878 2878  {
2879 2879          return (vn_getops(vp) == vnodeops);
2880 2880  }
2881 2881  
2882 2882  /*
2883 2883   * Returns non-zero (1) if the specified operation matches the
2884 2884   * corresponding operation for that the vnode.
2885 2885   * Returns zero (0) if not.
2886 2886   */
2887 2887  
2888 2888  #define MATCHNAME(n1, n2) (((n1)[0] == (n2)[0]) && (strcmp((n1), (n2)) == 0))
2889 2889  
2890 2890  int
2891 2891  vn_matchopval(vnode_t *vp, char *vopname, fs_generic_func_p funcp)
2892 2892  {
2893 2893          const fs_operation_trans_def_t *otdp;
2894 2894          fs_generic_func_p *loc = NULL;
2895 2895          vnodeops_t      *vop = vn_getops(vp);
2896 2896  
2897 2897          ASSERT(vopname != NULL);
2898 2898  
2899 2899          for (otdp = vn_ops_table; otdp->name != NULL; otdp++) {
2900 2900                  if (MATCHNAME(otdp->name, vopname)) {
2901 2901                          loc = (fs_generic_func_p *)
2902 2902                              ((char *)(vop) + otdp->offset);
2903 2903                          break;
2904 2904                  }
2905 2905          }
2906 2906  
2907 2907          return ((loc != NULL) && (*loc == funcp));
2908 2908  }
2909 2909  
2910 2910  /*

↓ open down ↓

1634 lines elided

↑ open up ↑

2911 2911   * fs_new_caller_id() needs to return a unique ID on a given local system.
2912 2912   * The IDs do not need to survive across reboots.  These are primarily
2913 2913   * used so that (FEM) monitors can detect particular callers (such as
2914 2914   * the NFS server) to a given vnode/vfs operation.
2915 2915   */
2916 2916  u_longlong_t
2917 2917  fs_new_caller_id()
2918 2918  {
2919 2919          static uint64_t next_caller_id = 0LL; /* First call returns 1 */
2920 2920  
2921      -        return ((u_longlong_t)atomic_add_64_nv(&next_caller_id, 1));
     2921 +        return ((u_longlong_t)atomic_inc_64_nv(&next_caller_id));
2922 2922  }
2923 2923  
2924 2924  /*
2925 2925   * Given a starting vnode and a path, updates the path in the target vnode in
2926 2926   * a safe manner.  If the vnode already has path information embedded, then the
2927 2927   * cached path is left untouched.
2928 2928   */
2929 2929  
2930 2930  size_t max_vnode_path = 4 * MAXPATHLEN;
2931 2931

2932 2932  void
2933 2933  vn_setpath(vnode_t *rootvp, struct vnode *startvp, struct vnode *vp,
2934 2934      const char *path, size_t plen)
2935 2935  {
2936 2936          char    *rpath;
2937 2937          vnode_t *base;
2938 2938          size_t  rpathlen, rpathalloc;
2939 2939          int     doslash = 1;
2940 2940  
2941 2941          if (*path == '/') {
2942 2942                  base = rootvp;
2943 2943                  path++;
2944 2944                  plen--;
2945 2945          } else {
2946 2946                  base = startvp;
2947 2947          }
2948 2948  
2949 2949          /*
2950 2950           * We cannot grab base->v_lock while we hold vp->v_lock because of
2951 2951           * the potential for deadlock.
2952 2952           */
2953 2953          mutex_enter(&base->v_lock);
2954 2954          if (base->v_path == NULL) {
2955 2955                  mutex_exit(&base->v_lock);
2956 2956                  return;
2957 2957          }
2958 2958  
2959 2959          rpathlen = strlen(base->v_path);
2960 2960          rpathalloc = rpathlen + plen + 1;
2961 2961          /* Avoid adding a slash if there's already one there */
2962 2962          if (base->v_path[rpathlen-1] == '/')
2963 2963                  doslash = 0;
2964 2964          else
2965 2965                  rpathalloc++;
2966 2966  
2967 2967          /*
2968 2968           * We don't want to call kmem_alloc(KM_SLEEP) with kernel locks held,
2969 2969           * so we must do this dance.  If, by chance, something changes the path,
2970 2970           * just give up since there is no real harm.
2971 2971           */
2972 2972          mutex_exit(&base->v_lock);
2973 2973  
2974 2974          /* Paths should stay within reason */
2975 2975          if (rpathalloc > max_vnode_path)
2976 2976                  return;
2977 2977  
2978 2978          rpath = kmem_alloc(rpathalloc, KM_SLEEP);
2979 2979  
2980 2980          mutex_enter(&base->v_lock);
2981 2981          if (base->v_path == NULL || strlen(base->v_path) != rpathlen) {
2982 2982                  mutex_exit(&base->v_lock);
2983 2983                  kmem_free(rpath, rpathalloc);
2984 2984                  return;
2985 2985          }
2986 2986          bcopy(base->v_path, rpath, rpathlen);
2987 2987          mutex_exit(&base->v_lock);
2988 2988  
2989 2989          if (doslash)
2990 2990                  rpath[rpathlen++] = '/';
2991 2991          bcopy(path, rpath + rpathlen, plen);
2992 2992          rpath[rpathlen + plen] = '\0';
2993 2993  
2994 2994          mutex_enter(&vp->v_lock);
2995 2995          if (vp->v_path != NULL) {
2996 2996                  mutex_exit(&vp->v_lock);
2997 2997                  kmem_free(rpath, rpathalloc);
2998 2998          } else {
2999 2999                  vp->v_path = rpath;
3000 3000                  mutex_exit(&vp->v_lock);
3001 3001          }
3002 3002  }
3003 3003  
3004 3004  /*
3005 3005   * Sets the path to the vnode to be the given string, regardless of current
3006 3006   * context.  The string must be a complete path from rootdir.  This is only used
3007 3007   * by fsop_root() for setting the path based on the mountpoint.
3008 3008   */
3009 3009  void
3010 3010  vn_setpath_str(struct vnode *vp, const char *str, size_t len)
3011 3011  {
3012 3012          char *buf = kmem_alloc(len + 1, KM_SLEEP);
3013 3013  
3014 3014          mutex_enter(&vp->v_lock);
3015 3015          if (vp->v_path != NULL) {
3016 3016                  mutex_exit(&vp->v_lock);
3017 3017                  kmem_free(buf, len + 1);
3018 3018                  return;
3019 3019          }
3020 3020  
3021 3021          vp->v_path = buf;
3022 3022          bcopy(str, vp->v_path, len);
3023 3023          vp->v_path[len] = '\0';
3024 3024  
3025 3025          mutex_exit(&vp->v_lock);
3026 3026  }
3027 3027  
3028 3028  /*
3029 3029   * Called from within filesystem's vop_rename() to handle renames once the
3030 3030   * target vnode is available.
3031 3031   */
3032 3032  void
3033 3033  vn_renamepath(vnode_t *dvp, vnode_t *vp, const char *nm, size_t len)
3034 3034  {
3035 3035          char *tmp;
3036 3036  
3037 3037          mutex_enter(&vp->v_lock);
3038 3038          tmp = vp->v_path;
3039 3039          vp->v_path = NULL;
3040 3040          mutex_exit(&vp->v_lock);
3041 3041          vn_setpath(rootdir, dvp, vp, nm, len);
3042 3042          if (tmp != NULL)
3043 3043                  kmem_free(tmp, strlen(tmp) + 1);
3044 3044  }
3045 3045  
3046 3046  /*
3047 3047   * Similar to vn_setpath_str(), this function sets the path of the destination
3048 3048   * vnode to the be the same as the source vnode.
3049 3049   */
3050 3050  void
3051 3051  vn_copypath(struct vnode *src, struct vnode *dst)
3052 3052  {
3053 3053          char *buf;
3054 3054          int alloc;
3055 3055  
3056 3056          mutex_enter(&src->v_lock);
3057 3057          if (src->v_path == NULL) {
3058 3058                  mutex_exit(&src->v_lock);
3059 3059                  return;
3060 3060          }
3061 3061          alloc = strlen(src->v_path) + 1;
3062 3062  
3063 3063          /* avoid kmem_alloc() with lock held */
3064 3064          mutex_exit(&src->v_lock);
3065 3065          buf = kmem_alloc(alloc, KM_SLEEP);
3066 3066          mutex_enter(&src->v_lock);
3067 3067          if (src->v_path == NULL || strlen(src->v_path) + 1 != alloc) {
3068 3068                  mutex_exit(&src->v_lock);
3069 3069                  kmem_free(buf, alloc);
3070 3070                  return;
3071 3071          }
3072 3072          bcopy(src->v_path, buf, alloc);
3073 3073          mutex_exit(&src->v_lock);
3074 3074  
3075 3075          mutex_enter(&dst->v_lock);
3076 3076          if (dst->v_path != NULL) {
3077 3077                  mutex_exit(&dst->v_lock);
3078 3078                  kmem_free(buf, alloc);
3079 3079                  return;
3080 3080          }
3081 3081          dst->v_path = buf;
3082 3082          mutex_exit(&dst->v_lock);
3083 3083  }
3084 3084  
3085 3085  /*
3086 3086   * XXX Private interface for segvn routines that handle vnode
3087 3087   * large page segments.
3088 3088   *
3089 3089   * return 1 if vp's file system VOP_PAGEIO() implementation
3090 3090   * can be safely used instead of VOP_GETPAGE() for handling
3091 3091   * pagefaults against regular non swap files. VOP_PAGEIO()
3092 3092   * interface is considered safe here if its implementation
3093 3093   * is very close to VOP_GETPAGE() implementation.
3094 3094   * e.g. It zero's out the part of the page beyond EOF. Doesn't
3095 3095   * panic if there're file holes but instead returns an error.
3096 3096   * Doesn't assume file won't be changed by user writes, etc.
3097 3097   *
3098 3098   * return 0 otherwise.
3099 3099   *
3100 3100   * For now allow segvn to only use VOP_PAGEIO() with ufs and nfs.
3101 3101   */
3102 3102  int
3103 3103  vn_vmpss_usepageio(vnode_t *vp)
3104 3104  {
3105 3105          vfs_t   *vfsp = vp->v_vfsp;
3106 3106          char *fsname = vfssw[vfsp->vfs_fstype].vsw_name;
3107 3107          char *pageio_ok_fss[] = {"ufs", "nfs", NULL};
3108 3108          char **fsok = pageio_ok_fss;
3109 3109  
3110 3110          if (fsname == NULL) {
3111 3111                  return (0);
3112 3112          }
3113 3113  
3114 3114          for (; *fsok; fsok++) {
3115 3115                  if (strcmp(*fsok, fsname) == 0) {
3116 3116                          return (1);
3117 3117                  }
3118 3118          }
3119 3119          return (0);
3120 3120  }
3121 3121  
3122 3122  /* VOP_XXX() macros call the corresponding fop_xxx() function */
3123 3123  
3124 3124  int
3125 3125  fop_open(
3126 3126          vnode_t **vpp,
3127 3127          int mode,
3128 3128          cred_t *cr,
3129 3129          caller_context_t *ct)
3130 3130  {
3131 3131          int ret;
3132 3132          vnode_t *vp = *vpp;
3133 3133  
3134 3134          VN_HOLD(vp);
3135 3135          /*
3136 3136           * Adding to the vnode counts before calling open
3137 3137           * avoids the need for a mutex. It circumvents a race
3138 3138           * condition where a query made on the vnode counts results in a

↓ open down ↓

207 lines elided

↑ open up ↑

3139 3139           * false negative. The inquirer goes away believing the file is
3140 3140           * not open when there is an open on the file already under way.
3141 3141           *
3142 3142           * The counts are meant to prevent NFS from granting a delegation
3143 3143           * when it would be dangerous to do so.
3144 3144           *
3145 3145           * The vnode counts are only kept on regular files
3146 3146           */
3147 3147          if ((*vpp)->v_type == VREG) {
3148 3148                  if (mode & FREAD)
3149      -                        atomic_add_32(&((*vpp)->v_rdcnt), 1);
     3149 +                        atomic_inc_32(&(*vpp)->v_rdcnt);
3150 3150                  if (mode & FWRITE)
3151      -                        atomic_add_32(&((*vpp)->v_wrcnt), 1);
     3151 +                        atomic_inc_32(&(*vpp)->v_wrcnt);
3152 3152          }
3153 3153  
3154 3154          VOPXID_MAP_CR(vp, cr);
3155 3155  
3156 3156          ret = (*(*(vpp))->v_op->vop_open)(vpp, mode, cr, ct);
3157 3157  
3158 3158          if (ret) {
3159 3159                  /*
3160 3160                   * Use the saved vp just in case the vnode ptr got trashed
3161 3161                   * by the error.
3162 3162                   */
3163 3163                  VOPSTATS_UPDATE(vp, open);
3164 3164                  if ((vp->v_type == VREG) && (mode & FREAD))
3165      -                        atomic_add_32(&(vp->v_rdcnt), -1);
     3165 +                        atomic_dec_32(&vp->v_rdcnt);
3166 3166                  if ((vp->v_type == VREG) && (mode & FWRITE))
3167      -                        atomic_add_32(&(vp->v_wrcnt), -1);
     3167 +                        atomic_dec_32(&vp->v_wrcnt);
3168 3168          } else {
3169 3169                  /*
3170 3170                   * Some filesystems will return a different vnode,
3171 3171                   * but the same path was still used to open it.
3172 3172                   * So if we do change the vnode and need to
3173 3173                   * copy over the path, do so here, rather than special
3174 3174                   * casing each filesystem. Adjust the vnode counts to
3175 3175                   * reflect the vnode switch.
3176 3176                   */
3177 3177                  VOPSTATS_UPDATE(*vpp, open);
3178 3178                  if (*vpp != vp && *vpp != NULL) {
3179 3179                          vn_copypath(vp, *vpp);
3180 3180                          if (((*vpp)->v_type == VREG) && (mode & FREAD))
3181      -                                atomic_add_32(&((*vpp)->v_rdcnt), 1);
     3181 +                                atomic_inc_32(&(*vpp)->v_rdcnt);
3182 3182                          if ((vp->v_type == VREG) && (mode & FREAD))
3183      -                                atomic_add_32(&(vp->v_rdcnt), -1);
     3183 +                                atomic_dec_32(&vp->v_rdcnt);
3184 3184                          if (((*vpp)->v_type == VREG) && (mode & FWRITE))
3185      -                                atomic_add_32(&((*vpp)->v_wrcnt), 1);
     3185 +                                atomic_inc_32(&(*vpp)->v_wrcnt);
3186 3186                          if ((vp->v_type == VREG) && (mode & FWRITE))
3187      -                                atomic_add_32(&(vp->v_wrcnt), -1);
     3187 +                                atomic_dec_32(&vp->v_wrcnt);
3188 3188                  }
3189 3189          }
3190 3190          VN_RELE(vp);
3191 3191          return (ret);
3192 3192  }
3193 3193  
3194 3194  int
3195 3195  fop_close(
3196 3196          vnode_t *vp,
3197 3197          int flag,

3198 3198          int count,
3199 3199          offset_t offset,
3200 3200          cred_t *cr,
3201 3201          caller_context_t *ct)
3202 3202  {
3203 3203          int err;
3204 3204  
3205 3205          VOPXID_MAP_CR(vp, cr);

↓ open down ↓

8 lines elided

↑ open up ↑

3206 3206  
3207 3207          err = (*(vp)->v_op->vop_close)(vp, flag, count, offset, cr, ct);
3208 3208          VOPSTATS_UPDATE(vp, close);
3209 3209          /*
3210 3210           * Check passed in count to handle possible dups. Vnode counts are only
3211 3211           * kept on regular files
3212 3212           */
3213 3213          if ((vp->v_type == VREG) && (count == 1))  {
3214 3214                  if (flag & FREAD) {
3215 3215                          ASSERT(vp->v_rdcnt > 0);
3216      -                        atomic_add_32(&(vp->v_rdcnt), -1);
     3216 +                        atomic_dec_32(&vp->v_rdcnt);
3217 3217                  }
3218 3218                  if (flag & FWRITE) {
3219 3219                          ASSERT(vp->v_wrcnt > 0);
3220      -                        atomic_add_32(&(vp->v_wrcnt), -1);
     3220 +                        atomic_dec_32(&vp->v_wrcnt);
3221 3221                  }
3222 3222          }
3223 3223          return (err);
3224 3224  }
3225 3225  
3226 3226  int
3227 3227  fop_read(
3228 3228          vnode_t *vp,
3229 3229          uio_t *uiop,
3230 3230          int ioflag,

3231 3231          cred_t *cr,
3232 3232          caller_context_t *ct)
3233 3233  {
3234 3234          int     err;
3235 3235          ssize_t resid_start = uiop->uio_resid;
3236 3236  
3237 3237          VOPXID_MAP_CR(vp, cr);
3238 3238  
3239 3239          err = (*(vp)->v_op->vop_read)(vp, uiop, ioflag, cr, ct);
3240 3240          VOPSTATS_UPDATE_IO(vp, read,
3241 3241              read_bytes, (resid_start - uiop->uio_resid));
3242 3242          return (err);
3243 3243  }
3244 3244  
3245 3245  int
3246 3246  fop_write(
3247 3247          vnode_t *vp,
3248 3248          uio_t *uiop,
3249 3249          int ioflag,
3250 3250          cred_t *cr,
3251 3251          caller_context_t *ct)
3252 3252  {
3253 3253          int     err;
3254 3254          ssize_t resid_start = uiop->uio_resid;
3255 3255  
3256 3256          VOPXID_MAP_CR(vp, cr);
3257 3257  
3258 3258          err = (*(vp)->v_op->vop_write)(vp, uiop, ioflag, cr, ct);
3259 3259          VOPSTATS_UPDATE_IO(vp, write,
3260 3260              write_bytes, (resid_start - uiop->uio_resid));
3261 3261          return (err);
3262 3262  }
3263 3263  
3264 3264  int
3265 3265  fop_ioctl(
3266 3266          vnode_t *vp,
3267 3267          int cmd,
3268 3268          intptr_t arg,
3269 3269          int flag,
3270 3270          cred_t *cr,
3271 3271          int *rvalp,
3272 3272          caller_context_t *ct)
3273 3273  {
3274 3274          int     err;
3275 3275  
3276 3276          VOPXID_MAP_CR(vp, cr);
3277 3277  
3278 3278          err = (*(vp)->v_op->vop_ioctl)(vp, cmd, arg, flag, cr, rvalp, ct);
3279 3279          VOPSTATS_UPDATE(vp, ioctl);
3280 3280          return (err);
3281 3281  }
3282 3282  
3283 3283  int
3284 3284  fop_setfl(
3285 3285          vnode_t *vp,
3286 3286          int oflags,
3287 3287          int nflags,
3288 3288          cred_t *cr,
3289 3289          caller_context_t *ct)
3290 3290  {
3291 3291          int     err;
3292 3292  
3293 3293          VOPXID_MAP_CR(vp, cr);
3294 3294  
3295 3295          err = (*(vp)->v_op->vop_setfl)(vp, oflags, nflags, cr, ct);
3296 3296          VOPSTATS_UPDATE(vp, setfl);
3297 3297          return (err);
3298 3298  }
3299 3299  
3300 3300  int
3301 3301  fop_getattr(
3302 3302          vnode_t *vp,
3303 3303          vattr_t *vap,
3304 3304          int flags,
3305 3305          cred_t *cr,
3306 3306          caller_context_t *ct)
3307 3307  {
3308 3308          int     err;
3309 3309  
3310 3310          VOPXID_MAP_CR(vp, cr);
3311 3311  
3312 3312          /*
3313 3313           * If this file system doesn't understand the xvattr extensions
3314 3314           * then turn off the xvattr bit.
3315 3315           */
3316 3316          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3317 3317                  vap->va_mask &= ~AT_XVATTR;
3318 3318          }
3319 3319  
3320 3320          /*
3321 3321           * We're only allowed to skip the ACL check iff we used a 32 bit
3322 3322           * ACE mask with VOP_ACCESS() to determine permissions.
3323 3323           */
3324 3324          if ((flags & ATTR_NOACLCHECK) &&
3325 3325              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3326 3326                  return (EINVAL);
3327 3327          }
3328 3328          err = (*(vp)->v_op->vop_getattr)(vp, vap, flags, cr, ct);
3329 3329          VOPSTATS_UPDATE(vp, getattr);
3330 3330          return (err);
3331 3331  }
3332 3332  
3333 3333  int
3334 3334  fop_setattr(
3335 3335          vnode_t *vp,
3336 3336          vattr_t *vap,
3337 3337          int flags,
3338 3338          cred_t *cr,
3339 3339          caller_context_t *ct)
3340 3340  {
3341 3341          int     err;
3342 3342  
3343 3343          VOPXID_MAP_CR(vp, cr);
3344 3344  
3345 3345          /*
3346 3346           * If this file system doesn't understand the xvattr extensions
3347 3347           * then turn off the xvattr bit.
3348 3348           */
3349 3349          if (vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR) == 0) {
3350 3350                  vap->va_mask &= ~AT_XVATTR;
3351 3351          }
3352 3352  
3353 3353          /*
3354 3354           * We're only allowed to skip the ACL check iff we used a 32 bit
3355 3355           * ACE mask with VOP_ACCESS() to determine permissions.
3356 3356           */
3357 3357          if ((flags & ATTR_NOACLCHECK) &&
3358 3358              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3359 3359                  return (EINVAL);
3360 3360          }
3361 3361          err = (*(vp)->v_op->vop_setattr)(vp, vap, flags, cr, ct);
3362 3362          VOPSTATS_UPDATE(vp, setattr);
3363 3363          return (err);
3364 3364  }
3365 3365  
3366 3366  int
3367 3367  fop_access(
3368 3368          vnode_t *vp,
3369 3369          int mode,
3370 3370          int flags,
3371 3371          cred_t *cr,
3372 3372          caller_context_t *ct)
3373 3373  {
3374 3374          int     err;
3375 3375  
3376 3376          if ((flags & V_ACE_MASK) &&
3377 3377              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
3378 3378                  return (EINVAL);
3379 3379          }
3380 3380  
3381 3381          VOPXID_MAP_CR(vp, cr);
3382 3382  
3383 3383          err = (*(vp)->v_op->vop_access)(vp, mode, flags, cr, ct);
3384 3384          VOPSTATS_UPDATE(vp, access);
3385 3385          return (err);
3386 3386  }
3387 3387  
3388 3388  int
3389 3389  fop_lookup(
3390 3390          vnode_t *dvp,
3391 3391          char *nm,
3392 3392          vnode_t **vpp,
3393 3393          pathname_t *pnp,
3394 3394          int flags,
3395 3395          vnode_t *rdir,
3396 3396          cred_t *cr,
3397 3397          caller_context_t *ct,
3398 3398          int *deflags,           /* Returned per-dirent flags */
3399 3399          pathname_t *ppnp)       /* Returned case-preserved name in directory */
3400 3400  {
3401 3401          int ret;
3402 3402  
3403 3403          /*
3404 3404           * If this file system doesn't support case-insensitive access
3405 3405           * and said access is requested, fail quickly.  It is required
3406 3406           * that if the vfs supports case-insensitive lookup, it also
3407 3407           * supports extended dirent flags.
3408 3408           */
3409 3409          if (flags & FIGNORECASE &&
3410 3410              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3411 3411              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3412 3412                  return (EINVAL);
3413 3413  
3414 3414          VOPXID_MAP_CR(dvp, cr);
3415 3415  
3416 3416          if ((flags & LOOKUP_XATTR) && (flags & LOOKUP_HAVE_SYSATTR_DIR) == 0) {
3417 3417                  ret = xattr_dir_lookup(dvp, vpp, flags, cr);
3418 3418          } else {
3419 3419                  ret = (*(dvp)->v_op->vop_lookup)
3420 3420                      (dvp, nm, vpp, pnp, flags, rdir, cr, ct, deflags, ppnp);
3421 3421          }
3422 3422          if (ret == 0 && *vpp) {
3423 3423                  VOPSTATS_UPDATE(*vpp, lookup);
3424 3424                  if ((*vpp)->v_path == NULL) {
3425 3425                          vn_setpath(rootdir, dvp, *vpp, nm, strlen(nm));
3426 3426                  }
3427 3427          }
3428 3428  
3429 3429          return (ret);
3430 3430  }
3431 3431  
3432 3432  int
3433 3433  fop_create(
3434 3434          vnode_t *dvp,
3435 3435          char *name,
3436 3436          vattr_t *vap,
3437 3437          vcexcl_t excl,
3438 3438          int mode,
3439 3439          vnode_t **vpp,
3440 3440          cred_t *cr,
3441 3441          int flags,
3442 3442          caller_context_t *ct,
3443 3443          vsecattr_t *vsecp)      /* ACL to set during create */
3444 3444  {
3445 3445          int ret;
3446 3446  
3447 3447          if (vsecp != NULL &&
3448 3448              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3449 3449                  return (EINVAL);
3450 3450          }
3451 3451          /*
3452 3452           * If this file system doesn't support case-insensitive access
3453 3453           * and said access is requested, fail quickly.
3454 3454           */
3455 3455          if (flags & FIGNORECASE &&
3456 3456              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3457 3457              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3458 3458                  return (EINVAL);
3459 3459  
3460 3460          VOPXID_MAP_CR(dvp, cr);
3461 3461  
3462 3462          ret = (*(dvp)->v_op->vop_create)
3463 3463              (dvp, name, vap, excl, mode, vpp, cr, flags, ct, vsecp);
3464 3464          if (ret == 0 && *vpp) {
3465 3465                  VOPSTATS_UPDATE(*vpp, create);
3466 3466                  if ((*vpp)->v_path == NULL) {
3467 3467                          vn_setpath(rootdir, dvp, *vpp, name, strlen(name));
3468 3468                  }
3469 3469          }
3470 3470  
3471 3471          return (ret);
3472 3472  }
3473 3473  
3474 3474  int
3475 3475  fop_remove(
3476 3476          vnode_t *dvp,
3477 3477          char *nm,
3478 3478          cred_t *cr,
3479 3479          caller_context_t *ct,
3480 3480          int flags)
3481 3481  {
3482 3482          int     err;
3483 3483  
3484 3484          /*
3485 3485           * If this file system doesn't support case-insensitive access
3486 3486           * and said access is requested, fail quickly.
3487 3487           */
3488 3488          if (flags & FIGNORECASE &&
3489 3489              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3490 3490              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3491 3491                  return (EINVAL);
3492 3492  
3493 3493          VOPXID_MAP_CR(dvp, cr);
3494 3494  
3495 3495          err = (*(dvp)->v_op->vop_remove)(dvp, nm, cr, ct, flags);
3496 3496          VOPSTATS_UPDATE(dvp, remove);
3497 3497          return (err);
3498 3498  }
3499 3499  
3500 3500  int
3501 3501  fop_link(
3502 3502          vnode_t *tdvp,
3503 3503          vnode_t *svp,
3504 3504          char *tnm,
3505 3505          cred_t *cr,
3506 3506          caller_context_t *ct,
3507 3507          int flags)
3508 3508  {
3509 3509          int     err;
3510 3510  
3511 3511          /*
3512 3512           * If the target file system doesn't support case-insensitive access
3513 3513           * and said access is requested, fail quickly.
3514 3514           */
3515 3515          if (flags & FIGNORECASE &&
3516 3516              (vfs_has_feature(tdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3517 3517              vfs_has_feature(tdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3518 3518                  return (EINVAL);
3519 3519  
3520 3520          VOPXID_MAP_CR(tdvp, cr);
3521 3521  
3522 3522          err = (*(tdvp)->v_op->vop_link)(tdvp, svp, tnm, cr, ct, flags);
3523 3523          VOPSTATS_UPDATE(tdvp, link);
3524 3524          return (err);
3525 3525  }
3526 3526  
3527 3527  int
3528 3528  fop_rename(
3529 3529          vnode_t *sdvp,
3530 3530          char *snm,
3531 3531          vnode_t *tdvp,
3532 3532          char *tnm,
3533 3533          cred_t *cr,
3534 3534          caller_context_t *ct,
3535 3535          int flags)
3536 3536  {
3537 3537          int     err;
3538 3538  
3539 3539          /*
3540 3540           * If the file system involved does not support
3541 3541           * case-insensitive access and said access is requested, fail
3542 3542           * quickly.
3543 3543           */
3544 3544          if (flags & FIGNORECASE &&
3545 3545              ((vfs_has_feature(sdvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3546 3546              vfs_has_feature(sdvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0)))
3547 3547                  return (EINVAL);
3548 3548  
3549 3549          VOPXID_MAP_CR(tdvp, cr);
3550 3550  
3551 3551          err = (*(sdvp)->v_op->vop_rename)(sdvp, snm, tdvp, tnm, cr, ct, flags);
3552 3552          VOPSTATS_UPDATE(sdvp, rename);
3553 3553          return (err);
3554 3554  }
3555 3555  
3556 3556  int
3557 3557  fop_mkdir(
3558 3558          vnode_t *dvp,
3559 3559          char *dirname,
3560 3560          vattr_t *vap,
3561 3561          vnode_t **vpp,
3562 3562          cred_t *cr,
3563 3563          caller_context_t *ct,
3564 3564          int flags,
3565 3565          vsecattr_t *vsecp)      /* ACL to set during create */
3566 3566  {
3567 3567          int ret;
3568 3568  
3569 3569          if (vsecp != NULL &&
3570 3570              vfs_has_feature(dvp->v_vfsp, VFSFT_ACLONCREATE) == 0) {
3571 3571                  return (EINVAL);
3572 3572          }
3573 3573          /*
3574 3574           * If this file system doesn't support case-insensitive access
3575 3575           * and said access is requested, fail quickly.
3576 3576           */
3577 3577          if (flags & FIGNORECASE &&
3578 3578              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3579 3579              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3580 3580                  return (EINVAL);
3581 3581  
3582 3582          VOPXID_MAP_CR(dvp, cr);
3583 3583  
3584 3584          ret = (*(dvp)->v_op->vop_mkdir)
3585 3585              (dvp, dirname, vap, vpp, cr, ct, flags, vsecp);
3586 3586          if (ret == 0 && *vpp) {
3587 3587                  VOPSTATS_UPDATE(*vpp, mkdir);
3588 3588                  if ((*vpp)->v_path == NULL) {
3589 3589                          vn_setpath(rootdir, dvp, *vpp, dirname,
3590 3590                              strlen(dirname));
3591 3591                  }
3592 3592          }
3593 3593  
3594 3594          return (ret);
3595 3595  }
3596 3596  
3597 3597  int
3598 3598  fop_rmdir(
3599 3599          vnode_t *dvp,
3600 3600          char *nm,
3601 3601          vnode_t *cdir,
3602 3602          cred_t *cr,
3603 3603          caller_context_t *ct,
3604 3604          int flags)
3605 3605  {
3606 3606          int     err;
3607 3607  
3608 3608          /*
3609 3609           * If this file system doesn't support case-insensitive access
3610 3610           * and said access is requested, fail quickly.
3611 3611           */
3612 3612          if (flags & FIGNORECASE &&
3613 3613              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3614 3614              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3615 3615                  return (EINVAL);
3616 3616  
3617 3617          VOPXID_MAP_CR(dvp, cr);
3618 3618  
3619 3619          err = (*(dvp)->v_op->vop_rmdir)(dvp, nm, cdir, cr, ct, flags);
3620 3620          VOPSTATS_UPDATE(dvp, rmdir);
3621 3621          return (err);
3622 3622  }
3623 3623  
3624 3624  int
3625 3625  fop_readdir(
3626 3626          vnode_t *vp,
3627 3627          uio_t *uiop,
3628 3628          cred_t *cr,
3629 3629          int *eofp,
3630 3630          caller_context_t *ct,
3631 3631          int flags)
3632 3632  {
3633 3633          int     err;
3634 3634          ssize_t resid_start = uiop->uio_resid;
3635 3635  
3636 3636          /*
3637 3637           * If this file system doesn't support retrieving directory
3638 3638           * entry flags and said access is requested, fail quickly.
3639 3639           */
3640 3640          if (flags & V_RDDIR_ENTFLAGS &&
3641 3641              vfs_has_feature(vp->v_vfsp, VFSFT_DIRENTFLAGS) == 0)
3642 3642                  return (EINVAL);
3643 3643  
3644 3644          VOPXID_MAP_CR(vp, cr);
3645 3645  
3646 3646          err = (*(vp)->v_op->vop_readdir)(vp, uiop, cr, eofp, ct, flags);
3647 3647          VOPSTATS_UPDATE_IO(vp, readdir,
3648 3648              readdir_bytes, (resid_start - uiop->uio_resid));
3649 3649          return (err);
3650 3650  }
3651 3651  
3652 3652  int
3653 3653  fop_symlink(
3654 3654          vnode_t *dvp,
3655 3655          char *linkname,
3656 3656          vattr_t *vap,
3657 3657          char *target,
3658 3658          cred_t *cr,
3659 3659          caller_context_t *ct,
3660 3660          int flags)
3661 3661  {
3662 3662          int     err;
3663 3663          xvattr_t xvattr;
3664 3664  
3665 3665          /*
3666 3666           * If this file system doesn't support case-insensitive access
3667 3667           * and said access is requested, fail quickly.
3668 3668           */
3669 3669          if (flags & FIGNORECASE &&
3670 3670              (vfs_has_feature(dvp->v_vfsp, VFSFT_CASEINSENSITIVE) == 0 &&
3671 3671              vfs_has_feature(dvp->v_vfsp, VFSFT_NOCASESENSITIVE) == 0))
3672 3672                  return (EINVAL);
3673 3673  
3674 3674          VOPXID_MAP_CR(dvp, cr);
3675 3675  
3676 3676          /* check for reparse point */
3677 3677          if ((vfs_has_feature(dvp->v_vfsp, VFSFT_REPARSE)) &&
3678 3678              (strncmp(target, FS_REPARSE_TAG_STR,
3679 3679              strlen(FS_REPARSE_TAG_STR)) == 0)) {
3680 3680                  if (!fs_reparse_mark(target, vap, &xvattr))
3681 3681                          vap = (vattr_t *)&xvattr;
3682 3682          }
3683 3683  
3684 3684          err = (*(dvp)->v_op->vop_symlink)
3685 3685              (dvp, linkname, vap, target, cr, ct, flags);
3686 3686          VOPSTATS_UPDATE(dvp, symlink);
3687 3687          return (err);
3688 3688  }
3689 3689  
3690 3690  int
3691 3691  fop_readlink(
3692 3692          vnode_t *vp,
3693 3693          uio_t *uiop,
3694 3694          cred_t *cr,
3695 3695          caller_context_t *ct)
3696 3696  {
3697 3697          int     err;
3698 3698  
3699 3699          VOPXID_MAP_CR(vp, cr);
3700 3700  
3701 3701          err = (*(vp)->v_op->vop_readlink)(vp, uiop, cr, ct);
3702 3702          VOPSTATS_UPDATE(vp, readlink);
3703 3703          return (err);
3704 3704  }
3705 3705  
3706 3706  int
3707 3707  fop_fsync(
3708 3708          vnode_t *vp,
3709 3709          int syncflag,
3710 3710          cred_t *cr,
3711 3711          caller_context_t *ct)
3712 3712  {
3713 3713          int     err;
3714 3714  
3715 3715          VOPXID_MAP_CR(vp, cr);
3716 3716  
3717 3717          err = (*(vp)->v_op->vop_fsync)(vp, syncflag, cr, ct);
3718 3718          VOPSTATS_UPDATE(vp, fsync);
3719 3719          return (err);
3720 3720  }
3721 3721  
3722 3722  void
3723 3723  fop_inactive(
3724 3724          vnode_t *vp,
3725 3725          cred_t *cr,
3726 3726          caller_context_t *ct)
3727 3727  {
3728 3728          /* Need to update stats before vop call since we may lose the vnode */
3729 3729          VOPSTATS_UPDATE(vp, inactive);
3730 3730  
3731 3731          VOPXID_MAP_CR(vp, cr);
3732 3732  
3733 3733          (*(vp)->v_op->vop_inactive)(vp, cr, ct);
3734 3734  }
3735 3735  
3736 3736  int
3737 3737  fop_fid(
3738 3738          vnode_t *vp,
3739 3739          fid_t *fidp,
3740 3740          caller_context_t *ct)
3741 3741  {
3742 3742          int     err;
3743 3743  
3744 3744          err = (*(vp)->v_op->vop_fid)(vp, fidp, ct);
3745 3745          VOPSTATS_UPDATE(vp, fid);
3746 3746          return (err);
3747 3747  }
3748 3748  
3749 3749  int
3750 3750  fop_rwlock(
3751 3751          vnode_t *vp,
3752 3752          int write_lock,
3753 3753          caller_context_t *ct)
3754 3754  {
3755 3755          int     ret;
3756 3756  
3757 3757          ret = ((*(vp)->v_op->vop_rwlock)(vp, write_lock, ct));
3758 3758          VOPSTATS_UPDATE(vp, rwlock);
3759 3759          return (ret);
3760 3760  }
3761 3761  
3762 3762  void
3763 3763  fop_rwunlock(
3764 3764          vnode_t *vp,
3765 3765          int write_lock,
3766 3766          caller_context_t *ct)
3767 3767  {
3768 3768          (*(vp)->v_op->vop_rwunlock)(vp, write_lock, ct);
3769 3769          VOPSTATS_UPDATE(vp, rwunlock);
3770 3770  }
3771 3771  
3772 3772  int
3773 3773  fop_seek(
3774 3774          vnode_t *vp,
3775 3775          offset_t ooff,
3776 3776          offset_t *noffp,
3777 3777          caller_context_t *ct)
3778 3778  {
3779 3779          int     err;
3780 3780  
3781 3781          err = (*(vp)->v_op->vop_seek)(vp, ooff, noffp, ct);
3782 3782          VOPSTATS_UPDATE(vp, seek);
3783 3783          return (err);
3784 3784  }
3785 3785  
3786 3786  int
3787 3787  fop_cmp(
3788 3788          vnode_t *vp1,
3789 3789          vnode_t *vp2,
3790 3790          caller_context_t *ct)
3791 3791  {
3792 3792          int     err;
3793 3793  
3794 3794          err = (*(vp1)->v_op->vop_cmp)(vp1, vp2, ct);
3795 3795          VOPSTATS_UPDATE(vp1, cmp);
3796 3796          return (err);
3797 3797  }
3798 3798  
3799 3799  int
3800 3800  fop_frlock(
3801 3801          vnode_t *vp,
3802 3802          int cmd,
3803 3803          flock64_t *bfp,
3804 3804          int flag,
3805 3805          offset_t offset,
3806 3806          struct flk_callback *flk_cbp,
3807 3807          cred_t *cr,
3808 3808          caller_context_t *ct)
3809 3809  {
3810 3810          int     err;
3811 3811  
3812 3812          VOPXID_MAP_CR(vp, cr);
3813 3813  
3814 3814          err = (*(vp)->v_op->vop_frlock)
3815 3815              (vp, cmd, bfp, flag, offset, flk_cbp, cr, ct);
3816 3816          VOPSTATS_UPDATE(vp, frlock);
3817 3817          return (err);
3818 3818  }
3819 3819  
3820 3820  int
3821 3821  fop_space(
3822 3822          vnode_t *vp,
3823 3823          int cmd,
3824 3824          flock64_t *bfp,
3825 3825          int flag,
3826 3826          offset_t offset,
3827 3827          cred_t *cr,
3828 3828          caller_context_t *ct)
3829 3829  {
3830 3830          int     err;
3831 3831  
3832 3832          VOPXID_MAP_CR(vp, cr);
3833 3833  
3834 3834          err = (*(vp)->v_op->vop_space)(vp, cmd, bfp, flag, offset, cr, ct);
3835 3835          VOPSTATS_UPDATE(vp, space);
3836 3836          return (err);
3837 3837  }
3838 3838  
3839 3839  int
3840 3840  fop_realvp(
3841 3841          vnode_t *vp,
3842 3842          vnode_t **vpp,
3843 3843          caller_context_t *ct)
3844 3844  {
3845 3845          int     err;
3846 3846  
3847 3847          err = (*(vp)->v_op->vop_realvp)(vp, vpp, ct);
3848 3848          VOPSTATS_UPDATE(vp, realvp);
3849 3849          return (err);
3850 3850  }
3851 3851  
3852 3852  int
3853 3853  fop_getpage(
3854 3854          vnode_t *vp,
3855 3855          offset_t off,
3856 3856          size_t len,
3857 3857          uint_t *protp,
3858 3858          page_t **plarr,
3859 3859          size_t plsz,
3860 3860          struct seg *seg,
3861 3861          caddr_t addr,
3862 3862          enum seg_rw rw,
3863 3863          cred_t *cr,
3864 3864          caller_context_t *ct)
3865 3865  {
3866 3866          int     err;
3867 3867  
3868 3868          VOPXID_MAP_CR(vp, cr);
3869 3869  
3870 3870          err = (*(vp)->v_op->vop_getpage)
3871 3871              (vp, off, len, protp, plarr, plsz, seg, addr, rw, cr, ct);
3872 3872          VOPSTATS_UPDATE(vp, getpage);
3873 3873          return (err);
3874 3874  }
3875 3875  
3876 3876  int
3877 3877  fop_putpage(
3878 3878          vnode_t *vp,
3879 3879          offset_t off,
3880 3880          size_t len,
3881 3881          int flags,
3882 3882          cred_t *cr,
3883 3883          caller_context_t *ct)
3884 3884  {
3885 3885          int     err;
3886 3886  
3887 3887          VOPXID_MAP_CR(vp, cr);
3888 3888  
3889 3889          err = (*(vp)->v_op->vop_putpage)(vp, off, len, flags, cr, ct);
3890 3890          VOPSTATS_UPDATE(vp, putpage);
3891 3891          return (err);
3892 3892  }
3893 3893  
3894 3894  int
3895 3895  fop_map(
3896 3896          vnode_t *vp,
3897 3897          offset_t off,
3898 3898          struct as *as,
3899 3899          caddr_t *addrp,
3900 3900          size_t len,
3901 3901          uchar_t prot,
3902 3902          uchar_t maxprot,
3903 3903          uint_t flags,
3904 3904          cred_t *cr,
3905 3905          caller_context_t *ct)
3906 3906  {
3907 3907          int     err;
3908 3908  
3909 3909          VOPXID_MAP_CR(vp, cr);
3910 3910  
3911 3911          err = (*(vp)->v_op->vop_map)
3912 3912              (vp, off, as, addrp, len, prot, maxprot, flags, cr, ct);
3913 3913          VOPSTATS_UPDATE(vp, map);
3914 3914          return (err);
3915 3915  }
3916 3916  
3917 3917  int
3918 3918  fop_addmap(
3919 3919          vnode_t *vp,
3920 3920          offset_t off,
3921 3921          struct as *as,
3922 3922          caddr_t addr,
3923 3923          size_t len,
3924 3924          uchar_t prot,
3925 3925          uchar_t maxprot,
3926 3926          uint_t flags,
3927 3927          cred_t *cr,
3928 3928          caller_context_t *ct)
3929 3929  {
3930 3930          int error;
3931 3931          u_longlong_t delta;
3932 3932  
3933 3933          VOPXID_MAP_CR(vp, cr);
3934 3934  
3935 3935          error = (*(vp)->v_op->vop_addmap)
3936 3936              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3937 3937  
3938 3938          if ((!error) && (vp->v_type == VREG)) {
3939 3939                  delta = (u_longlong_t)btopr(len);
3940 3940                  /*
3941 3941                   * If file is declared MAP_PRIVATE, it can't be written back
3942 3942                   * even if open for write. Handle as read.
3943 3943                   */
3944 3944                  if (flags & MAP_PRIVATE) {
3945 3945                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3946 3946                              (int64_t)delta);
3947 3947                  } else {
3948 3948                          /*
3949 3949                           * atomic_add_64 forces the fetch of a 64 bit value to
3950 3950                           * be atomic on 32 bit machines
3951 3951                           */
3952 3952                          if (maxprot & PROT_WRITE)
3953 3953                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
3954 3954                                      (int64_t)delta);
3955 3955                          if (maxprot & PROT_READ)
3956 3956                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3957 3957                                      (int64_t)delta);
3958 3958                          if (maxprot & PROT_EXEC)
3959 3959                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
3960 3960                                      (int64_t)delta);
3961 3961                  }
3962 3962          }
3963 3963          VOPSTATS_UPDATE(vp, addmap);
3964 3964          return (error);
3965 3965  }
3966 3966  
3967 3967  int
3968 3968  fop_delmap(
3969 3969          vnode_t *vp,
3970 3970          offset_t off,
3971 3971          struct as *as,
3972 3972          caddr_t addr,
3973 3973          size_t len,
3974 3974          uint_t prot,
3975 3975          uint_t maxprot,
3976 3976          uint_t flags,
3977 3977          cred_t *cr,
3978 3978          caller_context_t *ct)
3979 3979  {
3980 3980          int error;
3981 3981          u_longlong_t delta;
3982 3982  
3983 3983          VOPXID_MAP_CR(vp, cr);
3984 3984  
3985 3985          error = (*(vp)->v_op->vop_delmap)
3986 3986              (vp, off, as, addr, len, prot, maxprot, flags, cr, ct);
3987 3987  
3988 3988          /*
3989 3989           * NFS calls into delmap twice, the first time
3990 3990           * it simply establishes a callback mechanism and returns EAGAIN
3991 3991           * while the real work is being done upon the second invocation.
3992 3992           * We have to detect this here and only decrement the counts upon
3993 3993           * the second delmap request.
3994 3994           */
3995 3995          if ((error != EAGAIN) && (vp->v_type == VREG)) {
3996 3996  
3997 3997                  delta = (u_longlong_t)btopr(len);
3998 3998  
3999 3999                  if (flags & MAP_PRIVATE) {
4000 4000                          atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4001 4001                              (int64_t)(-delta));
4002 4002                  } else {
4003 4003                          /*
4004 4004                           * atomic_add_64 forces the fetch of a 64 bit value
4005 4005                           * to be atomic on 32 bit machines
4006 4006                           */
4007 4007                          if (maxprot & PROT_WRITE)
4008 4008                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_write)),
4009 4009                                      (int64_t)(-delta));
4010 4010                          if (maxprot & PROT_READ)
4011 4011                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4012 4012                                      (int64_t)(-delta));
4013 4013                          if (maxprot & PROT_EXEC)
4014 4014                                  atomic_add_64((uint64_t *)(&(vp->v_mmap_read)),
4015 4015                                      (int64_t)(-delta));
4016 4016                  }
4017 4017          }
4018 4018          VOPSTATS_UPDATE(vp, delmap);
4019 4019          return (error);
4020 4020  }
4021 4021  
4022 4022  
4023 4023  int
4024 4024  fop_poll(
4025 4025          vnode_t *vp,
4026 4026          short events,
4027 4027          int anyyet,
4028 4028          short *reventsp,
4029 4029          struct pollhead **phpp,
4030 4030          caller_context_t *ct)
4031 4031  {
4032 4032          int     err;
4033 4033  
4034 4034          err = (*(vp)->v_op->vop_poll)(vp, events, anyyet, reventsp, phpp, ct);
4035 4035          VOPSTATS_UPDATE(vp, poll);
4036 4036          return (err);
4037 4037  }
4038 4038  
4039 4039  int
4040 4040  fop_dump(
4041 4041          vnode_t *vp,
4042 4042          caddr_t addr,
4043 4043          offset_t lbdn,
4044 4044          offset_t dblks,
4045 4045          caller_context_t *ct)
4046 4046  {
4047 4047          int     err;
4048 4048  
4049 4049          /* ensure lbdn and dblks can be passed safely to bdev_dump */
4050 4050          if ((lbdn != (daddr_t)lbdn) || (dblks != (int)dblks))
4051 4051                  return (EIO);
4052 4052  
4053 4053          err = (*(vp)->v_op->vop_dump)(vp, addr, lbdn, dblks, ct);
4054 4054          VOPSTATS_UPDATE(vp, dump);
4055 4055          return (err);
4056 4056  }
4057 4057  
4058 4058  int
4059 4059  fop_pathconf(
4060 4060          vnode_t *vp,
4061 4061          int cmd,
4062 4062          ulong_t *valp,
4063 4063          cred_t *cr,
4064 4064          caller_context_t *ct)
4065 4065  {
4066 4066          int     err;
4067 4067  
4068 4068          VOPXID_MAP_CR(vp, cr);
4069 4069  
4070 4070          err = (*(vp)->v_op->vop_pathconf)(vp, cmd, valp, cr, ct);
4071 4071          VOPSTATS_UPDATE(vp, pathconf);
4072 4072          return (err);
4073 4073  }
4074 4074  
4075 4075  int
4076 4076  fop_pageio(
4077 4077          vnode_t *vp,
4078 4078          struct page *pp,
4079 4079          u_offset_t io_off,
4080 4080          size_t io_len,
4081 4081          int flags,
4082 4082          cred_t *cr,
4083 4083          caller_context_t *ct)
4084 4084  {
4085 4085          int     err;
4086 4086  
4087 4087          VOPXID_MAP_CR(vp, cr);
4088 4088  
4089 4089          err = (*(vp)->v_op->vop_pageio)(vp, pp, io_off, io_len, flags, cr, ct);
4090 4090          VOPSTATS_UPDATE(vp, pageio);
4091 4091          return (err);
4092 4092  }
4093 4093  
4094 4094  int
4095 4095  fop_dumpctl(
4096 4096          vnode_t *vp,
4097 4097          int action,
4098 4098          offset_t *blkp,
4099 4099          caller_context_t *ct)
4100 4100  {
4101 4101          int     err;
4102 4102          err = (*(vp)->v_op->vop_dumpctl)(vp, action, blkp, ct);
4103 4103          VOPSTATS_UPDATE(vp, dumpctl);
4104 4104          return (err);
4105 4105  }
4106 4106  
4107 4107  void
4108 4108  fop_dispose(
4109 4109          vnode_t *vp,
4110 4110          page_t *pp,
4111 4111          int flag,
4112 4112          int dn,
4113 4113          cred_t *cr,
4114 4114          caller_context_t *ct)
4115 4115  {
4116 4116          /* Must do stats first since it's possible to lose the vnode */
4117 4117          VOPSTATS_UPDATE(vp, dispose);
4118 4118  
4119 4119          VOPXID_MAP_CR(vp, cr);
4120 4120  
4121 4121          (*(vp)->v_op->vop_dispose)(vp, pp, flag, dn, cr, ct);
4122 4122  }
4123 4123  
4124 4124  int
4125 4125  fop_setsecattr(
4126 4126          vnode_t *vp,
4127 4127          vsecattr_t *vsap,
4128 4128          int flag,
4129 4129          cred_t *cr,
4130 4130          caller_context_t *ct)
4131 4131  {
4132 4132          int     err;
4133 4133  
4134 4134          VOPXID_MAP_CR(vp, cr);
4135 4135  
4136 4136          /*
4137 4137           * We're only allowed to skip the ACL check iff we used a 32 bit
4138 4138           * ACE mask with VOP_ACCESS() to determine permissions.
4139 4139           */
4140 4140          if ((flag & ATTR_NOACLCHECK) &&
4141 4141              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4142 4142                  return (EINVAL);
4143 4143          }
4144 4144          err = (*(vp)->v_op->vop_setsecattr) (vp, vsap, flag, cr, ct);
4145 4145          VOPSTATS_UPDATE(vp, setsecattr);
4146 4146          return (err);
4147 4147  }
4148 4148  
4149 4149  int
4150 4150  fop_getsecattr(
4151 4151          vnode_t *vp,
4152 4152          vsecattr_t *vsap,
4153 4153          int flag,
4154 4154          cred_t *cr,
4155 4155          caller_context_t *ct)
4156 4156  {
4157 4157          int     err;
4158 4158  
4159 4159          /*
4160 4160           * We're only allowed to skip the ACL check iff we used a 32 bit
4161 4161           * ACE mask with VOP_ACCESS() to determine permissions.
4162 4162           */
4163 4163          if ((flag & ATTR_NOACLCHECK) &&
4164 4164              vfs_has_feature(vp->v_vfsp, VFSFT_ACEMASKONACCESS) == 0) {
4165 4165                  return (EINVAL);
4166 4166          }
4167 4167  
4168 4168          VOPXID_MAP_CR(vp, cr);
4169 4169  
4170 4170          err = (*(vp)->v_op->vop_getsecattr) (vp, vsap, flag, cr, ct);
4171 4171          VOPSTATS_UPDATE(vp, getsecattr);
4172 4172          return (err);
4173 4173  }
4174 4174  
4175 4175  int
4176 4176  fop_shrlock(
4177 4177          vnode_t *vp,
4178 4178          int cmd,
4179 4179          struct shrlock *shr,
4180 4180          int flag,
4181 4181          cred_t *cr,
4182 4182          caller_context_t *ct)
4183 4183  {
4184 4184          int     err;
4185 4185  
4186 4186          VOPXID_MAP_CR(vp, cr);
4187 4187  
4188 4188          err = (*(vp)->v_op->vop_shrlock)(vp, cmd, shr, flag, cr, ct);
4189 4189          VOPSTATS_UPDATE(vp, shrlock);
4190 4190          return (err);
4191 4191  }
4192 4192  
4193 4193  int
4194 4194  fop_vnevent(vnode_t *vp, vnevent_t vnevent, vnode_t *dvp, char *fnm,
4195 4195      caller_context_t *ct)
4196 4196  {
4197 4197          int     err;
4198 4198  
4199 4199          err = (*(vp)->v_op->vop_vnevent)(vp, vnevent, dvp, fnm, ct);
4200 4200          VOPSTATS_UPDATE(vp, vnevent);
4201 4201          return (err);
4202 4202  }
4203 4203  
4204 4204  int
4205 4205  fop_reqzcbuf(vnode_t *vp, enum uio_rw ioflag, xuio_t *uiop, cred_t *cr,
4206 4206      caller_context_t *ct)
4207 4207  {
4208 4208          int err;
4209 4209  
4210 4210          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4211 4211                  return (ENOTSUP);
4212 4212          err = (*(vp)->v_op->vop_reqzcbuf)(vp, ioflag, uiop, cr, ct);
4213 4213          VOPSTATS_UPDATE(vp, reqzcbuf);
4214 4214          return (err);
4215 4215  }
4216 4216  
4217 4217  int
4218 4218  fop_retzcbuf(vnode_t *vp, xuio_t *uiop, cred_t *cr, caller_context_t *ct)
4219 4219  {
4220 4220          int err;
4221 4221  
4222 4222          if (vfs_has_feature(vp->v_vfsp, VFSFT_ZEROCOPY_SUPPORTED) == 0)
4223 4223                  return (ENOTSUP);
4224 4224          err = (*(vp)->v_op->vop_retzcbuf)(vp, uiop, cr, ct);
4225 4225          VOPSTATS_UPDATE(vp, retzcbuf);
4226 4226          return (err);
4227 4227  }
4228 4228  
4229 4229  /*
4230 4230   * Default destructor
4231 4231   *      Needed because NULL destructor means that the key is unused
4232 4232   */
4233 4233  /* ARGSUSED */
4234 4234  void
4235 4235  vsd_defaultdestructor(void *value)
4236 4236  {}
4237 4237  
4238 4238  /*
4239 4239   * Create a key (index into per vnode array)
4240 4240   *      Locks out vsd_create, vsd_destroy, and vsd_free
4241 4241   *      May allocate memory with lock held
4242 4242   */
4243 4243  void
4244 4244  vsd_create(uint_t *keyp, void (*destructor)(void *))
4245 4245  {
4246 4246          int     i;
4247 4247          uint_t  nkeys;
4248 4248  
4249 4249          /*
4250 4250           * if key is allocated, do nothing
4251 4251           */
4252 4252          mutex_enter(&vsd_lock);
4253 4253          if (*keyp) {
4254 4254                  mutex_exit(&vsd_lock);
4255 4255                  return;
4256 4256          }
4257 4257          /*
4258 4258           * find an unused key
4259 4259           */
4260 4260          if (destructor == NULL)
4261 4261                  destructor = vsd_defaultdestructor;
4262 4262  
4263 4263          for (i = 0; i < vsd_nkeys; ++i)
4264 4264                  if (vsd_destructor[i] == NULL)
4265 4265                          break;
4266 4266  
4267 4267          /*
4268 4268           * if no unused keys, increase the size of the destructor array
4269 4269           */
4270 4270          if (i == vsd_nkeys) {
4271 4271                  if ((nkeys = (vsd_nkeys << 1)) == 0)
4272 4272                          nkeys = 1;
4273 4273                  vsd_destructor =
4274 4274                      (void (**)(void *))vsd_realloc((void *)vsd_destructor,
4275 4275                      (size_t)(vsd_nkeys * sizeof (void (*)(void *))),
4276 4276                      (size_t)(nkeys * sizeof (void (*)(void *))));
4277 4277                  vsd_nkeys = nkeys;
4278 4278          }
4279 4279  
4280 4280          /*
4281 4281           * allocate the next available unused key
4282 4282           */
4283 4283          vsd_destructor[i] = destructor;
4284 4284          *keyp = i + 1;
4285 4285  
4286 4286          /* create vsd_list, if it doesn't exist */
4287 4287          if (vsd_list == NULL) {
4288 4288                  vsd_list = kmem_alloc(sizeof (list_t), KM_SLEEP);
4289 4289                  list_create(vsd_list, sizeof (struct vsd_node),
4290 4290                      offsetof(struct vsd_node, vs_nodes));
4291 4291          }
4292 4292  
4293 4293          mutex_exit(&vsd_lock);
4294 4294  }
4295 4295  
4296 4296  /*
4297 4297   * Destroy a key
4298 4298   *
4299 4299   * Assumes that the caller is preventing vsd_set and vsd_get
4300 4300   * Locks out vsd_create, vsd_destroy, and vsd_free
4301 4301   * May free memory with lock held
4302 4302   */
4303 4303  void
4304 4304  vsd_destroy(uint_t *keyp)
4305 4305  {
4306 4306          uint_t key;
4307 4307          struct vsd_node *vsd;
4308 4308  
4309 4309          /*
4310 4310           * protect the key namespace and our destructor lists
4311 4311           */
4312 4312          mutex_enter(&vsd_lock);
4313 4313          key = *keyp;
4314 4314          *keyp = 0;
4315 4315  
4316 4316          ASSERT(key <= vsd_nkeys);
4317 4317  
4318 4318          /*
4319 4319           * if the key is valid
4320 4320           */
4321 4321          if (key != 0) {
4322 4322                  uint_t k = key - 1;
4323 4323                  /*
4324 4324                   * for every vnode with VSD, call key's destructor
4325 4325                   */
4326 4326                  for (vsd = list_head(vsd_list); vsd != NULL;
4327 4327                      vsd = list_next(vsd_list, vsd)) {
4328 4328                          /*
4329 4329                           * no VSD for key in this vnode
4330 4330                           */
4331 4331                          if (key > vsd->vs_nkeys)
4332 4332                                  continue;
4333 4333                          /*
4334 4334                           * call destructor for key
4335 4335                           */
4336 4336                          if (vsd->vs_value[k] && vsd_destructor[k])
4337 4337                                  (*vsd_destructor[k])(vsd->vs_value[k]);
4338 4338                          /*
4339 4339                           * reset value for key
4340 4340                           */
4341 4341                          vsd->vs_value[k] = NULL;
4342 4342                  }
4343 4343                  /*
4344 4344                   * actually free the key (NULL destructor == unused)
4345 4345                   */
4346 4346                  vsd_destructor[k] = NULL;
4347 4347          }
4348 4348  
4349 4349          mutex_exit(&vsd_lock);
4350 4350  }
4351 4351  
4352 4352  /*
4353 4353   * Quickly return the per vnode value that was stored with the specified key
4354 4354   * Assumes the caller is protecting key from vsd_create and vsd_destroy
4355 4355   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4356 4356   */
4357 4357  void *
4358 4358  vsd_get(vnode_t *vp, uint_t key)
4359 4359  {
4360 4360          struct vsd_node *vsd;
4361 4361  
4362 4362          ASSERT(vp != NULL);
4363 4363          ASSERT(mutex_owned(&vp->v_vsd_lock));
4364 4364  
4365 4365          vsd = vp->v_vsd;
4366 4366  
4367 4367          if (key && vsd != NULL && key <= vsd->vs_nkeys)
4368 4368                  return (vsd->vs_value[key - 1]);
4369 4369          return (NULL);
4370 4370  }
4371 4371  
4372 4372  /*
4373 4373   * Set a per vnode value indexed with the specified key
4374 4374   * Assumes the caller is holding v_vsd_lock to protect the vsd.
4375 4375   */
4376 4376  int
4377 4377  vsd_set(vnode_t *vp, uint_t key, void *value)
4378 4378  {
4379 4379          struct vsd_node *vsd;
4380 4380  
4381 4381          ASSERT(vp != NULL);
4382 4382          ASSERT(mutex_owned(&vp->v_vsd_lock));
4383 4383  
4384 4384          if (key == 0)
4385 4385                  return (EINVAL);
4386 4386  
4387 4387          vsd = vp->v_vsd;
4388 4388          if (vsd == NULL)
4389 4389                  vsd = vp->v_vsd = kmem_zalloc(sizeof (*vsd), KM_SLEEP);
4390 4390  
4391 4391          /*
4392 4392           * If the vsd was just allocated, vs_nkeys will be 0, so the following
4393 4393           * code won't happen and we will continue down and allocate space for
4394 4394           * the vs_value array.
4395 4395           * If the caller is replacing one value with another, then it is up
4396 4396           * to the caller to free/rele/destroy the previous value (if needed).
4397 4397           */
4398 4398          if (key <= vsd->vs_nkeys) {
4399 4399                  vsd->vs_value[key - 1] = value;
4400 4400                  return (0);
4401 4401          }
4402 4402  
4403 4403          ASSERT(key <= vsd_nkeys);
4404 4404  
4405 4405          if (vsd->vs_nkeys == 0) {
4406 4406                  mutex_enter(&vsd_lock); /* lock out vsd_destroy() */
4407 4407                  /*
4408 4408                   * Link onto list of all VSD nodes.
4409 4409                   */
4410 4410                  list_insert_head(vsd_list, vsd);
4411 4411                  mutex_exit(&vsd_lock);
4412 4412          }
4413 4413  
4414 4414          /*
4415 4415           * Allocate vnode local storage and set the value for key
4416 4416           */
4417 4417          vsd->vs_value = vsd_realloc(vsd->vs_value,
4418 4418              vsd->vs_nkeys * sizeof (void *),
4419 4419              key * sizeof (void *));
4420 4420          vsd->vs_nkeys = key;
4421 4421          vsd->vs_value[key - 1] = value;
4422 4422  
4423 4423          return (0);
4424 4424  }
4425 4425  
4426 4426  /*
4427 4427   * Called from vn_free() to run the destructor function for each vsd
4428 4428   *      Locks out vsd_create and vsd_destroy
4429 4429   *      Assumes that the destructor *DOES NOT* use vsd
4430 4430   */
4431 4431  void
4432 4432  vsd_free(vnode_t *vp)
4433 4433  {
4434 4434          int i;
4435 4435          struct vsd_node *vsd = vp->v_vsd;
4436 4436  
4437 4437          if (vsd == NULL)
4438 4438                  return;
4439 4439  
4440 4440          if (vsd->vs_nkeys == 0) {
4441 4441                  kmem_free(vsd, sizeof (*vsd));
4442 4442                  vp->v_vsd = NULL;
4443 4443                  return;
4444 4444          }
4445 4445  
4446 4446          /*
4447 4447           * lock out vsd_create and vsd_destroy, call
4448 4448           * the destructor, and mark the value as destroyed.
4449 4449           */
4450 4450          mutex_enter(&vsd_lock);
4451 4451  
4452 4452          for (i = 0; i < vsd->vs_nkeys; i++) {
4453 4453                  if (vsd->vs_value[i] && vsd_destructor[i])
4454 4454                          (*vsd_destructor[i])(vsd->vs_value[i]);
4455 4455                  vsd->vs_value[i] = NULL;
4456 4456          }
4457 4457  
4458 4458          /*
4459 4459           * remove from linked list of VSD nodes
4460 4460           */
4461 4461          list_remove(vsd_list, vsd);
4462 4462  
4463 4463          mutex_exit(&vsd_lock);
4464 4464  
4465 4465          /*
4466 4466           * free up the VSD
4467 4467           */
4468 4468          kmem_free(vsd->vs_value, vsd->vs_nkeys * sizeof (void *));
4469 4469          kmem_free(vsd, sizeof (struct vsd_node));
4470 4470          vp->v_vsd = NULL;
4471 4471  }
4472 4472  
4473 4473  /*
4474 4474   * realloc
4475 4475   */
4476 4476  static void *
4477 4477  vsd_realloc(void *old, size_t osize, size_t nsize)
4478 4478  {
4479 4479          void *new;
4480 4480  
4481 4481          new = kmem_zalloc(nsize, KM_SLEEP);
4482 4482          if (old) {
4483 4483                  bcopy(old, new, osize);
4484 4484                  kmem_free(old, osize);
4485 4485          }
4486 4486          return (new);
4487 4487  }
4488 4488  
4489 4489  /*
4490 4490   * Setup the extensible system attribute for creating a reparse point.
4491 4491   * The symlink data 'target' is validated for proper format of a reparse
4492 4492   * string and a check also made to make sure the symlink data does not
4493 4493   * point to an existing file.
4494 4494   *
4495 4495   * return 0 if ok else -1.
4496 4496   */
4497 4497  static int
4498 4498  fs_reparse_mark(char *target, vattr_t *vap, xvattr_t *xvattr)
4499 4499  {
4500 4500          xoptattr_t *xoap;
4501 4501  
4502 4502          if ((!target) || (!vap) || (!xvattr))
4503 4503                  return (-1);
4504 4504  
4505 4505          /* validate reparse string */
4506 4506          if (reparse_validate((const char *)target))
4507 4507                  return (-1);
4508 4508  
4509 4509          xva_init(xvattr);
4510 4510          xvattr->xva_vattr = *vap;
4511 4511          xvattr->xva_vattr.va_mask |= AT_XVATTR;
4512 4512          xoap = xva_getxoptattr(xvattr);
4513 4513          ASSERT(xoap);
4514 4514          XVA_SET_REQ(xvattr, XAT_REPARSE);
4515 4515          xoap->xoa_reparse = 1;
4516 4516  
4517 4517          return (0);
4518 4518  }
4519 4519  
4520 4520  /*
4521 4521   * Function to check whether a symlink is a reparse point.
4522 4522   * Return B_TRUE if it is a reparse point, else return B_FALSE
4523 4523   */
4524 4524  boolean_t
4525 4525  vn_is_reparse(vnode_t *vp, cred_t *cr, caller_context_t *ct)
4526 4526  {
4527 4527          xvattr_t xvattr;
4528 4528          xoptattr_t *xoap;
4529 4529  
4530 4530          if ((vp->v_type != VLNK) ||
4531 4531              !(vfs_has_feature(vp->v_vfsp, VFSFT_XVATTR)))
4532 4532                  return (B_FALSE);
4533 4533  
4534 4534          xva_init(&xvattr);
4535 4535          xoap = xva_getxoptattr(&xvattr);
4536 4536          ASSERT(xoap);
4537 4537          XVA_SET_REQ(&xvattr, XAT_REPARSE);
4538 4538  
4539 4539          if (VOP_GETATTR(vp, &xvattr.xva_vattr, 0, cr, ct))
4540 4540                  return (B_FALSE);
4541 4541  
4542 4542          if ((!(xvattr.xva_vattr.va_mask & AT_XVATTR)) ||
4543 4543              (!(XVA_ISSET_RTN(&xvattr, XAT_REPARSE))))
4544 4544                  return (B_FALSE);
4545 4545  
4546 4546          return (xoap->xoa_reparse ? B_TRUE : B_FALSE);
4547 4547  }

↓ open down ↓

1317 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX