1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  29  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  30  */
  31 
  32 #include <sys/param.h>
  33 #include <sys/t_lock.h>
  34 #include <sys/systm.h>
  35 #include <sys/sysmacros.h>
  36 #include <sys/user.h>
  37 #include <sys/buf.h>
  38 #include <sys/stat.h>
  39 #include <sys/vfs.h>
  40 #include <sys/vfs_opreg.h>
  41 #include <sys/dirent.h>
  42 #include <sys/vnode.h>
  43 #include <sys/proc.h>
  44 #include <sys/file.h>
  45 #include <sys/fcntl.h>
  46 #include <sys/uio.h>
  47 #include <sys/fs/pc_label.h>
  48 #include <sys/fs/pc_fs.h>
  49 #include <sys/fs/pc_dir.h>
  50 #include <sys/fs/pc_node.h>
  51 #include <sys/mman.h>
  52 #include <sys/pathname.h>
  53 #include <sys/vmsystm.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/debug.h>
  56 #include <sys/statvfs.h>
  57 #include <sys/unistd.h>
  58 #include <sys/kmem.h>
  59 #include <sys/conf.h>
  60 #include <sys/flock.h>
  61 #include <sys/policy.h>
  62 #include <sys/sdt.h>
  63 #include <sys/sunddi.h>
  64 #include <sys/types.h>
  65 #include <sys/errno.h>
  66 
  67 #include <vm/seg.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg_map.h>
  71 #include <vm/seg_vn.h>
  72 #include <vm/hat.h>
  73 #include <vm/as.h>
  74 #include <vm/seg_kmem.h>
  75 
  76 #include <fs/fs_subr.h>
  77 
  78 static int pcfs_open(struct vnode **, int, struct cred *, caller_context_t *ct);
  79 static int pcfs_close(struct vnode *, int, int, offset_t, struct cred *,
  80         caller_context_t *ct);
  81 static int pcfs_read(struct vnode *, struct uio *, int, struct cred *,
  82         caller_context_t *);
  83 static int pcfs_write(struct vnode *, struct uio *, int, struct cred *,
  84         caller_context_t *);
  85 static int pcfs_getattr(struct vnode *, struct vattr *, int, struct cred *,
  86         caller_context_t *ct);
  87 static int pcfs_setattr(struct vnode *, struct vattr *, int, struct cred *,
  88         caller_context_t *);
  89 static int pcfs_access(struct vnode *, int, int, struct cred *,
  90         caller_context_t *ct);
  91 static int pcfs_lookup(struct vnode *, char *, struct vnode **,
  92         struct pathname *, int, struct vnode *, struct cred *,
  93         caller_context_t *, int *, pathname_t *);
  94 static int pcfs_create(struct vnode *, char *, struct vattr *,
  95         enum vcexcl, int mode, struct vnode **, struct cred *, int,
  96         caller_context_t *, vsecattr_t *);
  97 static int pcfs_remove(struct vnode *, char *, struct cred *,
  98         caller_context_t *, int);
  99 static int pcfs_rename(struct vnode *, char *, struct vnode *, char *,
 100         struct cred *, caller_context_t *, int);
 101 static int pcfs_mkdir(struct vnode *, char *, struct vattr *, struct vnode **,
 102         struct cred *, caller_context_t *, int, vsecattr_t *);
 103 static int pcfs_rmdir(struct vnode *, char *, struct vnode *, struct cred *,
 104         caller_context_t *, int);
 105 static int pcfs_readdir(struct vnode *, struct uio *, struct cred *, int *,
 106         caller_context_t *, int);
 107 static int pcfs_fsync(struct vnode *, int, struct cred *, caller_context_t *);
 108 static void pcfs_inactive(struct vnode *, struct cred *, caller_context_t *);
 109 static int pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *);
 110 static int pcfs_space(struct vnode *, int, struct flock64 *, int,
 111         offset_t, cred_t *, caller_context_t *);
 112 static int pcfs_getpage(struct vnode *, offset_t, size_t, uint_t *, page_t *[],
 113         size_t, struct seg *, caddr_t, enum seg_rw, struct cred *,
 114         caller_context_t *);
 115 static int pcfs_getapage(struct vnode *, u_offset_t, size_t, uint_t *,
 116         page_t *[], size_t, struct seg *, caddr_t, enum seg_rw, struct cred *);
 117 static int pcfs_putpage(struct vnode *, offset_t, size_t, int, struct cred *,
 118         caller_context_t *);
 119 static int pcfs_map(struct vnode *, offset_t, struct as *, caddr_t *, size_t,
 120         uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 121 static int pcfs_addmap(struct vnode *, offset_t, struct as *, caddr_t,
 122         size_t, uchar_t, uchar_t, uint_t, struct cred *, caller_context_t *);
 123 static int pcfs_delmap(struct vnode *, offset_t, struct as *, caddr_t,
 124         size_t, uint_t, uint_t, uint_t, struct cred *, caller_context_t *);
 125 static int pcfs_seek(struct vnode *, offset_t, offset_t *,
 126         caller_context_t *);
 127 static int pcfs_pathconf(struct vnode *, int, ulong_t *, struct cred *,
 128         caller_context_t *);
 129 
 130 int pcfs_putapage(struct vnode *, page_t *, u_offset_t *, size_t *, int,
 131         struct cred *);
 132 static int rwpcp(struct pcnode *, struct uio *, enum uio_rw, int);
 133 static int get_long_fn_chunk(struct pcdir_lfn *ep, char *buf);
 134 
 135 extern krwlock_t pcnodes_lock;
 136 
 137 #define lround(r)       (((r)+sizeof (long long)-1)&(~(sizeof (long long)-1)))
 138 
 139 /*
 140  * vnode op vectors for files and directories.
 141  */
 142 struct vnodeops *pcfs_fvnodeops;
 143 struct vnodeops *pcfs_dvnodeops;
 144 
 145 const fs_operation_def_t pcfs_fvnodeops_template[] = {
 146         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 147         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 148         VOPNAME_READ,           { .vop_read = pcfs_read },
 149         VOPNAME_WRITE,          { .vop_write = pcfs_write },
 150         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 151         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 152         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 153         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 154         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 155         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 156         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 157         VOPNAME_SPACE,          { .vop_space = pcfs_space },
 158         VOPNAME_GETPAGE,        { .vop_getpage = pcfs_getpage },
 159         VOPNAME_PUTPAGE,        { .vop_putpage = pcfs_putpage },
 160         VOPNAME_MAP,            { .vop_map = pcfs_map },
 161         VOPNAME_ADDMAP,         { .vop_addmap = pcfs_addmap },
 162         VOPNAME_DELMAP,         { .vop_delmap = pcfs_delmap },
 163         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 164         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 165         NULL,                   NULL
 166 };
 167 
 168 const fs_operation_def_t pcfs_dvnodeops_template[] = {
 169         VOPNAME_OPEN,           { .vop_open = pcfs_open },
 170         VOPNAME_CLOSE,          { .vop_close = pcfs_close },
 171         VOPNAME_GETATTR,        { .vop_getattr = pcfs_getattr },
 172         VOPNAME_SETATTR,        { .vop_setattr = pcfs_setattr },
 173         VOPNAME_ACCESS,         { .vop_access = pcfs_access },
 174         VOPNAME_LOOKUP,         { .vop_lookup = pcfs_lookup },
 175         VOPNAME_CREATE,         { .vop_create = pcfs_create },
 176         VOPNAME_REMOVE,         { .vop_remove = pcfs_remove },
 177         VOPNAME_RENAME,         { .vop_rename = pcfs_rename },
 178         VOPNAME_MKDIR,          { .vop_mkdir = pcfs_mkdir },
 179         VOPNAME_RMDIR,          { .vop_rmdir = pcfs_rmdir },
 180         VOPNAME_READDIR,        { .vop_readdir = pcfs_readdir },
 181         VOPNAME_FSYNC,          { .vop_fsync = pcfs_fsync },
 182         VOPNAME_INACTIVE,       { .vop_inactive = pcfs_inactive },
 183         VOPNAME_FID,            { .vop_fid = pcfs_fid },
 184         VOPNAME_SEEK,           { .vop_seek = pcfs_seek },
 185         VOPNAME_PATHCONF,       { .vop_pathconf = pcfs_pathconf },
 186         VOPNAME_VNEVENT,        { .vop_vnevent = fs_vnevent_support },
 187         NULL,                   NULL
 188 };
 189 
 190 
 191 /*ARGSUSED*/
 192 static int
 193 pcfs_open(
 194         struct vnode **vpp,
 195         int flag,
 196         struct cred *cr,
 197         caller_context_t *ct)
 198 {
 199         return (0);
 200 }
 201 
 202 /*
 203  * files are sync'ed on close to keep floppy up to date
 204  */
 205 
 206 /*ARGSUSED*/
 207 static int
 208 pcfs_close(
 209         struct vnode *vp,
 210         int flag,
 211         int count,
 212         offset_t offset,
 213         struct cred *cr,
 214         caller_context_t *ct)
 215 {
 216         return (0);
 217 }
 218 
 219 /*ARGSUSED*/
 220 static int
 221 pcfs_read(
 222         struct vnode *vp,
 223         struct uio *uiop,
 224         int ioflag,
 225         struct cred *cr,
 226         struct caller_context *ct)
 227 {
 228         struct pcfs *fsp;
 229         struct pcnode *pcp;
 230         int error;
 231 
 232         fsp = VFSTOPCFS(vp->v_vfsp);
 233         if (error = pc_verify(fsp))
 234                 return (error);
 235         error = pc_lockfs(fsp, 0, 0);
 236         if (error)
 237                 return (error);
 238         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 239                 pc_unlockfs(fsp);
 240                 return (EIO);
 241         }
 242         error = rwpcp(pcp, uiop, UIO_READ, ioflag);
 243         if ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0) {
 244                 pc_mark_acc(fsp, pcp);
 245         }
 246         pc_unlockfs(fsp);
 247         if (error) {
 248                 PC_DPRINTF1(1, "pcfs_read: io error = %d\n", error);
 249         }
 250         return (error);
 251 }
 252 
 253 /*ARGSUSED*/
 254 static int
 255 pcfs_write(
 256         struct vnode *vp,
 257         struct uio *uiop,
 258         int ioflag,
 259         struct cred *cr,
 260         struct caller_context *ct)
 261 {
 262         struct pcfs *fsp;
 263         struct pcnode *pcp;
 264         int error;
 265 
 266         fsp = VFSTOPCFS(vp->v_vfsp);
 267         if (error = pc_verify(fsp))
 268                 return (error);
 269         error = pc_lockfs(fsp, 0, 0);
 270         if (error)
 271                 return (error);
 272         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 273                 pc_unlockfs(fsp);
 274                 return (EIO);
 275         }
 276         if (ioflag & FAPPEND) {
 277                 /*
 278                  * in append mode start at end of file.
 279                  */
 280                 uiop->uio_loffset = pcp->pc_size;
 281         }
 282         error = rwpcp(pcp, uiop, UIO_WRITE, ioflag);
 283         pcp->pc_flags |= PC_MOD;
 284         pc_mark_mod(fsp, pcp);
 285         if (ioflag & (FSYNC|FDSYNC))
 286                 (void) pc_nodeupdate(pcp);
 287 
 288         pc_unlockfs(fsp);
 289         if (error) {
 290                 PC_DPRINTF1(1, "pcfs_write: io error = %d\n", error);
 291         }
 292         return (error);
 293 }
 294 
 295 /*
 296  * read or write a vnode
 297  */
 298 static int
 299 rwpcp(
 300         struct pcnode *pcp,
 301         struct uio *uio,
 302         enum uio_rw rw,
 303         int ioflag)
 304 {
 305         struct vnode *vp = PCTOV(pcp);
 306         struct pcfs *fsp;
 307         daddr_t bn;                     /* phys block number */
 308         int n;
 309         offset_t off;
 310         caddr_t base;
 311         int mapon, pagecreate;
 312         int newpage;
 313         int error = 0;
 314         rlim64_t limit = uio->uio_llimit;
 315         int oresid = uio->uio_resid;
 316 
 317         /*
 318          * If the filesystem was umounted by force, return immediately.
 319          */
 320         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
 321                 return (EIO);
 322 
 323         PC_DPRINTF4(5, "rwpcp pcp=%p off=%lld resid=%ld size=%u\n", (void *)pcp,
 324             uio->uio_loffset, uio->uio_resid, pcp->pc_size);
 325 
 326         ASSERT(rw == UIO_READ || rw == UIO_WRITE);
 327         ASSERT(vp->v_type == VREG);
 328 
 329         if (uio->uio_loffset >= UINT32_MAX && rw == UIO_READ) {
 330                 return (0);
 331         }
 332 
 333         if (uio->uio_loffset < 0)
 334                 return (EINVAL);
 335 
 336         if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
 337                 limit = MAXOFFSET_T;
 338 
 339         if (uio->uio_loffset >= limit && rw == UIO_WRITE) {
 340                 proc_t *p = ttoproc(curthread);
 341 
 342                 mutex_enter(&p->p_lock);
 343                 (void) rctl_action(rctlproc_legacy[RLIMIT_FSIZE], p->p_rctls,
 344                     p, RCA_UNSAFE_SIGINFO);
 345                 mutex_exit(&p->p_lock);
 346                 return (EFBIG);
 347         }
 348 
 349         /* the following condition will occur only for write */
 350 
 351         if (uio->uio_loffset >= UINT32_MAX)
 352                 return (EFBIG);
 353 
 354         if (uio->uio_resid == 0)
 355                 return (0);
 356 
 357         if (limit > UINT32_MAX)
 358                 limit = UINT32_MAX;
 359 
 360         fsp = VFSTOPCFS(vp->v_vfsp);
 361         if (fsp->pcfs_flags & PCFS_IRRECOV)
 362                 return (EIO);
 363 
 364         do {
 365                 /*
 366                  * Assignments to "n" in this block may appear
 367                  * to overflow in some cases.  However, after careful
 368                  * analysis it was determined that all assignments to
 369                  * "n" serve only to make "n" smaller.  Since "n"
 370                  * starts out as no larger than MAXBSIZE, "int" is
 371                  * safe.
 372                  */
 373                 off = uio->uio_loffset & MAXBMASK;
 374                 mapon = (int)(uio->uio_loffset & MAXBOFFSET);
 375                 n = MIN(MAXBSIZE - mapon, uio->uio_resid);
 376                 if (rw == UIO_READ) {
 377                         offset_t diff;
 378 
 379                         diff = pcp->pc_size - uio->uio_loffset;
 380                         if (diff <= 0)
 381                                 return (0);
 382                         if (diff < n)
 383                                 n = (int)diff;
 384                 }
 385                 /*
 386                  * Compare limit with the actual offset + n, not the
 387                  * rounded down offset "off" or we will overflow
 388                  * the maximum file size after all.
 389                  */
 390                 if (rw == UIO_WRITE && uio->uio_loffset + n >= limit) {
 391                         if (uio->uio_loffset >= limit) {
 392                                 error = EFBIG;
 393                                 break;
 394                         }
 395                         n = (int)(limit - uio->uio_loffset);
 396                 }
 397 
 398                 /*
 399                  * Touch the page and fault it in if it is not in
 400                  * core before segmap_getmapflt can lock it. This
 401                  * is to avoid the deadlock if the buffer is mapped
 402                  * to the same file through mmap which we want to
 403                  * write to.
 404                  */
 405                 uio_prefaultpages((long)n, uio);
 406 
 407                 base = segmap_getmap(segkmap, vp, (u_offset_t)off);
 408                 pagecreate = 0;
 409                 newpage = 0;
 410                 if (rw == UIO_WRITE) {
 411                         /*
 412                          * If PAGESIZE < MAXBSIZE, perhaps we ought to deal
 413                          * with one page at a time, instead of one MAXBSIZE
 414                          * at a time, so we can fully explore pagecreate
 415                          * optimization??
 416                          */
 417                         if (uio->uio_loffset + n > pcp->pc_size) {
 418                                 uint_t ncl, lcn;
 419 
 420                                 ncl = (uint_t)howmany((offset_t)pcp->pc_size,
 421                                     fsp->pcfs_clsize);
 422                                 if (uio->uio_loffset > pcp->pc_size &&
 423                                     ncl < (uint_t)howmany(uio->uio_loffset,
 424                                     fsp->pcfs_clsize)) {
 425                                         /*
 426                                          * Allocate and zerofill skipped
 427                                          * clusters. This may not be worth the
 428                                          * effort since a small lseek beyond
 429                                          * eof but still within the cluster
 430                                          * will not be zeroed out.
 431                                          */
 432                                         lcn = pc_lblkno(fsp, uio->uio_loffset);
 433                                         error = pc_balloc(pcp, (daddr_t)lcn,
 434                                             1, &bn);
 435                                         ncl = lcn + 1;
 436                                 }
 437                                 if (!error &&
 438                                     ncl < (uint_t)howmany(uio->uio_loffset + n,
 439                                     fsp->pcfs_clsize))
 440                                         /*
 441                                          * allocate clusters w/o zerofill
 442                                          */
 443                                         error = pc_balloc(pcp,
 444                                             (daddr_t)pc_lblkno(fsp,
 445                                             uio->uio_loffset + n - 1),
 446                                             0, &bn);
 447 
 448                                 pcp->pc_flags |= PC_CHG;
 449 
 450                                 if (error) {
 451                                         pc_cluster32_t ncl;
 452                                         int nerror;
 453 
 454                                         /*
 455                                          * figure out new file size from
 456                                          * cluster chain length. If this
 457                                          * is detected to loop, the chain
 458                                          * is corrupted and we'd better
 459                                          * keep our fingers off that file.
 460                                          */
 461                                         nerror = pc_fileclsize(fsp,
 462                                             pcp->pc_scluster, &ncl);
 463                                         if (nerror) {
 464                                                 PC_DPRINTF1(2,
 465                                                     "cluster chain "
 466                                                     "corruption, "
 467                                                     "scluster=%d\n",
 468                                                     pcp->pc_scluster);
 469                                                 pcp->pc_size = 0;
 470                                                 pcp->pc_flags |= PC_INVAL;
 471                                                 error = nerror;
 472                                                 (void) segmap_release(segkmap,
 473                                                     base, 0);
 474                                                 break;
 475                                         }
 476                                         pcp->pc_size = fsp->pcfs_clsize * ncl;
 477 
 478                                         if (error == ENOSPC &&
 479                                             (pcp->pc_size - uio->uio_loffset)
 480                                             > 0) {
 481                                                 PC_DPRINTF3(2, "rwpcp ENOSPC "
 482                                                     "off=%lld n=%d size=%d\n",
 483                                                     uio->uio_loffset,
 484                                                     n, pcp->pc_size);
 485                                                 n = (int)(pcp->pc_size -
 486                                                     uio->uio_loffset);
 487                                         } else {
 488                                                 PC_DPRINTF1(1,
 489                                                     "rwpcp error1=%d\n", error);
 490                                                 (void) segmap_release(segkmap,
 491                                                     base, 0);
 492                                                 break;
 493                                         }
 494                                 } else {
 495                                         pcp->pc_size =
 496                                             (uint_t)(uio->uio_loffset + n);
 497                                 }
 498                                 if (mapon == 0) {
 499                                         newpage = segmap_pagecreate(segkmap,
 500                                             base, (size_t)n, 0);
 501                                         pagecreate = 1;
 502                                 }
 503                         } else if (n == MAXBSIZE) {
 504                                 newpage = segmap_pagecreate(segkmap, base,
 505                                     (size_t)n, 0);
 506                                 pagecreate = 1;
 507                         }
 508                 }
 509                 error = uiomove(base + mapon, (size_t)n, rw, uio);
 510 
 511                 if (pagecreate && uio->uio_loffset <
 512                     roundup(off + mapon + n, PAGESIZE)) {
 513                         offset_t nzero, nmoved;
 514 
 515                         nmoved = uio->uio_loffset - (off + mapon);
 516                         nzero = roundup(mapon + n, PAGESIZE) - nmoved;
 517                         (void) kzero(base + mapon + nmoved, (size_t)nzero);
 518                 }
 519 
 520                 /*
 521                  * Unlock the pages which have been allocated by
 522                  * page_create_va() in segmap_pagecreate().
 523                  */
 524                 if (newpage) {
 525                         segmap_pageunlock(segkmap, base, (size_t)n,
 526                             rw == UIO_WRITE ? S_WRITE : S_READ);
 527                 }
 528 
 529                 if (error) {
 530                         PC_DPRINTF1(1, "rwpcp error2=%d\n", error);
 531                         /*
 532                          * If we failed on a write, we may have already
 533                          * allocated file blocks as well as pages.  It's hard
 534                          * to undo the block allocation, but we must be sure
 535                          * to invalidate any pages that may have been
 536                          * allocated.
 537                          */
 538                         if (rw == UIO_WRITE)
 539                                 (void) segmap_release(segkmap, base, SM_INVAL);
 540                         else
 541                                 (void) segmap_release(segkmap, base, 0);
 542                 } else {
 543                         uint_t flags = 0;
 544 
 545                         if (rw == UIO_READ) {
 546                                 if (n + mapon == MAXBSIZE ||
 547                                     uio->uio_loffset == pcp->pc_size)
 548                                         flags = SM_DONTNEED;
 549                         } else if (ioflag & (FSYNC|FDSYNC)) {
 550                                 flags = SM_WRITE;
 551                         } else if (n + mapon == MAXBSIZE) {
 552                                 flags = SM_WRITE|SM_ASYNC|SM_DONTNEED;
 553                         }
 554                         error = segmap_release(segkmap, base, flags);
 555                 }
 556 
 557         } while (error == 0 && uio->uio_resid > 0 && n != 0);
 558 
 559         if (oresid != uio->uio_resid)
 560                 error = 0;
 561         return (error);
 562 }
 563 
 564 /*ARGSUSED*/
 565 static int
 566 pcfs_getattr(
 567         struct vnode *vp,
 568         struct vattr *vap,
 569         int flags,
 570         struct cred *cr,
 571         caller_context_t *ct)
 572 {
 573         struct pcnode *pcp;
 574         struct pcfs *fsp;
 575         int error;
 576         char attr;
 577         struct pctime atime;
 578         int64_t unixtime;
 579 
 580         PC_DPRINTF1(8, "pcfs_getattr: vp=%p\n", (void *)vp);
 581 
 582         fsp = VFSTOPCFS(vp->v_vfsp);
 583         error = pc_lockfs(fsp, 0, 0);
 584         if (error)
 585                 return (error);
 586 
 587         /*
 588          * Note that we don't check for "invalid node" (PC_INVAL) here
 589          * only in order to make stat() succeed. We allow no I/O on such
 590          * a node, but do allow to check for its existence.
 591          */
 592         if ((pcp = VTOPC(vp)) == NULL) {
 593                 pc_unlockfs(fsp);
 594                 return (EIO);
 595         }
 596         /*
 597          * Copy from pcnode.
 598          */
 599         vap->va_type = vp->v_type;
 600         attr = pcp->pc_entry.pcd_attr;
 601         if (PCA_IS_HIDDEN(fsp, attr))
 602                 vap->va_mode = 0;
 603         else if (attr & PCA_LABEL)
 604                 vap->va_mode = 0444;
 605         else if (attr & PCA_RDONLY)
 606                 vap->va_mode = 0555;
 607         else if (fsp->pcfs_flags & PCFS_BOOTPART) {
 608                 vap->va_mode = 0755;
 609         } else {
 610                 vap->va_mode = 0777;
 611         }
 612 
 613         if (attr & PCA_DIR)
 614                 vap->va_mode |= S_IFDIR;
 615         else
 616                 vap->va_mode |= S_IFREG;
 617         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 618                 vap->va_uid = 0;
 619                 vap->va_gid = 0;
 620         } else {
 621                 vap->va_uid = crgetuid(cr);
 622                 vap->va_gid = crgetgid(cr);
 623         }
 624         vap->va_fsid = vp->v_vfsp->vfs_dev;
 625         vap->va_nodeid = (ino64_t)pc_makenodeid(pcp->pc_eblkno,
 626             pcp->pc_eoffset, pcp->pc_entry.pcd_attr,
 627             pc_getstartcluster(fsp, &pcp->pc_entry), pc_direntpersec(fsp));
 628         vap->va_nlink = 1;
 629         vap->va_size = (u_offset_t)pcp->pc_size;
 630         vap->va_rdev = 0;
 631         vap->va_nblocks =
 632             (fsblkcnt64_t)howmany((offset_t)pcp->pc_size, DEV_BSIZE);
 633         vap->va_blksize = fsp->pcfs_clsize;
 634 
 635         /*
 636          * FAT root directories have no timestamps. In order not to return
 637          * "time zero" (1/1/1970), we record the time of the mount and give
 638          * that. This breaks less expectations.
 639          */
 640         if (vp->v_flag & VROOT) {
 641                 vap->va_mtime = fsp->pcfs_mounttime;
 642                 vap->va_atime = fsp->pcfs_mounttime;
 643                 vap->va_ctime = fsp->pcfs_mounttime;
 644                 pc_unlockfs(fsp);
 645                 return (0);
 646         }
 647 
 648         pc_pcttotv(&pcp->pc_entry.pcd_mtime, &unixtime);
 649         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 650                 if (unixtime > INT32_MAX)
 651                         DTRACE_PROBE1(pcfs__mtimeclamped, int64_t, unixtime);
 652                 unixtime = MIN(unixtime, INT32_MAX);
 653         } else if (unixtime > INT32_MAX &&
 654             get_udatamodel() == DATAMODEL_ILP32) {
 655                 pc_unlockfs(fsp);
 656                 DTRACE_PROBE1(pcfs__mtimeoverflowed, int64_t, unixtime);
 657                 return (EOVERFLOW);
 658         }
 659 
 660         vap->va_mtime.tv_sec = (time_t)unixtime;
 661         vap->va_mtime.tv_nsec = 0;
 662 
 663         /*
 664          * FAT doesn't know about POSIX ctime.
 665          * Best approximation is to always set it to mtime.
 666          */
 667         vap->va_ctime = vap->va_mtime;
 668 
 669         /*
 670          * FAT only stores "last access date". If that's the
 671          * same as the date of last modification then the time
 672          * of last access is known. Otherwise, use midnight.
 673          */
 674         atime.pct_date = pcp->pc_entry.pcd_ladate;
 675         if (atime.pct_date == pcp->pc_entry.pcd_mtime.pct_date)
 676                 atime.pct_time = pcp->pc_entry.pcd_mtime.pct_time;
 677         else
 678                 atime.pct_time = 0;
 679         pc_pcttotv(&atime, &unixtime);
 680         if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0) {
 681                 if (unixtime > INT32_MAX)
 682                         DTRACE_PROBE1(pcfs__atimeclamped, int64_t, unixtime);
 683                 unixtime = MIN(unixtime, INT32_MAX);
 684         } else if (unixtime > INT32_MAX &&
 685             get_udatamodel() == DATAMODEL_ILP32) {
 686                 pc_unlockfs(fsp);
 687                 DTRACE_PROBE1(pcfs__atimeoverflowed, int64_t, unixtime);
 688                 return (EOVERFLOW);
 689         }
 690 
 691         vap->va_atime.tv_sec = (time_t)unixtime;
 692         vap->va_atime.tv_nsec = 0;
 693 
 694         pc_unlockfs(fsp);
 695         return (0);
 696 }
 697 
 698 
 699 /*ARGSUSED*/
 700 static int
 701 pcfs_setattr(
 702         struct vnode *vp,
 703         struct vattr *vap,
 704         int flags,
 705         struct cred *cr,
 706         caller_context_t *ct)
 707 {
 708         struct pcnode *pcp;
 709         mode_t mask = vap->va_mask;
 710         int error;
 711         struct pcfs *fsp;
 712         timestruc_t now, *timep;
 713 
 714         PC_DPRINTF2(6, "pcfs_setattr: vp=%p mask=%x\n", (void *)vp, (int)mask);
 715         /*
 716          * cannot set these attributes
 717          */
 718         if (mask & (AT_NOSET | AT_UID | AT_GID)) {
 719                 return (EINVAL);
 720         }
 721         /*
 722          * pcfs_setattr is now allowed on directories to avoid silly warnings
 723          * from 'tar' when it tries to set times on a directory, and console
 724          * printf's on the NFS server when it gets EINVAL back on such a
 725          * request. One possible problem with that since a directory entry
 726          * identifies a file, '.' and all the '..' entries in subdirectories
 727          * may get out of sync when the directory is updated since they're
 728          * treated like separate files. We could fix that by looking for
 729          * '.' and giving it the same attributes, and then looking for
 730          * all the subdirectories and updating '..', but that's pretty
 731          * expensive for something that doesn't seem likely to matter.
 732          */
 733         /* can't do some ops on directories anyway */
 734         if ((vp->v_type == VDIR) &&
 735             (mask & AT_SIZE)) {
 736                 return (EINVAL);
 737         }
 738 
 739         fsp = VFSTOPCFS(vp->v_vfsp);
 740         error = pc_lockfs(fsp, 0, 0);
 741         if (error)
 742                 return (error);
 743         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 744                 pc_unlockfs(fsp);
 745                 return (EIO);
 746         }
 747 
 748         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 749                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
 750                         pc_unlockfs(fsp);
 751                         return (EACCES);
 752                 }
 753         }
 754 
 755         /*
 756          * Change file access modes.
 757          * If nobody has write permission, file is marked readonly.
 758          * Otherwise file is writable by anyone.
 759          */
 760         if ((mask & AT_MODE) && (vap->va_mode != (mode_t)-1)) {
 761                 if ((vap->va_mode & 0222) == 0)
 762                         pcp->pc_entry.pcd_attr |= PCA_RDONLY;
 763                 else
 764                         pcp->pc_entry.pcd_attr &= ~PCA_RDONLY;
 765                 pcp->pc_flags |= PC_CHG;
 766         }
 767         /*
 768          * Truncate file. Must have write permission.
 769          */
 770         if ((mask & AT_SIZE) && (vap->va_size != (u_offset_t)-1)) {
 771                 if (pcp->pc_entry.pcd_attr & PCA_RDONLY) {
 772                         error = EACCES;
 773                         goto out;
 774                 }
 775                 if (vap->va_size > UINT32_MAX) {
 776                         error = EFBIG;
 777                         goto out;
 778                 }
 779                 error = pc_truncate(pcp, (uint_t)vap->va_size);
 780 
 781                 if (error)
 782                         goto out;
 783 
 784                 if (vap->va_size == 0)
 785                         vnevent_truncate(vp, ct);
 786         }
 787         /*
 788          * Change file modified times.
 789          */
 790         if (mask & (AT_MTIME | AT_CTIME)) {
 791                 /*
 792                  * If SysV-compatible option to set access and
 793                  * modified times if privileged, owner, or write access,
 794                  * use current time rather than va_mtime.
 795                  *
 796                  * XXX - va_mtime.tv_sec == -1 flags this.
 797                  */
 798                 timep = &vap->va_mtime;
 799                 if (vap->va_mtime.tv_sec == -1) {
 800                         gethrestime(&now);
 801                         timep = &now;
 802                 }
 803                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 804                     timep->tv_sec > INT32_MAX) {
 805                         error = EOVERFLOW;
 806                         goto out;
 807                 }
 808                 error = pc_tvtopct(timep, &pcp->pc_entry.pcd_mtime);
 809                 if (error)
 810                         goto out;
 811                 pcp->pc_flags |= PC_CHG;
 812         }
 813         /*
 814          * Change file access times.
 815          */
 816         if (mask & AT_ATIME) {
 817                 /*
 818                  * If SysV-compatible option to set access and
 819                  * modified times if privileged, owner, or write access,
 820                  * use current time rather than va_mtime.
 821                  *
 822                  * XXX - va_atime.tv_sec == -1 flags this.
 823                  */
 824                 struct pctime   atime;
 825 
 826                 timep = &vap->va_atime;
 827                 if (vap->va_atime.tv_sec == -1) {
 828                         gethrestime(&now);
 829                         timep = &now;
 830                 }
 831                 if ((fsp->pcfs_flags & PCFS_NOCLAMPTIME) == 0 &&
 832                     timep->tv_sec > INT32_MAX) {
 833                         error = EOVERFLOW;
 834                         goto out;
 835                 }
 836                 error = pc_tvtopct(timep, &atime);
 837                 if (error)
 838                         goto out;
 839                 pcp->pc_entry.pcd_ladate = atime.pct_date;
 840                 pcp->pc_flags |= PC_CHG;
 841         }
 842 out:
 843         pc_unlockfs(fsp);
 844         return (error);
 845 }
 846 
 847 
 848 /*ARGSUSED*/
 849 static int
 850 pcfs_access(
 851         struct vnode *vp,
 852         int mode,
 853         int flags,
 854         struct cred *cr,
 855         caller_context_t *ct)
 856 {
 857         struct pcnode *pcp;
 858         struct pcfs *fsp;
 859 
 860 
 861         fsp = VFSTOPCFS(vp->v_vfsp);
 862 
 863         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
 864                 return (EIO);
 865         if ((mode & VWRITE) && (pcp->pc_entry.pcd_attr & PCA_RDONLY))
 866                 return (EACCES);
 867 
 868         /*
 869          * If this is a boot partition, privileged users have full access while
 870          * others have read-only access.
 871          */
 872         if (fsp->pcfs_flags & PCFS_BOOTPART) {
 873                 if ((mode & VWRITE) &&
 874                     secpolicy_pcfs_modify_bootpartition(cr) != 0)
 875                         return (EACCES);
 876         }
 877         return (0);
 878 }
 879 
 880 
 881 /*ARGSUSED*/
 882 static int
 883 pcfs_fsync(
 884         struct vnode *vp,
 885         int syncflag,
 886         struct cred *cr,
 887         caller_context_t *ct)
 888 {
 889         struct pcfs *fsp;
 890         struct pcnode *pcp;
 891         int error;
 892 
 893         fsp = VFSTOPCFS(vp->v_vfsp);
 894         if (error = pc_verify(fsp))
 895                 return (error);
 896         error = pc_lockfs(fsp, 0, 0);
 897         if (error)
 898                 return (error);
 899         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
 900                 pc_unlockfs(fsp);
 901                 return (EIO);
 902         }
 903         rw_enter(&pcnodes_lock, RW_WRITER);
 904         error = pc_nodesync(pcp);
 905         rw_exit(&pcnodes_lock);
 906         pc_unlockfs(fsp);
 907         return (error);
 908 }
 909 
 910 
 911 /*ARGSUSED*/
 912 static void
 913 pcfs_inactive(
 914         struct vnode *vp,
 915         struct cred *cr,
 916         caller_context_t *ct)
 917 {
 918         struct pcnode *pcp;
 919         struct pcfs *fsp;
 920         int error;
 921 
 922         fsp = VFSTOPCFS(vp->v_vfsp);
 923         error = pc_lockfs(fsp, 0, 1);
 924 
 925         /*
 926          * If the filesystem was umounted by force, all dirty
 927          * pages associated with this vnode are invalidated
 928          * and then the vnode will be freed.
 929          */
 930         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED) {
 931                 pcp = VTOPC(vp);
 932                 if (vn_has_cached_data(vp)) {
 933                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 934                             pcfs_putapage, B_INVAL, (struct cred *)NULL);
 935                 }
 936                 remque(pcp);
 937                 if (error == 0)
 938                         pc_unlockfs(fsp);
 939                 vn_free(vp);
 940                 kmem_free(pcp, sizeof (struct pcnode));
 941                 VFS_RELE(PCFSTOVFS(fsp));
 942                 return;
 943         }
 944 
 945         mutex_enter(&vp->v_lock);
 946         ASSERT(vp->v_count >= 1);
 947         if (vp->v_count > 1) {
 948                 vp->v_count--;  /* release our hold from vn_rele */
 949                 mutex_exit(&vp->v_lock);
 950                 pc_unlockfs(fsp);
 951                 return;
 952         }
 953         mutex_exit(&vp->v_lock);
 954 
 955         /*
 956          * Check again to confirm that no intervening I/O error
 957          * with a subsequent pc_diskchanged() call has released
 958          * the pcnode. If it has then release the vnode as above.
 959          */
 960         pcp = VTOPC(vp);
 961         if (pcp == NULL || pcp->pc_flags & PC_INVAL) {
 962                 if (vn_has_cached_data(vp))
 963                         (void) pvn_vplist_dirty(vp, (u_offset_t)0,
 964                             pcfs_putapage, B_INVAL | B_TRUNC,
 965                             (struct cred *)NULL);
 966         }
 967 
 968         if (pcp == NULL) {
 969                 vn_free(vp);
 970         } else {
 971                 pc_rele(pcp);
 972         }
 973 
 974         if (!error)
 975                 pc_unlockfs(fsp);
 976 }
 977 
 978 /*ARGSUSED*/
 979 static int
 980 pcfs_lookup(
 981         struct vnode *dvp,
 982         char *nm,
 983         struct vnode **vpp,
 984         struct pathname *pnp,
 985         int flags,
 986         struct vnode *rdir,
 987         struct cred *cr,
 988         caller_context_t *ct,
 989         int *direntflags,
 990         pathname_t *realpnp)
 991 {
 992         struct pcfs *fsp;
 993         struct pcnode *pcp;
 994         int error;
 995 
 996         /*
 997          * If the filesystem was umounted by force, return immediately.
 998          */
 999         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1000                 return (EIO);
1001 
1002         /*
1003          * verify that the dvp is still valid on the disk
1004          */
1005         fsp = VFSTOPCFS(dvp->v_vfsp);
1006         if (error = pc_verify(fsp))
1007                 return (error);
1008         error = pc_lockfs(fsp, 0, 0);
1009         if (error)
1010                 return (error);
1011         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1012                 pc_unlockfs(fsp);
1013                 return (EIO);
1014         }
1015         /*
1016          * Null component name is a synonym for directory being searched.
1017          */
1018         if (*nm == '\0') {
1019                 VN_HOLD(dvp);
1020                 *vpp = dvp;
1021                 pc_unlockfs(fsp);
1022                 return (0);
1023         }
1024 
1025         error = pc_dirlook(VTOPC(dvp), nm, &pcp);
1026         if (!error) {
1027                 *vpp = PCTOV(pcp);
1028                 pcp->pc_flags |= PC_EXTERNAL;
1029         }
1030         pc_unlockfs(fsp);
1031         return (error);
1032 }
1033 
1034 
1035 /*ARGSUSED*/
1036 static int
1037 pcfs_create(
1038         struct vnode *dvp,
1039         char *nm,
1040         struct vattr *vap,
1041         enum vcexcl exclusive,
1042         int mode,
1043         struct vnode **vpp,
1044         struct cred *cr,
1045         int flag,
1046         caller_context_t *ct,
1047         vsecattr_t *vsecp)
1048 {
1049         int error;
1050         struct pcnode *pcp;
1051         struct vnode *vp;
1052         struct pcfs *fsp;
1053 
1054         /*
1055          * can't create directories. use pcfs_mkdir.
1056          * can't create anything other than files.
1057          */
1058         if (vap->va_type == VDIR)
1059                 return (EISDIR);
1060         else if (vap->va_type != VREG)
1061                 return (EINVAL);
1062 
1063         pcp = NULL;
1064         fsp = VFSTOPCFS(dvp->v_vfsp);
1065         error = pc_lockfs(fsp, 0, 0);
1066         if (error)
1067                 return (error);
1068         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1069                 pc_unlockfs(fsp);
1070                 return (EIO);
1071         }
1072 
1073         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1074                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1075                         pc_unlockfs(fsp);
1076                         return (EACCES);
1077                 }
1078         }
1079 
1080         if (*nm == '\0') {
1081                 /*
1082                  * Null component name refers to the directory itself.
1083                  */
1084                 VN_HOLD(dvp);
1085                 pcp = VTOPC(dvp);
1086                 error = EEXIST;
1087         } else {
1088                 error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1089         }
1090         /*
1091          * if file exists and this is a nonexclusive create,
1092          * check for access permissions
1093          */
1094         if (error == EEXIST) {
1095                 vp = PCTOV(pcp);
1096                 if (exclusive == NONEXCL) {
1097                         if (vp->v_type == VDIR) {
1098                                 error = EISDIR;
1099                         } else if (mode) {
1100                                 error = pcfs_access(PCTOV(pcp), mode, 0,
1101                                     cr, ct);
1102                         } else {
1103                                 error = 0;
1104                         }
1105                 }
1106                 if (error) {
1107                         VN_RELE(PCTOV(pcp));
1108                 } else if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) &&
1109                     (vap->va_size == 0)) {
1110                         error = pc_truncate(pcp, 0L);
1111                         if (error) {
1112                                 VN_RELE(PCTOV(pcp));
1113                         } else {
1114                                 vnevent_create(PCTOV(pcp), ct);
1115                         }
1116                 }
1117         }
1118         if (error) {
1119                 pc_unlockfs(fsp);
1120                 return (error);
1121         }
1122         *vpp = PCTOV(pcp);
1123         pcp->pc_flags |= PC_EXTERNAL;
1124         pc_unlockfs(fsp);
1125         return (error);
1126 }
1127 
1128 /*ARGSUSED*/
1129 static int
1130 pcfs_remove(
1131         struct vnode *vp,
1132         char *nm,
1133         struct cred *cr,
1134         caller_context_t *ct,
1135         int flags)
1136 {
1137         struct pcfs *fsp;
1138         struct pcnode *pcp;
1139         int error;
1140 
1141         fsp = VFSTOPCFS(vp->v_vfsp);
1142         if (error = pc_verify(fsp))
1143                 return (error);
1144         error = pc_lockfs(fsp, 0, 0);
1145         if (error)
1146                 return (error);
1147         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
1148                 pc_unlockfs(fsp);
1149                 return (EIO);
1150         }
1151         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1152                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1153                         pc_unlockfs(fsp);
1154                         return (EACCES);
1155                 }
1156         }
1157         error = pc_dirremove(pcp, nm, (struct vnode *)0, VREG, ct);
1158         pc_unlockfs(fsp);
1159         return (error);
1160 }
1161 
1162 /*
1163  * Rename a file or directory
1164  * This rename is restricted to only rename files within a directory.
1165  * XX should make rename more general
1166  */
1167 /*ARGSUSED*/
1168 static int
1169 pcfs_rename(
1170         struct vnode *sdvp,             /* old (source) parent vnode */
1171         char *snm,                      /* old (source) entry name */
1172         struct vnode *tdvp,             /* new (target) parent vnode */
1173         char *tnm,                      /* new (target) entry name */
1174         struct cred *cr,
1175         caller_context_t *ct,
1176         int flags)
1177 {
1178         struct pcfs *fsp;
1179         struct pcnode *dp;      /* parent pcnode */
1180         struct pcnode *tdp;
1181         int error;
1182 
1183         fsp = VFSTOPCFS(sdvp->v_vfsp);
1184         if (error = pc_verify(fsp))
1185                 return (error);
1186 
1187         /*
1188          * make sure we can muck with this directory.
1189          */
1190         error = pcfs_access(sdvp, VWRITE, 0, cr, ct);
1191         if (error) {
1192                 return (error);
1193         }
1194         error = pc_lockfs(fsp, 0, 0);
1195         if (error)
1196                 return (error);
1197         if (((dp = VTOPC(sdvp)) == NULL) || ((tdp = VTOPC(tdvp)) == NULL) ||
1198             (dp->pc_flags & PC_INVAL) || (tdp->pc_flags & PC_INVAL)) {
1199                 pc_unlockfs(fsp);
1200                 return (EIO);
1201         }
1202         error = pc_rename(dp, tdp, snm, tnm, ct);
1203         pc_unlockfs(fsp);
1204         return (error);
1205 }
1206 
1207 /*ARGSUSED*/
1208 static int
1209 pcfs_mkdir(
1210         struct vnode *dvp,
1211         char *nm,
1212         struct vattr *vap,
1213         struct vnode **vpp,
1214         struct cred *cr,
1215         caller_context_t *ct,
1216         int flags,
1217         vsecattr_t *vsecp)
1218 {
1219         struct pcfs *fsp;
1220         struct pcnode *pcp;
1221         int error;
1222 
1223         fsp = VFSTOPCFS(dvp->v_vfsp);
1224         if (error = pc_verify(fsp))
1225                 return (error);
1226         error = pc_lockfs(fsp, 0, 0);
1227         if (error)
1228                 return (error);
1229         if (VTOPC(dvp) == NULL || VTOPC(dvp)->pc_flags & PC_INVAL) {
1230                 pc_unlockfs(fsp);
1231                 return (EIO);
1232         }
1233 
1234         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1235                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1236                         pc_unlockfs(fsp);
1237                         return (EACCES);
1238                 }
1239         }
1240 
1241         error = pc_direnter(VTOPC(dvp), nm, vap, &pcp);
1242 
1243         if (!error) {
1244                 pcp -> pc_flags |= PC_EXTERNAL;
1245                 *vpp = PCTOV(pcp);
1246         } else if (error == EEXIST) {
1247                 VN_RELE(PCTOV(pcp));
1248         }
1249         pc_unlockfs(fsp);
1250         return (error);
1251 }
1252 
1253 /*ARGSUSED*/
1254 static int
1255 pcfs_rmdir(
1256         struct vnode *dvp,
1257         char *nm,
1258         struct vnode *cdir,
1259         struct cred *cr,
1260         caller_context_t *ct,
1261         int flags)
1262 {
1263         struct pcfs *fsp;
1264         struct pcnode *pcp;
1265         int error;
1266 
1267         fsp = VFSTOPCFS(dvp -> v_vfsp);
1268         if (error = pc_verify(fsp))
1269                 return (error);
1270         if (error = pc_lockfs(fsp, 0, 0))
1271                 return (error);
1272 
1273         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1274                 pc_unlockfs(fsp);
1275                 return (EIO);
1276         }
1277 
1278         if (fsp->pcfs_flags & PCFS_BOOTPART) {
1279                 if (secpolicy_pcfs_modify_bootpartition(cr) != 0) {
1280                         pc_unlockfs(fsp);
1281                         return (EACCES);
1282                 }
1283         }
1284 
1285         error = pc_dirremove(pcp, nm, cdir, VDIR, ct);
1286         pc_unlockfs(fsp);
1287         return (error);
1288 }
1289 
1290 /*
1291  * read entries in a directory.
1292  * we must convert pc format to unix format
1293  */
1294 
1295 /*ARGSUSED*/
1296 static int
1297 pcfs_readdir(
1298         struct vnode *dvp,
1299         struct uio *uiop,
1300         struct cred *cr,
1301         int *eofp,
1302         caller_context_t *ct,
1303         int flags)
1304 {
1305         struct pcnode *pcp;
1306         struct pcfs *fsp;
1307         struct pcdir *ep;
1308         struct buf *bp = NULL;
1309         offset_t offset;
1310         int boff;
1311         struct pc_dirent lbp;
1312         struct pc_dirent *ld = &lbp;
1313         int error;
1314 
1315         /*
1316          * If the filesystem was umounted by force, return immediately.
1317          */
1318         if (dvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1319                 return (EIO);
1320 
1321         if ((uiop->uio_iovcnt != 1) ||
1322             (uiop->uio_loffset % sizeof (struct pcdir)) != 0) {
1323                 return (EINVAL);
1324         }
1325         fsp = VFSTOPCFS(dvp->v_vfsp);
1326         /*
1327          * verify that the dp is still valid on the disk
1328          */
1329         if (error = pc_verify(fsp)) {
1330                 return (error);
1331         }
1332         error = pc_lockfs(fsp, 0, 0);
1333         if (error)
1334                 return (error);
1335         if ((pcp = VTOPC(dvp)) == NULL || pcp->pc_flags & PC_INVAL) {
1336                 pc_unlockfs(fsp);
1337                 return (EIO);
1338         }
1339 
1340         bzero(ld, sizeof (*ld));
1341 
1342         if (eofp != NULL)
1343                 *eofp = 0;
1344         offset = uiop->uio_loffset;
1345 
1346         if (dvp->v_flag & VROOT) {
1347                 /*
1348                  * kludge up entries for "." and ".." in the root.
1349                  */
1350                 if (offset == 0) {
1351                         (void) strcpy(ld->d_name, ".");
1352                         ld->d_reclen = DIRENT64_RECLEN(1);
1353                         ld->d_off = (off64_t)sizeof (struct pcdir);
1354                         ld->d_ino = (ino64_t)UINT_MAX;
1355                         if (ld->d_reclen > uiop->uio_resid) {
1356                                 pc_unlockfs(fsp);
1357                                 return (ENOSPC);
1358                         }
1359                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1360                         uiop->uio_loffset = ld->d_off;
1361                         offset = uiop->uio_loffset;
1362                 }
1363                 if (offset == sizeof (struct pcdir)) {
1364                         (void) strcpy(ld->d_name, "..");
1365                         ld->d_reclen = DIRENT64_RECLEN(2);
1366                         if (ld->d_reclen > uiop->uio_resid) {
1367                                 pc_unlockfs(fsp);
1368                                 return (ENOSPC);
1369                         }
1370                         ld->d_off = (off64_t)(uiop->uio_loffset +
1371                             sizeof (struct pcdir));
1372                         ld->d_ino = (ino64_t)UINT_MAX;
1373                         (void) uiomove(ld, ld->d_reclen, UIO_READ, uiop);
1374                         uiop->uio_loffset = ld->d_off;
1375                         offset = uiop->uio_loffset;
1376                 }
1377                 offset -= 2 * sizeof (struct pcdir);
1378                 /* offset now has the real offset value into directory file */
1379         }
1380 
1381         for (;;) {
1382                 boff = pc_blkoff(fsp, offset);
1383                 if (boff == 0 || bp == NULL || boff >= bp->b_bcount) {
1384                         if (bp != NULL) {
1385                                 brelse(bp);
1386                                 bp = NULL;
1387                         }
1388                         error = pc_blkatoff(pcp, offset, &bp, &ep);
1389                         if (error) {
1390                                 if (error == ENOENT) {
1391                                         error = 0;
1392                                         if (eofp)
1393                                                 *eofp = 1;
1394                                 }
1395                                 break;
1396                         }
1397                 }
1398                 if (ep->pcd_filename[0] == PCD_UNUSED) {
1399                         if (eofp)
1400                                 *eofp = 1;
1401                         break;
1402                 }
1403                 /*
1404                  * Don't display label because it may contain funny characters.
1405                  */
1406                 if (ep->pcd_filename[0] == PCD_ERASED) {
1407                         uiop->uio_loffset += sizeof (struct pcdir);
1408                         offset += sizeof (struct pcdir);
1409                         ep++;
1410                         continue;
1411                 }
1412                 if (PCDL_IS_LFN(ep)) {
1413                         if (pc_read_long_fn(dvp, uiop, ld, &ep, &offset, &bp) !=
1414                             0)
1415                                 break;
1416                         continue;
1417                 }
1418 
1419                 if (pc_read_short_fn(dvp, uiop, ld, &ep, &offset, &bp) != 0)
1420                         break;
1421         }
1422         if (bp)
1423                 brelse(bp);
1424         pc_unlockfs(fsp);
1425         return (error);
1426 }
1427 
1428 
1429 /*
1430  * Called from pvn_getpages to get a particular page.  When we are called
1431  * the pcfs is already locked.
1432  */
1433 /*ARGSUSED*/
1434 static int
1435 pcfs_getapage(
1436         struct vnode *vp,
1437         u_offset_t off,
1438         size_t len,
1439         uint_t *protp,
1440         page_t *pl[],           /* NULL if async IO is requested */
1441         size_t plsz,
1442         struct seg *seg,
1443         caddr_t addr,
1444         enum seg_rw rw,
1445         struct cred *cr)
1446 {
1447         struct pcnode *pcp;
1448         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1449         struct vnode *devvp;
1450         page_t *pp;
1451         page_t *pagefound;
1452         int err;
1453 
1454         /*
1455          * If the filesystem was umounted by force, return immediately.
1456          */
1457         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1458                 return (EIO);
1459 
1460         PC_DPRINTF3(5, "pcfs_getapage: vp=%p off=%lld len=%lu\n",
1461             (void *)vp, off, len);
1462 
1463         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL)
1464                 return (EIO);
1465         devvp = fsp->pcfs_devvp;
1466 
1467         /* pcfs doesn't do readaheads */
1468         if (pl == NULL)
1469                 return (0);
1470 
1471         pl[0] = NULL;
1472         err = 0;
1473         /*
1474          * If the accessed time on the pcnode has not already been
1475          * set elsewhere (e.g. for read/setattr) we set the time now.
1476          * This gives us approximate modified times for mmap'ed files
1477          * which are accessed via loads in the user address space.
1478          */
1479         if ((pcp->pc_flags & PC_ACC) == 0 &&
1480             ((fsp->pcfs_vfs->vfs_flag & VFS_RDONLY) == 0)) {
1481                 pc_mark_acc(fsp, pcp);
1482         }
1483 reread:
1484         if ((pagefound = page_exists(vp, off)) == NULL) {
1485                 /*
1486                  * Need to really do disk IO to get the page(s).
1487                  */
1488                 struct buf *bp;
1489                 daddr_t lbn, bn;
1490                 u_offset_t io_off;
1491                 size_t io_len;
1492                 u_offset_t lbnoff, xferoffset;
1493                 u_offset_t pgoff;
1494                 uint_t  xfersize;
1495                 int err1;
1496 
1497                 lbn = pc_lblkno(fsp, off);
1498                 lbnoff = off & ~(fsp->pcfs_clsize - 1);
1499                 xferoffset = off & ~(fsp->pcfs_secsize - 1);
1500 
1501                 pp = pvn_read_kluster(vp, off, seg, addr, &io_off, &io_len,
1502                     off, (size_t)MIN(pc_blksize(fsp, pcp, off), PAGESIZE), 0);
1503                 if (pp == NULL)
1504                         /*
1505                          * XXX - If pcfs is made MT-hot, this should go
1506                          * back to reread.
1507                          */
1508                         panic("pcfs_getapage pvn_read_kluster");
1509 
1510                 for (pgoff = 0; pgoff < PAGESIZE && xferoffset < pcp->pc_size;
1511                     pgoff += xfersize,
1512                     lbn +=  howmany(xfersize, fsp->pcfs_clsize),
1513                     lbnoff += xfersize, xferoffset += xfersize) {
1514                         /*
1515                          * read as many contiguous blocks as possible to
1516                          * fill this page
1517                          */
1518                         xfersize = PAGESIZE - pgoff;
1519                         err1 = pc_bmap(pcp, lbn, &bn, &xfersize);
1520                         if (err1) {
1521                                 PC_DPRINTF1(1, "pc_getapage err=%d", err1);
1522                                 err = err1;
1523                                 goto out;
1524                         }
1525                         bp = pageio_setup(pp, xfersize, devvp, B_READ);
1526                         bp->b_edev = devvp->v_rdev;
1527                         bp->b_dev = cmpdev(devvp->v_rdev);
1528                         bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1529                         bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1530                         bp->b_file = vp;
1531                         bp->b_offset = (offset_t)(off + pgoff);
1532 
1533                         (void) bdev_strategy(bp);
1534 
1535                         lwp_stat_update(LWP_STAT_INBLK, 1);
1536 
1537                         if (err == 0)
1538                                 err = biowait(bp);
1539                         else
1540                                 (void) biowait(bp);
1541                         pageio_done(bp);
1542                         if (err)
1543                                 goto out;
1544                 }
1545                 if (pgoff < PAGESIZE) {
1546                         pagezero(pp->p_prev, pgoff, PAGESIZE - pgoff);
1547                 }
1548                 pvn_plist_init(pp, pl, plsz, off, io_len, rw);
1549         }
1550 out:
1551         if (err) {
1552                 if (pp != NULL)
1553                         pvn_read_done(pp, B_ERROR);
1554                 return (err);
1555         }
1556 
1557         if (pagefound) {
1558                 /*
1559                  * Page exists in the cache, acquire the "shared"
1560                  * lock.  If this fails, go back to reread.
1561                  */
1562                 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
1563                         goto reread;
1564                 }
1565                 pl[0] = pp;
1566                 pl[1] = NULL;
1567         }
1568         return (err);
1569 }
1570 
1571 /*
1572  * Return all the pages from [off..off+len] in given file
1573  */
1574 /* ARGSUSED */
1575 static int
1576 pcfs_getpage(
1577         struct vnode *vp,
1578         offset_t off,
1579         size_t len,
1580         uint_t *protp,
1581         page_t *pl[],
1582         size_t plsz,
1583         struct seg *seg,
1584         caddr_t addr,
1585         enum seg_rw rw,
1586         struct cred *cr,
1587         caller_context_t *ct)
1588 {
1589         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1590         int err;
1591 
1592         PC_DPRINTF0(6, "pcfs_getpage\n");
1593         if (err = pc_verify(fsp))
1594                 return (err);
1595         if (vp->v_flag & VNOMAP)
1596                 return (ENOSYS);
1597         ASSERT(off <= UINT32_MAX);
1598         err = pc_lockfs(fsp, 0, 0);
1599         if (err)
1600                 return (err);
1601         if (protp != NULL)
1602                 *protp = PROT_ALL;
1603 
1604         ASSERT((off & PAGEOFFSET) == 0);
1605         err = pvn_getpages(pcfs_getapage, vp, off, len, protp, pl, plsz,
1606             seg, addr, rw, cr);
1607 
1608         pc_unlockfs(fsp);
1609         return (err);
1610 }
1611 
1612 
1613 /*
1614  * Flags are composed of {B_INVAL, B_FREE, B_DONTNEED, B_FORCE}
1615  * If len == 0, do from off to EOF.
1616  *
1617  * The normal cases should be len == 0 & off == 0 (entire vp list),
1618  * len == MAXBSIZE (from segmap_release actions), and len == PAGESIZE
1619  * (from pageout).
1620  *
1621  */
1622 /*ARGSUSED*/
1623 static int
1624 pcfs_putpage(
1625         struct vnode *vp,
1626         offset_t off,
1627         size_t len,
1628         int flags,
1629         struct cred *cr,
1630         caller_context_t *ct)
1631 {
1632         struct pcnode *pcp;
1633         page_t *pp;
1634         struct pcfs *fsp;
1635         u_offset_t io_off;
1636         size_t io_len;
1637         offset_t eoff;
1638         int err;
1639 
1640         /*
1641          * If the filesystem was umounted by force, return immediately.
1642          */
1643         if (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)
1644                 return (EIO);
1645 
1646         PC_DPRINTF1(6, "pcfs_putpage vp=0x%p\n", (void *)vp);
1647         if (vp->v_flag & VNOMAP)
1648                 return (ENOSYS);
1649 
1650         fsp = VFSTOPCFS(vp->v_vfsp);
1651 
1652         if (err = pc_verify(fsp))
1653                 return (err);
1654         if ((pcp = VTOPC(vp)) == NULL) {
1655                 PC_DPRINTF1(3, "pcfs_putpage NULL vp=0x%p\n", (void *)vp);
1656                 return (EIO);
1657         }
1658         if (pcp->pc_flags & PC_INVAL)
1659                 return (EIO);
1660 
1661         if (curproc == proc_pageout) {
1662                 /*
1663                  * XXX - This is a quick hack to avoid blocking
1664                  * pageout. Also to avoid pcfs_getapage deadlocking
1665                  * with putpage when memory is running out,
1666                  * since we only have one global lock and we don't
1667                  * support async putpage.
1668                  * It should be fixed someday.
1669                  *
1670                  * Interestingly, this used to be a test of NOMEMWAIT().
1671                  * We only ever got here once pcfs started supporting
1672                  * NFS sharing, and then only because the NFS server
1673                  * threads seem to do writes in sched's process context.
1674                  * Since everyone else seems to just care about pageout,
1675                  * the test was changed to look for pageout directly.
1676                  */
1677                 return (ENOMEM);
1678         }
1679 
1680         ASSERT(off <= UINT32_MAX);
1681 
1682         flags &= ~B_ASYNC;  /* XXX should fix this later */
1683 
1684         err = pc_lockfs(fsp, 0, 0);
1685         if (err)
1686                 return (err);
1687         if (!vn_has_cached_data(vp) || off >= pcp->pc_size) {
1688                 pc_unlockfs(fsp);
1689                 return (0);
1690         }
1691 
1692         if (len == 0) {
1693                 /*
1694                  * Search the entire vp list for pages >= off
1695                  */
1696                 err = pvn_vplist_dirty(vp, off,
1697                     pcfs_putapage, flags, cr);
1698         } else {
1699                 eoff = off + len;
1700 
1701                 for (io_off = off; io_off < eoff &&
1702                     io_off < pcp->pc_size; io_off += io_len) {
1703                         /*
1704                          * If we are not invalidating, synchronously
1705                          * freeing or writing pages use the routine
1706                          * page_lookup_nowait() to prevent reclaiming
1707                          * them from the free list.
1708                          */
1709                         if ((flags & B_INVAL) || ((flags & B_ASYNC) == 0)) {
1710                                 pp = page_lookup(vp, io_off,
1711                                     (flags & (B_INVAL | B_FREE)) ?
1712                                     SE_EXCL : SE_SHARED);
1713                         } else {
1714                                 pp = page_lookup_nowait(vp, io_off,
1715                                     (flags & B_FREE) ? SE_EXCL : SE_SHARED);
1716                         }
1717 
1718                         if (pp == NULL || pvn_getdirty(pp, flags) == 0)
1719                                 io_len = PAGESIZE;
1720                         else {
1721                                 err = pcfs_putapage(vp, pp, &io_off, &io_len,
1722                                     flags, cr);
1723                                 if (err != 0)
1724                                         break;
1725                                 /*
1726                                  * "io_off" and "io_len" are returned as
1727                                  * the range of pages we actually wrote.
1728                                  * This allows us to skip ahead more quickly
1729                                  * since several pages may've been dealt
1730                                  * with by this iteration of the loop.
1731                                  */
1732                         }
1733                 }
1734         }
1735         if (err == 0 && (flags & B_INVAL) &&
1736             off == 0 && len == 0 && vn_has_cached_data(vp)) {
1737                 /*
1738                  * If doing "invalidation", make sure that
1739                  * all pages on the vnode list are actually
1740                  * gone.
1741                  */
1742                 cmn_err(CE_PANIC,
1743                     "pcfs_putpage: B_INVAL, pages not gone");
1744         } else if (err) {
1745                 PC_DPRINTF1(1, "pcfs_putpage err=%d\n", err);
1746         }
1747         pc_unlockfs(fsp);
1748         return (err);
1749 }
1750 
1751 /*
1752  * Write out a single page, possibly klustering adjacent dirty pages.
1753  */
1754 /*ARGSUSED*/
1755 int
1756 pcfs_putapage(
1757         struct vnode *vp,
1758         page_t *pp,
1759         u_offset_t *offp,
1760         size_t *lenp,
1761         int flags,
1762         struct cred *cr)
1763 {
1764         struct pcnode *pcp;
1765         struct pcfs *fsp;
1766         struct vnode *devvp;
1767         size_t io_len;
1768         daddr_t bn;
1769         u_offset_t lbn, lbnoff, xferoffset;
1770         uint_t pgoff, xfersize;
1771         int err = 0;
1772         u_offset_t io_off;
1773 
1774         pcp = VTOPC(vp);
1775         fsp = VFSTOPCFS(vp->v_vfsp);
1776         devvp = fsp->pcfs_devvp;
1777 
1778         /*
1779          * If the modified time on the inode has not already been
1780          * set elsewhere (e.g. for write/setattr) and this is not
1781          * a call from msync (B_FORCE) we set the time now.
1782          * This gives us approximate modified times for mmap'ed files
1783          * which are modified via stores in the user address space.
1784          */
1785         if ((pcp->pc_flags & PC_MOD) == 0 || (flags & B_FORCE)) {
1786                 pcp->pc_flags |= PC_MOD;
1787                 pc_mark_mod(fsp, pcp);
1788         }
1789         pp = pvn_write_kluster(vp, pp, &io_off, &io_len, pp->p_offset,
1790             PAGESIZE, flags);
1791 
1792         if (fsp->pcfs_flags & PCFS_IRRECOV) {
1793                 goto out;
1794         }
1795 
1796         PC_DPRINTF1(7, "pc_putpage writing dirty page off=%llu\n", io_off);
1797 
1798         lbn = pc_lblkno(fsp, io_off);
1799         lbnoff = io_off & ~(fsp->pcfs_clsize - 1);
1800         xferoffset = io_off & ~(fsp->pcfs_secsize - 1);
1801 
1802         for (pgoff = 0; pgoff < io_len && xferoffset < pcp->pc_size;
1803             pgoff += xfersize,
1804             lbn += howmany(xfersize, fsp->pcfs_clsize),
1805             lbnoff += xfersize, xferoffset += xfersize) {
1806 
1807                 struct buf *bp;
1808                 int err1;
1809 
1810                 /*
1811                  * write as many contiguous blocks as possible from this page
1812                  */
1813                 xfersize = io_len - pgoff;
1814                 err1 = pc_bmap(pcp, (daddr_t)lbn, &bn, &xfersize);
1815                 if (err1) {
1816                         err = err1;
1817                         goto out;
1818                 }
1819                 bp = pageio_setup(pp, xfersize, devvp, B_WRITE | flags);
1820                 bp->b_edev = devvp->v_rdev;
1821                 bp->b_dev = cmpdev(devvp->v_rdev);
1822                 bp->b_blkno = bn + btodt(xferoffset - lbnoff);
1823                 bp->b_un.b_addr = (caddr_t)(uintptr_t)pgoff;
1824                 bp->b_file = vp;
1825                 bp->b_offset = (offset_t)(io_off + pgoff);
1826 
1827                 (void) bdev_strategy(bp);
1828 
1829                 lwp_stat_update(LWP_STAT_OUBLK, 1);
1830 
1831                 if (err == 0)
1832                         err = biowait(bp);
1833                 else
1834                         (void) biowait(bp);
1835                 pageio_done(bp);
1836         }
1837         pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
1838         pp = NULL;
1839 
1840 out:
1841         if ((fsp->pcfs_flags & PCFS_IRRECOV) && pp != NULL) {
1842                 pvn_write_done(pp, B_WRITE | flags);
1843         } else if (err != 0 && pp != NULL) {
1844                 pvn_write_done(pp, B_ERROR | B_WRITE | flags);
1845         }
1846 
1847         if (offp)
1848                 *offp = io_off;
1849         if (lenp)
1850                 *lenp = io_len;
1851                 PC_DPRINTF4(4, "pcfs_putapage: vp=%p pp=%p off=%lld len=%lu\n",
1852                     (void *)vp, (void *)pp, io_off, io_len);
1853         if (err) {
1854                 PC_DPRINTF1(1, "pcfs_putapage err=%d", err);
1855         }
1856         return (err);
1857 }
1858 
1859 /*ARGSUSED*/
1860 static int
1861 pcfs_map(
1862         struct vnode *vp,
1863         offset_t off,
1864         struct as *as,
1865         caddr_t *addrp,
1866         size_t len,
1867         uchar_t prot,
1868         uchar_t maxprot,
1869         uint_t flags,
1870         struct cred *cr,
1871         caller_context_t *ct)
1872 {
1873         struct segvn_crargs vn_a;
1874         int error;
1875 
1876         PC_DPRINTF0(6, "pcfs_map\n");
1877         if (vp->v_flag & VNOMAP)
1878                 return (ENOSYS);
1879 
1880         if (off > UINT32_MAX || off + len > UINT32_MAX)
1881                 return (ENXIO);
1882 
1883         as_rangelock(as);
1884         error = choose_addr(as, addrp, len, off, ADDR_VACALIGN, flags);
1885         if (error != 0) {
1886                 as_rangeunlock(as);
1887                 return (error);
1888         }
1889 
1890         vn_a.vp = vp;
1891         vn_a.offset = off;
1892         vn_a.type = flags & MAP_TYPE;
1893         vn_a.prot = prot;
1894         vn_a.maxprot = maxprot;
1895         vn_a.flags = flags & ~MAP_TYPE;
1896         vn_a.cred = cr;
1897         vn_a.amp = NULL;
1898         vn_a.szc = 0;
1899         vn_a.lgrp_mem_policy_flags = 0;
1900 
1901         error = as_map(as, *addrp, len, segvn_create, &vn_a);
1902         as_rangeunlock(as);
1903         return (error);
1904 }
1905 
1906 /* ARGSUSED */
1907 static int
1908 pcfs_seek(
1909         struct vnode *vp,
1910         offset_t ooff,
1911         offset_t *noffp,
1912         caller_context_t *ct)
1913 {
1914         if (*noffp < 0)
1915                 return (EINVAL);
1916         else if (*noffp > MAXOFFSET_T)
1917                 return (EINVAL);
1918         else
1919                 return (0);
1920 }
1921 
1922 /* ARGSUSED */
1923 static int
1924 pcfs_addmap(
1925         struct vnode *vp,
1926         offset_t off,
1927         struct as *as,
1928         caddr_t addr,
1929         size_t len,
1930         uchar_t prot,
1931         uchar_t maxprot,
1932         uint_t flags,
1933         struct cred *cr,
1934         caller_context_t *ct)
1935 {
1936         if (vp->v_flag & VNOMAP)
1937                 return (ENOSYS);
1938         return (0);
1939 }
1940 
1941 /*ARGSUSED*/
1942 static int
1943 pcfs_delmap(
1944         struct vnode *vp,
1945         offset_t off,
1946         struct as *as,
1947         caddr_t addr,
1948         size_t len,
1949         uint_t prot,
1950         uint_t maxprot,
1951         uint_t flags,
1952         struct cred *cr,
1953         caller_context_t *ct)
1954 {
1955         if (vp->v_flag & VNOMAP)
1956                 return (ENOSYS);
1957         return (0);
1958 }
1959 
1960 /*
1961  * POSIX pathconf() support.
1962  */
1963 /* ARGSUSED */
1964 static int
1965 pcfs_pathconf(
1966         struct vnode *vp,
1967         int cmd,
1968         ulong_t *valp,
1969         struct cred *cr,
1970         caller_context_t *ct)
1971 {
1972         struct pcfs *fsp = VFSTOPCFS(vp->v_vfsp);
1973 
1974         switch (cmd) {
1975         case _PC_LINK_MAX:
1976                 *valp = 1;
1977                 return (0);
1978 
1979         case _PC_CASE_BEHAVIOR:
1980                 return (EINVAL);
1981 
1982         case _PC_FILESIZEBITS:
1983                 /*
1984                  * Both FAT16 and FAT32 support 4GB - 1 byte for file size.
1985                  * FAT12 can only go up to the maximum filesystem capacity
1986                  * which is ~509MB.
1987                  */
1988                 *valp = IS_FAT12(fsp) ? 30 : 33;
1989                 return (0);
1990 
1991         case _PC_TIMESTAMP_RESOLUTION:
1992                 /*
1993                  * PCFS keeps track of modification times, it its own
1994                  * internal format, to a resolution of 2 seconds.
1995                  * Since 2000 million is representable in an int32_t
1996                  * without overflow (or becoming negative), we allow
1997                  * this value to be returned.
1998                  */
1999                 *valp = 2000000000L;
2000                 return (0);
2001 
2002         default:
2003                 return (fs_pathconf(vp, cmd, valp, cr, ct));
2004         }
2005 
2006 }
2007 
2008 /* ARGSUSED */
2009 static int
2010 pcfs_space(
2011         struct vnode *vp,
2012         int cmd,
2013         struct flock64 *bfp,
2014         int flag,
2015         offset_t offset,
2016         cred_t *cr,
2017         caller_context_t *ct)
2018 {
2019         struct vattr vattr;
2020         int error;
2021 
2022         if (cmd != F_FREESP)
2023                 return (EINVAL);
2024 
2025         if ((error = convoff(vp, bfp, 0, offset)) == 0) {
2026                 if ((bfp->l_start > UINT32_MAX) || (bfp->l_len > UINT32_MAX))
2027                         return (EFBIG);
2028                 /*
2029                  * we only support the special case of l_len == 0,
2030                  * meaning free to end of file at this moment.
2031                  */
2032                 if (bfp->l_len != 0)
2033                         return (EINVAL);
2034                 vattr.va_mask = AT_SIZE;
2035                 vattr.va_size = bfp->l_start;
2036                 error = VOP_SETATTR(vp, (vattr_t *)&vattr, 0, cr, ct);
2037         }
2038         return (error);
2039 }
2040 
2041 /*
2042  * Break up 'len' chars from 'buf' into a long file name chunk.
2043  * Pad with '0xff' to make Norton Disk Doctor and Microsoft ScanDisk happy.
2044  */
2045 void
2046 set_long_fn_chunk(struct pcdir_lfn *ep, char *buf, int len)
2047 {
2048         int     i;
2049 
2050         ASSERT(buf != NULL);
2051 
2052         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2) {
2053                 if (len > 0) {
2054                         ep->pcdl_firstfilename[i] = *buf++;
2055                         ep->pcdl_firstfilename[i + 1] = *buf++;
2056                         len -= 2;
2057                 } else {
2058                         ep->pcdl_firstfilename[i] = (uchar_t)0xff;
2059                         ep->pcdl_firstfilename[i + 1] = (uchar_t)0xff;
2060                 }
2061         }
2062 
2063         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2) {
2064                 if (len > 0) {
2065                         ep->pcdl_secondfilename[i] = *buf++;
2066                         ep->pcdl_secondfilename[i + 1] = *buf++;
2067                         len -= 2;
2068                 } else {
2069                         ep->pcdl_secondfilename[i] = (uchar_t)0xff;
2070                         ep->pcdl_secondfilename[i + 1] = (uchar_t)0xff;
2071                 }
2072         }
2073         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2) {
2074                 if (len > 0) {
2075                         ep->pcdl_thirdfilename[i] = *buf++;
2076                         ep->pcdl_thirdfilename[i + 1] = *buf++;
2077                         len -= 2;
2078                 } else {
2079                         ep->pcdl_thirdfilename[i] = (uchar_t)0xff;
2080                         ep->pcdl_thirdfilename[i + 1] = (uchar_t)0xff;
2081                 }
2082         }
2083 }
2084 
2085 /*
2086  * Extract the characters from the long filename chunk into 'buf'.
2087  * Return the number of characters extracted.
2088  */
2089 static int
2090 get_long_fn_chunk(struct pcdir_lfn *ep, char *buf)
2091 {
2092         char    *tmp = buf;
2093         int     i;
2094 
2095         /* Copy all the names, no filtering now */
2096 
2097         for (i = 0; i < PCLF_FIRSTNAMESIZE; i += 2, tmp += 2) {
2098                 *tmp = ep->pcdl_firstfilename[i];
2099                 *(tmp + 1) = ep->pcdl_firstfilename[i + 1];
2100 
2101                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2102                         return (tmp - buf);
2103         }
2104         for (i = 0; i < PCLF_SECONDNAMESIZE; i += 2, tmp += 2) {
2105                 *tmp = ep->pcdl_secondfilename[i];
2106                 *(tmp + 1) = ep->pcdl_secondfilename[i + 1];
2107 
2108                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2109                         return (tmp - buf);
2110         }
2111         for (i = 0; i < PCLF_THIRDNAMESIZE; i += 2, tmp += 2) {
2112                 *tmp = ep->pcdl_thirdfilename[i];
2113                 *(tmp + 1) = ep->pcdl_thirdfilename[i + 1];
2114 
2115                 if ((*tmp == '\0') && (*(tmp+1) == '\0'))
2116                         return (tmp - buf);
2117         }
2118         return (tmp - buf);
2119 }
2120 
2121 
2122 /*
2123  * Checksum the passed in short filename.
2124  * This is used to validate each component of the long name to make
2125  * sure the long name is valid (it hasn't been "detached" from the
2126  * short filename). This algorithm was found in FreeBSD.
2127  * (sys/fs/msdosfs/msdosfs_conv.c:winChksum(), Wolfgang Solfrank)
2128  */
2129 
2130 uchar_t
2131 pc_checksum_long_fn(char *name, char *ext)
2132 {
2133         uchar_t c;
2134         char    b[11];
2135 
2136         bcopy(name, b, 8);
2137         bcopy(ext, b+8, 3);
2138 
2139         c = b[0];
2140         c = ((c << 7) | (c >> 1)) + b[1];
2141         c = ((c << 7) | (c >> 1)) + b[2];
2142         c = ((c << 7) | (c >> 1)) + b[3];
2143         c = ((c << 7) | (c >> 1)) + b[4];
2144         c = ((c << 7) | (c >> 1)) + b[5];
2145         c = ((c << 7) | (c >> 1)) + b[6];
2146         c = ((c << 7) | (c >> 1)) + b[7];
2147         c = ((c << 7) | (c >> 1)) + b[8];
2148         c = ((c << 7) | (c >> 1)) + b[9];
2149         c = ((c << 7) | (c >> 1)) + b[10];
2150 
2151         return (c);
2152 }
2153 
2154 /*
2155  * Read a chunk of long filename entries into 'namep'.
2156  * Return with offset pointing to short entry (on success), or next
2157  * entry to read (if this wasn't a valid lfn really).
2158  * Uses the passed-in buffer if it can, otherwise kmem_allocs() room for
2159  * a long filename.
2160  *
2161  * Can also be called with a NULL namep, in which case it just returns
2162  * whether this was really a valid long filename and consumes it
2163  * (used by pc_dirempty()).
2164  */
2165 int
2166 pc_extract_long_fn(struct pcnode *pcp, char *namep,
2167     struct pcdir **epp, offset_t *offset, struct buf **bp)
2168 {
2169         struct pcdir *ep = *epp;
2170         struct pcdir_lfn *lep = (struct pcdir_lfn *)ep;
2171         struct vnode *dvp = PCTOV(pcp);
2172         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2173         char    *lfn;
2174         char    *lfn_base;
2175         int     boff;
2176         int     i, cs;
2177         char    *buf;
2178         uchar_t cksum;
2179         int     detached = 0;
2180         int     error = 0;
2181         int     foldcase;
2182         int     count = 0;
2183         size_t  u16l = 0, u8l = 0;
2184         char    *outbuf;
2185         size_t  ret, inlen, outlen;
2186 
2187         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2188         lfn_base = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2189         lfn = lfn_base + PCMAXNAM_UTF16 - sizeof (uint16_t);
2190         *lfn = '\0';
2191         *(lfn + 1) = '\0';
2192         cksum = lep->pcdl_checksum;
2193 
2194         buf = kmem_alloc(PCMAXNAM_UTF16, KM_SLEEP);
2195         for (i = (lep->pcdl_ordinal & ~0xc0); i > 0; i--) {
2196                 /* read next block if necessary */
2197                 boff = pc_blkoff(fsp, *offset);
2198                 if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2199                         if (*bp != NULL) {
2200                                 brelse(*bp);
2201                                 *bp = NULL;
2202                         }
2203                         error = pc_blkatoff(pcp, *offset, bp, &ep);
2204                         if (error) {
2205                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2206                                 kmem_free(buf, PCMAXNAM_UTF16);
2207                                 return (error);
2208                         }
2209                         lep = (struct pcdir_lfn *)ep;
2210                 }
2211                 /* can this happen? Bad fs? */
2212                 if (!PCDL_IS_LFN((struct pcdir *)lep)) {
2213                         detached = 1;
2214                         break;
2215                 }
2216                 if (cksum != lep->pcdl_checksum)
2217                         detached = 1;
2218                 /* process current entry */
2219                 cs = get_long_fn_chunk(lep, buf);
2220                 count += cs;
2221                 for (; cs > 0; cs--) {
2222                         /* see if we underflow */
2223                         if (lfn >= lfn_base)
2224                                 *--lfn = buf[cs - 1];
2225                         else
2226                                 detached = 1;
2227                 }
2228                 lep++;
2229                 *offset += sizeof (struct pcdir);
2230         }
2231         kmem_free(buf, PCMAXNAM_UTF16);
2232         /* read next block if necessary */
2233         boff = pc_blkoff(fsp, *offset);
2234         ep = (struct pcdir *)lep;
2235         if (boff == 0 || *bp == NULL || boff >= (*bp)->b_bcount) {
2236                 if (*bp != NULL) {
2237                         brelse(*bp);
2238                         *bp = NULL;
2239                 }
2240                 error = pc_blkatoff(pcp, *offset, bp, &ep);
2241                 if (error) {
2242                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2243                         return (error);
2244                 }
2245         }
2246         /* should be on the short one */
2247         if (PCDL_IS_LFN(ep) || ((ep->pcd_filename[0] == PCD_UNUSED) ||
2248             (ep->pcd_filename[0] == PCD_ERASED))) {
2249                 detached = 1;
2250         }
2251         if (detached ||
2252             (cksum != pc_checksum_long_fn(ep->pcd_filename, ep->pcd_ext)) ||
2253             !pc_valid_long_fn(lfn, 0)) {
2254                 /*
2255                  * process current entry again. This may end up another lfn
2256                  * or a short name.
2257                  */
2258                 *epp = ep;
2259                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2260                 return (EINVAL);
2261         }
2262         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2263                 /*
2264                  * Don't display label because it may contain
2265                  * funny characters.
2266                  */
2267                 *offset += sizeof (struct pcdir);
2268                 ep++;
2269                 *epp = ep;
2270                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2271                 return (EINVAL);
2272         }
2273         if (namep) {
2274                 u16l = count / 2;
2275                 u8l = PCMAXNAMLEN;
2276                 error = uconv_u16tou8((const uint16_t *)lfn, &u16l,
2277                     (uchar_t *)namep, &u8l, UCONV_IN_LITTLE_ENDIAN);
2278                 /*
2279                  * uconv_u16tou8() will catch conversion errors including
2280                  * the case where there is not enough room to write the
2281                  * converted result and the u8l will never go over the given
2282                  * PCMAXNAMLEN.
2283                  */
2284                 if (error != 0) {
2285                         kmem_free(lfn_base, PCMAXNAM_UTF16);
2286                         return (EINVAL);
2287                 }
2288                 namep[u8l] = '\0';
2289                 if (foldcase) {
2290                         inlen = strlen(namep);
2291                         outlen = PCMAXNAMLEN;
2292                         outbuf = kmem_alloc(PCMAXNAMLEN + 1, KM_SLEEP);
2293                         ret = u8_textprep_str(namep, &inlen, outbuf,
2294                             &outlen, U8_TEXTPREP_TOLOWER, U8_UNICODE_LATEST,
2295                             &error);
2296                         if (ret == -1) {
2297                                 kmem_free(outbuf, PCMAXNAMLEN + 1);
2298                                 kmem_free(lfn_base, PCMAXNAM_UTF16);
2299                                 return (EINVAL);
2300                         }
2301                         outbuf[PCMAXNAMLEN - outlen] = '\0';
2302                         (void) strncpy(namep, outbuf, PCMAXNAMLEN + 1);
2303                         kmem_free(outbuf, PCMAXNAMLEN + 1);
2304                 }
2305         }
2306         kmem_free(lfn_base, PCMAXNAM_UTF16);
2307         *epp = ep;
2308         return (0);
2309 }
2310 /*
2311  * Read a long filename into the pc_dirent structure and copy it out.
2312  */
2313 int
2314 pc_read_long_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2315     struct pcdir **epp, offset_t *offset, struct buf **bp)
2316 {
2317         struct pcdir *ep;
2318         struct pcnode *pcp = VTOPC(dvp);
2319         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2320         offset_t uiooffset = uiop->uio_loffset;
2321         int     error = 0;
2322         offset_t oldoffset;
2323 
2324         oldoffset = *offset;
2325         error = pc_extract_long_fn(pcp, ld->d_name, epp, offset, bp);
2326         if (error) {
2327                 if (error == EINVAL) {
2328                         uiop->uio_loffset += *offset - oldoffset;
2329                         return (0);
2330                 } else
2331                         return (error);
2332         }
2333 
2334         ep = *epp;
2335         uiop->uio_loffset += *offset - oldoffset;
2336         ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2337         if (ld->d_reclen > uiop->uio_resid) {
2338                 uiop->uio_loffset = uiooffset;
2339                 return (ENOSPC);
2340         }
2341         ld->d_off = uiop->uio_loffset + sizeof (struct pcdir);
2342         ld->d_ino = pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2343             pc_blkoff(fsp, *offset), ep->pcd_attr,
2344             pc_getstartcluster(fsp, ep), pc_direntpersec(fsp));
2345         (void) uiomove((caddr_t)ld, ld->d_reclen, UIO_READ, uiop);
2346         uiop->uio_loffset = ld->d_off;
2347         *offset += sizeof (struct pcdir);
2348         ep++;
2349         *epp = ep;
2350         return (0);
2351 }
2352 
2353 /*
2354  * Read a short filename into the pc_dirent structure and copy it out.
2355  */
2356 int
2357 pc_read_short_fn(struct vnode *dvp, struct uio *uiop, struct pc_dirent *ld,
2358     struct pcdir **epp, offset_t *offset, struct buf **bp)
2359 {
2360         struct pcfs *fsp = VFSTOPCFS(dvp->v_vfsp);
2361         int     boff = pc_blkoff(fsp, *offset);
2362         struct pcdir *ep = *epp;
2363         offset_t        oldoffset = uiop->uio_loffset;
2364         int     error;
2365         int     foldcase;
2366 
2367         if (PCA_IS_HIDDEN(fsp, ep->pcd_attr)) {
2368                 uiop->uio_loffset += sizeof (struct pcdir);
2369                 *offset += sizeof (struct pcdir);
2370                 ep++;
2371                 *epp = ep;
2372                 return (0);
2373         }
2374         ld->d_ino = (ino64_t)pc_makenodeid(pc_daddrdb(fsp, (*bp)->b_blkno),
2375             boff, ep->pcd_attr, pc_getstartcluster(fsp, ep),
2376             pc_direntpersec(fsp));
2377         foldcase = (fsp->pcfs_flags & PCFS_FOLDCASE);
2378         error = pc_fname_ext_to_name(&ld->d_name[0], &ep->pcd_filename[0],
2379             &ep->pcd_ext[0], foldcase);
2380         if (error == 0) {
2381                 ld->d_reclen = DIRENT64_RECLEN(strlen(ld->d_name));
2382                 if (ld->d_reclen > uiop->uio_resid) {
2383                         uiop->uio_loffset = oldoffset;
2384                         return (ENOSPC);
2385                 }
2386                 ld->d_off = (off64_t)(uiop->uio_loffset +
2387                     sizeof (struct pcdir));
2388                 (void) uiomove((caddr_t)ld,
2389                     ld->d_reclen, UIO_READ, uiop);
2390                 uiop->uio_loffset = ld->d_off;
2391         } else {
2392                 uiop->uio_loffset += sizeof (struct pcdir);
2393         }
2394         *offset += sizeof (struct pcdir);
2395         ep++;
2396         *epp = ep;
2397         return (0);
2398 }
2399 
2400 /* ARGSUSED */
2401 static int
2402 pcfs_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct)
2403 {
2404         struct pc_fid *pcfid;
2405         struct pcnode *pcp;
2406         struct pcfs     *fsp;
2407         int     error;
2408 
2409         fsp = VFSTOPCFS(vp->v_vfsp);
2410         if (fsp == NULL)
2411                 return (EIO);
2412         error = pc_lockfs(fsp, 0, 0);
2413         if (error)
2414                 return (error);
2415         if ((pcp = VTOPC(vp)) == NULL || pcp->pc_flags & PC_INVAL) {
2416                 pc_unlockfs(fsp);
2417                 return (EIO);
2418         }
2419         if (fidp->fid_len < (sizeof (struct pc_fid) - sizeof (ushort_t))) {
2420                 fidp->fid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2421                 pc_unlockfs(fsp);
2422                 return (ENOSPC);
2423         }
2424 
2425         pcfid = (struct pc_fid *)fidp;
2426         bzero(pcfid, sizeof (struct pc_fid));
2427         pcfid->pcfid_len = sizeof (struct pc_fid) - sizeof (ushort_t);
2428         if (vp->v_flag & VROOT) {
2429                 pcfid->pcfid_block = 0;
2430                 pcfid->pcfid_offset = 0;
2431                 pcfid->pcfid_ctime = 0;
2432         } else {
2433                 pcfid->pcfid_block = pcp->pc_eblkno;
2434                 pcfid->pcfid_offset = pcp->pc_eoffset;
2435                 pcfid->pcfid_ctime = pcp->pc_entry.pcd_crtime.pct_time;
2436         }
2437         pc_unlockfs(fsp);
2438         return (0);
2439 }