5382-pvn_getpages-handles-lengths-PAGESIZE-just-fine Wdiff usr/src/uts/common/fs/swapfs/swap_vnops.c

Print this page

5382 pvn_getpages handles lengths <= PAGESIZE just fine

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/swapfs/swap_vnops.c
          +++ new/usr/src/uts/common/fs/swapfs/swap_vnops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *

↓ open down ↓

12 lines elided

↑ open up ↑

  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  23   24   */
  24   25  
  25   26  #include <sys/types.h>
  26   27  #include <sys/param.h>
  27   28  #include <sys/systm.h>
  28   29  #include <sys/buf.h>
  29   30  #include <sys/cred.h>
  30   31  #include <sys/errno.h>
  31   32  #include <sys/vnode.h>
  32   33  #include <sys/vfs_opreg.h>

  33   34  #include <sys/cmn_err.h>
  34   35  #include <sys/swap.h>
  35   36  #include <sys/mman.h>
  36   37  #include <sys/vmsystm.h>
  37   38  #include <sys/vtrace.h>
  38   39  #include <sys/debug.h>
  39   40  #include <sys/sysmacros.h>
  40   41  #include <sys/vm.h>
  41   42  
  42   43  #include <sys/fs/swapnode.h>
  43   44  
  44   45  #include <vm/seg.h>
  45   46  #include <vm/page.h>
  46   47  #include <vm/pvn.h>
  47   48  #include <fs/fs_subr.h>
  48   49  
  49   50  #include <vm/seg_kp.h>
  50   51  
  51   52  /*
  52   53   * Define the routines within this file.
  53   54   */
  54   55  static int      swap_getpage(struct vnode *vp, offset_t off, size_t len,
  55   56      uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
  56   57      caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
  57   58  static int      swap_putpage(struct vnode *vp, offset_t off, size_t len,
  58   59      int flags, struct cred *cr, caller_context_t *ct);
  59   60  static void     swap_inactive(struct vnode *vp, struct cred *cr,
  60   61      caller_context_t *ct);
  61   62  static void     swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
  62   63      cred_t *cr, caller_context_t *ct);
  63   64  
  64   65  static int      swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
  65   66      uint_t *protp, page_t **plarr, size_t plsz,
  66   67      struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
  67   68  
  68   69  int     swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
  69   70      uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
  70   71      uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
  71   72      enum seg_rw rw, struct cred *cr);
  72   73  
  73   74  static int      swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
  74   75      size_t *lenp, int flags, struct cred *cr);
  75   76  
  76   77  const fs_operation_def_t swap_vnodeops_template[] = {
  77   78          VOPNAME_INACTIVE,       { .vop_inactive = swap_inactive },
  78   79          VOPNAME_GETPAGE,        { .vop_getpage = swap_getpage },
  79   80          VOPNAME_PUTPAGE,        { .vop_putpage = swap_putpage },
  80   81          VOPNAME_DISPOSE,        { .vop_dispose = swap_dispose },
  81   82          VOPNAME_SETFL,          { .error = fs_error },
  82   83          VOPNAME_POLL,           { .error = fs_error },
  83   84          VOPNAME_PATHCONF,       { .error = fs_error },
  84   85          VOPNAME_GETSECATTR,     { .error = fs_error },
  85   86          VOPNAME_SHRLOCK,        { .error = fs_error },
  86   87          NULL,                   NULL
  87   88  };
  88   89  
  89   90  vnodeops_t *swap_vnodeops;
  90   91  
  91   92  /* ARGSUSED */
  92   93  static void
  93   94  swap_inactive(
  94   95          struct vnode *vp,
  95   96          struct cred *cr,
  96   97          caller_context_t *ct)
  97   98  {
  98   99          SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
  99  100  }
 100  101  
 101  102  /*
 102  103   * Return all the pages from [off..off+len] in given file
 103  104   */
 104  105  /*ARGSUSED*/
 105  106  static int
 106  107  swap_getpage(
 107  108          struct vnode *vp,
 108  109          offset_t off,

↓ open down ↓

76 lines elided

↑ open up ↑

 109  110          size_t len,
 110  111          uint_t *protp,
 111  112          page_t *pl[],
 112  113          size_t plsz,
 113  114          struct seg *seg,
 114  115          caddr_t addr,
 115  116          enum seg_rw rw,
 116  117          struct cred *cr,
 117  118          caller_context_t *ct)
 118  119  {
 119      -        int err;
 120      -
 121  120          SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
 122  121              (void *)vp, off, len, 0, 0);
 123  122  
 124  123          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
 125  124              "swapfs getpage:vp %p off %llx len %ld",
 126  125              (void *)vp, off, len);
 127  126  
 128      -        if (len <= PAGESIZE) {
 129      -                err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 130      -                    seg, addr, rw, cr);
 131      -        } else {
 132      -                err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
 133      -                    protp, pl, plsz, seg, addr, rw, cr);
 134      -        }
 135      -
 136      -        return (err);
      127 +        return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
      128 +            pl, plsz, seg, addr, rw, cr));
 137  129  }
 138  130  
 139  131  /*
 140      - * Called from pvn_getpages or swap_getpage to get a particular page.
      132 + * Called from pvn_getpages to get a particular page.
 141  133   */
 142  134  /*ARGSUSED*/
 143  135  static int
 144  136  swap_getapage(
 145  137          struct vnode *vp,
 146  138          u_offset_t off,
 147  139          size_t len,
 148  140          uint_t *protp,
 149  141          page_t *pl[],
 150  142          size_t plsz,

 151  143          struct seg *seg,
 152  144          caddr_t addr,
 153  145          enum seg_rw rw,
 154  146          struct cred *cr)
 155  147  {
 156  148          struct page *pp, *rpp;
 157  149          int flags;
 158  150          int err = 0;
 159  151          struct vnode *pvp = NULL;
 160  152          u_offset_t poff;
 161  153          int flag_noreloc;
 162  154          se_t lock;
 163  155          extern int kcage_on;
 164  156          int upgrade = 0;
 165  157  
 166  158          SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
 167  159              vp, off, len, 0, 0);
 168  160  
 169  161          /*
 170  162           * Until there is a call-back mechanism to cause SEGKP
 171  163           * pages to be unlocked, make them non-relocatable.
 172  164           */
 173  165          if (SEG_IS_SEGKP(seg))
 174  166                  flag_noreloc = PG_NORELOC;
 175  167          else
 176  168                  flag_noreloc = 0;
 177  169  
 178  170          if (protp != NULL)
 179  171                  *protp = PROT_ALL;
 180  172  
 181  173          lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
 182  174  
 183  175  again:
 184  176          if (pp = page_lookup(vp, off, lock)) {
 185  177                  /*
 186  178                   * In very rare instances, a segkp page may have been
 187  179                   * relocated outside of the kernel by the kernel cage
 188  180                   * due to the window between page_unlock() and
 189  181                   * VOP_PUTPAGE() in segkp_unlock().  Due to the
 190  182                   * rareness of these occurances, the solution is to
 191  183                   * relocate the page to a P_NORELOC page.
 192  184                   */
 193  185                  if (flag_noreloc != 0) {
 194  186                          if (!PP_ISNORELOC(pp) && kcage_on) {
 195  187                                  if (lock != SE_EXCL) {
 196  188                                          upgrade = 1;
 197  189                                          if (!page_tryupgrade(pp)) {
 198  190                                                  page_unlock(pp);
 199  191                                                  lock = SE_EXCL;
 200  192                                                  goto again;
 201  193                                          }
 202  194                                  }
 203  195  
 204  196                                  if (page_relocate_cage(&pp, &rpp) != 0)
 205  197                                          panic("swap_getapage: "
 206  198                                              "page_relocate_cage failed");
 207  199  
 208  200                                  pp = rpp;
 209  201                          }
 210  202                  }
 211  203  
 212  204                  if (pl) {
 213  205                          if (upgrade)
 214  206                                  page_downgrade(pp);
 215  207  
 216  208                          pl[0] = pp;
 217  209                          pl[1] = NULL;
 218  210                  } else {
 219  211                          page_unlock(pp);
 220  212                  }
 221  213          } else {
 222  214                  pp = page_create_va(vp, off, PAGESIZE,
 223  215                      PG_WAIT | PG_EXCL | flag_noreloc,
 224  216                      seg, addr);
 225  217                  /*
 226  218                   * Someone raced in and created the page after we did the
 227  219                   * lookup but before we did the create, so go back and
 228  220                   * try to look it up again.
 229  221                   */
 230  222                  if (pp == NULL)
 231  223                          goto again;
 232  224                  if (rw != S_CREATE) {
 233  225                          err = swap_getphysname(vp, off, &pvp, &poff);
 234  226                          if (pvp) {
 235  227                                  struct anon *ap;
 236  228                                  kmutex_t *ahm;
 237  229  
 238  230                                  flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
 239  231                                  err = VOP_PAGEIO(pvp, pp, poff,
 240  232                                      PAGESIZE, flags, cr, NULL);
 241  233  
 242  234                                  if (!err) {
 243  235                                          ahm = AH_MUTEX(vp, off);
 244  236                                          mutex_enter(ahm);
 245  237  
 246  238                                          ap = swap_anon(vp, off);
 247  239                                          if (ap == NULL) {
 248  240                                                  panic("swap_getapage:"
 249  241                                                      " null anon");
 250  242                                          }
 251  243  
 252  244                                          if (ap->an_pvp == pvp &&
 253  245                                              ap->an_poff == poff) {
 254  246                                                  swap_phys_free(pvp, poff,
 255  247                                                      PAGESIZE);
 256  248                                                  ap->an_pvp = NULL;
 257  249                                                  ap->an_poff = NULL;
 258  250                                                  hat_setmod(pp);
 259  251                                          }
 260  252  
 261  253                                          mutex_exit(ahm);
 262  254                                  }
 263  255                          } else {
 264  256                                  if (!err)
 265  257                                          pagezero(pp, 0, PAGESIZE);
 266  258  
 267  259                                  /*
 268  260                                   * If it's a fault ahead, release page_io_lock
 269  261                                   * and SE_EXCL we grabbed in page_create_va
 270  262                                   *
 271  263                                   * If we are here, we haven't called VOP_PAGEIO
 272  264                                   * and thus calling pvn_read_done(pp, B_READ)
 273  265                                   * below may mislead that we tried i/o. Besides,
 274  266                                   * in case of async, pvn_read_done() should
 275  267                                   * not be called by *getpage()
 276  268                                   */
 277  269                                  if (pl == NULL) {
 278  270                                          /*
 279  271                                           * swap_getphysname can return error
 280  272                                           * only when we are getting called from
 281  273                                           * swapslot_free which passes non-NULL
 282  274                                           * pl to VOP_GETPAGE.
 283  275                                           */
 284  276                                          ASSERT(err == 0);
 285  277                                          page_io_unlock(pp);
 286  278                                          page_unlock(pp);
 287  279                                  }
 288  280                          }
 289  281                  }
 290  282  
 291  283                  ASSERT(pp != NULL);
 292  284  
 293  285                  if (err && pl)
 294  286                          pvn_read_done(pp, B_ERROR);
 295  287  
 296  288                  if (!err && pl)
 297  289                          pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
 298  290          }
 299  291          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 300  292              "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
 301  293          return (err);
 302  294  }
 303  295  
 304  296  /*
 305  297   * Called from large page anon routines only! This is an ugly hack where
 306  298   * the anon layer directly calls into swapfs with a preallocated large page.
 307  299   * Another method would have been to change to VOP and add an extra arg for
 308  300   * the preallocated large page. This all could be cleaned up later when we
 309  301   * solve the anonymous naming problem and no longer need to loop across of
 310  302   * the VOP in PAGESIZE increments to fill in or initialize a large page as
 311  303   * is done today. I think the latter is better since it avoid a change to
 312  304   * the VOP interface that could later be avoided.
 313  305   */
 314  306  int
 315  307  swap_getconpage(
 316  308          struct vnode *vp,
 317  309          u_offset_t off,
 318  310          size_t len,
 319  311          uint_t *protp,
 320  312          page_t *pl[],
 321  313          size_t plsz,
 322  314          page_t  *conpp,
 323  315          uint_t  *pszc,
 324  316          spgcnt_t *nreloc,
 325  317          struct seg *seg,
 326  318          caddr_t addr,
 327  319          enum seg_rw rw,
 328  320          struct cred *cr)
 329  321  {
 330  322          struct page     *pp;
 331  323          int             err = 0;
 332  324          struct vnode    *pvp = NULL;
 333  325          u_offset_t      poff;
 334  326  
 335  327          ASSERT(len == PAGESIZE);
 336  328          ASSERT(pl != NULL);
 337  329          ASSERT(plsz == PAGESIZE);
 338  330          ASSERT(protp == NULL);
 339  331          ASSERT(nreloc != NULL);
 340  332          ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
 341  333          SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
 342  334              vp, off, len, 0, 0);
 343  335  
 344  336          /*
 345  337           * If we are not using a preallocated page then we know one already
 346  338           * exists. So just let the old code handle it.
 347  339           */
 348  340          if (conpp == NULL) {
 349  341                  err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
 350  342                      seg, addr, rw, cr);
 351  343                  return (err);
 352  344          }
 353  345          ASSERT(conpp->p_szc != 0);
 354  346          ASSERT(PAGE_EXCL(conpp));
 355  347  
 356  348  
 357  349          ASSERT(conpp->p_next == conpp);
 358  350          ASSERT(conpp->p_prev == conpp);
 359  351          ASSERT(!PP_ISAGED(conpp));
 360  352          ASSERT(!PP_ISFREE(conpp));
 361  353  
 362  354          *nreloc = 0;
 363  355          pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
 364  356  
 365  357          /*
 366  358           * If existing page is found we may need to relocate.
 367  359           */
 368  360          if (pp != conpp) {
 369  361                  ASSERT(rw != S_CREATE);
 370  362                  ASSERT(pszc != NULL);
 371  363                  ASSERT(PAGE_SHARED(pp));
 372  364                  if (pp->p_szc < conpp->p_szc) {
 373  365                          *pszc = pp->p_szc;
 374  366                          page_unlock(pp);
 375  367                          err = -1;
 376  368                  } else if (pp->p_szc > conpp->p_szc &&
 377  369                      seg->s_szc > conpp->p_szc) {
 378  370                          *pszc = MIN(pp->p_szc, seg->s_szc);
 379  371                          page_unlock(pp);
 380  372                          err = -2;
 381  373                  } else {
 382  374                          pl[0] = pp;
 383  375                          pl[1] = NULL;
 384  376                          if (page_pptonum(pp) &
 385  377                              (page_get_pagecnt(conpp->p_szc) - 1))
 386  378                                  cmn_err(CE_PANIC, "swap_getconpage: no root");
 387  379                  }
 388  380                  return (err);
 389  381          }
 390  382  
 391  383          ASSERT(PAGE_EXCL(pp));
 392  384  
 393  385          if (*nreloc != 0) {
 394  386                  ASSERT(rw != S_CREATE);
 395  387                  pl[0] = pp;
 396  388                  pl[1] = NULL;
 397  389                  return (0);
 398  390          }
 399  391  
 400  392          *nreloc = 1;
 401  393  
 402  394          /*
 403  395           * If necessary do the page io.
 404  396           */
 405  397          if (rw != S_CREATE) {
 406  398                  /*
 407  399                   * Since we are only called now on behalf of an
 408  400                   * address space operation it's impossible for
 409  401                   * us to fail unlike swap_getapge() which
 410  402                   * also gets called from swapslot_free().
 411  403                   */
 412  404                  if (swap_getphysname(vp, off, &pvp, &poff)) {
 413  405                          cmn_err(CE_PANIC,
 414  406                              "swap_getconpage: swap_getphysname failed!");
 415  407                  }
 416  408  
 417  409                  if (pvp != NULL) {
 418  410                          err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
 419  411                              cr, NULL);
 420  412                          if (err == 0) {
 421  413                                  struct anon *ap;
 422  414                                  kmutex_t *ahm;
 423  415  
 424  416                                  ahm = AH_MUTEX(vp, off);
 425  417                                  mutex_enter(ahm);
 426  418                                  ap = swap_anon(vp, off);
 427  419                                  if (ap == NULL)
 428  420                                          panic("swap_getconpage: null anon");
 429  421                                  if (ap->an_pvp != pvp || ap->an_poff != poff)
 430  422                                          panic("swap_getconpage: bad anon");
 431  423  
 432  424                                  swap_phys_free(pvp, poff, PAGESIZE);
 433  425                                  ap->an_pvp = NULL;
 434  426                                  ap->an_poff = NULL;
 435  427                                  hat_setmod(pp);
 436  428                                  mutex_exit(ahm);
 437  429                          }
 438  430                  } else {
 439  431                          pagezero(pp, 0, PAGESIZE);
 440  432                  }
 441  433          }
 442  434  
 443  435          /*
 444  436           * Normally we would let pvn_read_done() destroy
 445  437           * the page on IO error. But since this is a preallocated
 446  438           * page we'll let the anon layer handle it.
 447  439           */
 448  440          page_io_unlock(pp);
 449  441          if (err != 0)
 450  442                  page_hashout(pp, NULL);
 451  443          ASSERT(pp->p_next == pp);
 452  444          ASSERT(pp->p_prev == pp);
 453  445  
 454  446          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
 455  447              "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
 456  448  
 457  449          pl[0] = pp;
 458  450          pl[1] = NULL;
 459  451          return (err);
 460  452  }
 461  453  
 462  454  /* Async putpage klustering stuff */
 463  455  int sw_pending_size;
 464  456  extern int klustsize;
 465  457  extern struct async_reqs *sw_getreq();
 466  458  extern void sw_putreq(struct async_reqs *);
 467  459  extern void sw_putbackreq(struct async_reqs *);
 468  460  extern struct async_reqs *sw_getfree();
 469  461  extern void sw_putfree(struct async_reqs *);
 470  462  
 471  463  static size_t swap_putpagecnt, swap_pagespushed;
 472  464  static size_t swap_otherfail, swap_otherpages;
 473  465  static size_t swap_klustfail, swap_klustpages;
 474  466  static size_t swap_getiofail, swap_getiopages;
 475  467  
 476  468  /*
 477  469   * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
 478  470   * If len == 0, do from off to EOF.
 479  471   */
 480  472  static int swap_nopage = 0;     /* Don't do swap_putpage's if set */
 481  473  
 482  474  /* ARGSUSED */
 483  475  static int
 484  476  swap_putpage(
 485  477          struct vnode *vp,
 486  478          offset_t off,
 487  479          size_t len,
 488  480          int flags,
 489  481          struct cred *cr,
 490  482          caller_context_t *ct)
 491  483  {
 492  484          page_t *pp;
 493  485          u_offset_t io_off;
 494  486          size_t io_len = 0;
 495  487          int err = 0;
 496  488          int nowait;
 497  489          struct async_reqs *arg;
 498  490  
 499  491          if (swap_nopage)
 500  492                  return (0);
 501  493  
 502  494          ASSERT(vp->v_count != 0);
 503  495  
 504  496          nowait = flags & B_PAGE_NOWAIT;
 505  497  
 506  498          /*
 507  499           * Clear force flag so that p_lckcnt pages are not invalidated.
 508  500           */
 509  501          flags &= ~(B_FORCE | B_PAGE_NOWAIT);
 510  502  
 511  503          SWAPFS_PRINT(SWAP_VOPS,
 512  504              "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
 513  505              (void *)vp, off, len, flags, 0);
 514  506          TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
 515  507              "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
 516  508  
 517  509          if (vp->v_flag & VNOMAP)
 518  510                  return (ENOSYS);
 519  511  
 520  512          if (!vn_has_cached_data(vp))
 521  513                  return (0);
 522  514  
 523  515          if (len == 0) {
 524  516                  if (curproc == proc_pageout)
 525  517                          cmn_err(CE_PANIC, "swapfs: pageout can't block");
 526  518  
 527  519                  /* Search the entire vp list for pages >= off. */
 528  520                  err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
 529  521                      flags, cr);
 530  522          } else {
 531  523                  u_offset_t eoff;
 532  524  
 533  525                  /*
 534  526                   * Loop over all offsets in the range [off...off + len]
 535  527                   * looking for pages to deal with.
 536  528                   */
 537  529                  eoff = off + len;
 538  530                  for (io_off = (u_offset_t)off; io_off < eoff;
 539  531                      io_off += io_len) {
 540  532                          /*
 541  533                           * If we run out of the async req slot, put the page
 542  534                           * now instead of queuing.
 543  535                           */
 544  536                          if (flags == (B_ASYNC | B_FREE) &&
 545  537                              sw_pending_size < klustsize &&
 546  538                              (arg = sw_getfree())) {
 547  539                                  /*
 548  540                                   * If we are clustering, we should allow
 549  541                                   * pageout to feed us more pages because # of
 550  542                                   * pushes is limited by # of I/Os, and one
 551  543                                   * cluster is considered to be one I/O.
 552  544                                   */
 553  545                                  if (pushes)
 554  546                                          pushes--;
 555  547  
 556  548                                  arg->a_vp = vp;
 557  549                                  arg->a_off = io_off;
 558  550                                  arg->a_len = PAGESIZE;
 559  551                                  arg->a_flags = B_ASYNC | B_FREE;
 560  552                                  arg->a_cred = kcred;
 561  553                                  sw_putreq(arg);
 562  554                                  io_len = PAGESIZE;
 563  555                                  continue;
 564  556                          }
 565  557                          /*
 566  558                           * If we are not invalidating pages, use the
 567  559                           * routine page_lookup_nowait() to prevent
 568  560                           * reclaiming them from the free list.
 569  561                           */
 570  562                          if (!nowait && ((flags & B_INVAL) ||
 571  563                              (flags & (B_ASYNC | B_FREE)) == B_FREE))
 572  564                                  pp = page_lookup(vp, io_off, SE_EXCL);
 573  565                          else
 574  566                                  pp = page_lookup_nowait(vp, io_off,
 575  567                                      (flags & (B_FREE | B_INVAL)) ?
 576  568                                      SE_EXCL : SE_SHARED);
 577  569  
 578  570                          if (pp == NULL || pvn_getdirty(pp, flags) == 0)
 579  571                                  io_len = PAGESIZE;
 580  572                          else {
 581  573                                  err = swap_putapage(vp, pp, &io_off, &io_len,
 582  574                                      flags, cr);
 583  575                                  if (err != 0)
 584  576                                          break;
 585  577                          }
 586  578                  }
 587  579          }
 588  580          /* If invalidating, verify all pages on vnode list are gone. */
 589  581          if (err == 0 && off == 0 && len == 0 &&
 590  582              (flags & B_INVAL) && vn_has_cached_data(vp)) {
 591  583                  cmn_err(CE_WARN,
 592  584                      "swap_putpage: B_INVAL, pages not gone");
 593  585          }
 594  586          return (err);
 595  587  }
 596  588  
 597  589  /*
 598  590   * Write out a single page.
 599  591   * For swapfs this means choose a physical swap slot and write the page
 600  592   * out using VOP_PAGEIO.
 601  593   * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
 602  594   * swapfs pages, a bunch of contiguous swap slots and then write them
 603  595   * all out in one clustered i/o.
 604  596   */
 605  597  /*ARGSUSED*/
 606  598  static int
 607  599  swap_putapage(
 608  600          struct vnode *vp,
 609  601          page_t *pp,
 610  602          u_offset_t *offp,
 611  603          size_t *lenp,
 612  604          int flags,
 613  605          struct cred *cr)
 614  606  {
 615  607          int err;
 616  608          struct vnode *pvp;
 617  609          u_offset_t poff, off;
 618  610          u_offset_t doff;
 619  611          size_t dlen;
 620  612          size_t klsz = 0;
 621  613          u_offset_t klstart = 0;
 622  614          struct vnode *klvp = NULL;
 623  615          page_t *pplist;
 624  616          se_t se;
 625  617          struct async_reqs *arg;
 626  618          size_t swap_klustsize;
 627  619  
 628  620          /*
 629  621           * This check is added for callers who access swap_putpage with len = 0.
 630  622           * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
 631  623           * And it's necessary to do the same queuing if users have the same
 632  624           * B_ASYNC|B_FREE flags on.
 633  625           */
 634  626          if (flags == (B_ASYNC | B_FREE) &&
 635  627              sw_pending_size < klustsize && (arg = sw_getfree())) {
 636  628  
 637  629                  hat_setmod(pp);
 638  630                  page_io_unlock(pp);
 639  631                  page_unlock(pp);
 640  632  
 641  633                  arg->a_vp = vp;
 642  634                  arg->a_off = pp->p_offset;
 643  635                  arg->a_len = PAGESIZE;
 644  636                  arg->a_flags = B_ASYNC | B_FREE;
 645  637                  arg->a_cred = kcred;
 646  638                  sw_putreq(arg);
 647  639  
 648  640                  return (0);
 649  641          }
 650  642  
 651  643          SWAPFS_PRINT(SWAP_PUTP,
 652  644              "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
 653  645              pp, vp, pp->p_offset, flags, 0);
 654  646  
 655  647          ASSERT(PAGE_LOCKED(pp));
 656  648  
 657  649          off = pp->p_offset;
 658  650  
 659  651          doff = off;
 660  652          dlen = PAGESIZE;
 661  653  
 662  654          if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
 663  655                  err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
 664  656                  hat_setmod(pp);
 665  657                  page_io_unlock(pp);
 666  658                  page_unlock(pp);
 667  659                  goto out;
 668  660          }
 669  661  
 670  662          klvp = pvp;
 671  663          klstart = poff;
 672  664          pplist = pp;
 673  665          /*
 674  666           * If this is ASYNC | FREE and we've accumulated a bunch of such
 675  667           * pending requests, kluster.
 676  668           */
 677  669          if (flags == (B_ASYNC | B_FREE))
 678  670                  swap_klustsize = klustsize;
 679  671          else
 680  672                  swap_klustsize = PAGESIZE;
 681  673          se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
 682  674          klsz = PAGESIZE;
 683  675          while (klsz < swap_klustsize) {
 684  676                  if ((arg = sw_getreq()) == NULL) {
 685  677                          swap_getiofail++;
 686  678                          swap_getiopages += btop(klsz);
 687  679                          break;
 688  680                  }
 689  681                  ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
 690  682                  vp = arg->a_vp;
 691  683                  off = arg->a_off;
 692  684  
 693  685                  if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
 694  686                          swap_otherfail++;
 695  687                          swap_otherpages += btop(klsz);
 696  688                          sw_putfree(arg);
 697  689                          break;
 698  690                  }
 699  691                  if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
 700  692                          sw_putfree(arg);
 701  693                          continue;
 702  694                  }
 703  695                  /* Get new physical backing store for the page */
 704  696                  doff = off;
 705  697                  dlen = PAGESIZE;
 706  698                  if (err = swap_newphysname(vp, off, &doff, &dlen,
 707  699                      &pvp, &poff)) {
 708  700                          swap_otherfail++;
 709  701                          swap_otherpages += btop(klsz);
 710  702                          hat_setmod(pp);
 711  703                          page_io_unlock(pp);
 712  704                          page_unlock(pp);
 713  705                          sw_putbackreq(arg);
 714  706                          break;
 715  707                  }
 716  708                  /* Try to cluster new physical name with previous ones */
 717  709                  if (klvp == pvp && poff == klstart + klsz) {
 718  710                          klsz += PAGESIZE;
 719  711                          page_add(&pplist, pp);
 720  712                          pplist = pplist->p_next;
 721  713                          sw_putfree(arg);
 722  714                  } else if (klvp == pvp && poff == klstart - PAGESIZE) {
 723  715                          klsz += PAGESIZE;
 724  716                          klstart -= PAGESIZE;
 725  717                          page_add(&pplist, pp);
 726  718                          sw_putfree(arg);
 727  719                  } else {
 728  720                          swap_klustfail++;
 729  721                          swap_klustpages += btop(klsz);
 730  722                          hat_setmod(pp);
 731  723                          page_io_unlock(pp);
 732  724                          page_unlock(pp);
 733  725                          sw_putbackreq(arg);
 734  726                          break;
 735  727                  }
 736  728          }
 737  729  
 738  730          err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
 739  731              B_WRITE | flags, cr, NULL);
 740  732  
 741  733          if ((flags & B_ASYNC) == 0)
 742  734                  pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
 743  735  
 744  736          /* Statistics */
 745  737          if (!err) {
 746  738                  swap_putpagecnt++;
 747  739                  swap_pagespushed += btop(klsz);
 748  740          }
 749  741  out:
 750  742          TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
 751  743              "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
 752  744              vp, klvp, klstart, klsz);
 753  745          if (err && err != ENOMEM)
 754  746                  cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
 755  747          if (lenp)
 756  748                  *lenp = PAGESIZE;
 757  749          return (err);
 758  750  }
 759  751  
 760  752  static void
 761  753  swap_dispose(
 762  754          vnode_t *vp,
 763  755          page_t *pp,
 764  756          int fl,
 765  757          int dn,
 766  758          cred_t *cr,
 767  759          caller_context_t *ct)
 768  760  {
 769  761          int err;
 770  762          u_offset_t off = pp->p_offset;
 771  763          vnode_t *pvp;
 772  764          u_offset_t poff;
 773  765  
 774  766          ASSERT(PAGE_EXCL(pp));
 775  767  
 776  768          /*
 777  769           * The caller will free/invalidate large page in one shot instead of
 778  770           * one small page at a time.
 779  771           */
 780  772          if (pp->p_szc != 0) {
 781  773                  page_unlock(pp);
 782  774                  return;
 783  775          }
 784  776  
 785  777          err = swap_getphysname(vp, off, &pvp, &poff);
 786  778          if (!err && pvp != NULL)
 787  779                  VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
 788  780          else
 789  781                  fs_dispose(vp, pp, fl, dn, cr, ct);
 790  782  }

↓ open down ↓

640 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX