6583-remove-whole-process-swapping Wdiff usr/src/uts/sparc/v9/vm/seg_nf.c

Print this page

6583 remove whole-process swapping

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/sparc/v9/vm/seg_nf.c
          +++ new/usr/src/uts/sparc/v9/vm/seg_nf.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27   27  /* All Rights Reserved */
  28   28  
  29   29  /*
  30   30   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   31   * under license from the Regents of the University of California.
  32   32   */
  33   33  
  34   34  /*
  35   35   * VM - segment for non-faulting loads.
  36   36   */
  37   37  
  38   38  #include <sys/types.h>
  39   39  #include <sys/t_lock.h>
  40   40  #include <sys/param.h>
  41   41  #include <sys/mman.h>
  42   42  #include <sys/errno.h>
  43   43  #include <sys/kmem.h>
  44   44  #include <sys/cmn_err.h>
  45   45  #include <sys/vnode.h>
  46   46  #include <sys/proc.h>
  47   47  #include <sys/conf.h>
  48   48  #include <sys/debug.h>
  49   49  #include <sys/archsystm.h>
  50   50  #include <sys/lgrp.h>
  51   51  
  52   52  #include <vm/page.h>
  53   53  #include <vm/hat.h>
  54   54  #include <vm/as.h>
  55   55  #include <vm/seg.h>
  56   56  #include <vm/vpage.h>
  57   57  
  58   58  /*
  59   59   * Private seg op routines.
  60   60   */
  61   61  static int      segnf_dup(struct seg *seg, struct seg *newseg);
  62   62  static int      segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
  63   63  static void     segnf_free(struct seg *seg);
  64   64  static faultcode_t segnf_nomap(void);
  65   65  static int      segnf_setprot(struct seg *seg, caddr_t addr,
  66   66                      size_t len, uint_t prot);
  67   67  static int      segnf_checkprot(struct seg *seg, caddr_t addr,
  68   68                      size_t len, uint_t prot);
  69   69  static void     segnf_badop(void);
  70   70  static int      segnf_nop(void);
  71   71  static int      segnf_getprot(struct seg *seg, caddr_t addr,
  72   72                      size_t len, uint_t *protv);
  73   73  static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
  74   74  static int      segnf_gettype(struct seg *seg, caddr_t addr);
  75   75  static int      segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  76   76  static void     segnf_dump(struct seg *seg);
  77   77  static int      segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
  78   78                      struct page ***ppp, enum lock_type type, enum seg_rw rw);
  79   79  static int      segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
  80   80                      uint_t szc);
  81   81  static int      segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  82   82  static lgrp_mem_policy_info_t   *segnf_getpolicy(struct seg *seg,
  83   83      caddr_t addr);
  84   84  
  85   85  
  86   86  struct seg_ops segnf_ops = {
  87   87          segnf_dup,

↓ open down ↓

87 lines elided

↑ open up ↑

  88   88          segnf_unmap,
  89   89          segnf_free,
  90   90          (faultcode_t (*)(struct hat *, struct seg *, caddr_t, size_t,
  91   91              enum fault_type, enum seg_rw))
  92   92                  segnf_nomap,            /* fault */
  93   93          (faultcode_t (*)(struct seg *, caddr_t))
  94   94                  segnf_nomap,            /* faulta */
  95   95          segnf_setprot,
  96   96          segnf_checkprot,
  97   97          (int (*)())segnf_badop,         /* kluster */
  98      -        (size_t (*)(struct seg *))NULL, /* swapout */
  99   98          (int (*)(struct seg *, caddr_t, size_t, int, uint_t))
 100   99                  segnf_nop,              /* sync */
 101  100          (size_t (*)(struct seg *, caddr_t, size_t, char *))
 102  101                  segnf_nop,              /* incore */
 103  102          (int (*)(struct seg *, caddr_t, size_t, int, int, ulong_t *, size_t))
 104  103                  segnf_nop,              /* lockop */
 105  104          segnf_getprot,
 106  105          segnf_getoffset,
 107  106          segnf_gettype,
 108  107          segnf_getvp,

 109  108          (int (*)(struct seg *, caddr_t, size_t, uint_t))
 110  109                  segnf_nop,              /* advise */
 111  110          segnf_dump,
 112  111          segnf_pagelock,
 113  112          segnf_setpagesize,
 114  113          segnf_getmemid,
 115  114          segnf_getpolicy,
 116  115  };
 117  116  
 118  117  /*
 119  118   * vnode and page for the page of zeros we use for the nf mappings.
 120  119   */
 121  120  static kmutex_t segnf_lock;
 122  121  static struct vnode nfvp;
 123  122  static struct page **nfpp;
 124  123  
 125  124  #define addr_to_vcolor(addr)                                            \
 126  125          (shm_alignment) ?                                               \
 127  126          ((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
 128  127  
 129  128  /*
 130  129   * We try to limit the number of Non-fault segments created.
 131  130   * Non fault segments are created to optimize sparc V9 code which uses
 132  131   * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
 133  132   *
 134  133   * There are several reasons why creating too many non-fault segments
 135  134   * could cause problems.
 136  135   *
 137  136   *      First, excessive allocation of kernel resources for the seg
 138  137   *      structures and the HAT data to map the zero pages.
 139  138   *
 140  139   *      Secondly, creating nofault segments actually uses up user virtual
 141  140   *      address space. This makes it unavailable for subsequent mmap(0, ...)
 142  141   *      calls which use as_gap() to find empty va regions.  Creation of too
 143  142   *      many nofault segments could thus interfere with the ability of the
 144  143   *      runtime linker to load a shared object.
 145  144   */
 146  145  #define MAXSEGFORNF     (10000)
 147  146  #define MAXNFSEARCH     (5)
 148  147  
 149  148  
 150  149  /*
 151  150   * Must be called from startup()
 152  151   */
 153  152  void
 154  153  segnf_init()
 155  154  {
 156  155          mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
 157  156  }
 158  157  
 159  158  
 160  159  /*
 161  160   * Create a no-fault segment.
 162  161   *
 163  162   * The no-fault segment is not technically necessary, as the code in
 164  163   * nfload() in trap.c will emulate the SPARC instruction and load
 165  164   * a value of zero in the destination register.
 166  165   *
 167  166   * However, this code tries to put a page of zero's at the nofault address
 168  167   * so that subsequent non-faulting loads to the same page will not
 169  168   * trap with a tlb miss.
 170  169   *
 171  170   * In order to help limit the number of segments we merge adjacent nofault
 172  171   * segments into a single segment.  If we get a large number of segments
 173  172   * we'll also try to delete a random other nf segment.
 174  173   */
 175  174  /* ARGSUSED */
 176  175  int
 177  176  segnf_create(struct seg *seg, void *argsp)
 178  177  {
 179  178          uint_t prot;
 180  179          pgcnt_t vacpgs;
 181  180          u_offset_t off = 0;
 182  181          caddr_t vaddr = NULL;
 183  182          int i, color;
 184  183          struct seg *s1;
 185  184          struct seg *s2;
 186  185          size_t size;
 187  186          struct as *as = seg->s_as;
 188  187  
 189  188          ASSERT(as && AS_WRITE_HELD(as));
 190  189  
 191  190          /*
 192  191           * Need a page per virtual color or just 1 if no vac.
 193  192           */
 194  193          mutex_enter(&segnf_lock);
 195  194          if (nfpp == NULL) {
 196  195                  struct seg kseg;
 197  196  
 198  197                  vacpgs = 1;
 199  198                  if (shm_alignment > PAGESIZE) {
 200  199                          vacpgs = shm_alignment >> PAGESHIFT;
 201  200                  }
 202  201  
 203  202                  nfpp = kmem_alloc(sizeof (*nfpp) * vacpgs, KM_SLEEP);
 204  203  
 205  204                  kseg.s_as = &kas;
 206  205                  for (i = 0; i < vacpgs; i++, off += PAGESIZE,
 207  206                      vaddr += PAGESIZE) {
 208  207                          nfpp[i] = page_create_va(&nfvp, off, PAGESIZE,
 209  208                              PG_WAIT | PG_NORELOC, &kseg, vaddr);
 210  209                          page_io_unlock(nfpp[i]);
 211  210                          page_downgrade(nfpp[i]);
 212  211                          pagezero(nfpp[i], 0, PAGESIZE);
 213  212                  }
 214  213          }
 215  214          mutex_exit(&segnf_lock);
 216  215  
 217  216          hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 218  217  
 219  218          /*
 220  219           * s_data can't be NULL because of ASSERTS in the common vm code.
 221  220           */
 222  221          seg->s_ops = &segnf_ops;
 223  222          seg->s_data = seg;
 224  223          seg->s_flags |= S_PURGE;
 225  224  
 226  225          mutex_enter(&as->a_contents);
 227  226          as->a_flags |= AS_NEEDSPURGE;
 228  227          mutex_exit(&as->a_contents);
 229  228  
 230  229          prot = PROT_READ;
 231  230          color = addr_to_vcolor(seg->s_base);
 232  231          if (as != &kas)
 233  232                  prot |= PROT_USER;
 234  233          hat_memload(as->a_hat, seg->s_base, nfpp[color],
 235  234              prot | HAT_NOFAULT, HAT_LOAD);
 236  235  
 237  236          /*
 238  237           * At this point see if we can concatenate a segment to
 239  238           * a non-fault segment immediately before and/or after it.
 240  239           */
 241  240          if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
 242  241              s1->s_ops == &segnf_ops &&
 243  242              s1->s_base + s1->s_size == seg->s_base) {
 244  243                  size = s1->s_size;
 245  244                  seg_free(s1);
 246  245                  seg->s_base -= size;
 247  246                  seg->s_size += size;
 248  247          }
 249  248  
 250  249          if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
 251  250              s2->s_ops == &segnf_ops &&
 252  251              seg->s_base + seg->s_size == s2->s_base) {
 253  252                  size = s2->s_size;
 254  253                  seg_free(s2);
 255  254                  seg->s_size += size;
 256  255          }
 257  256  
 258  257          /*
 259  258           * if we already have a lot of segments, try to delete some other
 260  259           * nofault segment to reduce the probability of uncontrolled segment
 261  260           * creation.
 262  261           *
 263  262           * the code looks around quickly (no more than MAXNFSEARCH segments
 264  263           * each way) for another NF segment and then deletes it.
 265  264           */
 266  265          if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
 267  266                  size = 0;
 268  267                  s2 = NULL;
 269  268                  s1 = AS_SEGPREV(as, seg);
 270  269                  while (size++ < MAXNFSEARCH && s1 != NULL) {
 271  270                          if (s1->s_ops == &segnf_ops)
 272  271                                  s2 = s1;
 273  272                          s1 = AS_SEGPREV(s1->s_as, seg);
 274  273                  }
 275  274                  if (s2 == NULL) {
 276  275                          s1 = AS_SEGNEXT(as, seg);
 277  276                          while (size-- > 0 && s1 != NULL) {
 278  277                                  if (s1->s_ops == &segnf_ops)
 279  278                                          s2 = s1;
 280  279                                  s1 = AS_SEGNEXT(as, seg);
 281  280                          }
 282  281                  }
 283  282                  if (s2 != NULL)
 284  283                          seg_unmap(s2);
 285  284          }
 286  285  
 287  286          return (0);
 288  287  }
 289  288  
 290  289  /*
 291  290   * Never really need "No fault" segments, so they aren't dup'd.
 292  291   */
 293  292  /* ARGSUSED */
 294  293  static int
 295  294  segnf_dup(struct seg *seg, struct seg *newseg)
 296  295  {
 297  296          panic("segnf_dup");
 298  297          return (0);
 299  298  }
 300  299  
 301  300  /*
 302  301   * Split a segment at addr for length len.
 303  302   */
 304  303  static int
 305  304  segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
 306  305  {
 307  306          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 308  307  
 309  308          /*
 310  309           * Check for bad sizes.
 311  310           */
 312  311          if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 313  312              (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
 314  313                  cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
 315  314          }
 316  315  
 317  316          /*
 318  317           * Unload any hardware translations in the range to be taken out.
 319  318           */
 320  319          hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 321  320  
 322  321          if (addr == seg->s_base && len == seg->s_size) {
 323  322                  /*
 324  323                   * Freeing entire segment.
 325  324                   */
 326  325                  seg_free(seg);
 327  326          } else if (addr == seg->s_base) {
 328  327                  /*
 329  328                   * Freeing the beginning of the segment.
 330  329                   */
 331  330                  seg->s_base += len;
 332  331                  seg->s_size -= len;
 333  332          } else if (addr + len == seg->s_base + seg->s_size) {
 334  333                  /*
 335  334                   * Freeing the end of the segment.
 336  335                   */
 337  336                  seg->s_size -= len;
 338  337          } else {
 339  338                  /*
 340  339                   * The section to go is in the middle of the segment, so we
 341  340                   * have to cut it into two segments.  We shrink the existing
 342  341                   * "seg" at the low end, and create "nseg" for the high end.
 343  342                   */
 344  343                  caddr_t nbase = addr + len;
 345  344                  size_t nsize = (seg->s_base + seg->s_size) - nbase;
 346  345                  struct seg *nseg;
 347  346  
 348  347                  /*
 349  348                   * Trim down "seg" before trying to stick "nseg" into the as.
 350  349                   */
 351  350                  seg->s_size = addr - seg->s_base;
 352  351                  nseg = seg_alloc(seg->s_as, nbase, nsize);
 353  352                  if (nseg == NULL)
 354  353                          cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
 355  354  
 356  355                  /*
 357  356                   * s_data can't be NULL because of ASSERTs in common VM code.
 358  357                   */
 359  358                  nseg->s_ops = seg->s_ops;
 360  359                  nseg->s_data = nseg;
 361  360                  nseg->s_flags |= S_PURGE;
 362  361                  mutex_enter(&seg->s_as->a_contents);
 363  362                  seg->s_as->a_flags |= AS_NEEDSPURGE;
 364  363                  mutex_exit(&seg->s_as->a_contents);
 365  364          }
 366  365  
 367  366          return (0);
 368  367  }
 369  368  
 370  369  /*
 371  370   * Free a segment.
 372  371   */
 373  372  static void
 374  373  segnf_free(struct seg *seg)
 375  374  {
 376  375          ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 377  376  }
 378  377  
 379  378  /*
 380  379   * No faults allowed on segnf.
 381  380   */
 382  381  static faultcode_t
 383  382  segnf_nomap(void)
 384  383  {
 385  384          return (FC_NOMAP);
 386  385  }
 387  386  
 388  387  /* ARGSUSED */
 389  388  static int
 390  389  segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 391  390  {
 392  391          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 393  392          return (EACCES);
 394  393  }
 395  394  
 396  395  /* ARGSUSED */
 397  396  static int
 398  397  segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 399  398  {
 400  399          uint_t sprot;
 401  400          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 402  401  
 403  402          sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
 404  403          return ((prot & sprot) == prot ? 0 : EACCES);
 405  404  }
 406  405  
 407  406  static void
 408  407  segnf_badop(void)
 409  408  {
 410  409          panic("segnf_badop");
 411  410          /*NOTREACHED*/
 412  411  }
 413  412  
 414  413  static int
 415  414  segnf_nop(void)
 416  415  {
 417  416          return (0);
 418  417  }
 419  418  
 420  419  static int
 421  420  segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 422  421  {
 423  422          size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 424  423          size_t p;
 425  424          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 426  425  
 427  426          for (p = 0; p < pgno; ++p)
 428  427                  protv[p] = PROT_READ;
 429  428          return (0);
 430  429  }
 431  430  
 432  431  /* ARGSUSED */
 433  432  static u_offset_t
 434  433  segnf_getoffset(struct seg *seg, caddr_t addr)
 435  434  {
 436  435          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 437  436  
 438  437          return ((u_offset_t)0);
 439  438  }
 440  439  
 441  440  /* ARGSUSED */
 442  441  static int
 443  442  segnf_gettype(struct seg *seg, caddr_t addr)
 444  443  {
 445  444          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 446  445  
 447  446          return (MAP_SHARED);
 448  447  }
 449  448  
 450  449  /* ARGSUSED */
 451  450  static int
 452  451  segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 453  452  {
 454  453          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 455  454  
 456  455          *vpp = &nfvp;
 457  456          return (0);
 458  457  }
 459  458  
 460  459  /*
 461  460   * segnf pages are not dumped, so we just return
 462  461   */
 463  462  /* ARGSUSED */
 464  463  static void
 465  464  segnf_dump(struct seg *seg)
 466  465  {}
 467  466  
 468  467  /*ARGSUSED*/
 469  468  static int
 470  469  segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
 471  470      struct page ***ppp, enum lock_type type, enum seg_rw rw)
 472  471  {
 473  472          return (ENOTSUP);
 474  473  }
 475  474  
 476  475  /*ARGSUSED*/
 477  476  static int
 478  477  segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 479  478      uint_t szc)
 480  479  {
 481  480          return (ENOTSUP);
 482  481  }
 483  482  
 484  483  /*ARGSUSED*/
 485  484  static int
 486  485  segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 487  486  {
 488  487          return (ENODEV);
 489  488  }
 490  489  
 491  490  /*ARGSUSED*/
 492  491  static lgrp_mem_policy_info_t *
 493  492  segnf_getpolicy(struct seg *seg, caddr_t addr)
 494  493  {
 495  494          return (NULL);
 496  495  }

↓ open down ↓

388 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX