6146-seg_inherit_notsup-is-redundant Wdiff usr/src/uts/common/vm/seg_map.c

Print this page

6146 seg_inherit_notsup is redundant

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/vm/seg_map.c
          +++ new/usr/src/uts/common/vm/seg_map.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * Portions of this source code were derived from Berkeley 4.3 BSD
  31   31   * under license from the Regents of the University of California.
  32   32   */
  33   33  
  34   34  /*
  35   35   * VM - generic vnode mapping segment.
  36   36   *
  37   37   * The segmap driver is used only by the kernel to get faster (than seg_vn)
  38   38   * mappings [lower routine overhead; more persistent cache] to random
  39   39   * vnode/offsets.  Note than the kernel may (and does) use seg_vn as well.
  40   40   */
  41   41  
  42   42  #include <sys/types.h>
  43   43  #include <sys/t_lock.h>
  44   44  #include <sys/param.h>
  45   45  #include <sys/sysmacros.h>
  46   46  #include <sys/buf.h>
  47   47  #include <sys/systm.h>
  48   48  #include <sys/vnode.h>
  49   49  #include <sys/mman.h>
  50   50  #include <sys/errno.h>
  51   51  #include <sys/cred.h>
  52   52  #include <sys/kmem.h>
  53   53  #include <sys/vtrace.h>
  54   54  #include <sys/cmn_err.h>
  55   55  #include <sys/debug.h>
  56   56  #include <sys/thread.h>
  57   57  #include <sys/dumphdr.h>
  58   58  #include <sys/bitmap.h>
  59   59  #include <sys/lgrp.h>
  60   60  
  61   61  #include <vm/seg_kmem.h>
  62   62  #include <vm/hat.h>
  63   63  #include <vm/as.h>
  64   64  #include <vm/seg.h>
  65   65  #include <vm/seg_kpm.h>
  66   66  #include <vm/seg_map.h>
  67   67  #include <vm/page.h>
  68   68  #include <vm/pvn.h>
  69   69  #include <vm/rm.h>
  70   70  
  71   71  /*
  72   72   * Private seg op routines.
  73   73   */
  74   74  static void     segmap_free(struct seg *seg);
  75   75  faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
  76   76                          size_t len, enum fault_type type, enum seg_rw rw);
  77   77  static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
  78   78  static int      segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
  79   79                          uint_t prot);
  80   80  static int      segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
  81   81  static int      segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
  82   82                          uint_t *protv);
  83   83  static u_offset_t       segmap_getoffset(struct seg *seg, caddr_t addr);
  84   84  static int      segmap_gettype(struct seg *seg, caddr_t addr);
  85   85  static int      segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  86   86  static void     segmap_dump(struct seg *seg);
  87   87  static int      segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
  88   88                          struct page ***ppp, enum lock_type type,
  89   89                          enum seg_rw rw);
  90   90  static void     segmap_badop(void);
  91   91  static int      segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  92   92  static lgrp_mem_policy_info_t   *segmap_getpolicy(struct seg *seg,
  93   93      caddr_t addr);
  94   94  static int      segmap_capable(struct seg *seg, segcapability_t capability);
  95   95  
  96   96  /* segkpm support */
  97   97  static caddr_t  segmap_pagecreate_kpm(struct seg *, vnode_t *, u_offset_t,
  98   98                          struct smap *, enum seg_rw);
  99   99  struct smap     *get_smap_kpm(caddr_t, page_t **);
 100  100  
 101  101  #define SEGMAP_BADOP(t) (t(*)())segmap_badop
 102  102  
 103  103  static struct seg_ops segmap_ops = {
 104  104          .dup            = SEGMAP_BADOP(int),
 105  105          .unmap          = SEGMAP_BADOP(int),
 106  106          .free           = segmap_free,
 107  107          .fault          = segmap_fault,
 108  108          .faulta         = segmap_faulta,
 109  109          .setprot        = SEGMAP_BADOP(int),
 110  110          .checkprot      = segmap_checkprot,
 111  111          .kluster        = segmap_kluster,
 112  112          .swapout        = SEGMAP_BADOP(size_t),
 113  113          .sync           = SEGMAP_BADOP(int),
 114  114          .incore         = SEGMAP_BADOP(size_t),
 115  115          .lockop         = SEGMAP_BADOP(int),
 116  116          .getprot        = segmap_getprot,

↓ open down ↓

116 lines elided

↑ open up ↑

 117  117          .getoffset      = segmap_getoffset,
 118  118          .gettype        = segmap_gettype,
 119  119          .getvp          = segmap_getvp,
 120  120          .advise         = SEGMAP_BADOP(int),
 121  121          .dump           = segmap_dump,
 122  122          .pagelock       = segmap_pagelock,
 123  123          .setpagesize    = SEGMAP_BADOP(int),
 124  124          .getmemid       = segmap_getmemid,
 125  125          .getpolicy      = segmap_getpolicy,
 126  126          .capable        = segmap_capable,
 127      -        .inherit        = seg_inherit_notsup,
 128  127  };
 129  128  
 130  129  /*
 131  130   * Private segmap routines.
 132  131   */
 133  132  static void     segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
 134  133                          size_t len, enum seg_rw rw, struct smap *smp);
 135  134  static void     segmap_smapadd(struct smap *smp);
 136  135  static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
 137  136                          u_offset_t off, int hashid);

 138  137  static void     segmap_hashout(struct smap *smp);
 139  138  
 140  139  
 141  140  /*
 142  141   * Statistics for segmap operations.
 143  142   *
 144  143   * No explicit locking to protect these stats.
 145  144   */
 146  145  struct segmapcnt segmapcnt = {
 147  146          { "fault",              KSTAT_DATA_ULONG },
 148  147          { "faulta",             KSTAT_DATA_ULONG },
 149  148          { "getmap",             KSTAT_DATA_ULONG },
 150  149          { "get_use",            KSTAT_DATA_ULONG },
 151  150          { "get_reclaim",        KSTAT_DATA_ULONG },
 152  151          { "get_reuse",          KSTAT_DATA_ULONG },
 153  152          { "get_unused",         KSTAT_DATA_ULONG },
 154  153          { "get_nofree",         KSTAT_DATA_ULONG },
 155  154          { "rel_async",          KSTAT_DATA_ULONG },
 156  155          { "rel_write",          KSTAT_DATA_ULONG },
 157  156          { "rel_free",           KSTAT_DATA_ULONG },
 158  157          { "rel_abort",          KSTAT_DATA_ULONG },
 159  158          { "rel_dontneed",       KSTAT_DATA_ULONG },
 160  159          { "release",            KSTAT_DATA_ULONG },
 161  160          { "pagecreate",         KSTAT_DATA_ULONG },
 162  161          { "free_notfree",       KSTAT_DATA_ULONG },
 163  162          { "free_dirty",         KSTAT_DATA_ULONG },
 164  163          { "free",               KSTAT_DATA_ULONG },
 165  164          { "stolen",             KSTAT_DATA_ULONG },
 166  165          { "get_nomtx",          KSTAT_DATA_ULONG }
 167  166  };
 168  167  
 169  168  kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
 170  169  uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
 171  170  
 172  171  /*
 173  172   * Return number of map pages in segment.
 174  173   */
 175  174  #define MAP_PAGES(seg)          ((seg)->s_size >> MAXBSHIFT)
 176  175  
 177  176  /*
 178  177   * Translate addr into smap number within segment.
 179  178   */
 180  179  #define MAP_PAGE(seg, addr)  (((addr) - (seg)->s_base) >> MAXBSHIFT)
 181  180  
 182  181  /*
 183  182   * Translate addr in seg into struct smap pointer.
 184  183   */
 185  184  #define GET_SMAP(seg, addr)     \
 186  185          &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
 187  186  
 188  187  /*
 189  188   * Bit in map (16 bit bitmap).
 190  189   */
 191  190  #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
 192  191  
 193  192  static int smd_colormsk = 0;
 194  193  static int smd_ncolor = 0;
 195  194  static int smd_nfree = 0;
 196  195  static int smd_freemsk = 0;
 197  196  #ifdef DEBUG
 198  197  static int *colors_used;
 199  198  #endif
 200  199  static struct smap *smd_smap;
 201  200  static struct smaphash *smd_hash;
 202  201  #ifdef SEGMAP_HASHSTATS
 203  202  static unsigned int *smd_hash_len;
 204  203  #endif
 205  204  static struct smfree *smd_free;
 206  205  static ulong_t smd_hashmsk = 0;
 207  206  
 208  207  #define SEGMAP_MAXCOLOR         2
 209  208  #define SEGMAP_CACHE_PAD        64
 210  209  
 211  210  union segmap_cpu {
 212  211          struct {
 213  212                  uint32_t        scpu_free_ndx[SEGMAP_MAXCOLOR];
 214  213                  struct smap     *scpu_last_smap;
 215  214                  ulong_t         scpu_getmap;
 216  215                  ulong_t         scpu_release;
 217  216                  ulong_t         scpu_get_reclaim;
 218  217                  ulong_t         scpu_fault;
 219  218                  ulong_t         scpu_pagecreate;
 220  219                  ulong_t         scpu_get_reuse;
 221  220          } scpu;
 222  221          char    scpu_pad[SEGMAP_CACHE_PAD];
 223  222  };
 224  223  static union segmap_cpu *smd_cpu;
 225  224  
 226  225  /*
 227  226   * There are three locks in seg_map:
 228  227   *      - per freelist mutexes
 229  228   *      - per hashchain mutexes
 230  229   *      - per smap mutexes
 231  230   *
 232  231   * The lock ordering is to get the smap mutex to lock down the slot
 233  232   * first then the hash lock (for hash in/out (vp, off) list) or the
 234  233   * freelist lock to put the slot back on the free list.
 235  234   *
 236  235   * The hash search is done by only holding the hashchain lock, when a wanted
 237  236   * slot is found, we drop the hashchain lock then lock the slot so there
 238  237   * is no overlapping of hashchain and smap locks. After the slot is
 239  238   * locked, we verify again if the slot is still what we are looking
 240  239   * for.
 241  240   *
 242  241   * Allocation of a free slot is done by holding the freelist lock,
 243  242   * then locking the smap slot at the head of the freelist. This is
 244  243   * in reversed lock order so mutex_tryenter() is used.
 245  244   *
 246  245   * The smap lock protects all fields in smap structure except for
 247  246   * the link fields for hash/free lists which are protected by
 248  247   * hashchain and freelist locks.
 249  248   */
 250  249  
 251  250  #define SHASHMTX(hashid)        (&smd_hash[hashid].sh_mtx)
 252  251  
 253  252  #define SMP2SMF(smp)            (&smd_free[(smp - smd_smap) & smd_freemsk])
 254  253  #define SMP2SMF_NDX(smp)        (ushort_t)((smp - smd_smap) & smd_freemsk)
 255  254  
 256  255  #define SMAPMTX(smp) (&smp->sm_mtx)
 257  256  
 258  257  #define SMAP_HASHFUNC(vp, off, hashid) \
 259  258          { \
 260  259          hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
 261  260                  ((off) >> MAXBSHIFT)) & smd_hashmsk); \
 262  261          }
 263  262  
 264  263  /*
 265  264   * The most frequently updated kstat counters are kept in the
 266  265   * per cpu array to avoid hot cache blocks. The update function
 267  266   * sums the cpu local counters to update the global counters.
 268  267   */
 269  268  
 270  269  /* ARGSUSED */
 271  270  int
 272  271  segmap_kstat_update(kstat_t *ksp, int rw)
 273  272  {
 274  273          int i;
 275  274          ulong_t getmap, release, get_reclaim;
 276  275          ulong_t fault, pagecreate, get_reuse;
 277  276  
 278  277          if (rw == KSTAT_WRITE)
 279  278                  return (EACCES);
 280  279          getmap = release = get_reclaim = (ulong_t)0;
 281  280          fault = pagecreate = get_reuse = (ulong_t)0;
 282  281          for (i = 0; i < max_ncpus; i++) {
 283  282                  getmap += smd_cpu[i].scpu.scpu_getmap;
 284  283                  release  += smd_cpu[i].scpu.scpu_release;
 285  284                  get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
 286  285                  fault  += smd_cpu[i].scpu.scpu_fault;
 287  286                  pagecreate  += smd_cpu[i].scpu.scpu_pagecreate;
 288  287                  get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
 289  288          }
 290  289          segmapcnt.smp_getmap.value.ul = getmap;
 291  290          segmapcnt.smp_release.value.ul = release;
 292  291          segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
 293  292          segmapcnt.smp_fault.value.ul = fault;
 294  293          segmapcnt.smp_pagecreate.value.ul = pagecreate;
 295  294          segmapcnt.smp_get_reuse.value.ul = get_reuse;
 296  295          return (0);
 297  296  }
 298  297  
 299  298  int
 300  299  segmap_create(struct seg *seg, void *argsp)
 301  300  {
 302  301          struct segmap_data *smd;
 303  302          struct smap *smp;
 304  303          struct smfree *sm;
 305  304          struct segmap_crargs *a = (struct segmap_crargs *)argsp;
 306  305          struct smaphash *shashp;
 307  306          union segmap_cpu *scpu;
 308  307          long i, npages;
 309  308          size_t hashsz;
 310  309          uint_t nfreelist;
 311  310          extern void prefetch_smap_w(void *);
 312  311          extern int max_ncpus;
 313  312  
 314  313          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 315  314  
 316  315          if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
 317  316                  panic("segkmap not MAXBSIZE aligned");
 318  317                  /*NOTREACHED*/
 319  318          }
 320  319  
 321  320          smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
 322  321  
 323  322          seg->s_data = (void *)smd;
 324  323          seg->s_ops = &segmap_ops;
 325  324          smd->smd_prot = a->prot;
 326  325  
 327  326          /*
 328  327           * Scale the number of smap freelists to be
 329  328           * proportional to max_ncpus * number of virtual colors.
 330  329           * The caller can over-ride this scaling by providing
 331  330           * a non-zero a->nfreelist argument.
 332  331           */
 333  332          nfreelist = a->nfreelist;
 334  333          if (nfreelist == 0)
 335  334                  nfreelist = max_ncpus;
 336  335          else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
 337  336                  cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
 338  337                  "%d, using %d", nfreelist, max_ncpus);
 339  338                  nfreelist = max_ncpus;
 340  339          }
 341  340          if (!ISP2(nfreelist)) {
 342  341                  /* round up nfreelist to the next power of two. */
 343  342                  nfreelist = 1 << (highbit(nfreelist));
 344  343          }
 345  344  
 346  345          /*
 347  346           * Get the number of virtual colors - must be a power of 2.
 348  347           */
 349  348          if (a->shmsize)
 350  349                  smd_ncolor = a->shmsize >> MAXBSHIFT;
 351  350          else
 352  351                  smd_ncolor = 1;
 353  352          ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
 354  353          ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
 355  354          smd_colormsk = smd_ncolor - 1;
 356  355          smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
 357  356          smd_freemsk = smd_nfree - 1;
 358  357  
 359  358          /*
 360  359           * Allocate and initialize the freelist headers.
 361  360           * Note that sm_freeq[1] starts out as the release queue. This
 362  361           * is known when the smap structures are initialized below.
 363  362           */
 364  363          smd_free = smd->smd_free =
 365  364              kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
 366  365          for (i = 0; i < smd_nfree; i++) {
 367  366                  sm = &smd->smd_free[i];
 368  367                  mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 369  368                  mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
 370  369                  sm->sm_allocq = &sm->sm_freeq[0];
 371  370                  sm->sm_releq = &sm->sm_freeq[1];
 372  371          }
 373  372  
 374  373          /*
 375  374           * Allocate and initialize the smap hash chain headers.
 376  375           * Compute hash size rounding down to the next power of two.
 377  376           */
 378  377          npages = MAP_PAGES(seg);
 379  378          smd->smd_npages = npages;
 380  379          hashsz = npages / SMAP_HASHAVELEN;
 381  380          hashsz = 1 << (highbit(hashsz)-1);
 382  381          smd_hashmsk = hashsz - 1;
 383  382          smd_hash = smd->smd_hash =
 384  383              kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
 385  384  #ifdef SEGMAP_HASHSTATS
 386  385          smd_hash_len =
 387  386              kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
 388  387  #endif
 389  388          for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
 390  389                  shashp->sh_hash_list = NULL;
 391  390                  mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
 392  391          }
 393  392  
 394  393          /*
 395  394           * Allocate and initialize the smap structures.
 396  395           * Link all slots onto the appropriate freelist.
 397  396           * The smap array is large enough to affect boot time
 398  397           * on large systems, so use memory prefetching and only
 399  398           * go through the array 1 time. Inline a optimized version
 400  399           * of segmap_smapadd to add structures to freelists with
 401  400           * knowledge that no locks are needed here.
 402  401           */
 403  402          smd_smap = smd->smd_sm =
 404  403              kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
 405  404  
 406  405          for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
 407  406              smp >= smd->smd_sm; smp--) {
 408  407                  struct smap *smpfreelist;
 409  408                  struct sm_freeq *releq;
 410  409  
 411  410                  prefetch_smap_w((char *)smp);
 412  411  
 413  412                  smp->sm_vp = NULL;
 414  413                  smp->sm_hash = NULL;
 415  414                  smp->sm_off = 0;
 416  415                  smp->sm_bitmap = 0;
 417  416                  smp->sm_refcnt = 0;
 418  417                  mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
 419  418                  smp->sm_free_ndx = SMP2SMF_NDX(smp);
 420  419  
 421  420                  sm = SMP2SMF(smp);
 422  421                  releq = sm->sm_releq;
 423  422  
 424  423                  smpfreelist = releq->smq_free;
 425  424                  if (smpfreelist == 0) {
 426  425                          releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 427  426                  } else {
 428  427                          smp->sm_next = smpfreelist;
 429  428                          smp->sm_prev = smpfreelist->sm_prev;
 430  429                          smpfreelist->sm_prev = smp;
 431  430                          smp->sm_prev->sm_next = smp;
 432  431                          releq->smq_free = smp->sm_next;
 433  432                  }
 434  433  
 435  434                  /*
 436  435                   * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
 437  436                   */
 438  437                  smp->sm_flags = 0;
 439  438  
 440  439  #ifdef  SEGKPM_SUPPORT
 441  440                  /*
 442  441                   * Due to the fragile prefetch loop no
 443  442                   * separate function is used here.
 444  443                   */
 445  444                  smp->sm_kpme_next = NULL;
 446  445                  smp->sm_kpme_prev = NULL;
 447  446                  smp->sm_kpme_page = NULL;
 448  447  #endif
 449  448          }
 450  449  
 451  450          /*
 452  451           * Allocate the per color indices that distribute allocation
 453  452           * requests over the free lists. Each cpu will have a private
 454  453           * rotor index to spread the allocations even across the available
 455  454           * smap freelists. Init the scpu_last_smap field to the first
 456  455           * smap element so there is no need to check for NULL.
 457  456           */
 458  457          smd_cpu =
 459  458              kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
 460  459          for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
 461  460                  int j;
 462  461                  for (j = 0; j < smd_ncolor; j++)
 463  462                          scpu->scpu.scpu_free_ndx[j] = j;
 464  463                  scpu->scpu.scpu_last_smap = smd_smap;
 465  464          }
 466  465  
 467  466          vpm_init();
 468  467  
 469  468  #ifdef DEBUG
 470  469          /*
 471  470           * Keep track of which colors are used more often.
 472  471           */
 473  472          colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
 474  473  #endif /* DEBUG */
 475  474  
 476  475          return (0);
 477  476  }
 478  477  
 479  478  static void
 480  479  segmap_free(seg)
 481  480          struct seg *seg;
 482  481  {
 483  482          ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
 484  483  }
 485  484  
 486  485  /*
 487  486   * Do a F_SOFTUNLOCK call over the range requested.
 488  487   * The range must have already been F_SOFTLOCK'ed.
 489  488   */
 490  489  static void
 491  490  segmap_unlock(
 492  491          struct hat *hat,
 493  492          struct seg *seg,
 494  493          caddr_t addr,
 495  494          size_t len,
 496  495          enum seg_rw rw,
 497  496          struct smap *smp)
 498  497  {
 499  498          page_t *pp;
 500  499          caddr_t adr;
 501  500          u_offset_t off;
 502  501          struct vnode *vp;
 503  502          kmutex_t *smtx;
 504  503  
 505  504          ASSERT(smp->sm_refcnt > 0);
 506  505  
 507  506  #ifdef lint
 508  507          seg = seg;
 509  508  #endif
 510  509  
 511  510          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 512  511  
 513  512                  /*
 514  513                   * We're called only from segmap_fault and this was a
 515  514                   * NOP in case of a kpm based smap, so dangerous things
 516  515                   * must have happened in the meantime. Pages are prefaulted
 517  516                   * and locked in segmap_getmapflt and they will not be
 518  517                   * unlocked until segmap_release.
 519  518                   */
 520  519                  panic("segmap_unlock: called with kpm addr %p", (void *)addr);
 521  520                  /*NOTREACHED*/
 522  521          }
 523  522  
 524  523          vp = smp->sm_vp;
 525  524          off = smp->sm_off + (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 526  525  
 527  526          hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
 528  527          for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
 529  528                  ushort_t bitmask;
 530  529  
 531  530                  /*
 532  531                   * Use page_find() instead of page_lookup() to
 533  532                   * find the page since we know that it has
 534  533                   * "shared" lock.
 535  534                   */
 536  535                  pp = page_find(vp, off);
 537  536                  if (pp == NULL) {
 538  537                          panic("segmap_unlock: page not found");
 539  538                          /*NOTREACHED*/
 540  539                  }
 541  540  
 542  541                  if (rw == S_WRITE) {
 543  542                          hat_setrefmod(pp);
 544  543                  } else if (rw != S_OTHER) {
 545  544                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 546  545                          "segmap_fault:pp %p vp %p offset %llx", pp, vp, off);
 547  546                          hat_setref(pp);
 548  547                  }
 549  548  
 550  549                  /*
 551  550                   * Clear bitmap, if the bit corresponding to "off" is set,
 552  551                   * since the page and translation are being unlocked.
 553  552                   */
 554  553                  bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
 555  554  
 556  555                  /*
 557  556                   * Large Files: Following assertion is to verify
 558  557                   * the correctness of the cast to (int) above.
 559  558                   */
 560  559                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
 561  560                  smtx = SMAPMTX(smp);
 562  561                  mutex_enter(smtx);
 563  562                  if (smp->sm_bitmap & bitmask) {
 564  563                          smp->sm_bitmap &= ~bitmask;
 565  564                  }
 566  565                  mutex_exit(smtx);
 567  566  
 568  567                  page_unlock(pp);
 569  568          }
 570  569  }
 571  570  
 572  571  #define MAXPPB  (MAXBSIZE/4096) /* assumes minimum page size of 4k */
 573  572  
 574  573  /*
 575  574   * This routine is called via a machine specific fault handling
 576  575   * routine.  It is also called by software routines wishing to
 577  576   * lock or unlock a range of addresses.
 578  577   *
 579  578   * Note that this routine expects a page-aligned "addr".
 580  579   */
 581  580  faultcode_t
 582  581  segmap_fault(
 583  582          struct hat *hat,
 584  583          struct seg *seg,
 585  584          caddr_t addr,
 586  585          size_t len,
 587  586          enum fault_type type,
 588  587          enum seg_rw rw)
 589  588  {
 590  589          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 591  590          struct smap *smp;
 592  591          page_t *pp, **ppp;
 593  592          struct vnode *vp;
 594  593          u_offset_t off;
 595  594          page_t *pl[MAXPPB + 1];
 596  595          uint_t prot;
 597  596          u_offset_t addroff;
 598  597          caddr_t adr;
 599  598          int err;
 600  599          u_offset_t sm_off;
 601  600          int hat_flag;
 602  601  
 603  602          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 604  603                  int newpage;
 605  604                  kmutex_t *smtx;
 606  605  
 607  606                  /*
 608  607                   * Pages are successfully prefaulted and locked in
 609  608                   * segmap_getmapflt and can't be unlocked until
 610  609                   * segmap_release. No hat mappings have to be locked
 611  610                   * and they also can't be unlocked as long as the
 612  611                   * caller owns an active kpm addr.
 613  612                   */
 614  613  #ifndef DEBUG
 615  614                  if (type != F_SOFTUNLOCK)
 616  615                          return (0);
 617  616  #endif
 618  617  
 619  618                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 620  619                          panic("segmap_fault: smap not found "
 621  620                              "for addr %p", (void *)addr);
 622  621                          /*NOTREACHED*/
 623  622                  }
 624  623  
 625  624                  smtx = SMAPMTX(smp);
 626  625  #ifdef  DEBUG
 627  626                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 628  627                  if (newpage) {
 629  628                          cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
 630  629                              (void *)smp);
 631  630                  }
 632  631  
 633  632                  if (type != F_SOFTUNLOCK) {
 634  633                          mutex_exit(smtx);
 635  634                          return (0);
 636  635                  }
 637  636  #endif
 638  637                  mutex_exit(smtx);
 639  638                  vp = smp->sm_vp;
 640  639                  sm_off = smp->sm_off;
 641  640  
 642  641                  if (vp == NULL)
 643  642                          return (FC_MAKE_ERR(EIO));
 644  643  
 645  644                  ASSERT(smp->sm_refcnt > 0);
 646  645  
 647  646                  addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 648  647                  if (addroff + len > MAXBSIZE)
 649  648                          panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
 650  649                              (void *)(addr + len));
 651  650  
 652  651                  off = sm_off + addroff;
 653  652  
 654  653                  pp = page_find(vp, off);
 655  654  
 656  655                  if (pp == NULL)
 657  656                          panic("segmap_fault: softunlock page not found");
 658  657  
 659  658                  /*
 660  659                   * Set ref bit also here in case of S_OTHER to avoid the
 661  660                   * overhead of supporting other cases than F_SOFTUNLOCK
 662  661                   * with segkpm. We can do this because the underlying
 663  662                   * pages are locked anyway.
 664  663                   */
 665  664                  if (rw == S_WRITE) {
 666  665                          hat_setrefmod(pp);
 667  666                  } else {
 668  667                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 669  668                              "segmap_fault:pp %p vp %p offset %llx",
 670  669                              pp, vp, off);
 671  670                          hat_setref(pp);
 672  671                  }
 673  672  
 674  673                  return (0);
 675  674          }
 676  675  
 677  676          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
 678  677          smp = GET_SMAP(seg, addr);
 679  678          vp = smp->sm_vp;
 680  679          sm_off = smp->sm_off;
 681  680  
 682  681          if (vp == NULL)
 683  682                  return (FC_MAKE_ERR(EIO));
 684  683  
 685  684          ASSERT(smp->sm_refcnt > 0);
 686  685  
 687  686          addroff = (u_offset_t)((uintptr_t)addr & MAXBOFFSET);
 688  687          if (addroff + len > MAXBSIZE) {
 689  688                  panic("segmap_fault: endaddr %p "
 690  689                      "exceeds MAXBSIZE chunk", (void *)(addr + len));
 691  690                  /*NOTREACHED*/
 692  691          }
 693  692          off = sm_off + addroff;
 694  693  
 695  694          /*
 696  695           * First handle the easy stuff
 697  696           */
 698  697          if (type == F_SOFTUNLOCK) {
 699  698                  segmap_unlock(hat, seg, addr, len, rw, smp);
 700  699                  return (0);
 701  700          }
 702  701  
 703  702          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 704  703              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 705  704          err = VOP_GETPAGE(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
 706  705              seg, addr, rw, CRED(), NULL);
 707  706  
 708  707          if (err)
 709  708                  return (FC_MAKE_ERR(err));
 710  709  
 711  710          prot &= smd->smd_prot;
 712  711  
 713  712          /*
 714  713           * Handle all pages returned in the pl[] array.
 715  714           * This loop is coded on the assumption that if
 716  715           * there was no error from the VOP_GETPAGE routine,
 717  716           * that the page list returned will contain all the
 718  717           * needed pages for the vp from [off..off + len].
 719  718           */
 720  719          ppp = pl;
 721  720          while ((pp = *ppp++) != NULL) {
 722  721                  u_offset_t poff;
 723  722                  ASSERT(pp->p_vnode == vp);
 724  723                  hat_flag = HAT_LOAD;
 725  724  
 726  725                  /*
 727  726                   * Verify that the pages returned are within the range
 728  727                   * of this segmap region.  Note that it is theoretically
 729  728                   * possible for pages outside this range to be returned,
 730  729                   * but it is not very likely.  If we cannot use the
 731  730                   * page here, just release it and go on to the next one.
 732  731                   */
 733  732                  if (pp->p_offset < sm_off ||
 734  733                      pp->p_offset >= sm_off + MAXBSIZE) {
 735  734                          (void) page_release(pp, 1);
 736  735                          continue;
 737  736                  }
 738  737  
 739  738                  ASSERT(hat == kas.a_hat);
 740  739                  poff = pp->p_offset;
 741  740                  adr = addr + (poff - off);
 742  741                  if (adr >= addr && adr < addr + len) {
 743  742                          hat_setref(pp);
 744  743                          TRACE_3(TR_FAC_VM, TR_SEGMAP_FAULT,
 745  744                              "segmap_fault:pp %p vp %p offset %llx",
 746  745                              pp, vp, poff);
 747  746                          if (type == F_SOFTLOCK)
 748  747                                  hat_flag = HAT_LOAD_LOCK;
 749  748                  }
 750  749  
 751  750                  /*
 752  751                   * Deal with VMODSORT pages here. If we know this is a write
 753  752                   * do the setmod now and allow write protection.
 754  753                   * As long as it's modified or not S_OTHER, remove write
 755  754                   * protection. With S_OTHER it's up to the FS to deal with this.
 756  755                   */
 757  756                  if (IS_VMODSORT(vp)) {
 758  757                          if (rw == S_WRITE)
 759  758                                  hat_setmod(pp);
 760  759                          else if (rw != S_OTHER && !hat_ismod(pp))
 761  760                                  prot &= ~PROT_WRITE;
 762  761                  }
 763  762  
 764  763                  hat_memload(hat, adr, pp, prot, hat_flag);
 765  764                  if (hat_flag != HAT_LOAD_LOCK)
 766  765                          page_unlock(pp);
 767  766          }
 768  767          return (0);
 769  768  }
 770  769  
 771  770  /*
 772  771   * This routine is used to start I/O on pages asynchronously.
 773  772   */
 774  773  static faultcode_t
 775  774  segmap_faulta(struct seg *seg, caddr_t addr)
 776  775  {
 777  776          struct smap *smp;
 778  777          struct vnode *vp;
 779  778          u_offset_t off;
 780  779          int err;
 781  780  
 782  781          if (segmap_kpm && IS_KPM_ADDR(addr)) {
 783  782                  int     newpage;
 784  783                  kmutex_t *smtx;
 785  784  
 786  785                  /*
 787  786                   * Pages are successfully prefaulted and locked in
 788  787                   * segmap_getmapflt and can't be unlocked until
 789  788                   * segmap_release. No hat mappings have to be locked
 790  789                   * and they also can't be unlocked as long as the
 791  790                   * caller owns an active kpm addr.
 792  791                   */
 793  792  #ifdef  DEBUG
 794  793                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
 795  794                          panic("segmap_faulta: smap not found "
 796  795                              "for addr %p", (void *)addr);
 797  796                          /*NOTREACHED*/
 798  797                  }
 799  798  
 800  799                  smtx = SMAPMTX(smp);
 801  800                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
 802  801                  mutex_exit(smtx);
 803  802                  if (newpage)
 804  803                          cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
 805  804                              (void *)smp);
 806  805  #endif
 807  806                  return (0);
 808  807          }
 809  808  
 810  809          segmapcnt.smp_faulta.value.ul++;
 811  810          smp = GET_SMAP(seg, addr);
 812  811  
 813  812          ASSERT(smp->sm_refcnt > 0);
 814  813  
 815  814          vp = smp->sm_vp;
 816  815          off = smp->sm_off;
 817  816  
 818  817          if (vp == NULL) {
 819  818                  cmn_err(CE_WARN, "segmap_faulta - no vp");
 820  819                  return (FC_MAKE_ERR(EIO));
 821  820          }
 822  821  
 823  822          TRACE_3(TR_FAC_VM, TR_SEGMAP_GETPAGE,
 824  823              "segmap_getpage:seg %p addr %p vp %p", seg, addr, vp);
 825  824  
 826  825          err = VOP_GETPAGE(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
 827  826              & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
 828  827              seg, addr, S_READ, CRED(), NULL);
 829  828  
 830  829          if (err)
 831  830                  return (FC_MAKE_ERR(err));
 832  831          return (0);
 833  832  }
 834  833  
 835  834  /*ARGSUSED*/
 836  835  static int
 837  836  segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 838  837  {
 839  838          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 840  839  
 841  840          ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
 842  841  
 843  842          /*
 844  843           * Need not acquire the segment lock since
 845  844           * "smd_prot" is a read-only field.
 846  845           */
 847  846          return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
 848  847  }
 849  848  
 850  849  static int
 851  850  segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 852  851  {
 853  852          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 854  853          size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 855  854  
 856  855          ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
 857  856  
 858  857          if (pgno != 0) {
 859  858                  do {
 860  859                          protv[--pgno] = smd->smd_prot;
 861  860                  } while (pgno != 0);
 862  861          }
 863  862          return (0);
 864  863  }
 865  864  
 866  865  static u_offset_t
 867  866  segmap_getoffset(struct seg *seg, caddr_t addr)
 868  867  {
 869  868          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 870  869  
 871  870          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 872  871  
 873  872          return ((u_offset_t)smd->smd_sm->sm_off + (addr - seg->s_base));
 874  873  }
 875  874  
 876  875  /*ARGSUSED*/
 877  876  static int
 878  877  segmap_gettype(struct seg *seg, caddr_t addr)
 879  878  {
 880  879          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 881  880  
 882  881          return (MAP_SHARED);
 883  882  }
 884  883  
 885  884  /*ARGSUSED*/
 886  885  static int
 887  886  segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 888  887  {
 889  888          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
 890  889  
 891  890          ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
 892  891  
 893  892          /* XXX - This doesn't make any sense */
 894  893          *vpp = smd->smd_sm->sm_vp;
 895  894          return (0);
 896  895  }
 897  896  
 898  897  /*
 899  898   * Check to see if it makes sense to do kluster/read ahead to
 900  899   * addr + delta relative to the mapping at addr.  We assume here
 901  900   * that delta is a signed PAGESIZE'd multiple (which can be negative).
 902  901   *
 903  902   * For segmap we always "approve" of this action from our standpoint.
 904  903   */
 905  904  /*ARGSUSED*/
 906  905  static int
 907  906  segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
 908  907  {
 909  908          return (0);
 910  909  }
 911  910  
 912  911  static void
 913  912  segmap_badop()
 914  913  {
 915  914          panic("segmap_badop");
 916  915          /*NOTREACHED*/
 917  916  }
 918  917  
 919  918  /*
 920  919   * Special private segmap operations
 921  920   */
 922  921  
 923  922  /*
 924  923   * Add smap to the appropriate free list.
 925  924   */
 926  925  static void
 927  926  segmap_smapadd(struct smap *smp)
 928  927  {
 929  928          struct smfree *sm;
 930  929          struct smap *smpfreelist;
 931  930          struct sm_freeq *releq;
 932  931  
 933  932          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 934  933  
 935  934          if (smp->sm_refcnt != 0) {
 936  935                  panic("segmap_smapadd");
 937  936                  /*NOTREACHED*/
 938  937          }
 939  938  
 940  939          sm = &smd_free[smp->sm_free_ndx];
 941  940          /*
 942  941           * Add to the tail of the release queue
 943  942           * Note that sm_releq and sm_allocq could toggle
 944  943           * before we get the lock. This does not affect
 945  944           * correctness as the 2 queues are only maintained
 946  945           * to reduce lock pressure.
 947  946           */
 948  947          releq = sm->sm_releq;
 949  948          if (releq == &sm->sm_freeq[0])
 950  949                  smp->sm_flags |= SM_QNDX_ZERO;
 951  950          else
 952  951                  smp->sm_flags &= ~SM_QNDX_ZERO;
 953  952          mutex_enter(&releq->smq_mtx);
 954  953          smpfreelist = releq->smq_free;
 955  954          if (smpfreelist == 0) {
 956  955                  int want;
 957  956  
 958  957                  releq->smq_free = smp->sm_next = smp->sm_prev = smp;
 959  958                  /*
 960  959                   * Both queue mutexes held to set sm_want;
 961  960                   * snapshot the value before dropping releq mutex.
 962  961                   * If sm_want appears after the releq mutex is dropped,
 963  962                   * then the smap just freed is already gone.
 964  963                   */
 965  964                  want = sm->sm_want;
 966  965                  mutex_exit(&releq->smq_mtx);
 967  966                  /*
 968  967                   * See if there was a waiter before dropping the releq mutex
 969  968                   * then recheck after obtaining sm_freeq[0] mutex as
 970  969                   * the another thread may have already signaled.
 971  970                   */
 972  971                  if (want) {
 973  972                          mutex_enter(&sm->sm_freeq[0].smq_mtx);
 974  973                          if (sm->sm_want)
 975  974                                  cv_signal(&sm->sm_free_cv);
 976  975                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
 977  976                  }
 978  977          } else {
 979  978                  smp->sm_next = smpfreelist;
 980  979                  smp->sm_prev = smpfreelist->sm_prev;
 981  980                  smpfreelist->sm_prev = smp;
 982  981                  smp->sm_prev->sm_next = smp;
 983  982                  mutex_exit(&releq->smq_mtx);
 984  983          }
 985  984  }
 986  985  
 987  986  
 988  987  static struct smap *
 989  988  segmap_hashin(struct smap *smp, struct vnode *vp, u_offset_t off, int hashid)
 990  989  {
 991  990          struct smap **hpp;
 992  991          struct smap *tmp;
 993  992          kmutex_t *hmtx;
 994  993  
 995  994          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
 996  995          ASSERT(smp->sm_vp == NULL);
 997  996          ASSERT(smp->sm_hash == NULL);
 998  997          ASSERT(smp->sm_prev == NULL);
 999  998          ASSERT(smp->sm_next == NULL);
1000  999          ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
1001 1000  
1002 1001          hmtx = SHASHMTX(hashid);
1003 1002  
1004 1003          mutex_enter(hmtx);
1005 1004          /*
1006 1005           * First we need to verify that no one has created a smp
1007 1006           * with (vp,off) as its tag before we us.
1008 1007           */
1009 1008          for (tmp = smd_hash[hashid].sh_hash_list;
1010 1009              tmp != NULL; tmp = tmp->sm_hash)
1011 1010                  if (tmp->sm_vp == vp && tmp->sm_off == off)
1012 1011                          break;
1013 1012  
1014 1013          if (tmp == NULL) {
1015 1014                  /*
1016 1015                   * No one created one yet.
1017 1016                   *
1018 1017                   * Funniness here - we don't increment the ref count on the
1019 1018                   * vnode * even though we have another pointer to it here.
1020 1019                   * The reason for this is that we don't want the fact that
1021 1020                   * a seg_map entry somewhere refers to a vnode to prevent the
1022 1021                   * vnode * itself from going away.  This is because this
1023 1022                   * reference to the vnode is a "soft one".  In the case where
1024 1023                   * a mapping is being used by a rdwr [or directory routine?]
1025 1024                   * there already has to be a non-zero ref count on the vnode.
1026 1025                   * In the case where the vp has been freed and the the smap
1027 1026                   * structure is on the free list, there are no pages in memory
1028 1027                   * that can refer to the vnode.  Thus even if we reuse the same
1029 1028                   * vnode/smap structure for a vnode which has the same
1030 1029                   * address but represents a different object, we are ok.
1031 1030                   */
1032 1031                  smp->sm_vp = vp;
1033 1032                  smp->sm_off = off;
1034 1033  
1035 1034                  hpp = &smd_hash[hashid].sh_hash_list;
1036 1035                  smp->sm_hash = *hpp;
1037 1036                  *hpp = smp;
1038 1037  #ifdef SEGMAP_HASHSTATS
1039 1038                  smd_hash_len[hashid]++;
1040 1039  #endif
1041 1040          }
1042 1041          mutex_exit(hmtx);
1043 1042  
1044 1043          return (tmp);
1045 1044  }
1046 1045  
1047 1046  static void
1048 1047  segmap_hashout(struct smap *smp)
1049 1048  {
1050 1049          struct smap **hpp, *hp;
1051 1050          struct vnode *vp;
1052 1051          kmutex_t *mtx;
1053 1052          int hashid;
1054 1053          u_offset_t off;
1055 1054  
1056 1055          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1057 1056  
1058 1057          vp = smp->sm_vp;
1059 1058          off = smp->sm_off;
1060 1059  
1061 1060          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1062 1061          mtx = SHASHMTX(hashid);
1063 1062          mutex_enter(mtx);
1064 1063  
1065 1064          hpp = &smd_hash[hashid].sh_hash_list;
1066 1065          for (;;) {
1067 1066                  hp = *hpp;
1068 1067                  if (hp == NULL) {
1069 1068                          panic("segmap_hashout");
1070 1069                          /*NOTREACHED*/
1071 1070                  }
1072 1071                  if (hp == smp)
1073 1072                          break;
1074 1073                  hpp = &hp->sm_hash;
1075 1074          }
1076 1075  
1077 1076          *hpp = smp->sm_hash;
1078 1077          smp->sm_hash = NULL;
1079 1078  #ifdef SEGMAP_HASHSTATS
1080 1079          smd_hash_len[hashid]--;
1081 1080  #endif
1082 1081          mutex_exit(mtx);
1083 1082  
1084 1083          smp->sm_vp = NULL;
1085 1084          smp->sm_off = (u_offset_t)0;
1086 1085  
1087 1086  }
1088 1087  
1089 1088  /*
1090 1089   * Attempt to free unmodified, unmapped, and non locked segmap
1091 1090   * pages.
1092 1091   */
1093 1092  void
1094 1093  segmap_pagefree(struct vnode *vp, u_offset_t off)
1095 1094  {
1096 1095          u_offset_t pgoff;
1097 1096          page_t  *pp;
1098 1097  
1099 1098          for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1100 1099  
1101 1100                  if ((pp = page_lookup_nowait(vp, pgoff, SE_EXCL)) == NULL)
1102 1101                          continue;
1103 1102  
1104 1103                  switch (page_release(pp, 1)) {
1105 1104                  case PGREL_NOTREL:
1106 1105                          segmapcnt.smp_free_notfree.value.ul++;
1107 1106                          break;
1108 1107                  case PGREL_MOD:
1109 1108                          segmapcnt.smp_free_dirty.value.ul++;
1110 1109                          break;
1111 1110                  case PGREL_CLEAN:
1112 1111                          segmapcnt.smp_free.value.ul++;
1113 1112                          break;
1114 1113                  }
1115 1114          }
1116 1115  }
1117 1116  
1118 1117  /*
1119 1118   * Locks held on entry: smap lock
1120 1119   * Locks held on exit : smap lock.
1121 1120   */
1122 1121  
1123 1122  static void
1124 1123  grab_smp(struct smap *smp, page_t *pp)
1125 1124  {
1126 1125          ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1127 1126          ASSERT(smp->sm_refcnt == 0);
1128 1127  
1129 1128          if (smp->sm_vp != (struct vnode *)NULL) {
1130 1129                  struct vnode    *vp = smp->sm_vp;
1131 1130                  u_offset_t      off = smp->sm_off;
1132 1131                  /*
1133 1132                   * Destroy old vnode association and
1134 1133                   * unload any hardware translations to
1135 1134                   * the old object.
1136 1135                   */
1137 1136                  smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1138 1137                  segmap_hashout(smp);
1139 1138  
1140 1139                  /*
1141 1140                   * This node is off freelist and hashlist,
1142 1141                   * so there is no reason to drop/reacquire sm_mtx
1143 1142                   * across calls to hat_unload.
1144 1143                   */
1145 1144                  if (segmap_kpm) {
1146 1145                          caddr_t vaddr;
1147 1146                          int hat_unload_needed = 0;
1148 1147  
1149 1148                          /*
1150 1149                           * unload kpm mapping
1151 1150                           */
1152 1151                          if (pp != NULL) {
1153 1152                                  vaddr = hat_kpm_page2va(pp, 1);
1154 1153                                  hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1155 1154                                  page_unlock(pp);
1156 1155                          }
1157 1156  
1158 1157                          /*
1159 1158                           * Check if we have (also) the rare case of a
1160 1159                           * non kpm mapping.
1161 1160                           */
1162 1161                          if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1163 1162                                  hat_unload_needed = 1;
1164 1163                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1165 1164                          }
1166 1165  
1167 1166                          if (hat_unload_needed) {
1168 1167                                  hat_unload(kas.a_hat, segkmap->s_base +
1169 1168                                      ((smp - smd_smap) * MAXBSIZE),
1170 1169                                      MAXBSIZE, HAT_UNLOAD);
1171 1170                          }
1172 1171  
1173 1172                  } else {
1174 1173                          ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1175 1174                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1176 1175                          hat_unload(kas.a_hat, segkmap->s_base +
1177 1176                              ((smp - smd_smap) * MAXBSIZE),
1178 1177                              MAXBSIZE, HAT_UNLOAD);
1179 1178                  }
1180 1179                  segmap_pagefree(vp, off);
1181 1180          }
1182 1181  }
1183 1182  
1184 1183  static struct smap *
1185 1184  get_free_smp(int free_ndx)
1186 1185  {
1187 1186          struct smfree *sm;
1188 1187          kmutex_t *smtx;
1189 1188          struct smap *smp, *first;
1190 1189          struct sm_freeq *allocq, *releq;
1191 1190          struct kpme *kpme;
1192 1191          page_t *pp = NULL;
1193 1192          int end_ndx, page_locked = 0;
1194 1193  
1195 1194          end_ndx = free_ndx;
1196 1195          sm = &smd_free[free_ndx];
1197 1196  
1198 1197  retry_queue:
1199 1198          allocq = sm->sm_allocq;
1200 1199          mutex_enter(&allocq->smq_mtx);
1201 1200  
1202 1201          if ((smp = allocq->smq_free) == NULL) {
1203 1202  
1204 1203  skip_queue:
1205 1204                  /*
1206 1205                   * The alloc list is empty or this queue is being skipped;
1207 1206                   * first see if the allocq toggled.
1208 1207                   */
1209 1208                  if (sm->sm_allocq != allocq) {
1210 1209                          /* queue changed */
1211 1210                          mutex_exit(&allocq->smq_mtx);
1212 1211                          goto retry_queue;
1213 1212                  }
1214 1213                  releq = sm->sm_releq;
1215 1214                  if (!mutex_tryenter(&releq->smq_mtx)) {
1216 1215                          /* cannot get releq; a free smp may be there now */
1217 1216                          mutex_exit(&allocq->smq_mtx);
1218 1217  
1219 1218                          /*
1220 1219                           * This loop could spin forever if this thread has
1221 1220                           * higher priority than the thread that is holding
1222 1221                           * releq->smq_mtx. In order to force the other thread
1223 1222                           * to run, we'll lock/unlock the mutex which is safe
1224 1223                           * since we just unlocked the allocq mutex.
1225 1224                           */
1226 1225                          mutex_enter(&releq->smq_mtx);
1227 1226                          mutex_exit(&releq->smq_mtx);
1228 1227                          goto retry_queue;
1229 1228                  }
1230 1229                  if (releq->smq_free == NULL) {
1231 1230                          /*
1232 1231                           * This freelist is empty.
1233 1232                           * This should not happen unless clients
1234 1233                           * are failing to release the segmap
1235 1234                           * window after accessing the data.
1236 1235                           * Before resorting to sleeping, try
1237 1236                           * the next list of the same color.
1238 1237                           */
1239 1238                          free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1240 1239                          if (free_ndx != end_ndx) {
1241 1240                                  mutex_exit(&releq->smq_mtx);
1242 1241                                  mutex_exit(&allocq->smq_mtx);
1243 1242                                  sm = &smd_free[free_ndx];
1244 1243                                  goto retry_queue;
1245 1244                          }
1246 1245                          /*
1247 1246                           * Tried all freelists of the same color once,
1248 1247                           * wait on this list and hope something gets freed.
1249 1248                           */
1250 1249                          segmapcnt.smp_get_nofree.value.ul++;
1251 1250                          sm->sm_want++;
1252 1251                          mutex_exit(&sm->sm_freeq[1].smq_mtx);
1253 1252                          cv_wait(&sm->sm_free_cv,
1254 1253                              &sm->sm_freeq[0].smq_mtx);
1255 1254                          sm->sm_want--;
1256 1255                          mutex_exit(&sm->sm_freeq[0].smq_mtx);
1257 1256                          sm = &smd_free[free_ndx];
1258 1257                          goto retry_queue;
1259 1258                  } else {
1260 1259                          /*
1261 1260                           * Something on the rele queue; flip the alloc
1262 1261                           * and rele queues and retry.
1263 1262                           */
1264 1263                          sm->sm_allocq = releq;
1265 1264                          sm->sm_releq = allocq;
1266 1265                          mutex_exit(&allocq->smq_mtx);
1267 1266                          mutex_exit(&releq->smq_mtx);
1268 1267                          if (page_locked) {
1269 1268                                  delay(hz >> 2);
1270 1269                                  page_locked = 0;
1271 1270                          }
1272 1271                          goto retry_queue;
1273 1272                  }
1274 1273          } else {
1275 1274                  /*
1276 1275                   * Fastpath the case we get the smap mutex
1277 1276                   * on the first try.
1278 1277                   */
1279 1278                  first = smp;
1280 1279  next_smap:
1281 1280                  smtx = SMAPMTX(smp);
1282 1281                  if (!mutex_tryenter(smtx)) {
1283 1282                          /*
1284 1283                           * Another thread is trying to reclaim this slot.
1285 1284                           * Skip to the next queue or smap.
1286 1285                           */
1287 1286                          if ((smp = smp->sm_next) == first) {
1288 1287                                  goto skip_queue;
1289 1288                          } else {
1290 1289                                  goto next_smap;
1291 1290                          }
1292 1291                  } else {
1293 1292                          /*
1294 1293                           * if kpme exists, get shared lock on the page
1295 1294                           */
1296 1295                          if (segmap_kpm && smp->sm_vp != NULL) {
1297 1296  
1298 1297                                  kpme = GET_KPME(smp);
1299 1298                                  pp = kpme->kpe_page;
1300 1299  
1301 1300                                  if (pp != NULL) {
1302 1301                                          if (!page_trylock(pp, SE_SHARED)) {
1303 1302                                                  smp = smp->sm_next;
1304 1303                                                  mutex_exit(smtx);
1305 1304                                                  page_locked = 1;
1306 1305  
1307 1306                                                  pp = NULL;
1308 1307  
1309 1308                                                  if (smp == first) {
1310 1309                                                          goto skip_queue;
1311 1310                                                  } else {
1312 1311                                                          goto next_smap;
1313 1312                                                  }
1314 1313                                          } else {
1315 1314                                                  if (kpme->kpe_page == NULL) {
1316 1315                                                          page_unlock(pp);
1317 1316                                                          pp = NULL;
1318 1317                                                  }
1319 1318                                          }
1320 1319                                  }
1321 1320                          }
1322 1321  
1323 1322                          /*
1324 1323                           * At this point, we've selected smp.  Remove smp
1325 1324                           * from its freelist.  If smp is the first one in
1326 1325                           * the freelist, update the head of the freelist.
1327 1326                           */
1328 1327                          if (first == smp) {
1329 1328                                  ASSERT(first == allocq->smq_free);
1330 1329                                  allocq->smq_free = smp->sm_next;
1331 1330                          }
1332 1331  
1333 1332                          /*
1334 1333                           * if the head of the freelist still points to smp,
1335 1334                           * then there are no more free smaps in that list.
1336 1335                           */
1337 1336                          if (allocq->smq_free == smp)
1338 1337                                  /*
1339 1338                                   * Took the last one
1340 1339                                   */
1341 1340                                  allocq->smq_free = NULL;
1342 1341                          else {
1343 1342                                  smp->sm_prev->sm_next = smp->sm_next;
1344 1343                                  smp->sm_next->sm_prev = smp->sm_prev;
1345 1344                          }
1346 1345                          mutex_exit(&allocq->smq_mtx);
1347 1346                          smp->sm_prev = smp->sm_next = NULL;
1348 1347  
1349 1348                          /*
1350 1349                           * if pp != NULL, pp must have been locked;
1351 1350                           * grab_smp() unlocks pp.
1352 1351                           */
1353 1352                          ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1354 1353                          grab_smp(smp, pp);
1355 1354                          /* return smp locked. */
1356 1355                          ASSERT(SMAPMTX(smp) == smtx);
1357 1356                          ASSERT(MUTEX_HELD(smtx));
1358 1357                          return (smp);
1359 1358                  }
1360 1359          }
1361 1360  }
1362 1361  
1363 1362  /*
1364 1363   * Special public segmap operations
1365 1364   */
1366 1365  
1367 1366  /*
1368 1367   * Create pages (without using VOP_GETPAGE) and load up translations to them.
1369 1368   * If softlock is TRUE, then set things up so that it looks like a call
1370 1369   * to segmap_fault with F_SOFTLOCK.
1371 1370   *
1372 1371   * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1373 1372   *
1374 1373   * All fields in the generic segment (struct seg) are considered to be
1375 1374   * read-only for "segmap" even though the kernel address space (kas) may
1376 1375   * not be locked, hence no lock is needed to access them.
1377 1376   */
1378 1377  int
1379 1378  segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1380 1379  {
1381 1380          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1382 1381          page_t *pp;
1383 1382          u_offset_t off;
1384 1383          struct smap *smp;
1385 1384          struct vnode *vp;
1386 1385          caddr_t eaddr;
1387 1386          int newpage = 0;
1388 1387          uint_t prot;
1389 1388          kmutex_t *smtx;
1390 1389          int hat_flag;
1391 1390  
1392 1391          ASSERT(seg->s_as == &kas);
1393 1392  
1394 1393          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1395 1394                  /*
1396 1395                   * Pages are successfully prefaulted and locked in
1397 1396                   * segmap_getmapflt and can't be unlocked until
1398 1397                   * segmap_release. The SM_KPM_NEWPAGE flag is set
1399 1398                   * in segmap_pagecreate_kpm when new pages are created.
1400 1399                   * and it is returned as "newpage" indication here.
1401 1400                   */
1402 1401                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1403 1402                          panic("segmap_pagecreate: smap not found "
1404 1403                              "for addr %p", (void *)addr);
1405 1404                          /*NOTREACHED*/
1406 1405                  }
1407 1406  
1408 1407                  smtx = SMAPMTX(smp);
1409 1408                  newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1410 1409                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
1411 1410                  mutex_exit(smtx);
1412 1411  
1413 1412                  return (newpage);
1414 1413          }
1415 1414  
1416 1415          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1417 1416  
1418 1417          eaddr = addr + len;
1419 1418          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1420 1419  
1421 1420          smp = GET_SMAP(seg, addr);
1422 1421  
1423 1422          /*
1424 1423           * We don't grab smp mutex here since we assume the smp
1425 1424           * has a refcnt set already which prevents the slot from
1426 1425           * changing its id.
1427 1426           */
1428 1427          ASSERT(smp->sm_refcnt > 0);
1429 1428  
1430 1429          vp = smp->sm_vp;
1431 1430          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1432 1431          prot = smd->smd_prot;
1433 1432  
1434 1433          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1435 1434                  hat_flag = HAT_LOAD;
1436 1435                  pp = page_lookup(vp, off, SE_SHARED);
1437 1436                  if (pp == NULL) {
1438 1437                          ushort_t bitindex;
1439 1438  
1440 1439                          if ((pp = page_create_va(vp, off,
1441 1440                              PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1442 1441                                  panic("segmap_pagecreate: page_create failed");
1443 1442                                  /*NOTREACHED*/
1444 1443                          }
1445 1444                          newpage = 1;
1446 1445                          page_io_unlock(pp);
1447 1446  
1448 1447                          /*
1449 1448                           * Since pages created here do not contain valid
1450 1449                           * data until the caller writes into them, the
1451 1450                           * "exclusive" lock will not be dropped to prevent
1452 1451                           * other users from accessing the page.  We also
1453 1452                           * have to lock the translation to prevent a fault
1454 1453                           * from occurring when the virtual address mapped by
1455 1454                           * this page is written into.  This is necessary to
1456 1455                           * avoid a deadlock since we haven't dropped the
1457 1456                           * "exclusive" lock.
1458 1457                           */
1459 1458                          bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1460 1459  
1461 1460                          /*
1462 1461                           * Large Files: The following assertion is to
1463 1462                           * verify the cast above.
1464 1463                           */
1465 1464                          ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1466 1465                          smtx = SMAPMTX(smp);
1467 1466                          mutex_enter(smtx);
1468 1467                          smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1469 1468                          mutex_exit(smtx);
1470 1469  
1471 1470                          hat_flag = HAT_LOAD_LOCK;
1472 1471                  } else if (softlock) {
1473 1472                          hat_flag = HAT_LOAD_LOCK;
1474 1473                  }
1475 1474  
1476 1475                  if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1477 1476                          hat_setmod(pp);
1478 1477  
1479 1478                  hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1480 1479  
1481 1480                  if (hat_flag != HAT_LOAD_LOCK)
1482 1481                          page_unlock(pp);
1483 1482  
1484 1483                  TRACE_5(TR_FAC_VM, TR_SEGMAP_PAGECREATE,
1485 1484                      "segmap_pagecreate:seg %p addr %p pp %p vp %p offset %llx",
1486 1485                      seg, addr, pp, vp, off);
1487 1486          }
1488 1487  
1489 1488          return (newpage);
1490 1489  }
1491 1490  
1492 1491  void
1493 1492  segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1494 1493  {
1495 1494          struct smap     *smp;
1496 1495          ushort_t        bitmask;
1497 1496          page_t          *pp;
1498 1497          struct  vnode   *vp;
1499 1498          u_offset_t      off;
1500 1499          caddr_t         eaddr;
1501 1500          kmutex_t        *smtx;
1502 1501  
1503 1502          ASSERT(seg->s_as == &kas);
1504 1503  
1505 1504          eaddr = addr + len;
1506 1505          addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1507 1506  
1508 1507          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1509 1508                  /*
1510 1509                   * Pages are successfully prefaulted and locked in
1511 1510                   * segmap_getmapflt and can't be unlocked until
1512 1511                   * segmap_release, so no pages or hat mappings have
1513 1512                   * to be unlocked at this point.
1514 1513                   */
1515 1514  #ifdef DEBUG
1516 1515                  if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1517 1516                          panic("segmap_pageunlock: smap not found "
1518 1517                              "for addr %p", (void *)addr);
1519 1518                          /*NOTREACHED*/
1520 1519                  }
1521 1520  
1522 1521                  ASSERT(smp->sm_refcnt > 0);
1523 1522                  mutex_exit(SMAPMTX(smp));
1524 1523  #endif
1525 1524                  return;
1526 1525          }
1527 1526  
1528 1527          smp = GET_SMAP(seg, addr);
1529 1528          smtx = SMAPMTX(smp);
1530 1529  
1531 1530          ASSERT(smp->sm_refcnt > 0);
1532 1531  
1533 1532          vp = smp->sm_vp;
1534 1533          off = smp->sm_off + ((u_offset_t)((uintptr_t)addr & MAXBOFFSET));
1535 1534  
1536 1535          for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1537 1536                  bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1538 1537  
1539 1538                  /*
1540 1539                   * Large Files: Following assertion is to verify
1541 1540                   * the correctness of the cast to (int) above.
1542 1541                   */
1543 1542                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
1544 1543  
1545 1544                  /*
1546 1545                   * If the bit corresponding to "off" is set,
1547 1546                   * clear this bit in the bitmap, unlock translations,
1548 1547                   * and release the "exclusive" lock on the page.
1549 1548                   */
1550 1549                  if (smp->sm_bitmap & bitmask) {
1551 1550                          mutex_enter(smtx);
1552 1551                          smp->sm_bitmap &= ~bitmask;
1553 1552                          mutex_exit(smtx);
1554 1553  
1555 1554                          hat_unlock(kas.a_hat, addr, PAGESIZE);
1556 1555  
1557 1556                          /*
1558 1557                           * Use page_find() instead of page_lookup() to
1559 1558                           * find the page since we know that it has
1560 1559                           * "exclusive" lock.
1561 1560                           */
1562 1561                          pp = page_find(vp, off);
1563 1562                          if (pp == NULL) {
1564 1563                                  panic("segmap_pageunlock: page not found");
1565 1564                                  /*NOTREACHED*/
1566 1565                          }
1567 1566                          if (rw == S_WRITE) {
1568 1567                                  hat_setrefmod(pp);
1569 1568                          } else if (rw != S_OTHER) {
1570 1569                                  hat_setref(pp);
1571 1570                          }
1572 1571  
1573 1572                          page_unlock(pp);
1574 1573                  }
1575 1574          }
1576 1575  }
1577 1576  
1578 1577  caddr_t
1579 1578  segmap_getmap(struct seg *seg, struct vnode *vp, u_offset_t off)
1580 1579  {
1581 1580          return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1582 1581  }
1583 1582  
1584 1583  /*
1585 1584   * This is the magic virtual address that offset 0 of an ELF
1586 1585   * file gets mapped to in user space. This is used to pick
1587 1586   * the vac color on the freelist.
1588 1587   */
1589 1588  #define ELF_OFFZERO_VA  (0x10000)
1590 1589  /*
1591 1590   * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1592 1591   * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1593 1592   * The return address is  always MAXBSIZE aligned.
1594 1593   *
1595 1594   * If forcefault is nonzero and the MMU translations haven't yet been created,
1596 1595   * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1597 1596   */
1598 1597  caddr_t
1599 1598  segmap_getmapflt(
1600 1599          struct seg *seg,
1601 1600          struct vnode *vp,
1602 1601          u_offset_t off,
1603 1602          size_t len,
1604 1603          int forcefault,
1605 1604          enum seg_rw rw)
1606 1605  {
1607 1606          struct smap *smp, *nsmp;
1608 1607          extern struct vnode *common_specvp();
1609 1608          caddr_t baseaddr;                       /* MAXBSIZE aligned */
1610 1609          u_offset_t baseoff;
1611 1610          int newslot;
1612 1611          caddr_t vaddr;
1613 1612          int color, hashid;
1614 1613          kmutex_t *hashmtx, *smapmtx;
1615 1614          struct smfree *sm;
1616 1615          page_t  *pp;
1617 1616          struct kpme *kpme;
1618 1617          uint_t  prot;
1619 1618          caddr_t base;
1620 1619          page_t  *pl[MAXPPB + 1];
1621 1620          int     error;
1622 1621          int     is_kpm = 1;
1623 1622  
1624 1623          ASSERT(seg->s_as == &kas);
1625 1624          ASSERT(seg == segkmap);
1626 1625  
1627 1626          baseoff = off & (offset_t)MAXBMASK;
1628 1627          if (off + len > baseoff + MAXBSIZE) {
1629 1628                  panic("segmap_getmap bad len");
1630 1629                  /*NOTREACHED*/
1631 1630          }
1632 1631  
1633 1632          /*
1634 1633           * If this is a block device we have to be sure to use the
1635 1634           * "common" block device vnode for the mapping.
1636 1635           */
1637 1636          if (vp->v_type == VBLK)
1638 1637                  vp = common_specvp(vp);
1639 1638  
1640 1639          smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1641 1640  
1642 1641          if (segmap_kpm == 0 ||
1643 1642              (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1644 1643                  is_kpm = 0;
1645 1644          }
1646 1645  
1647 1646          SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1648 1647          hashmtx = SHASHMTX(hashid);
1649 1648  
1650 1649  retry_hash:
1651 1650          mutex_enter(hashmtx);
1652 1651          for (smp = smd_hash[hashid].sh_hash_list;
1653 1652              smp != NULL; smp = smp->sm_hash)
1654 1653                  if (smp->sm_vp == vp && smp->sm_off == baseoff)
1655 1654                          break;
1656 1655          mutex_exit(hashmtx);
1657 1656  
1658 1657  vrfy_smp:
1659 1658          if (smp != NULL) {
1660 1659  
1661 1660                  ASSERT(vp->v_count != 0);
1662 1661  
1663 1662                  /*
1664 1663                   * Get smap lock and recheck its tag. The hash lock
1665 1664                   * is dropped since the hash is based on (vp, off)
1666 1665                   * and (vp, off) won't change when we have smap mtx.
1667 1666                   */
1668 1667                  smapmtx = SMAPMTX(smp);
1669 1668                  mutex_enter(smapmtx);
1670 1669                  if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1671 1670                          mutex_exit(smapmtx);
1672 1671                          goto retry_hash;
1673 1672                  }
1674 1673  
1675 1674                  if (smp->sm_refcnt == 0) {
1676 1675  
1677 1676                          smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1678 1677  
1679 1678                          /*
1680 1679                           * Could still be on the free list. However, this
1681 1680                           * could also be an smp that is transitioning from
1682 1681                           * the free list when we have too much contention
1683 1682                           * for the smapmtx's. In this case, we have an
1684 1683                           * unlocked smp that is not on the free list any
1685 1684                           * longer, but still has a 0 refcnt.  The only way
1686 1685                           * to be sure is to check the freelist pointers.
1687 1686                           * Since we now have the smapmtx, we are guaranteed
1688 1687                           * that the (vp, off) won't change, so we are safe
1689 1688                           * to reclaim it.  get_free_smp() knows that this
1690 1689                           * can happen, and it will check the refcnt.
1691 1690                           */
1692 1691  
1693 1692                          if ((smp->sm_next != NULL)) {
1694 1693                                  struct sm_freeq *freeq;
1695 1694  
1696 1695                                  ASSERT(smp->sm_prev != NULL);
1697 1696                                  sm = &smd_free[smp->sm_free_ndx];
1698 1697  
1699 1698                                  if (smp->sm_flags & SM_QNDX_ZERO)
1700 1699                                          freeq = &sm->sm_freeq[0];
1701 1700                                  else
1702 1701                                          freeq = &sm->sm_freeq[1];
1703 1702  
1704 1703                                  mutex_enter(&freeq->smq_mtx);
1705 1704                                  if (freeq->smq_free != smp) {
1706 1705                                          /*
1707 1706                                           * fastpath normal case
1708 1707                                           */
1709 1708                                          smp->sm_prev->sm_next = smp->sm_next;
1710 1709                                          smp->sm_next->sm_prev = smp->sm_prev;
1711 1710                                  } else if (smp == smp->sm_next) {
1712 1711                                          /*
1713 1712                                           * Taking the last smap on freelist
1714 1713                                           */
1715 1714                                          freeq->smq_free = NULL;
1716 1715                                  } else {
1717 1716                                          /*
1718 1717                                           * Reclaiming 1st smap on list
1719 1718                                           */
1720 1719                                          freeq->smq_free = smp->sm_next;
1721 1720                                          smp->sm_prev->sm_next = smp->sm_next;
1722 1721                                          smp->sm_next->sm_prev = smp->sm_prev;
1723 1722                                  }
1724 1723                                  mutex_exit(&freeq->smq_mtx);
1725 1724                                  smp->sm_prev = smp->sm_next = NULL;
1726 1725                          } else {
1727 1726                                  ASSERT(smp->sm_prev == NULL);
1728 1727                                  segmapcnt.smp_stolen.value.ul++;
1729 1728                          }
1730 1729  
1731 1730                  } else {
1732 1731                          segmapcnt.smp_get_use.value.ul++;
1733 1732                  }
1734 1733                  smp->sm_refcnt++;               /* another user */
1735 1734  
1736 1735                  /*
1737 1736                   * We don't invoke segmap_fault via TLB miss, so we set ref
1738 1737                   * and mod bits in advance. For S_OTHER  we set them in
1739 1738                   * segmap_fault F_SOFTUNLOCK.
1740 1739                   */
1741 1740                  if (is_kpm) {
1742 1741                          if (rw == S_WRITE) {
1743 1742                                  smp->sm_flags |= SM_WRITE_DATA;
1744 1743                          } else if (rw == S_READ) {
1745 1744                                  smp->sm_flags |= SM_READ_DATA;
1746 1745                          }
1747 1746                  }
1748 1747                  mutex_exit(smapmtx);
1749 1748  
1750 1749                  newslot = 0;
1751 1750          } else {
1752 1751  
1753 1752                  uint32_t free_ndx, *free_ndxp;
1754 1753                  union segmap_cpu *scpu;
1755 1754  
1756 1755                  /*
1757 1756                   * On a PAC machine or a machine with anti-alias
1758 1757                   * hardware, smd_colormsk will be zero.
1759 1758                   *
1760 1759                   * On a VAC machine- pick color by offset in the file
1761 1760                   * so we won't get VAC conflicts on elf files.
1762 1761                   * On data files, color does not matter but we
1763 1762                   * don't know what kind of file it is so we always
1764 1763                   * pick color by offset. This causes color
1765 1764                   * corresponding to file offset zero to be used more
1766 1765                   * heavily.
1767 1766                   */
1768 1767                  color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1769 1768                  scpu = smd_cpu+CPU->cpu_seqid;
1770 1769                  free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1771 1770                  free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1772 1771  #ifdef DEBUG
1773 1772                  colors_used[free_ndx]++;
1774 1773  #endif /* DEBUG */
1775 1774  
1776 1775                  /*
1777 1776                   * Get a locked smp slot from the free list.
1778 1777                   */
1779 1778                  smp = get_free_smp(free_ndx);
1780 1779                  smapmtx = SMAPMTX(smp);
1781 1780  
1782 1781                  ASSERT(smp->sm_vp == NULL);
1783 1782  
1784 1783                  if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1785 1784                          /*
1786 1785                           * Failed to hashin, there exists one now.
1787 1786                           * Return the smp we just allocated.
1788 1787                           */
1789 1788                          segmap_smapadd(smp);
1790 1789                          mutex_exit(smapmtx);
1791 1790  
1792 1791                          smp = nsmp;
1793 1792                          goto vrfy_smp;
1794 1793                  }
1795 1794                  smp->sm_refcnt++;               /* another user */
1796 1795  
1797 1796                  /*
1798 1797                   * We don't invoke segmap_fault via TLB miss, so we set ref
1799 1798                   * and mod bits in advance. For S_OTHER  we set them in
1800 1799                   * segmap_fault F_SOFTUNLOCK.
1801 1800                   */
1802 1801                  if (is_kpm) {
1803 1802                          if (rw == S_WRITE) {
1804 1803                                  smp->sm_flags |= SM_WRITE_DATA;
1805 1804                          } else if (rw == S_READ) {
1806 1805                                  smp->sm_flags |= SM_READ_DATA;
1807 1806                          }
1808 1807                  }
1809 1808                  mutex_exit(smapmtx);
1810 1809  
1811 1810                  newslot = 1;
1812 1811          }
1813 1812  
1814 1813          if (!is_kpm)
1815 1814                  goto use_segmap_range;
1816 1815  
1817 1816          /*
1818 1817           * Use segkpm
1819 1818           */
1820 1819          /* Lint directive required until 6746211 is fixed */
1821 1820          /*CONSTCOND*/
1822 1821          ASSERT(PAGESIZE == MAXBSIZE);
1823 1822  
1824 1823          /*
1825 1824           * remember the last smp faulted on this cpu.
1826 1825           */
1827 1826          (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1828 1827  
1829 1828          if (forcefault == SM_PAGECREATE) {
1830 1829                  baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1831 1830                  return (baseaddr);
1832 1831          }
1833 1832  
1834 1833          if (newslot == 0 &&
1835 1834              (pp = GET_KPME(smp)->kpe_page) != NULL) {
1836 1835  
1837 1836                  /* fastpath */
1838 1837                  switch (rw) {
1839 1838                  case S_READ:
1840 1839                  case S_WRITE:
1841 1840                          if (page_trylock(pp, SE_SHARED)) {
1842 1841                                  if (PP_ISFREE(pp) ||
1843 1842                                      !(pp->p_vnode == vp &&
1844 1843                                      pp->p_offset == baseoff)) {
1845 1844                                          page_unlock(pp);
1846 1845                                          pp = page_lookup(vp, baseoff,
1847 1846                                              SE_SHARED);
1848 1847                                  }
1849 1848                          } else {
1850 1849                                  pp = page_lookup(vp, baseoff, SE_SHARED);
1851 1850                          }
1852 1851  
1853 1852                          if (pp == NULL) {
1854 1853                                  ASSERT(GET_KPME(smp)->kpe_page == NULL);
1855 1854                                  break;
1856 1855                          }
1857 1856  
1858 1857                          if (rw == S_WRITE &&
1859 1858                              hat_page_getattr(pp, P_MOD | P_REF) !=
1860 1859                              (P_MOD | P_REF)) {
1861 1860                                  page_unlock(pp);
1862 1861                                  break;
1863 1862                          }
1864 1863  
1865 1864                          /*
1866 1865                           * We have the p_selock as reader, grab_smp
1867 1866                           * can't hit us, we have bumped the smap
1868 1867                           * refcnt and hat_pageunload needs the
1869 1868                           * p_selock exclusive.
1870 1869                           */
1871 1870                          kpme = GET_KPME(smp);
1872 1871                          if (kpme->kpe_page == pp) {
1873 1872                                  baseaddr = hat_kpm_page2va(pp, 0);
1874 1873                          } else if (kpme->kpe_page == NULL) {
1875 1874                                  baseaddr = hat_kpm_mapin(pp, kpme);
1876 1875                          } else {
1877 1876                                  panic("segmap_getmapflt: stale "
1878 1877                                      "kpme page, kpme %p", (void *)kpme);
1879 1878                                  /*NOTREACHED*/
1880 1879                          }
1881 1880  
1882 1881                          /*
1883 1882                           * We don't invoke segmap_fault via TLB miss,
1884 1883                           * so we set ref and mod bits in advance.
1885 1884                           * For S_OTHER and we set them in segmap_fault
1886 1885                           * F_SOFTUNLOCK.
1887 1886                           */
1888 1887                          if (rw == S_READ && !hat_isref(pp))
1889 1888                                  hat_setref(pp);
1890 1889  
1891 1890                          return (baseaddr);
1892 1891                  default:
1893 1892                          break;
1894 1893                  }
1895 1894          }
1896 1895  
1897 1896          base = segkpm_create_va(baseoff);
1898 1897          error = VOP_GETPAGE(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1899 1898              seg, base, rw, CRED(), NULL);
1900 1899  
1901 1900          pp = pl[0];
1902 1901          if (error || pp == NULL) {
1903 1902                  /*
1904 1903                   * Use segmap address slot and let segmap_fault deal
1905 1904                   * with the error cases. There is no error return
1906 1905                   * possible here.
1907 1906                   */
1908 1907                  goto use_segmap_range;
1909 1908          }
1910 1909  
1911 1910          ASSERT(pl[1] == NULL);
1912 1911  
1913 1912          /*
1914 1913           * When prot is not returned w/ PROT_ALL the returned pages
1915 1914           * are not backed by fs blocks. For most of the segmap users
1916 1915           * this is no problem, they don't write to the pages in the
1917 1916           * same request and therefore don't rely on a following
1918 1917           * trap driven segmap_fault. With SM_LOCKPROTO users it
1919 1918           * is more secure to use segkmap adresses to allow
1920 1919           * protection segmap_fault's.
1921 1920           */
1922 1921          if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1923 1922                  /*
1924 1923                   * Use segmap address slot and let segmap_fault
1925 1924                   * do the error return.
1926 1925                   */
1927 1926                  ASSERT(rw != S_WRITE);
1928 1927                  ASSERT(PAGE_LOCKED(pp));
1929 1928                  page_unlock(pp);
1930 1929                  forcefault = 0;
1931 1930                  goto use_segmap_range;
1932 1931          }
1933 1932  
1934 1933          /*
1935 1934           * We have the p_selock as reader, grab_smp can't hit us, we
1936 1935           * have bumped the smap refcnt and hat_pageunload needs the
1937 1936           * p_selock exclusive.
1938 1937           */
1939 1938          kpme = GET_KPME(smp);
1940 1939          if (kpme->kpe_page == pp) {
1941 1940                  baseaddr = hat_kpm_page2va(pp, 0);
1942 1941          } else if (kpme->kpe_page == NULL) {
1943 1942                  baseaddr = hat_kpm_mapin(pp, kpme);
1944 1943          } else {
1945 1944                  panic("segmap_getmapflt: stale kpme page after "
1946 1945                      "VOP_GETPAGE, kpme %p", (void *)kpme);
1947 1946                  /*NOTREACHED*/
1948 1947          }
1949 1948  
1950 1949          smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1951 1950  
1952 1951          return (baseaddr);
1953 1952  
1954 1953  
1955 1954  use_segmap_range:
1956 1955          baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1957 1956          TRACE_4(TR_FAC_VM, TR_SEGMAP_GETMAP,
1958 1957              "segmap_getmap:seg %p addr %p vp %p offset %llx",
1959 1958              seg, baseaddr, vp, baseoff);
1960 1959  
1961 1960          /*
1962 1961           * Prefault the translations
1963 1962           */
1964 1963          vaddr = baseaddr + (off - baseoff);
1965 1964          if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1966 1965  
1967 1966                  caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1968 1967                      (uintptr_t)PAGEMASK);
1969 1968  
1970 1969                  (void) segmap_fault(kas.a_hat, seg, pgaddr,
1971 1970                      (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1972 1971                      F_INVAL, rw);
1973 1972          }
1974 1973  
1975 1974          return (baseaddr);
1976 1975  }
1977 1976  
1978 1977  int
1979 1978  segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1980 1979  {
1981 1980          struct smap     *smp;
1982 1981          int             error;
1983 1982          int             bflags = 0;
1984 1983          struct vnode    *vp;
1985 1984          u_offset_t      offset;
1986 1985          kmutex_t        *smtx;
1987 1986          int             is_kpm = 0;
1988 1987          page_t          *pp;
1989 1988  
1990 1989          if (segmap_kpm && IS_KPM_ADDR(addr)) {
1991 1990  
1992 1991                  if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1993 1992                          panic("segmap_release: addr %p not "
1994 1993                              "MAXBSIZE aligned", (void *)addr);
1995 1994                          /*NOTREACHED*/
1996 1995                  }
1997 1996  
1998 1997                  if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1999 1998                          panic("segmap_release: smap not found "
2000 1999                              "for addr %p", (void *)addr);
2001 2000                          /*NOTREACHED*/
2002 2001                  }
2003 2002  
2004 2003                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2005 2004                      "segmap_relmap:seg %p addr %p smp %p",
2006 2005                      seg, addr, smp);
2007 2006  
2008 2007                  smtx = SMAPMTX(smp);
2009 2008  
2010 2009                  /*
2011 2010                   * For compatibility reasons segmap_pagecreate_kpm sets this
2012 2011                   * flag to allow a following segmap_pagecreate to return
2013 2012                   * this as "newpage" flag. When segmap_pagecreate is not
2014 2013                   * called at all we clear it now.
2015 2014                   */
2016 2015                  smp->sm_flags &= ~SM_KPM_NEWPAGE;
2017 2016                  is_kpm = 1;
2018 2017                  if (smp->sm_flags & SM_WRITE_DATA) {
2019 2018                          hat_setrefmod(pp);
2020 2019                  } else if (smp->sm_flags & SM_READ_DATA) {
2021 2020                          hat_setref(pp);
2022 2021                  }
2023 2022          } else {
2024 2023                  if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
2025 2024                      ((uintptr_t)addr & MAXBOFFSET) != 0) {
2026 2025                          panic("segmap_release: bad addr %p", (void *)addr);
2027 2026                          /*NOTREACHED*/
2028 2027                  }
2029 2028                  smp = GET_SMAP(seg, addr);
2030 2029  
2031 2030                  TRACE_3(TR_FAC_VM, TR_SEGMAP_RELMAP,
2032 2031                      "segmap_relmap:seg %p addr %p smp %p",
2033 2032                      seg, addr, smp);
2034 2033  
2035 2034                  smtx = SMAPMTX(smp);
2036 2035                  mutex_enter(smtx);
2037 2036                  smp->sm_flags |= SM_NOTKPM_RELEASED;
2038 2037          }
2039 2038  
2040 2039          ASSERT(smp->sm_refcnt > 0);
2041 2040  
2042 2041          /*
2043 2042           * Need to call VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2044 2043           * are set.
2045 2044           */
2046 2045          if ((flags & ~SM_DONTNEED) != 0) {
2047 2046                  if (flags & SM_WRITE)
2048 2047                          segmapcnt.smp_rel_write.value.ul++;
2049 2048                  if (flags & SM_ASYNC) {
2050 2049                          bflags |= B_ASYNC;
2051 2050                          segmapcnt.smp_rel_async.value.ul++;
2052 2051                  }
2053 2052                  if (flags & SM_INVAL) {
2054 2053                          bflags |= B_INVAL;
2055 2054                          segmapcnt.smp_rel_abort.value.ul++;
2056 2055                  }
2057 2056                  if (flags & SM_DESTROY) {
2058 2057                          bflags |= (B_INVAL|B_TRUNC);
2059 2058                          segmapcnt.smp_rel_abort.value.ul++;
2060 2059                  }
2061 2060                  if (smp->sm_refcnt == 1) {
2062 2061                          /*
2063 2062                           * We only bother doing the FREE and DONTNEED flags
2064 2063                           * if no one else is still referencing this mapping.
2065 2064                           */
2066 2065                          if (flags & SM_FREE) {
2067 2066                                  bflags |= B_FREE;
2068 2067                                  segmapcnt.smp_rel_free.value.ul++;
2069 2068                          }
2070 2069                          if (flags & SM_DONTNEED) {
2071 2070                                  bflags |= B_DONTNEED;
2072 2071                                  segmapcnt.smp_rel_dontneed.value.ul++;
2073 2072                          }
2074 2073                  }
2075 2074          } else {
2076 2075                  smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2077 2076          }
2078 2077  
2079 2078          vp = smp->sm_vp;
2080 2079          offset = smp->sm_off;
2081 2080  
2082 2081          if (--smp->sm_refcnt == 0) {
2083 2082  
2084 2083                  smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2085 2084  
2086 2085                  if (flags & (SM_INVAL|SM_DESTROY)) {
2087 2086                          segmap_hashout(smp);    /* remove map info */
2088 2087                          if (is_kpm) {
2089 2088                                  hat_kpm_mapout(pp, GET_KPME(smp), addr);
2090 2089                                  if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2091 2090                                          smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2092 2091                                          hat_unload(kas.a_hat, segkmap->s_base +
2093 2092                                              ((smp - smd_smap) * MAXBSIZE),
2094 2093                                              MAXBSIZE, HAT_UNLOAD);
2095 2094                                  }
2096 2095  
2097 2096                          } else {
2098 2097                                  if (segmap_kpm)
2099 2098                                          segkpm_mapout_validkpme(GET_KPME(smp));
2100 2099  
2101 2100                                  smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2102 2101                                  hat_unload(kas.a_hat, addr, MAXBSIZE,
2103 2102                                      HAT_UNLOAD);
2104 2103                          }
2105 2104                  }
2106 2105                  segmap_smapadd(smp);    /* add to free list */
2107 2106          }
2108 2107  
2109 2108          mutex_exit(smtx);
2110 2109  
2111 2110          if (is_kpm)
2112 2111                  page_unlock(pp);
2113 2112          /*
2114 2113           * Now invoke VOP_PUTPAGE() if any flags (except SM_DONTNEED)
2115 2114           * are set.
2116 2115           */
2117 2116          if ((flags & ~SM_DONTNEED) != 0) {
2118 2117                  error = VOP_PUTPAGE(vp, offset, MAXBSIZE,
2119 2118                      bflags, CRED(), NULL);
2120 2119          } else {
2121 2120                  error = 0;
2122 2121          }
2123 2122  
2124 2123          return (error);
2125 2124  }
2126 2125  
2127 2126  /*
2128 2127   * Dump the pages belonging to this segmap segment.
2129 2128   */
2130 2129  static void
2131 2130  segmap_dump(struct seg *seg)
2132 2131  {
2133 2132          struct segmap_data *smd;
2134 2133          struct smap *smp, *smp_end;
2135 2134          page_t *pp;
2136 2135          pfn_t pfn;
2137 2136          u_offset_t off;
2138 2137          caddr_t addr;
2139 2138  
2140 2139          smd = (struct segmap_data *)seg->s_data;
2141 2140          addr = seg->s_base;
2142 2141          for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2143 2142              smp < smp_end; smp++) {
2144 2143  
2145 2144                  if (smp->sm_refcnt) {
2146 2145                          for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2147 2146                                  int we_own_it = 0;
2148 2147  
2149 2148                                  /*
2150 2149                                   * If pp == NULL, the page either does
2151 2150                                   * not exist or is exclusively locked.
2152 2151                                   * So determine if it exists before
2153 2152                                   * searching for it.
2154 2153                                   */
2155 2154                                  if ((pp = page_lookup_nowait(smp->sm_vp,
2156 2155                                      smp->sm_off + off, SE_SHARED)))
2157 2156                                          we_own_it = 1;
2158 2157                                  else
2159 2158                                          pp = page_exists(smp->sm_vp,
2160 2159                                              smp->sm_off + off);
2161 2160  
2162 2161                                  if (pp) {
2163 2162                                          pfn = page_pptonum(pp);
2164 2163                                          dump_addpage(seg->s_as,
2165 2164                                              addr + off, pfn);
2166 2165                                          if (we_own_it)
2167 2166                                                  page_unlock(pp);
2168 2167                                  }
2169 2168                                  dump_timeleft = dump_timeout;
2170 2169                          }
2171 2170                  }
2172 2171                  addr += MAXBSIZE;
2173 2172          }
2174 2173  }
2175 2174  
2176 2175  /*ARGSUSED*/
2177 2176  static int
2178 2177  segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2179 2178      struct page ***ppp, enum lock_type type, enum seg_rw rw)
2180 2179  {
2181 2180          return (ENOTSUP);
2182 2181  }
2183 2182  
2184 2183  static int
2185 2184  segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2186 2185  {
2187 2186          struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2188 2187  
2189 2188          memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2190 2189          memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2191 2190          return (0);
2192 2191  }
2193 2192  
2194 2193  /*ARGSUSED*/
2195 2194  static lgrp_mem_policy_info_t *
2196 2195  segmap_getpolicy(struct seg *seg, caddr_t addr)
2197 2196  {
2198 2197          return (NULL);
2199 2198  }
2200 2199  
2201 2200  /*ARGSUSED*/
2202 2201  static int
2203 2202  segmap_capable(struct seg *seg, segcapability_t capability)
2204 2203  {
2205 2204          return (0);
2206 2205  }
2207 2206  
2208 2207  
2209 2208  #ifdef  SEGKPM_SUPPORT
2210 2209  
2211 2210  /*
2212 2211   * segkpm support routines
2213 2212   */
2214 2213  
2215 2214  static caddr_t
2216 2215  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2217 2216          struct smap *smp, enum seg_rw rw)
2218 2217  {
2219 2218          caddr_t base;
2220 2219          page_t  *pp;
2221 2220          int     newpage = 0;
2222 2221          struct kpme     *kpme;
2223 2222  
2224 2223          ASSERT(smp->sm_refcnt > 0);
2225 2224  
2226 2225          if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL) {
2227 2226                  kmutex_t *smtx;
2228 2227  
2229 2228                  base = segkpm_create_va(off);
2230 2229  
2231 2230                  if ((pp = page_create_va(vp, off, PAGESIZE, PG_WAIT,
2232 2231                      seg, base)) == NULL) {
2233 2232                          panic("segmap_pagecreate_kpm: "
2234 2233                              "page_create failed");
2235 2234                          /*NOTREACHED*/
2236 2235                  }
2237 2236  
2238 2237                  newpage = 1;
2239 2238                  page_io_unlock(pp);
2240 2239                  ASSERT((u_offset_t)(off - smp->sm_off) <= INT_MAX);
2241 2240  
2242 2241                  /*
2243 2242                   * Mark this here until the following segmap_pagecreate
2244 2243                   * or segmap_release.
2245 2244                   */
2246 2245                  smtx = SMAPMTX(smp);
2247 2246                  mutex_enter(smtx);
2248 2247                  smp->sm_flags |= SM_KPM_NEWPAGE;
2249 2248                  mutex_exit(smtx);
2250 2249          }
2251 2250  
2252 2251          kpme = GET_KPME(smp);
2253 2252          if (!newpage && kpme->kpe_page == pp)
2254 2253                  base = hat_kpm_page2va(pp, 0);
2255 2254          else
2256 2255                  base = hat_kpm_mapin(pp, kpme);
2257 2256  
2258 2257          /*
2259 2258           * FS code may decide not to call segmap_pagecreate and we
2260 2259           * don't invoke segmap_fault via TLB miss, so we have to set
2261 2260           * ref and mod bits in advance.
2262 2261           */
2263 2262          if (rw == S_WRITE) {
2264 2263                  hat_setrefmod(pp);
2265 2264          } else {
2266 2265                  ASSERT(rw == S_READ);
2267 2266                  hat_setref(pp);
2268 2267          }
2269 2268  
2270 2269          smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2271 2270  
2272 2271          return (base);
2273 2272  }
2274 2273  
2275 2274  /*
2276 2275   * Find the smap structure corresponding to the
2277 2276   * KPM addr and return it locked.
2278 2277   */
2279 2278  struct smap *
2280 2279  get_smap_kpm(caddr_t addr, page_t **ppp)
2281 2280  {
2282 2281          struct smap     *smp;
2283 2282          struct vnode    *vp;
2284 2283          u_offset_t      offset;
2285 2284          caddr_t         baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2286 2285          int             hashid;
2287 2286          kmutex_t        *hashmtx;
2288 2287          page_t          *pp;
2289 2288          union segmap_cpu *scpu;
2290 2289  
2291 2290          pp = hat_kpm_vaddr2page(baseaddr);
2292 2291  
2293 2292          ASSERT(pp && !PP_ISFREE(pp));
2294 2293          ASSERT(PAGE_LOCKED(pp));
2295 2294          ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2296 2295  
2297 2296          vp = pp->p_vnode;
2298 2297          offset = pp->p_offset;
2299 2298          ASSERT(vp != NULL);
2300 2299  
2301 2300          /*
2302 2301           * Assume the last smap used on this cpu is the one needed.
2303 2302           */
2304 2303          scpu = smd_cpu+CPU->cpu_seqid;
2305 2304          smp = scpu->scpu.scpu_last_smap;
2306 2305          mutex_enter(&smp->sm_mtx);
2307 2306          if (smp->sm_vp == vp && smp->sm_off == offset) {
2308 2307                  ASSERT(smp->sm_refcnt > 0);
2309 2308          } else {
2310 2309                  /*
2311 2310                   * Assumption wrong, find the smap on the hash chain.
2312 2311                   */
2313 2312                  mutex_exit(&smp->sm_mtx);
2314 2313                  SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2315 2314                  hashmtx = SHASHMTX(hashid);
2316 2315  
2317 2316                  mutex_enter(hashmtx);
2318 2317                  smp = smd_hash[hashid].sh_hash_list;
2319 2318                  for (; smp != NULL; smp = smp->sm_hash) {
2320 2319                          if (smp->sm_vp == vp && smp->sm_off == offset)
2321 2320                                  break;
2322 2321                  }
2323 2322                  mutex_exit(hashmtx);
2324 2323                  if (smp) {
2325 2324                          mutex_enter(&smp->sm_mtx);
2326 2325                          ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2327 2326                  }
2328 2327          }
2329 2328  
2330 2329          if (ppp)
2331 2330                  *ppp = smp ? pp : NULL;
2332 2331  
2333 2332          return (smp);
2334 2333  }
2335 2334  
2336 2335  #else   /* SEGKPM_SUPPORT */
2337 2336  
2338 2337  /* segkpm stubs */
2339 2338  
2340 2339  /*ARGSUSED*/
2341 2340  static caddr_t
2342 2341  segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, u_offset_t off,
2343 2342          struct smap *smp, enum seg_rw rw)
2344 2343  {
2345 2344          return (NULL);
2346 2345  }
2347 2346  
2348 2347  /*ARGSUSED*/
2349 2348  struct smap *
2350 2349  get_smap_kpm(caddr_t addr, page_t **ppp)
2351 2350  {
2352 2351          return (NULL);
2353 2352  }
2354 2353  
2355 2354  #endif  /* SEGKPM_SUPPORT */

↓ open down ↓

2218 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX