1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
  27 /* All Rights Reserved */
  28 
  29 /*
  30  * Portions of this source code were derived from Berkeley 4.3 BSD
  31  * under license from the Regents of the University of California.
  32  */
  33 
  34 #pragma ident   "%Z%%M% %I%     %E% SMI"
  35 
  36 /*
  37  * VM - segment for non-faulting loads.
  38  */
  39 
  40 #include <sys/types.h>
  41 #include <sys/t_lock.h>
  42 #include <sys/param.h>
  43 #include <sys/mman.h>
  44 #include <sys/errno.h>
  45 #include <sys/kmem.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/vnode.h>
  48 #include <sys/proc.h>
  49 #include <sys/conf.h>
  50 #include <sys/debug.h>
  51 #include <sys/archsystm.h>
  52 #include <sys/lgrp.h>
  53 
  54 #include <vm/page.h>
  55 #include <vm/hat.h>
  56 #include <vm/as.h>
  57 #include <vm/seg.h>
  58 #include <vm/vpage.h>
  59 
  60 /*
  61  * Private seg op routines.
  62  */
  63 static int      segnf_dup(struct seg *seg, struct seg *newseg);
  64 static int      segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
  65 static void     segnf_free(struct seg *seg);
  66 static faultcode_t segnf_nomap(void);
  67 static int      segnf_setprot(struct seg *seg, caddr_t addr,
  68                     size_t len, uint_t prot);
  69 static int      segnf_checkprot(struct seg *seg, caddr_t addr,
  70                     size_t len, uint_t prot);
  71 static void     segnf_badop(void);
  72 static int      segnf_nop(void);
  73 static int      segnf_getprot(struct seg *seg, caddr_t addr,
  74                     size_t len, uint_t *protv);
  75 static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
  76 static int      segnf_gettype(struct seg *seg, caddr_t addr);
  77 static int      segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
  78 static void     segnf_dump(struct seg *seg);
  79 static int      segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
  80                     struct page ***ppp, enum lock_type type, enum seg_rw rw);
  81 static int      segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
  82                     uint_t szc);
  83 static int      segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
  84 static lgrp_mem_policy_info_t   *segnf_getpolicy(struct seg *seg,
  85     caddr_t addr);
  86 
  87 
  88 struct seg_ops segnf_ops = {
  89         segnf_dup,
  90         segnf_unmap,
  91         segnf_free,
  92         (faultcode_t (*)(struct hat *, struct seg *, caddr_t, size_t,
  93             enum fault_type, enum seg_rw))
  94                 segnf_nomap,            /* fault */
  95         (faultcode_t (*)(struct seg *, caddr_t))
  96                 segnf_nomap,            /* faulta */
  97         segnf_setprot,
  98         segnf_checkprot,
  99         (int (*)())segnf_badop,         /* kluster */
 100         (size_t (*)(struct seg *))NULL, /* swapout */
 101         (int (*)(struct seg *, caddr_t, size_t, int, uint_t))
 102                 segnf_nop,              /* sync */
 103         (size_t (*)(struct seg *, caddr_t, size_t, char *))
 104                 segnf_nop,              /* incore */
 105         (int (*)(struct seg *, caddr_t, size_t, int, int, ulong_t *, size_t))
 106                 segnf_nop,              /* lockop */
 107         segnf_getprot,
 108         segnf_getoffset,
 109         segnf_gettype,
 110         segnf_getvp,
 111         (int (*)(struct seg *, caddr_t, size_t, uint_t))
 112                 segnf_nop,              /* advise */
 113         segnf_dump,
 114         segnf_pagelock,
 115         segnf_setpagesize,
 116         segnf_getmemid,
 117         segnf_getpolicy,
 118 };
 119 
 120 /*
 121  * vnode and page for the page of zeros we use for the nf mappings.
 122  */
 123 static kmutex_t segnf_lock;
 124 static struct vnode nfvp;
 125 static struct page **nfpp;
 126 
 127 #define addr_to_vcolor(addr)                                            \
 128         (shm_alignment) ?                                               \
 129         ((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
 130 
 131 /*
 132  * We try to limit the number of Non-fault segments created.
 133  * Non fault segments are created to optimize sparc V9 code which uses
 134  * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
 135  *
 136  * There are several reasons why creating too many non-fault segments
 137  * could cause problems.
 138  *
 139  *      First, excessive allocation of kernel resources for the seg
 140  *      structures and the HAT data to map the zero pages.
 141  *
 142  *      Secondly, creating nofault segments actually uses up user virtual
 143  *      address space. This makes it unavailable for subsequent mmap(0, ...)
 144  *      calls which use as_gap() to find empty va regions.  Creation of too
 145  *      many nofault segments could thus interfere with the ability of the
 146  *      runtime linker to load a shared object.
 147  */
 148 #define MAXSEGFORNF     (10000)
 149 #define MAXNFSEARCH     (5)
 150 
 151 
 152 /*
 153  * Must be called from startup()
 154  */
 155 void
 156 segnf_init()
 157 {
 158         mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
 159 }
 160 
 161 
 162 /*
 163  * Create a no-fault segment.
 164  *
 165  * The no-fault segment is not technically necessary, as the code in
 166  * nfload() in trap.c will emulate the SPARC instruction and load
 167  * a value of zero in the destination register.
 168  *
 169  * However, this code tries to put a page of zero's at the nofault address
 170  * so that subsequent non-faulting loads to the same page will not
 171  * trap with a tlb miss.
 172  *
 173  * In order to help limit the number of segments we merge adjacent nofault
 174  * segments into a single segment.  If we get a large number of segments
 175  * we'll also try to delete a random other nf segment.
 176  */
 177 /* ARGSUSED */
 178 int
 179 segnf_create(struct seg *seg, void *argsp)
 180 {
 181         uint_t prot;
 182         pgcnt_t vacpgs;
 183         u_offset_t off = 0;
 184         caddr_t vaddr = NULL;
 185         int i, color;
 186         struct seg *s1;
 187         struct seg *s2;
 188         size_t size;
 189         struct as *as = seg->s_as;
 190 
 191         ASSERT(as && AS_WRITE_HELD(as));
 192 
 193         /*
 194          * Need a page per virtual color or just 1 if no vac.
 195          */
 196         mutex_enter(&segnf_lock);
 197         if (nfpp == NULL) {
 198                 struct seg kseg;
 199 
 200                 vacpgs = 1;
 201                 if (shm_alignment > PAGESIZE) {
 202                         vacpgs = shm_alignment >> PAGESHIFT;
 203                 }
 204 
 205                 nfpp = kmem_alloc(sizeof (*nfpp) * vacpgs, KM_SLEEP);
 206 
 207                 kseg.s_as = &kas;
 208                 for (i = 0; i < vacpgs; i++, off += PAGESIZE,
 209                     vaddr += PAGESIZE) {
 210                         nfpp[i] = page_create_va(&nfvp, off, PAGESIZE,
 211                             PG_WAIT | PG_NORELOC, &kseg, vaddr);
 212                         page_io_unlock(nfpp[i]);
 213                         page_downgrade(nfpp[i]);
 214                         pagezero(nfpp[i], 0, PAGESIZE);
 215                 }
 216         }
 217         mutex_exit(&segnf_lock);
 218 
 219         hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 220 
 221         /*
 222          * s_data can't be NULL because of ASSERTS in the common vm code.
 223          */
 224         seg->s_ops = &segnf_ops;
 225         seg->s_data = seg;
 226         seg->s_flags |= S_PURGE;
 227 
 228         mutex_enter(&as->a_contents);
 229         as->a_flags |= AS_NEEDSPURGE;
 230         mutex_exit(&as->a_contents);
 231 
 232         prot = PROT_READ;
 233         color = addr_to_vcolor(seg->s_base);
 234         if (as != &kas)
 235                 prot |= PROT_USER;
 236         hat_memload(as->a_hat, seg->s_base, nfpp[color],
 237             prot | HAT_NOFAULT, HAT_LOAD);
 238 
 239         /*
 240          * At this point see if we can concatenate a segment to
 241          * a non-fault segment immediately before and/or after it.
 242          */
 243         if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
 244             s1->s_ops == &segnf_ops &&
 245             s1->s_base + s1->s_size == seg->s_base) {
 246                 size = s1->s_size;
 247                 seg_free(s1);
 248                 seg->s_base -= size;
 249                 seg->s_size += size;
 250         }
 251 
 252         if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
 253             s2->s_ops == &segnf_ops &&
 254             seg->s_base + seg->s_size == s2->s_base) {
 255                 size = s2->s_size;
 256                 seg_free(s2);
 257                 seg->s_size += size;
 258         }
 259 
 260         /*
 261          * if we already have a lot of segments, try to delete some other
 262          * nofault segment to reduce the probability of uncontrolled segment
 263          * creation.
 264          *
 265          * the code looks around quickly (no more than MAXNFSEARCH segments
 266          * each way) for another NF segment and then deletes it.
 267          */
 268         if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
 269                 size = 0;
 270                 s2 = NULL;
 271                 s1 = AS_SEGPREV(as, seg);
 272                 while (size++ < MAXNFSEARCH && s1 != NULL) {
 273                         if (s1->s_ops == &segnf_ops)
 274                                 s2 = s1;
 275                         s1 = AS_SEGPREV(s1->s_as, seg);
 276                 }
 277                 if (s2 == NULL) {
 278                         s1 = AS_SEGNEXT(as, seg);
 279                         while (size-- > 0 && s1 != NULL) {
 280                                 if (s1->s_ops == &segnf_ops)
 281                                         s2 = s1;
 282                                 s1 = AS_SEGNEXT(as, seg);
 283                         }
 284                 }
 285                 if (s2 != NULL)
 286                         seg_unmap(s2);
 287         }
 288 
 289         return (0);
 290 }
 291 
 292 /*
 293  * Never really need "No fault" segments, so they aren't dup'd.
 294  */
 295 /* ARGSUSED */
 296 static int
 297 segnf_dup(struct seg *seg, struct seg *newseg)
 298 {
 299         panic("segnf_dup");
 300         return (0);
 301 }
 302 
 303 /*
 304  * Split a segment at addr for length len.
 305  */
 306 static int
 307 segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
 308 {
 309         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 310 
 311         /*
 312          * Check for bad sizes.
 313          */
 314         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 315             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
 316                 cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
 317         }
 318 
 319         /*
 320          * Unload any hardware translations in the range to be taken out.
 321          */
 322         hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 323 
 324         if (addr == seg->s_base && len == seg->s_size) {
 325                 /*
 326                  * Freeing entire segment.
 327                  */
 328                 seg_free(seg);
 329         } else if (addr == seg->s_base) {
 330                 /*
 331                  * Freeing the beginning of the segment.
 332                  */
 333                 seg->s_base += len;
 334                 seg->s_size -= len;
 335         } else if (addr + len == seg->s_base + seg->s_size) {
 336                 /*
 337                  * Freeing the end of the segment.
 338                  */
 339                 seg->s_size -= len;
 340         } else {
 341                 /*
 342                  * The section to go is in the middle of the segment, so we
 343                  * have to cut it into two segments.  We shrink the existing
 344                  * "seg" at the low end, and create "nseg" for the high end.
 345                  */
 346                 caddr_t nbase = addr + len;
 347                 size_t nsize = (seg->s_base + seg->s_size) - nbase;
 348                 struct seg *nseg;
 349 
 350                 /*
 351                  * Trim down "seg" before trying to stick "nseg" into the as.
 352                  */
 353                 seg->s_size = addr - seg->s_base;
 354                 nseg = seg_alloc(seg->s_as, nbase, nsize);
 355                 if (nseg == NULL)
 356                         cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
 357 
 358                 /*
 359                  * s_data can't be NULL because of ASSERTs in common VM code.
 360                  */
 361                 nseg->s_ops = seg->s_ops;
 362                 nseg->s_data = nseg;
 363                 nseg->s_flags |= S_PURGE;
 364                 mutex_enter(&seg->s_as->a_contents);
 365                 seg->s_as->a_flags |= AS_NEEDSPURGE;
 366                 mutex_exit(&seg->s_as->a_contents);
 367         }
 368 
 369         return (0);
 370 }
 371 
 372 /*
 373  * Free a segment.
 374  */
 375 static void
 376 segnf_free(struct seg *seg)
 377 {
 378         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
 379 }
 380 
 381 /*
 382  * No faults allowed on segnf.
 383  */
 384 static faultcode_t
 385 segnf_nomap(void)
 386 {
 387         return (FC_NOMAP);
 388 }
 389 
 390 /* ARGSUSED */
 391 static int
 392 segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 393 {
 394         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 395         return (EACCES);
 396 }
 397 
 398 /* ARGSUSED */
 399 static int
 400 segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
 401 {
 402         uint_t sprot;
 403         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 404 
 405         sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
 406         return ((prot & sprot) == prot ? 0 : EACCES);
 407 }
 408 
 409 static void
 410 segnf_badop(void)
 411 {
 412         panic("segnf_badop");
 413         /*NOTREACHED*/
 414 }
 415 
 416 static int
 417 segnf_nop(void)
 418 {
 419         return (0);
 420 }
 421 
 422 static int
 423 segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
 424 {
 425         size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
 426         size_t p;
 427         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 428 
 429         for (p = 0; p < pgno; ++p)
 430                 protv[p] = PROT_READ;
 431         return (0);
 432 }
 433 
 434 /* ARGSUSED */
 435 static u_offset_t
 436 segnf_getoffset(struct seg *seg, caddr_t addr)
 437 {
 438         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 439 
 440         return ((u_offset_t)0);
 441 }
 442 
 443 /* ARGSUSED */
 444 static int
 445 segnf_gettype(struct seg *seg, caddr_t addr)
 446 {
 447         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 448 
 449         return (MAP_SHARED);
 450 }
 451 
 452 /* ARGSUSED */
 453 static int
 454 segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
 455 {
 456         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
 457 
 458         *vpp = &nfvp;
 459         return (0);
 460 }
 461 
 462 /*
 463  * segnf pages are not dumped, so we just return
 464  */
 465 /* ARGSUSED */
 466 static void
 467 segnf_dump(struct seg *seg)
 468 {}
 469 
 470 /*ARGSUSED*/
 471 static int
 472 segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
 473     struct page ***ppp, enum lock_type type, enum seg_rw rw)
 474 {
 475         return (ENOTSUP);
 476 }
 477 
 478 /*ARGSUSED*/
 479 static int
 480 segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
 481     uint_t szc)
 482 {
 483         return (ENOTSUP);
 484 }
 485 
 486 /*ARGSUSED*/
 487 static int
 488 segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
 489 {
 490         return (ENODEV);
 491 }
 492 
 493 /*ARGSUSED*/
 494 static lgrp_mem_policy_info_t *
 495 segnf_getpolicy(struct seg *seg, caddr_t addr)
 496 {
 497         return (NULL);
 498 }