1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27 
  28 #include <sys/types.h>
  29 #include <sys/systm.h>
  30 #include <sys/archsystm.h>
  31 #include <sys/machsystm.h>
  32 #include <sys/t_lock.h>
  33 #include <sys/vmem.h>
  34 #include <sys/mman.h>
  35 #include <sys/vm.h>
  36 #include <sys/cpu.h>
  37 #include <sys/cmn_err.h>
  38 #include <sys/cpuvar.h>
  39 #include <sys/atomic.h>
  40 #include <vm/as.h>
  41 #include <vm/hat.h>
  42 #include <vm/as.h>
  43 #include <vm/page.h>
  44 #include <vm/seg.h>
  45 #include <vm/seg_kmem.h>
  46 #include <vm/seg_kpm.h>
  47 #include <vm/hat_sfmmu.h>
  48 #include <sys/debug.h>
  49 #include <sys/cpu_module.h>
  50 
  51 /*
  52  * A quick way to generate a cache consistent address to map in a page.
  53  * users: ppcopy, pagezero, /proc, dev/mem
  54  *
  55  * The ppmapin/ppmapout routines provide a quick way of generating a cache
  56  * consistent address by reserving a given amount of kernel address space.
  57  * The base is PPMAPBASE and its size is PPMAPSIZE.  This memory is divided
  58  * into x number of sets, where x is the number of colors for the virtual
  59  * cache. The number of colors is how many times a page can be mapped
  60  * simulatenously in the cache.  For direct map caches this translates to
  61  * the number of pages in the cache.
  62  * Each set will be assigned a group of virtual pages from the reserved memory
  63  * depending on its virtual color.
  64  * When trying to assign a virtual address we will find out the color for the
  65  * physical page in question (if applicable).  Then we will try to find an
  66  * available virtual page from the set of the appropiate color.
  67  */
  68 
  69 int pp_slots = 4;               /* small default, tuned by cpu module */
  70 
  71 /* tuned by cpu module, default is "safe" */
  72 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
  73 
  74 static caddr_t  ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
  75 static int      nsets;                  /* number of sets */
  76 static int      ppmap_shift;            /* set selector */
  77 
  78 #ifdef PPDEBUG
  79 #define         MAXCOLORS       16      /* for debug only */
  80 static int      ppalloc_noslot = 0;     /* # of allocations from kernelmap */
  81 static int      align_hits;
  82 static int      pp_allocs;              /* # of ppmapin requests */
  83 #endif /* PPDEBUG */
  84 
  85 /*
  86  * There are only 64 TLB entries on spitfire, 16 on cheetah
  87  * (fully-associative TLB) so we allow the cpu module to tune the
  88  * number to use here via pp_slots.
  89  */
  90 static struct ppmap_va {
  91         caddr_t ppmap_slots[MAXPP_SLOTS];
  92 } ppmap_va[NCPU];
  93 
  94 /* prevent compilation with VAC defined */
  95 #ifdef VAC
  96 #error "sun4v ppmapin and ppmapout do not support VAC"
  97 #endif
  98 
  99 void
 100 ppmapinit(void)
 101 {
 102         int nset;
 103         caddr_t va;
 104 
 105         ASSERT(pp_slots <= MAXPP_SLOTS);
 106 
 107         va = (caddr_t)PPMAPBASE;
 108 
 109         /*
 110          * sun4v does not have a virtual indexed cache and simply
 111          * has only one set containing all pages.
 112          */
 113         nsets = mmu_btop(PPMAPSIZE);
 114         ppmap_shift = MMU_PAGESHIFT;
 115 
 116         for (nset = 0; nset < nsets; nset++) {
 117                 ppmap_vaddrs[nset] =
 118                     (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
 119         }
 120 }
 121 
 122 /*
 123  * Allocate a cache consistent virtual address to map a page, pp,
 124  * with protection, vprot; and map it in the MMU, using the most
 125  * efficient means possible.  The argument avoid is a virtual address
 126  * hint which when masked yields an offset into a virtual cache
 127  * that should be avoided when allocating an address to map in a
 128  * page.  An avoid arg of -1 means you don't care, for instance pagezero.
 129  *
 130  * machine dependent, depends on virtual address space layout,
 131  * understands that all kernel addresses have bit 31 set.
 132  *
 133  * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
 134  * that found in other architectures.  In other architectures the hint
 135  * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
 136  * This was used to avoid virtual cache trashing in the bcopy.  Unfortunately
 137  * in the case of a COW,  this later on caused a cache aliasing conflict.  In
 138  * sun4, the bcopy routine uses the block ld/st instructions so we don't have
 139  * to worry about virtual cache trashing.  Actually, by using the hint to choose
 140  * the right color we can almost guarantee a cache conflict will not occur.
 141  */
 142 
 143 /*ARGSUSED2*/
 144 caddr_t
 145 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
 146 {
 147         int nset;
 148         caddr_t va;
 149 
 150 #ifdef PPDEBUG
 151         pp_allocs++;
 152 #endif /* PPDEBUG */
 153 
 154         /*
 155          * For sun4v caches are physical caches, we can pick any address
 156          * we want.
 157          */
 158         for (nset = 0; nset < nsets; nset++) {
 159                 va = ppmap_vaddrs[nset];
 160                 if (va != NULL) {
 161 #ifdef PPDEBUG
 162                         align_hits++;
 163 #endif /* PPDEBUG */
 164                         if (casptr(&ppmap_vaddrs[nset], va, NULL) == va) {
 165                                 hat_memload(kas.a_hat, va, pp,
 166                                     vprot | HAT_NOSYNC,
 167                                     HAT_LOAD_LOCK);
 168                                 return (va);
 169                         }
 170                 }
 171         }
 172 
 173 #ifdef PPDEBUG
 174         ppalloc_noslot++;
 175 #endif /* PPDEBUG */
 176 
 177         /*
 178          * No free slots; get a random one from the kernel heap area.
 179          */
 180         va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
 181 
 182         hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
 183 
 184         return (va);
 185 
 186 }
 187 
 188 void
 189 ppmapout(caddr_t va)
 190 {
 191         int nset;
 192 
 193         if (va >= kernelheap && va < ekernelheap) {
 194                 /*
 195                  * Space came from kernelmap, flush the page and
 196                  * return the space.
 197                  */
 198                 hat_unload(kas.a_hat, va, PAGESIZE,
 199                     (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
 200                 vmem_free(heap_arena, va, PAGESIZE);
 201         } else {
 202                 /*
 203                  * Space came from ppmap_vaddrs[], give it back.
 204                  */
 205                 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
 206                 hat_unload(kas.a_hat, va, PAGESIZE,
 207                     (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
 208 
 209                 ASSERT(ppmap_vaddrs[nset] == NULL);
 210                 ppmap_vaddrs[nset] = va;
 211         }
 212 }
 213 
 214 #ifdef DEBUG
 215 #define PP_STAT_ADD(stat)       (stat)++
 216 uint_t pload, ploadfail;
 217 uint_t ppzero, ppzero_short;
 218 #else
 219 #define PP_STAT_ADD(stat)
 220 #endif /* DEBUG */
 221 
 222 static void
 223 pp_unload_tlb(caddr_t *pslot, caddr_t va)
 224 {
 225         ASSERT(*pslot == va);
 226 
 227         vtag_flushpage(va, (uint64_t)ksfmmup);
 228         *pslot = NULL;                          /* release the slot */
 229 }
 230 
 231 /*
 232  * Routine to copy kernel pages during relocation.  It will copy one
 233  * PAGESIZE page to another PAGESIZE page.  This function may be called
 234  * above LOCK_LEVEL so it should not grab any locks.
 235  */
 236 void
 237 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
 238 {
 239         uint64_t fm_pa, to_pa;
 240         size_t nbytes;
 241 
 242         fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
 243         to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
 244 
 245         nbytes = MMU_PAGESIZE;
 246 
 247         for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
 248                 hw_pa_bcopy32(fm_pa, to_pa);
 249 }
 250 
 251 /*
 252  * Copy the data from the physical page represented by "frompp" to
 253  * that represented by "topp".
 254  *
 255  * Try to use per cpu mapping first, if that fails then call pp_mapin
 256  * to load it.
 257  * Returns one on success or zero on some sort of fault while doing the copy.
 258  */
 259 int
 260 ppcopy(page_t *fm_pp, page_t *to_pp)
 261 {
 262         caddr_t fm_va = NULL;
 263         caddr_t to_va;
 264         boolean_t fast;
 265         label_t ljb;
 266         int ret = 1;
 267 
 268         ASSERT(PAGE_LOCKED(fm_pp));
 269         ASSERT(PAGE_LOCKED(to_pp));
 270 
 271         /*
 272          * Try to map using KPM if enabled.  If it fails, fall
 273          * back to ppmapin/ppmapout.
 274          */
 275         if ((kpm_enable == 0) ||
 276             (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
 277             (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
 278                 if (fm_va != NULL)
 279                         hat_kpm_mapout(fm_pp, NULL, fm_va);
 280                 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
 281                 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
 282                 fast = B_FALSE;
 283         } else
 284                 fast = B_TRUE;
 285 
 286         if (on_fault(&ljb)) {
 287                 ret = 0;
 288                 goto faulted;
 289         }
 290         bcopy(fm_va, to_va, PAGESIZE);
 291         no_fault();
 292 faulted:
 293 
 294         /* Unmap */
 295         if (fast) {
 296                 hat_kpm_mapout(fm_pp, NULL, fm_va);
 297                 hat_kpm_mapout(to_pp, NULL, to_va);
 298         } else {
 299                 ppmapout(fm_va);
 300                 ppmapout(to_va);
 301         }
 302         return (ret);
 303 }
 304 
 305 /*
 306  * Zero the physical page from off to off + len given by `pp'
 307  * without changing the reference and modified bits of page.
 308  *
 309  * Again, we'll try per cpu mapping first.
 310  */
 311 
 312 void
 313 pagezero(page_t *pp, uint_t off, uint_t len)
 314 {
 315         caddr_t va;
 316         extern int hwblkclr(void *, size_t);
 317         extern int use_hw_bzero;
 318         boolean_t fast;
 319 
 320         ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
 321         ASSERT(PAGE_LOCKED(pp));
 322 
 323         PP_STAT_ADD(ppzero);
 324 
 325         if (len != MMU_PAGESIZE || !use_hw_bzero) {
 326                 PP_STAT_ADD(ppzero_short);
 327         }
 328 
 329         kpreempt_disable();
 330 
 331         /*
 332          * Try to use KPM if enabled.  If that fails, fall back to
 333          * ppmapin/ppmapout.
 334          */
 335 
 336         if (kpm_enable != 0) {
 337                 fast = B_TRUE;
 338                 va = hat_kpm_mapin(pp, NULL);
 339         } else
 340                 va = NULL;
 341 
 342         if (va == NULL) {
 343                 fast = B_FALSE;
 344                 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
 345         }
 346 
 347         if (!use_hw_bzero) {
 348                 bzero(va + off, len);
 349                 sync_icache(va + off, len);
 350         } else if (hwblkclr(va + off, len)) {
 351                 /*
 352                  * We may not have used block commit asi.
 353                  * So flush the I-$ manually
 354                  */
 355                 sync_icache(va + off, len);
 356         } else {
 357                 /*
 358                  * We have used blk commit, and flushed the I-$.
 359                  * However we still may have an instruction in the
 360                  * pipeline. Only a flush will invalidate that.
 361                  */
 362                 doflush(va);
 363         }
 364 
 365         if (fast) {
 366                 hat_kpm_mapout(pp, NULL, va);
 367         } else {
 368                 ppmapout(va);
 369         }
 370         kpreempt_enable();
 371 }