64 #include <vm/seg_kmem.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/rm.h>
67 #include <sys/t_lock.h>
68 #include <sys/obpdefs.h>
69 #include <sys/vm_machparam.h>
70 #include <sys/var.h>
71 #include <sys/trap.h>
72 #include <sys/machtrap.h>
73 #include <sys/scb.h>
74 #include <sys/bitmap.h>
75 #include <sys/machlock.h>
76 #include <sys/membar.h>
77 #include <sys/atomic.h>
78 #include <sys/cpu_module.h>
79 #include <sys/prom_debug.h>
80 #include <sys/ksynch.h>
81 #include <sys/mem_config.h>
82 #include <sys/mem_cage.h>
83 #include <vm/vm_dep.h>
84 #include <vm/xhat_sfmmu.h>
85 #include <sys/fpu/fpusystm.h>
86 #include <vm/mach_kpm.h>
87 #include <sys/callb.h>
88
89 #ifdef DEBUG
90 #define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len) \
91 if (SFMMU_IS_SHMERID_VALID(rid)) { \
92 caddr_t _eaddr = (saddr) + (len); \
93 sf_srd_t *_srdp; \
94 sf_region_t *_rgnp; \
95 ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \
96 ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid)); \
97 ASSERT((hat) != ksfmmup); \
98 _srdp = (hat)->sfmmu_srdp; \
99 ASSERT(_srdp != NULL); \
100 ASSERT(_srdp->srd_refcnt != 0); \
101 _rgnp = _srdp->srd_hmergnp[(rid)]; \
102 ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid); \
103 ASSERT(_rgnp->rgn_refcnt != 0); \
104 ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \
1333 * Reserve some kernel virtual address space for the locked TTEs
1334 * that allow us to probe the TSB from TL>0.
1335 */
1336 utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1337 0, 0, NULL, NULL, VM_SLEEP);
1338 utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1339 0, 0, NULL, NULL, VM_SLEEP);
1340 #endif
1341
1342 #ifdef VAC
1343 /*
1344 * The big page VAC handling code assumes VAC
1345 * will not be bigger than the smallest big
1346 * page- which is 64K.
1347 */
1348 if (TTEPAGES(TTE64K) < CACHE_NUM_COLOR) {
1349 cmn_err(CE_PANIC, "VAC too big!");
1350 }
1351 #endif
1352
1353 (void) xhat_init();
1354
1355 uhme_hash_pa = va_to_pa(uhme_hash);
1356 khme_hash_pa = va_to_pa(khme_hash);
1357
1358 /*
1359 * Initialize relocation locks. kpr_suspendlock is held
1360 * at PIL_MAX to prevent interrupts from pinning the holder
1361 * of a suspended TTE which may access it leading to a
1362 * deadlock condition.
1363 */
1364 mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
1365 mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
1366
1367 /*
1368 * If Shared context support is disabled via /etc/system
1369 * set shctx_on to 0 here if it was set to 1 earlier in boot
1370 * sequence by cpu module initialization code.
1371 */
1372 if (shctx_on && disable_shctx) {
1373 shctx_on = 0;
1374 }
1522 for (i = 0; i < max_mmu_page_sizes; i++) {
1523 sfmmup->sfmmu_ttecnt[i] = 0;
1524 sfmmup->sfmmu_scdrttecnt[i] = 0;
1525 sfmmup->sfmmu_ismttecnt[i] = 0;
1526 sfmmup->sfmmu_scdismttecnt[i] = 0;
1527 sfmmup->sfmmu_pgsz[i] = TTE8K;
1528 }
1529 sfmmup->sfmmu_tsb0_4minflcnt = 0;
1530 sfmmup->sfmmu_iblk = NULL;
1531 sfmmup->sfmmu_ismhat = 0;
1532 sfmmup->sfmmu_scdhat = 0;
1533 sfmmup->sfmmu_ismblkpa = (uint64_t)-1;
1534 if (sfmmup == ksfmmup) {
1535 CPUSET_ALL(sfmmup->sfmmu_cpusran);
1536 } else {
1537 CPUSET_ZERO(sfmmup->sfmmu_cpusran);
1538 }
1539 sfmmup->sfmmu_free = 0;
1540 sfmmup->sfmmu_rmstat = 0;
1541 sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart;
1542 sfmmup->sfmmu_xhat_provider = NULL;
1543 cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL);
1544 sfmmup->sfmmu_srdp = NULL;
1545 SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map);
1546 bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
1547 sfmmup->sfmmu_scdp = NULL;
1548 sfmmup->sfmmu_scd_link.next = NULL;
1549 sfmmup->sfmmu_scd_link.prev = NULL;
1550 return (sfmmup);
1551 }
1552
1553 /*
1554 * Create per-MMU context domain kstats for a given MMU ctx.
1555 */
1556 static void
1557 sfmmu_mmu_kstat_create(mmu_ctx_t *mmu_ctxp)
1558 {
1559 mmu_ctx_stat_t stat;
1560 kstat_t *mmu_kstat;
1561
1562 ASSERT(MUTEX_HELD(&cpu_lock));
1902 * INVALID_CONTEXT to it.
1903 * Compatibility Note: hw takes care of MMU_SCONTEXT1
1904 */
1905 sfmmu_setctx_sec(INVALID_CONTEXT);
1906 sfmmu_clear_utsbinfo();
1907
1908 kpreempt_enable();
1909 sfmmu_hat_exit(hatlockp);
1910 }
1911 }
1912
1913 /*
1914 * Free all the translation resources for the specified address space.
1915 * Called from as_free when an address space is being destroyed.
1916 */
1917 void
1918 hat_free_start(struct hat *sfmmup)
1919 {
1920 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
1921 ASSERT(sfmmup != ksfmmup);
1922 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1923
1924 sfmmup->sfmmu_free = 1;
1925 if (sfmmup->sfmmu_scdp != NULL) {
1926 sfmmu_leave_scd(sfmmup, 0);
1927 }
1928
1929 ASSERT(sfmmup->sfmmu_scdp == NULL);
1930 }
1931
1932 void
1933 hat_free_end(struct hat *sfmmup)
1934 {
1935 int i;
1936
1937 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1938 ASSERT(sfmmup->sfmmu_free == 1);
1939 ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1940 ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
1941 ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
1942 ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
1943 ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
1944 ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
1945
1946 if (sfmmup->sfmmu_rmstat) {
1947 hat_freestat(sfmmup->sfmmu_as, NULL);
1948 }
1949
1950 while (sfmmup->sfmmu_tsb != NULL) {
1951 struct tsb_info *next = sfmmup->sfmmu_tsb->tsb_next;
1952 sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb);
1953 sfmmup->sfmmu_tsb = next;
1954 }
1955
1956 if (sfmmup->sfmmu_srdp != NULL) {
1957 sfmmu_leave_srd(sfmmup);
1966 }
1967 sfmmu_free_sfmmu(sfmmup);
1968
1969 #ifdef DEBUG
1970 for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1971 ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL);
1972 }
1973 #endif
1974
1975 kmem_cache_free(sfmmuid_cache, sfmmup);
1976 }
1977
1978 /*
1979 * Set up any translation structures, for the specified address space,
1980 * that are needed or preferred when the process is being swapped in.
1981 */
1982 /* ARGSUSED */
1983 void
1984 hat_swapin(struct hat *hat)
1985 {
1986 ASSERT(hat->sfmmu_xhat_provider == NULL);
1987 }
1988
1989 /*
1990 * Free all of the translation resources, for the specified address space,
1991 * that can be freed while the process is swapped out. Called from as_swapout.
1992 * Also, free up the ctx that this process was using.
1993 */
1994 void
1995 hat_swapout(struct hat *sfmmup)
1996 {
1997 struct hmehash_bucket *hmebp;
1998 struct hme_blk *hmeblkp;
1999 struct hme_blk *pr_hblk = NULL;
2000 struct hme_blk *nx_hblk;
2001 int i;
2002 struct hme_blk *list = NULL;
2003 hatlock_t *hatlockp;
2004 struct tsb_info *tsbinfop;
2005 struct free_tsb {
2006 struct free_tsb *next;
2007 struct tsb_info *tsbinfop;
2008 }; /* free list of TSBs */
2009 struct free_tsb *freelist, *last, *next;
2010
2011 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
2012 SFMMU_STAT(sf_swapout);
2013
2014 /*
2015 * There is no way to go from an as to all its translations in sfmmu.
2016 * Here is one of the times when we take the big hit and traverse
2017 * the hash looking for hme_blks to free up. Not only do we free up
2018 * this as hme_blks but all those that are free. We are obviously
2019 * swapping because we need memory so let's free up as much
2020 * as we can.
2021 *
2022 * Note that we don't flush TLB/TSB here -- it's not necessary
2023 * because:
2024 * 1) we free the ctx we're using and throw away the TSB(s);
2025 * 2) processes aren't runnable while being swapped out.
2026 */
2027 ASSERT(sfmmup != KHATID);
2028 for (i = 0; i <= UHMEHASH_SZ; i++) {
2029 hmebp = &uhme_hash[i];
2030 SFMMU_HASH_LOCK(hmebp);
2031 hmeblkp = hmebp->hmeblkp;
2032 pr_hblk = NULL;
2033 while (hmeblkp) {
2034
2035 ASSERT(!hmeblkp->hblk_xhat_bit);
2036
2037 if ((hmeblkp->hblk_tag.htag_id == sfmmup) &&
2038 !hmeblkp->hblk_shw_bit && !hmeblkp->hblk_lckcnt) {
2039 ASSERT(!hmeblkp->hblk_shared);
2040 (void) sfmmu_hblk_unload(sfmmup, hmeblkp,
2041 (caddr_t)get_hblk_base(hmeblkp),
2042 get_hblk_endaddr(hmeblkp),
2043 NULL, HAT_UNLOAD);
2044 }
2045 nx_hblk = hmeblkp->hblk_next;
2046 if (!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt) {
2047 ASSERT(!hmeblkp->hblk_lckcnt);
2048 sfmmu_hblk_hash_rm(hmebp, hmeblkp, pr_hblk,
2049 &list, 0);
2050 } else {
2051 pr_hblk = hmeblkp;
2052 }
2053 hmeblkp = nx_hblk;
2054 }
2055 SFMMU_HASH_UNLOCK(hmebp);
2056 }
2118 sfmmu_hat_exit(hatlockp);
2119 for (; freelist != NULL; freelist = next) {
2120 next = freelist->next;
2121 sfmmu_tsb_free(freelist->tsbinfop);
2122 }
2123 }
2124
2125 /*
2126 * Duplicate the translations of an as into another newas
2127 */
2128 /* ARGSUSED */
2129 int
2130 hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
2131 uint_t flag)
2132 {
2133 sf_srd_t *srdp;
2134 sf_scd_t *scdp;
2135 int i;
2136 extern uint_t get_color_start(struct as *);
2137
2138 ASSERT(hat->sfmmu_xhat_provider == NULL);
2139 ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2140 (flag == HAT_DUP_SRD));
2141 ASSERT(hat != ksfmmup);
2142 ASSERT(newhat != ksfmmup);
2143 ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2144
2145 if (flag == HAT_DUP_COW) {
2146 panic("hat_dup: HAT_DUP_COW not supported");
2147 }
2148
2149 if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2150 ASSERT(srdp->srd_evp != NULL);
2151 VN_HOLD(srdp->srd_evp);
2152 ASSERT(srdp->srd_refcnt > 0);
2153 newhat->sfmmu_srdp = srdp;
2154 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2155 }
2156
2157 /*
2158 * HAT_DUP_ALL flag is used after as duplication is done.
2188 if (flag == HAT_DUP_ALL && consistent_coloring == 0 &&
2189 update_proc_pgcolorbase_after_fork != 0) {
2190 hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as);
2191 }
2192 return (0);
2193 }
2194
2195 void
2196 hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
2197 uint_t attr, uint_t flags)
2198 {
2199 hat_do_memload(hat, addr, pp, attr, flags,
2200 SFMMU_INVALID_SHMERID);
2201 }
2202
2203 void
2204 hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
2205 uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
2206 {
2207 uint_t rid;
2208 if (rcookie == HAT_INVALID_REGION_COOKIE ||
2209 hat->sfmmu_xhat_provider != NULL) {
2210 hat_do_memload(hat, addr, pp, attr, flags,
2211 SFMMU_INVALID_SHMERID);
2212 return;
2213 }
2214 rid = (uint_t)((uint64_t)rcookie);
2215 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2216 hat_do_memload(hat, addr, pp, attr, flags, rid);
2217 }
2218
2219 /*
2220 * Set up addr to map to page pp with protection prot.
2221 * As an optimization we also load the TSB with the
2222 * corresponding tte but it is no big deal if the tte gets kicked out.
2223 */
2224 static void
2225 hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp,
2226 uint_t attr, uint_t flags, uint_t rid)
2227 {
2228 tte_t tte;
2229
2230
2231 ASSERT(hat != NULL);
2232 ASSERT(PAGE_LOCKED(pp));
2233 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2234 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2235 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2236 SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2237
2238 if (PP_ISFREE(pp)) {
2239 panic("hat_memload: loading a mapping to free page %p",
2240 (void *)pp);
2241 }
2242
2243 if (hat->sfmmu_xhat_provider) {
2244 /* no regions for xhats */
2245 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2246 XHAT_MEMLOAD(hat, addr, pp, attr, flags);
2247 return;
2248 }
2249
2250 ASSERT((hat == ksfmmup) ||
2251 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2252
2253 if (flags & ~SFMMU_LOAD_ALLFLAG)
2254 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2255 flags & ~SFMMU_LOAD_ALLFLAG);
2256
2257 if (hat->sfmmu_rmstat)
2258 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2259
2260 #if defined(SF_ERRATA_57)
2261 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2262 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2263 !(flags & HAT_LOAD_SHARE)) {
2264 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2265 " page executable");
2266 attr &= ~PROT_EXEC;
2267 }
2268 #endif
2269
2279 }
2280
2281 /*
2282 * hat_devload can be called to map real memory (e.g.
2283 * /dev/kmem) and even though hat_devload will determine pf is
2284 * for memory, it will be unable to get a shared lock on the
2285 * page (because someone else has it exclusively) and will
2286 * pass dp = NULL. If tteload doesn't get a non-NULL
2287 * page pointer it can't cache memory.
2288 */
2289 void
2290 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2291 uint_t attr, int flags)
2292 {
2293 tte_t tte;
2294 struct page *pp = NULL;
2295 int use_lgpg = 0;
2296
2297 ASSERT(hat != NULL);
2298
2299 if (hat->sfmmu_xhat_provider) {
2300 XHAT_DEVLOAD(hat, addr, len, pfn, attr, flags);
2301 return;
2302 }
2303
2304 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2305 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2306 ASSERT((hat == ksfmmup) ||
2307 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2308 if (len == 0)
2309 panic("hat_devload: zero len");
2310 if (flags & ~SFMMU_LOAD_ALLFLAG)
2311 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2312 flags & ~SFMMU_LOAD_ALLFLAG);
2313
2314 #if defined(SF_ERRATA_57)
2315 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2316 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2317 !(flags & HAT_LOAD_SHARE)) {
2318 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2319 " page executable");
2320 attr &= ~PROT_EXEC;
2321 }
2322 #endif
2323
2430 */
2431 if ((flags & HAT_LOAD_SHARE) == 0) {
2432 sfmmu_check_page_sizes(hat, 1);
2433 }
2434 }
2435
2436 void
2437 hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
2438 struct page **pps, uint_t attr, uint_t flags)
2439 {
2440 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2441 SFMMU_INVALID_SHMERID);
2442 }
2443
2444 void
2445 hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
2446 struct page **pps, uint_t attr, uint_t flags,
2447 hat_region_cookie_t rcookie)
2448 {
2449 uint_t rid;
2450 if (rcookie == HAT_INVALID_REGION_COOKIE ||
2451 hat->sfmmu_xhat_provider != NULL) {
2452 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2453 SFMMU_INVALID_SHMERID);
2454 return;
2455 }
2456 rid = (uint_t)((uint64_t)rcookie);
2457 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2458 hat_do_memload_array(hat, addr, len, pps, attr, flags, rid);
2459 }
2460
2461 /*
2462 * Map the largest extend possible out of the page array. The array may NOT
2463 * be in order. The largest possible mapping a page can have
2464 * is specified in the p_szc field. The p_szc field
2465 * cannot change as long as there any mappings (large or small)
2466 * to any of the pages that make up the large page. (ie. any
2467 * promotion/demotion of page size is not up to the hat but up to
2468 * the page free list manager). The array
2469 * should consist of properly aligned contigous pages that are
2470 * part of a big page for a large mapping to be created.
2471 */
2472 static void
2473 hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len,
2474 struct page **pps, uint_t attr, uint_t flags, uint_t rid)
2475 {
2476 int ttesz;
2477 size_t mapsz;
2478 pgcnt_t numpg, npgs;
2479 tte_t tte;
2480 page_t *pp;
2481 uint_t large_pages_disable;
2482
2483 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2484 SFMMU_VALIDATE_HMERID(hat, rid, addr, len);
2485
2486 if (hat->sfmmu_xhat_provider) {
2487 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2488 XHAT_MEMLOAD_ARRAY(hat, addr, len, pps, attr, flags);
2489 return;
2490 }
2491
2492 if (hat->sfmmu_rmstat)
2493 hat_resvstat(len, hat->sfmmu_as, addr);
2494
2495 #if defined(SF_ERRATA_57)
2496 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2497 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2498 !(flags & HAT_LOAD_SHARE)) {
2499 cmn_err(CE_WARN, "hat_memload_array: illegal attempt to make "
2500 "user page executable");
2501 attr &= ~PROT_EXEC;
2502 }
2503 #endif
2504
2505 /* Get number of pages */
2506 npgs = len >> MMU_PAGESHIFT;
2507
2508 if (flags & HAT_LOAD_SHARE) {
2509 large_pages_disable = disable_ism_large_pages;
2510 } else {
2511 large_pages_disable = disable_large_pages;
3954 rsz, rgnp->rgn_obj,
3955 rgnp->rgn_objoff);
3956 }
3957 ttesz--;
3958 }
3959 }
3960
3961 /*
3962 * Release one hardware address translation lock on the given address range.
3963 */
3964 void
3965 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3966 {
3967 struct hmehash_bucket *hmebp;
3968 hmeblk_tag hblktag;
3969 int hmeshift, hashno = 1;
3970 struct hme_blk *hmeblkp, *list = NULL;
3971 caddr_t endaddr;
3972
3973 ASSERT(sfmmup != NULL);
3974 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
3975
3976 ASSERT((sfmmup == ksfmmup) ||
3977 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
3978 ASSERT((len & MMU_PAGEOFFSET) == 0);
3979 endaddr = addr + len;
3980 hblktag.htag_id = sfmmup;
3981 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3982
3983 /*
3984 * Spitfire supports 4 page sizes.
3985 * Most pages are expected to be of the smallest page size (8K) and
3986 * these will not need to be rehashed. 64K pages also don't need to be
3987 * rehashed because an hmeblk spans 64K of address space. 512K pages
3988 * might need 1 rehash and and 4M pages might need 2 rehashes.
3989 */
3990 while (addr < endaddr) {
3991 hmeshift = HME_HASH_SHIFT(hashno);
3992 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3993 hblktag.htag_rehash = hashno;
3994 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
4039 {
4040 sf_srd_t *srdp;
4041 sf_region_t *rgnp;
4042 int ttesz;
4043 uint_t rid;
4044 caddr_t eaddr;
4045 caddr_t va;
4046 int hmeshift;
4047 hmeblk_tag hblktag;
4048 struct hmehash_bucket *hmebp;
4049 struct hme_blk *hmeblkp;
4050 struct hme_blk *pr_hblk;
4051 struct hme_blk *list;
4052
4053 if (rcookie == HAT_INVALID_REGION_COOKIE) {
4054 hat_unlock(sfmmup, addr, len);
4055 return;
4056 }
4057
4058 ASSERT(sfmmup != NULL);
4059 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4060 ASSERT(sfmmup != ksfmmup);
4061
4062 srdp = sfmmup->sfmmu_srdp;
4063 rid = (uint_t)((uint64_t)rcookie);
4064 VERIFY3U(rid, <, SFMMU_MAX_HME_REGIONS);
4065 eaddr = addr + len;
4066 va = addr;
4067 list = NULL;
4068 rgnp = srdp->srd_hmergnp[rid];
4069 SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len);
4070
4071 ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc)));
4072 ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc)));
4073 if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) {
4074 ttesz = HBLK_MIN_TTESZ;
4075 } else {
4076 ttesz = rgnp->rgn_pgszc;
4077 }
4078 while (va < eaddr) {
4079 while (ttesz < rgnp->rgn_pgszc &&
4751 page_unlock(pp);
4752 }
4753
4754 /*
4755 * hat_probe returns 1 if the translation for the address 'addr' is
4756 * loaded, zero otherwise.
4757 *
4758 * hat_probe should be used only for advisorary purposes because it may
4759 * occasionally return the wrong value. The implementation must guarantee that
4760 * returning the wrong value is a very rare event. hat_probe is used
4761 * to implement optimizations in the segment drivers.
4762 *
4763 */
4764 int
4765 hat_probe(struct hat *sfmmup, caddr_t addr)
4766 {
4767 pfn_t pfn;
4768 tte_t tte;
4769
4770 ASSERT(sfmmup != NULL);
4771 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4772
4773 ASSERT((sfmmup == ksfmmup) ||
4774 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4775
4776 if (sfmmup == ksfmmup) {
4777 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4778 == PFN_SUSPENDED) {
4779 sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4780 }
4781 } else {
4782 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4783 }
4784
4785 if (pfn != PFN_INVALID)
4786 return (1);
4787 else
4788 return (0);
4789 }
4790
4791 ssize_t
4792 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4793 {
4794 tte_t tte;
4795
4796 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4797
4798 if (sfmmup == ksfmmup) {
4799 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4800 return (-1);
4801 }
4802 } else {
4803 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4804 return (-1);
4805 }
4806 }
4807
4808 ASSERT(TTE_IS_VALID(&tte));
4809 return (TTEBYTES(TTE_CSZ(&tte)));
4810 }
4811
4812 uint_t
4813 hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr)
4814 {
4815 tte_t tte;
4816
4817 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4818
4819 if (sfmmup == ksfmmup) {
4820 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4821 tte.ll = 0;
4822 }
4823 } else {
4824 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4825 tte.ll = 0;
4826 }
4827 }
4828 if (TTE_IS_VALID(&tte)) {
4829 *attr = sfmmu_ptov_attr(&tte);
4830 return (0);
4831 }
4832 *attr = 0;
4833 return ((uint_t)0xffffffff);
4834 }
4835
4836 /*
4837 * Enables more attributes on specified address range (ie. logical OR)
4838 */
4839 void
4840 hat_setattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4841 {
4842 if (hat->sfmmu_xhat_provider) {
4843 XHAT_SETATTR(hat, addr, len, attr);
4844 return;
4845 } else {
4846 /*
4847 * This must be a CPU HAT. If the address space has
4848 * XHATs attached, change attributes for all of them,
4849 * just in case
4850 */
4851 ASSERT(hat->sfmmu_as != NULL);
4852 if (hat->sfmmu_as->a_xhat != NULL)
4853 xhat_setattr_all(hat->sfmmu_as, addr, len, attr);
4854 }
4855
4856 sfmmu_chgattr(hat, addr, len, attr, SFMMU_SETATTR);
4857 }
4858
4859 /*
4860 * Assigns attributes to the specified address range. All the attributes
4861 * are specified.
4862 */
4863 void
4864 hat_chgattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4865 {
4866 if (hat->sfmmu_xhat_provider) {
4867 XHAT_CHGATTR(hat, addr, len, attr);
4868 return;
4869 } else {
4870 /*
4871 * This must be a CPU HAT. If the address space has
4872 * XHATs attached, change attributes for all of them,
4873 * just in case
4874 */
4875 ASSERT(hat->sfmmu_as != NULL);
4876 if (hat->sfmmu_as->a_xhat != NULL)
4877 xhat_chgattr_all(hat->sfmmu_as, addr, len, attr);
4878 }
4879
4880 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CHGATTR);
4881 }
4882
4883 /*
4884 * Remove attributes on the specified address range (ie. loginal NAND)
4885 */
4886 void
4887 hat_clrattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4888 {
4889 if (hat->sfmmu_xhat_provider) {
4890 XHAT_CLRATTR(hat, addr, len, attr);
4891 return;
4892 } else {
4893 /*
4894 * This must be a CPU HAT. If the address space has
4895 * XHATs attached, change attributes for all of them,
4896 * just in case
4897 */
4898 ASSERT(hat->sfmmu_as != NULL);
4899 if (hat->sfmmu_as->a_xhat != NULL)
4900 xhat_clrattr_all(hat->sfmmu_as, addr, len, attr);
4901 }
4902
4903 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4904 }
4905
4906 /*
4907 * Change attributes on an address range to that specified by attr and mode.
4908 */
4909 static void
4910 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4911 int mode)
4912 {
4913 struct hmehash_bucket *hmebp;
4914 hmeblk_tag hblktag;
4915 int hmeshift, hashno = 1;
4916 struct hme_blk *hmeblkp, *list = NULL;
4917 caddr_t endaddr;
4918 cpuset_t cpuset;
4919 demap_range_t dmr;
4920
4921 CPUSET_ZERO(cpuset);
5234 * Change the protections in the virtual address range
5235 * given to the specified virtual protection. If vprot is ~PROT_WRITE,
5236 * then remove write permission, leaving the other
5237 * permissions unchanged. If vprot is ~PROT_USER, remove user permissions.
5238 *
5239 */
5240 void
5241 hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
5242 {
5243 struct hmehash_bucket *hmebp;
5244 hmeblk_tag hblktag;
5245 int hmeshift, hashno = 1;
5246 struct hme_blk *hmeblkp, *list = NULL;
5247 caddr_t endaddr;
5248 cpuset_t cpuset;
5249 demap_range_t dmr;
5250
5251 ASSERT((len & MMU_PAGEOFFSET) == 0);
5252 ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
5253
5254 if (sfmmup->sfmmu_xhat_provider) {
5255 XHAT_CHGPROT(sfmmup, addr, len, vprot);
5256 return;
5257 } else {
5258 /*
5259 * This must be a CPU HAT. If the address space has
5260 * XHATs attached, change attributes for all of them,
5261 * just in case
5262 */
5263 ASSERT(sfmmup->sfmmu_as != NULL);
5264 if (sfmmup->sfmmu_as->a_xhat != NULL)
5265 xhat_chgprot_all(sfmmup->sfmmu_as, addr, len, vprot);
5266 }
5267
5268 CPUSET_ZERO(cpuset);
5269
5270 if ((vprot != (uint_t)~PROT_WRITE) && (vprot & PROT_USER) &&
5271 ((addr + len) > (caddr_t)USERLIMIT)) {
5272 panic("user addr %p vprot %x in kernel space",
5273 (void *)addr, vprot);
5274 }
5275 endaddr = addr + len;
5276 hblktag.htag_id = sfmmup;
5277 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
5278 DEMAP_RANGE_INIT(sfmmup, &dmr);
5279
5280 while (addr < endaddr) {
5281 hmeshift = HME_HASH_SHIFT(hashno);
5282 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
5283 hblktag.htag_rehash = hashno;
5284 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
5285
5286 SFMMU_HASH_LOCK(hmebp);
5681 hat_unload_callback(
5682 struct hat *sfmmup,
5683 caddr_t addr,
5684 size_t len,
5685 uint_t flags,
5686 hat_callback_t *callback)
5687 {
5688 struct hmehash_bucket *hmebp;
5689 hmeblk_tag hblktag;
5690 int hmeshift, hashno, iskernel;
5691 struct hme_blk *hmeblkp, *pr_hblk, *list = NULL;
5692 caddr_t endaddr;
5693 cpuset_t cpuset;
5694 int addr_count = 0;
5695 int a;
5696 caddr_t cb_start_addr[MAX_CB_ADDR];
5697 caddr_t cb_end_addr[MAX_CB_ADDR];
5698 int issegkmap = ISSEGKMAP(sfmmup, addr);
5699 demap_range_t dmr, *dmrp;
5700
5701 if (sfmmup->sfmmu_xhat_provider) {
5702 XHAT_UNLOAD_CALLBACK(sfmmup, addr, len, flags, callback);
5703 return;
5704 } else {
5705 /*
5706 * This must be a CPU HAT. If the address space has
5707 * XHATs attached, unload the mappings for all of them,
5708 * just in case
5709 */
5710 ASSERT(sfmmup->sfmmu_as != NULL);
5711 if (sfmmup->sfmmu_as->a_xhat != NULL)
5712 xhat_unload_callback_all(sfmmup->sfmmu_as, addr,
5713 len, flags, callback);
5714 }
5715
5716 ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5717 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
5718
5719 ASSERT(sfmmup != NULL);
5720 ASSERT((len & MMU_PAGEOFFSET) == 0);
5721 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5722
5723 /*
5724 * Probing through a large VA range (say 63 bits) will be slow, even
5725 * at 4 Meg steps between the probes. So, when the virtual address range
5726 * is very large, search the HME entries for what to unload.
5727 *
5728 * len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5729 *
5730 * UHMEHASH_SZ is number of hash buckets to examine
5731 *
5732 */
5733 if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5734 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);
5971 callback->hcb_start_addr = cb_start_addr[a];
5972 callback->hcb_end_addr = cb_end_addr[a];
5973 callback->hcb_function(callback);
5974 }
5975 }
5976
5977 /*
5978 * Check TSB and TLB page sizes if the process isn't exiting.
5979 */
5980 if (!sfmmup->sfmmu_free)
5981 sfmmu_check_page_sizes(sfmmup, 0);
5982 }
5983
5984 /*
5985 * Unload all the mappings in the range [addr..addr+len). addr and len must
5986 * be MMU_PAGESIZE aligned.
5987 */
5988 void
5989 hat_unload(struct hat *sfmmup, caddr_t addr, size_t len, uint_t flags)
5990 {
5991 if (sfmmup->sfmmu_xhat_provider) {
5992 XHAT_UNLOAD(sfmmup, addr, len, flags);
5993 return;
5994 }
5995 hat_unload_callback(sfmmup, addr, len, flags, NULL);
5996 }
5997
5998
5999 /*
6000 * Find the largest mapping size for this page.
6001 */
6002 int
6003 fnd_mapping_sz(page_t *pp)
6004 {
6005 int sz;
6006 int p_index;
6007
6008 p_index = PP_MAPINDEX(pp);
6009
6010 sz = 0;
6011 p_index >>= 1; /* don't care about 8K bit */
6012 for (; p_index; p_index >>= 1) {
6013 sz++;
6014 }
6314 va += sz;
6315 }
6316 }
6317
6318 /*
6319 * Synchronize all the mappings in the range [addr..addr+len).
6320 * Can be called with clearflag having two states:
6321 * HAT_SYNC_DONTZERO means just return the rm stats
6322 * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6323 */
6324 void
6325 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6326 {
6327 struct hmehash_bucket *hmebp;
6328 hmeblk_tag hblktag;
6329 int hmeshift, hashno = 1;
6330 struct hme_blk *hmeblkp, *list = NULL;
6331 caddr_t endaddr;
6332 cpuset_t cpuset;
6333
6334 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
6335 ASSERT((sfmmup == ksfmmup) ||
6336 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
6337 ASSERT((len & MMU_PAGEOFFSET) == 0);
6338 ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6339 (clearflag == HAT_SYNC_ZERORM));
6340
6341 CPUSET_ZERO(cpuset);
6342
6343 endaddr = addr + len;
6344 hblktag.htag_id = sfmmup;
6345 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6346
6347 /*
6348 * Spitfire supports 4 page sizes.
6349 * Most pages are expected to be of the smallest page
6350 * size (8K) and these will not need to be rehashed. 64K
6351 * pages also don't need to be rehashed because the an hmeblk
6352 * spans 64K of address space. 512K pages might need 1 rehash and
6353 * and 4M pages 2 rehashes.
6354 */
7121 return; /* non-fatal */
7122 }
7123 panic("pa_hment leaked: 0x%p", (void *)pahmep);
7124 }
7125
7126 /*
7127 * Remove all mappings to page 'pp'.
7128 */
7129 int
7130 hat_pageunload(struct page *pp, uint_t forceflag)
7131 {
7132 struct page *origpp = pp;
7133 struct sf_hment *sfhme, *tmphme;
7134 struct hme_blk *hmeblkp;
7135 kmutex_t *pml;
7136 #ifdef VAC
7137 kmutex_t *pmtx;
7138 #endif
7139 cpuset_t cpuset, tset;
7140 int index, cons;
7141 int xhme_blks;
7142 int pa_hments;
7143
7144 ASSERT(PAGE_EXCL(pp));
7145
7146 retry_xhat:
7147 tmphme = NULL;
7148 xhme_blks = 0;
7149 pa_hments = 0;
7150 CPUSET_ZERO(cpuset);
7151
7152 pml = sfmmu_mlist_enter(pp);
7153
7154 #ifdef VAC
7155 if (pp->p_kpmref)
7156 sfmmu_kpm_pageunload(pp);
7157 ASSERT(!PP_ISMAPPED_KPM(pp));
7158 #endif
7159 /*
7160 * Clear vpm reference. Since the page is exclusively locked
7161 * vpm cannot be referencing it.
7162 */
7163 if (vpm_enable) {
7164 pp->p_vpmref = 0;
7165 }
7166
7167 index = PP_MAPINDEX(pp);
7168 cons = TTE8K;
7169 retry:
7170 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7171 tmphme = sfhme->hme_next;
7172
7173 if (IS_PAHME(sfhme)) {
7174 ASSERT(sfhme->hme_data != NULL);
7175 pa_hments++;
7176 continue;
7177 }
7178
7179 hmeblkp = sfmmu_hmetohblk(sfhme);
7180 if (hmeblkp->hblk_xhat_bit) {
7181 struct xhat_hme_blk *xblk =
7182 (struct xhat_hme_blk *)hmeblkp;
7183
7184 (void) XHAT_PAGEUNLOAD(xblk->xhat_hme_blk_hat,
7185 pp, forceflag, XBLK2PROVBLK(xblk));
7186
7187 xhme_blks = 1;
7188 continue;
7189 }
7190
7191 /*
7192 * If there are kernel mappings don't unload them, they will
7193 * be suspended.
7194 */
7195 if (forceflag == SFMMU_KERNEL_RELOC && hmeblkp->hblk_lckcnt &&
7196 hmeblkp->hblk_tag.htag_id == ksfmmup)
7197 continue;
7198
7199 tset = sfmmu_pageunload(pp, sfhme, cons);
7200 CPUSET_OR(cpuset, tset);
7201 }
7202
7203 while (index != 0) {
7204 index = index >> 1;
7205 if (index != 0)
7206 cons++;
7207 if (index & 0x1) {
7208 /* Go to leading page */
7209 pp = PP_GROUPLEADER(pp, cons);
7210 ASSERT(sfmmu_mlist_held(pp));
7211 goto retry;
7212 }
7213 }
7214
7215 /*
7216 * cpuset may be empty if the page was only mapped by segkpm,
7217 * in which case we won't actually cross-trap.
7218 */
7219 xt_sync(cpuset);
7220
7221 /*
7222 * The page should have no mappings at this point, unless
7223 * we were called from hat_page_relocate() in which case we
7224 * leave the locked mappings which will be suspended later.
7225 */
7226 ASSERT(!PP_ISMAPPED(origpp) || xhme_blks || pa_hments ||
7227 (forceflag == SFMMU_KERNEL_RELOC));
7228
7229 #ifdef VAC
7230 if (PP_ISTNC(pp)) {
7231 if (cons == TTE8K) {
7232 pmtx = sfmmu_page_enter(pp);
7233 PP_CLRTNC(pp);
7234 sfmmu_page_exit(pmtx);
7235 } else {
7236 conv_tnc(pp, cons);
7237 }
7238 }
7239 #endif /* VAC */
7240
7241 if (pa_hments && forceflag != SFMMU_KERNEL_RELOC) {
7242 /*
7243 * Unlink any pa_hments and free them, calling back
7244 * the responsible subsystem to notify it of the error.
7245 * This can occur in situations such as drivers leaking
7246 * DMA handles: naughty, but common enough that we'd like
7247 * to keep the system running rather than bringing it
7248 * down with an obscure error like "pa_hment leaked"
7249 * which doesn't aid the user in debugging their driver.
7250 */
7251 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7252 tmphme = sfhme->hme_next;
7253 if (IS_PAHME(sfhme)) {
7254 struct pa_hment *pahmep = sfhme->hme_data;
7255 sfmmu_pahment_leaked(pahmep);
7256 HME_SUB(sfhme, pp);
7257 kmem_cache_free(pa_hment_cache, pahmep);
7258 }
7259 }
7260
7261 ASSERT(!PP_ISMAPPED(origpp) || xhme_blks);
7262 }
7263
7264 sfmmu_mlist_exit(pml);
7265
7266 /*
7267 * XHAT may not have finished unloading pages
7268 * because some other thread was waiting for
7269 * mlist lock and XHAT_PAGEUNLOAD let it do
7270 * the job.
7271 */
7272 if (xhme_blks) {
7273 pp = origpp;
7274 goto retry_xhat;
7275 }
7276
7277 return (0);
7278 }
7279
7280 cpuset_t
7281 sfmmu_pageunload(page_t *pp, struct sf_hment *sfhme, int cons)
7282 {
7283 struct hme_blk *hmeblkp;
7284 sfmmu_t *sfmmup;
7285 tte_t tte, ttemod;
7286 #ifdef DEBUG
7287 tte_t orig_old;
7288 #endif /* DEBUG */
7289 caddr_t addr;
7290 int ttesz;
7291 int ret;
7292 cpuset_t cpuset;
7293
7294 ASSERT(pp != NULL);
7295 ASSERT(sfmmu_mlist_held(pp));
7296 ASSERT(!PP_ISKAS(pp));
7537
7538 clearflag &= ~HAT_SYNC_STOPON_SHARED;
7539 pml = sfmmu_mlist_enter(pp);
7540 index = PP_MAPINDEX(pp);
7541 cons = TTE8K;
7542 retry:
7543 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7544 /*
7545 * We need to save the next hment on the list since
7546 * it is possible for pagesync to remove an invalid hment
7547 * from the list.
7548 */
7549 tmphme = sfhme->hme_next;
7550 if (IS_PAHME(sfhme))
7551 continue;
7552 /*
7553 * If we are looking for large mappings and this hme doesn't
7554 * reach the range we are seeking, just ignore it.
7555 */
7556 hmeblkp = sfmmu_hmetohblk(sfhme);
7557 if (hmeblkp->hblk_xhat_bit)
7558 continue;
7559
7560 if (hme_size(sfhme) < cons)
7561 continue;
7562
7563 if (stop_on_sh) {
7564 if (hmeblkp->hblk_shared) {
7565 sf_srd_t *srdp = hblktosrd(hmeblkp);
7566 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7567 sf_region_t *rgnp;
7568 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7569 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7570 ASSERT(srdp != NULL);
7571 rgnp = srdp->srd_hmergnp[rid];
7572 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
7573 rgnp, rid);
7574 shcnt += rgnp->rgn_refcnt;
7575 } else {
7576 shcnt++;
7577 }
7578 if (shcnt > po_share) {
7698 {
7699 caddr_t addr;
7700 tte_t tte;
7701 tte_t ttemod;
7702 struct hme_blk *hmeblkp;
7703 int ret;
7704 sfmmu_t *sfmmup;
7705 cpuset_t cpuset;
7706
7707 ASSERT(pp != NULL);
7708 ASSERT(sfmmu_mlist_held(pp));
7709
7710 CPUSET_ZERO(cpuset);
7711 SFMMU_STAT(sf_clrwrt);
7712
7713 retry:
7714
7715 sfmmu_copytte(&sfhme->hme_tte, &tte);
7716 if (TTE_IS_VALID(&tte) && TTE_IS_WRITABLE(&tte)) {
7717 hmeblkp = sfmmu_hmetohblk(sfhme);
7718
7719 /*
7720 * xhat mappings should never be to a VMODSORT page.
7721 */
7722 ASSERT(hmeblkp->hblk_xhat_bit == 0);
7723
7724 sfmmup = hblktosfmmu(hmeblkp);
7725 addr = tte_to_vaddr(hmeblkp, tte);
7726
7727 ttemod = tte;
7728 TTE_CLR_WRT(&ttemod);
7729 TTE_CLR_MOD(&ttemod);
7730 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
7731
7732 /*
7733 * if cas failed and the new value is not what
7734 * we want retry
7735 */
7736 if (ret < 0)
7737 goto retry;
7738
7739 /* we win the cas */
7740 if (ret > 0) {
7741 if (hmeblkp->hblk_shared) {
7742 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7743 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7968 * Returns a page frame number for a given virtual address.
7969 * Returns PFN_INVALID to indicate an invalid mapping
7970 */
7971 pfn_t
7972 hat_getpfnum(struct hat *hat, caddr_t addr)
7973 {
7974 pfn_t pfn;
7975 tte_t tte;
7976
7977 /*
7978 * We would like to
7979 * ASSERT(AS_LOCK_HELD(as, &as->a_lock));
7980 * but we can't because the iommu driver will call this
7981 * routine at interrupt time and it can't grab the as lock
7982 * or it will deadlock: A thread could have the as lock
7983 * and be waiting for io. The io can't complete
7984 * because the interrupt thread is blocked trying to grab
7985 * the as lock.
7986 */
7987
7988 ASSERT(hat->sfmmu_xhat_provider == NULL);
7989
7990 if (hat == ksfmmup) {
7991 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7992 ASSERT(segkmem_lpszc > 0);
7993 pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7994 if (pfn != PFN_INVALID) {
7995 sfmmu_check_kpfn(pfn);
7996 return (pfn);
7997 }
7998 } else if (segkpm && IS_KPM_ADDR(addr)) {
7999 return (sfmmu_kpm_vatopfn(addr));
8000 }
8001 while ((pfn = sfmmu_vatopfn(addr, ksfmmup, &tte))
8002 == PFN_SUSPENDED) {
8003 sfmmu_vatopfn_suspended(addr, ksfmmup, &tte);
8004 }
8005 sfmmu_check_kpfn(pfn);
8006 return (pfn);
8007 } else {
8008 return (sfmmu_uvatopfn(addr, hat, NULL));
8009 }
8153 SFMMU_HASH_UNLOCK(hmebp);
8154 pfn = PFN_INVALID;
8155 return (pfn);
8156 }
8157 }
8158 SFMMU_HASH_UNLOCK(hmebp);
8159 hashno++;
8160 } while (hashno <= mmu_hashcnt);
8161 return (PFN_INVALID);
8162 }
8163
8164
8165 /*
8166 * For compatability with AT&T and later optimizations
8167 */
8168 /* ARGSUSED */
8169 void
8170 hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags)
8171 {
8172 ASSERT(hat != NULL);
8173 ASSERT(hat->sfmmu_xhat_provider == NULL);
8174 }
8175
8176 /*
8177 * Return the number of mappings to a particular page. This number is an
8178 * approximation of the number of people sharing the page.
8179 *
8180 * shared hmeblks or ism hmeblks are counted as 1 mapping here.
8181 * hat_page_checkshare() can be used to compare threshold to share
8182 * count that reflects the number of region sharers albeit at higher cost.
8183 */
8184 ulong_t
8185 hat_page_getshare(page_t *pp)
8186 {
8187 page_t *spp = pp; /* start page */
8188 kmutex_t *pml;
8189 ulong_t cnt;
8190 int index, sz = TTE64K;
8191
8192 /*
8193 * We need to grab the mlist lock to make sure any outstanding
8246
8247 if (vpm_enable && pp->p_vpmref) {
8248 cnt += 1;
8249 }
8250
8251 if (pp->p_share + cnt > sh_thresh) {
8252 sfmmu_mlist_exit(pml);
8253 return (1);
8254 }
8255
8256 index = PP_MAPINDEX(pp);
8257
8258 again:
8259 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
8260 tmphme = sfhme->hme_next;
8261 if (IS_PAHME(sfhme)) {
8262 continue;
8263 }
8264
8265 hmeblkp = sfmmu_hmetohblk(sfhme);
8266 if (hmeblkp->hblk_xhat_bit) {
8267 cnt++;
8268 if (cnt > sh_thresh) {
8269 sfmmu_mlist_exit(pml);
8270 return (1);
8271 }
8272 continue;
8273 }
8274 if (hme_size(sfhme) != sz) {
8275 continue;
8276 }
8277
8278 if (hmeblkp->hblk_shared) {
8279 sf_srd_t *srdp = hblktosrd(hmeblkp);
8280 uint_t rid = hmeblkp->hblk_tag.htag_rid;
8281 sf_region_t *rgnp;
8282 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
8283 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
8284 ASSERT(srdp != NULL);
8285 rgnp = srdp->srd_hmergnp[rid];
8286 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
8287 rgnp, rid);
8288 cnt += rgnp->rgn_refcnt;
8289 } else {
8290 cnt++;
8291 }
8292 if (cnt > sh_thresh) {
8293 sfmmu_mlist_exit(pml);
8368 CPUSET_ZERO(cpuset);
8369 sz = TTE64K;
8370 sync = 1;
8371 }
8372
8373 while (index) {
8374 if (!(index & 0x1)) {
8375 index >>= 1;
8376 sz++;
8377 continue;
8378 }
8379 ASSERT(sz <= pszc);
8380 rootpp = PP_GROUPLEADER(pp, sz);
8381 for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
8382 tmphme = sfhme->hme_next;
8383 ASSERT(!IS_PAHME(sfhme));
8384 hmeblkp = sfmmu_hmetohblk(sfhme);
8385 if (hme_size(sfhme) != sz) {
8386 continue;
8387 }
8388 if (hmeblkp->hblk_xhat_bit) {
8389 cmn_err(CE_PANIC,
8390 "hat_page_demote: xhat hmeblk");
8391 }
8392 tset = sfmmu_pageunload(rootpp, sfhme, sz);
8393 CPUSET_OR(cpuset, tset);
8394 }
8395 if (index >>= 1) {
8396 sz++;
8397 }
8398 }
8399
8400 ASSERT(!PP_ISMAPPED_LARGE(pp));
8401
8402 if (sync) {
8403 xt_sync(cpuset);
8404 #ifdef VAC
8405 if (PP_ISTNC(pp)) {
8406 conv_tnc(rootpp, sz);
8407 }
8408 #endif /* VAC */
8409 }
8410
8411 pmtx = sfmmu_page_enter(pp);
8499 * This is currently implemented as the number of bytes that have active
8500 * hardware translations that have page structures. Therefore, it can
8501 * underestimate the traditional resident set size, eg, if the
8502 * physical page is present and the hardware translation is missing;
8503 * and it can overestimate the rss, eg, if there are active
8504 * translations to a frame buffer with page structs.
8505 * Also, it does not take sharing into account.
8506 *
8507 * Note that we don't acquire locks here since this function is most often
8508 * called from the clock thread.
8509 */
8510 size_t
8511 hat_get_mapped_size(struct hat *hat)
8512 {
8513 size_t assize = 0;
8514 int i;
8515
8516 if (hat == NULL)
8517 return (0);
8518
8519 ASSERT(hat->sfmmu_xhat_provider == NULL);
8520
8521 for (i = 0; i < mmu_page_sizes; i++)
8522 assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] +
8523 (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i);
8524
8525 if (hat->sfmmu_iblk == NULL)
8526 return (assize);
8527
8528 for (i = 0; i < mmu_page_sizes; i++)
8529 assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] +
8530 (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i);
8531
8532 return (assize);
8533 }
8534
8535 int
8536 hat_stats_enable(struct hat *hat)
8537 {
8538 hatlock_t *hatlockp;
8539
8540 ASSERT(hat->sfmmu_xhat_provider == NULL);
8541
8542 hatlockp = sfmmu_hat_enter(hat);
8543 hat->sfmmu_rmstat++;
8544 sfmmu_hat_exit(hatlockp);
8545 return (1);
8546 }
8547
8548 void
8549 hat_stats_disable(struct hat *hat)
8550 {
8551 hatlock_t *hatlockp;
8552
8553 ASSERT(hat->sfmmu_xhat_provider == NULL);
8554
8555 hatlockp = sfmmu_hat_enter(hat);
8556 hat->sfmmu_rmstat--;
8557 sfmmu_hat_exit(hatlockp);
8558 }
8559
8560 /*
8561 * Routines for entering or removing ourselves from the
8562 * ism_hat's mapping list. This is used for both private and
8563 * SCD hats.
8564 */
8565 static void
8566 iment_add(struct ism_ment *iment, struct hat *ism_hat)
8567 {
8568 ASSERT(MUTEX_HELD(&ism_mlist_lock));
8569
8570 iment->iment_prev = NULL;
8571 iment->iment_next = ism_hat->sfmmu_iment;
8572 if (ism_hat->sfmmu_iment) {
8573 ism_hat->sfmmu_iment->iment_prev = iment;
8574 }
8636 sf_scd_t *old_scdp;
8637
8638 #ifdef DEBUG
8639 caddr_t eaddr = addr + len;
8640 #endif /* DEBUG */
8641
8642 ASSERT(ism_hatid != NULL && sfmmup != NULL);
8643 ASSERT(sptaddr == ISMID_STARTADDR);
8644 /*
8645 * Check the alignment.
8646 */
8647 if (!ISM_ALIGNED(ismshift, addr) || !ISM_ALIGNED(ismshift, sptaddr))
8648 return (EINVAL);
8649
8650 /*
8651 * Check size alignment.
8652 */
8653 if (!ISM_ALIGNED(ismshift, len))
8654 return (EINVAL);
8655
8656 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
8657
8658 /*
8659 * Allocate ism_ment for the ism_hat's mapping list, and an
8660 * ism map blk in case we need one. We must do our
8661 * allocations before acquiring locks to prevent a deadlock
8662 * in the kmem allocator on the mapping list lock.
8663 */
8664 new_iblk = kmem_cache_alloc(ism_blk_cache, KM_SLEEP);
8665 ism_ment = kmem_cache_alloc(ism_ment_cache, KM_SLEEP);
8666
8667 /*
8668 * Serialize ISM mappings with the ISM busy flag, and also the
8669 * trap handlers.
8670 */
8671 sfmmu_ismhat_enter(sfmmup, 0);
8672
8673 /*
8674 * Allocate an ism map blk if necessary.
8675 */
8676 if (sfmmup->sfmmu_iblk == NULL) {
8677 sfmmup->sfmmu_iblk = new_iblk;
8849 void
8850 hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
8851 {
8852 ism_map_t *ism_map;
8853 ism_ment_t *free_ment = NULL;
8854 ism_blk_t *ism_blkp;
8855 struct hat *ism_hatid;
8856 int found, i;
8857 hatlock_t *hatlockp;
8858 struct tsb_info *tsbinfo;
8859 uint_t ismshift = page_get_shift(ismszc);
8860 size_t sh_size = ISM_SHIFT(ismshift, len);
8861 uchar_t ism_rid;
8862 sf_scd_t *old_scdp;
8863
8864 ASSERT(ISM_ALIGNED(ismshift, addr));
8865 ASSERT(ISM_ALIGNED(ismshift, len));
8866 ASSERT(sfmmup != NULL);
8867 ASSERT(sfmmup != ksfmmup);
8868
8869 if (sfmmup->sfmmu_xhat_provider) {
8870 XHAT_UNSHARE(sfmmup, addr, len);
8871 return;
8872 } else {
8873 /*
8874 * This must be a CPU HAT. If the address space has
8875 * XHATs attached, inform all XHATs that ISM segment
8876 * is going away
8877 */
8878 ASSERT(sfmmup->sfmmu_as != NULL);
8879 if (sfmmup->sfmmu_as->a_xhat != NULL)
8880 xhat_unshare_all(sfmmup->sfmmu_as, addr, len);
8881 }
8882
8883 /*
8884 * Make sure that during the entire time ISM mappings are removed,
8885 * the trap handlers serialize behind us, and that no one else
8886 * can be mucking with ISM mappings. This also lets us get away
8887 * with not doing expensive cross calls to flush the TLB -- we
8888 * just discard the context, flush the entire TSB, and call it
8889 * a day.
8890 */
8891 sfmmu_ismhat_enter(sfmmup, 0);
8892
8893 /*
8894 * Remove the mapping.
8895 *
8896 * We can't have any holes in the ism map.
8897 * The tsb miss code while searching the ism map will
8898 * stop on an empty map slot. So we must move
8899 * everyone past the hole up 1 if any.
8900 *
8901 * Also empty ism map blks are not freed until the
9315 * Always convert all mappings to TNC.
9316 */
9317 sz = fnd_mapping_sz(pp);
9318 pp = PP_GROUPLEADER(pp, sz);
9319 SFMMU_STAT_ADD(sf_uncache_conflict, TTEPAGES(sz));
9320 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH,
9321 TTEPAGES(sz));
9322
9323 return;
9324 }
9325
9326 /*
9327 * check if any mapping is in same as or if it is locked
9328 * since in that case we need to uncache.
9329 */
9330 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9331 tmphme = sfhmep->hme_next;
9332 if (IS_PAHME(sfhmep))
9333 continue;
9334 hmeblkp = sfmmu_hmetohblk(sfhmep);
9335 if (hmeblkp->hblk_xhat_bit)
9336 continue;
9337 tmphat = hblktosfmmu(hmeblkp);
9338 sfmmu_copytte(&sfhmep->hme_tte, &tte);
9339 ASSERT(TTE_IS_VALID(&tte));
9340 if (hmeblkp->hblk_shared || tmphat == hat ||
9341 hmeblkp->hblk_lckcnt) {
9342 /*
9343 * We have an uncache conflict
9344 */
9345 SFMMU_STAT(sf_uncache_conflict);
9346 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
9347 return;
9348 }
9349 }
9350
9351 /*
9352 * We have an unload conflict
9353 * We have already checked for LARGE mappings, therefore
9354 * the remaining mapping(s) must be TTE8K.
9355 */
9356 SFMMU_STAT(sf_unload_conflict);
9357
9358 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9359 tmphme = sfhmep->hme_next;
9360 if (IS_PAHME(sfhmep))
9361 continue;
9362 hmeblkp = sfmmu_hmetohblk(sfhmep);
9363 if (hmeblkp->hblk_xhat_bit)
9364 continue;
9365 ASSERT(!hmeblkp->hblk_shared);
9366 (void) sfmmu_pageunload(pp, sfhmep, TTE8K);
9367 }
9368
9369 if (PP_ISMAPPED_KPM(pp))
9370 sfmmu_kpm_vac_unload(pp, addr);
9371
9372 /*
9373 * Unloads only do TLB flushes so we need to flush the
9374 * cache here.
9375 */
9376 sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
9377 PP_SET_VCOLOR(pp, vcolor);
9378 }
9379
9380 /*
9381 * Whenever a mapping is unloaded and the page is in TNC state,
9382 * we see if the page can be made cacheable again. 'pp' is
9383 * the page that we just unloaded a mapping from, the size
9384 * of mapping that was unloaded is 'ottesz'.
9492
9493 if (PP_ISPNC(pp)) {
9494 return (0);
9495 }
9496
9497 clr_valid = 0;
9498 if (PP_ISMAPPED_KPM(pp)) {
9499 caddr_t kpmvaddr;
9500
9501 ASSERT(kpm_enable);
9502 kpmvaddr = hat_kpm_page2va(pp, 1);
9503 ASSERT(!(npages > 1 && IS_KPM_ALIAS_RANGE(kpmvaddr)));
9504 color1 = addr_to_vcolor(kpmvaddr);
9505 clr_valid = 1;
9506 }
9507
9508 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9509 if (IS_PAHME(sfhme))
9510 continue;
9511 hmeblkp = sfmmu_hmetohblk(sfhme);
9512 if (hmeblkp->hblk_xhat_bit)
9513 continue;
9514
9515 sfmmu_copytte(&sfhme->hme_tte, &tte);
9516 ASSERT(TTE_IS_VALID(&tte));
9517
9518 vaddr = tte_to_vaddr(hmeblkp, tte);
9519 color = addr_to_vcolor(vaddr);
9520
9521 if (npages > 1) {
9522 /*
9523 * If there is a big mapping, make sure
9524 * 8K mapping is consistent with the big
9525 * mapping.
9526 */
9527 bcolor = i % ncolors;
9528 if (color != bcolor) {
9529 return (0);
9530 }
9531 }
9532 if (!clr_valid) {
9533 clr_valid = 1;
9641 static void
9642 sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
9643 {
9644 struct sf_hment *sfhme;
9645 struct hme_blk *hmeblkp;
9646 sfmmu_t *sfmmup;
9647 tte_t tte, ttemod;
9648 caddr_t vaddr;
9649 int ret, color;
9650 pfn_t pfn;
9651
9652 color = bcolor;
9653 pfn = pp->p_pagenum;
9654
9655 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9656
9657 if (IS_PAHME(sfhme))
9658 continue;
9659 hmeblkp = sfmmu_hmetohblk(sfhme);
9660
9661 if (hmeblkp->hblk_xhat_bit)
9662 continue;
9663
9664 sfmmu_copytte(&sfhme->hme_tte, &tte);
9665 ASSERT(TTE_IS_VALID(&tte));
9666 vaddr = tte_to_vaddr(hmeblkp, tte);
9667 color = addr_to_vcolor(vaddr);
9668
9669 #ifdef DEBUG
9670 if ((flags & HAT_CACHE) && bcolor != NO_VCOLOR) {
9671 ASSERT(color == bcolor);
9672 }
9673 #endif
9674
9675 ASSERT(flags != HAT_TMPNC || color == PP_GET_VCOLOR(pp));
9676
9677 ttemod = tte;
9678 if (flags & (HAT_UNCACHE | HAT_TMPNC)) {
9679 TTE_CLR_VCACHEABLE(&ttemod);
9680 } else { /* flags & HAT_CACHE */
9681 TTE_SET_VCACHEABLE(&ttemod);
9682 }
9683 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
13391 for (; i <= (size - hme1blk_sz); i += hme1blk_sz, k++) {
13392 hmeblkp = (struct hme_blk *)addr;
13393 addr += hme1blk_sz;
13394 hmeblkp->hblk_nuc_bit = 1;
13395 hmeblkp->hblk_nextpa = cached_va_to_pa((caddr_t)hmeblkp);
13396 }
13397 ASSERT(k >= nhblk1);
13398 nucleus_hblk1.len = k;
13399 SFMMU_STAT_ADD(sf_hblk1_ncreate, k);
13400 }
13401
13402 /*
13403 * This function is currently not supported on this platform. For what
13404 * it's supposed to do, see hat.c and hat_srmmu.c
13405 */
13406 /* ARGSUSED */
13407 faultcode_t
13408 hat_softlock(struct hat *hat, caddr_t addr, size_t *lenp, page_t **ppp,
13409 uint_t flags)
13410 {
13411 ASSERT(hat->sfmmu_xhat_provider == NULL);
13412 return (FC_NOSUPPORT);
13413 }
13414
13415 /*
13416 * Searchs the mapping list of the page for a mapping of the same size. If not
13417 * found the corresponding bit is cleared in the p_index field. When large
13418 * pages are more prevalent in the system, we can maintain the mapping list
13419 * in order and we don't have to traverse the list each time. Just check the
13420 * next and prev entries, and if both are of different size, we clear the bit.
13421 */
13422 static void
13423 sfmmu_rm_large_mappings(page_t *pp, int ttesz)
13424 {
13425 struct sf_hment *sfhmep;
13426 struct hme_blk *hmeblkp;
13427 int index;
13428 pgcnt_t npgs;
13429
13430 ASSERT(ttesz > TTE8K);
13431
13432 ASSERT(sfmmu_mlist_held(pp));
13433
13434 ASSERT(PP_ISMAPPED_LARGE(pp));
13435
13436 /*
13437 * Traverse mapping list looking for another mapping of same size.
13438 * since we only want to clear index field if all mappings of
13439 * that size are gone.
13440 */
13441
13442 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
13443 if (IS_PAHME(sfhmep))
13444 continue;
13445 hmeblkp = sfmmu_hmetohblk(sfhmep);
13446 if (hmeblkp->hblk_xhat_bit)
13447 continue;
13448 if (hme_size(sfhmep) == ttesz) {
13449 /*
13450 * another mapping of the same size. don't clear index.
13451 */
13452 return;
13453 }
13454 }
13455
13456 /*
13457 * Clear the p_index bit for large page.
13458 */
13459 index = PAGESZ_TO_INDEX(ttesz);
13460 npgs = TTEPAGES(ttesz);
13461 while (npgs-- > 0) {
13462 ASSERT(pp->p_index & index);
13463 pp->p_index &= ~index;
13464 pp = PP_PAGENEXT(pp);
13465 }
13466 }
13467
14012 uint_t rhash;
14013 uint_t rid;
14014 hatlock_t *hatlockp;
14015 sf_region_t *rgnp;
14016 sf_region_t *new_rgnp = NULL;
14017 int i;
14018 uint16_t *nextidp;
14019 sf_region_t **freelistp;
14020 int maxids;
14021 sf_region_t **rarrp;
14022 uint16_t *busyrgnsp;
14023 ulong_t rttecnt;
14024 uchar_t tteflag;
14025 uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
14026 int text = (r_type == HAT_REGION_TEXT);
14027
14028 if (srdp == NULL || r_size == 0) {
14029 return (HAT_INVALID_REGION_COOKIE);
14030 }
14031
14032 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14033 ASSERT(sfmmup != ksfmmup);
14034 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14035 ASSERT(srdp->srd_refcnt > 0);
14036 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14037 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14038 ASSERT(r_pgszc < mmu_page_sizes);
14039 if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
14040 !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
14041 panic("hat_join_region: region addr or size is not aligned\n");
14042 }
14043
14044
14045 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14046 SFMMU_REGION_HME;
14047 /*
14048 * Currently only support shared hmes for the read only main text
14049 * region.
14050 */
14051 if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
14052 (r_perm & PROT_WRITE))) {
14318 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14319 ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
14320
14321 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14322 SFMMU_REGION_HME;
14323
14324 if (r_type == SFMMU_REGION_ISM) {
14325 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
14326 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14327 rgnp = srdp->srd_ismrgnp[rid];
14328 } else {
14329 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14330 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14331 rgnp = srdp->srd_hmergnp[rid];
14332 }
14333 ASSERT(rgnp != NULL);
14334 ASSERT(rgnp->rgn_id == rid);
14335 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14336 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14337 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14338
14339 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14340 if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) {
14341 xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr,
14342 rgnp->rgn_size, 0, NULL);
14343 }
14344
14345 if (sfmmup->sfmmu_free) {
14346 ulong_t rttecnt;
14347 r_pgszc = rgnp->rgn_pgszc;
14348 r_size = rgnp->rgn_size;
14349
14350 ASSERT(sfmmup->sfmmu_scdp == NULL);
14351 if (r_type == SFMMU_REGION_ISM) {
14352 SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14353 } else {
14354 /* update shme rgns ttecnt in sfmmu_ttecnt */
14355 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14356 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14357
14358 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
14359 -rttecnt);
14360
14361 SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
14362 }
14363 } else if (r_type == SFMMU_REGION_ISM) {
|
64 #include <vm/seg_kmem.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/rm.h>
67 #include <sys/t_lock.h>
68 #include <sys/obpdefs.h>
69 #include <sys/vm_machparam.h>
70 #include <sys/var.h>
71 #include <sys/trap.h>
72 #include <sys/machtrap.h>
73 #include <sys/scb.h>
74 #include <sys/bitmap.h>
75 #include <sys/machlock.h>
76 #include <sys/membar.h>
77 #include <sys/atomic.h>
78 #include <sys/cpu_module.h>
79 #include <sys/prom_debug.h>
80 #include <sys/ksynch.h>
81 #include <sys/mem_config.h>
82 #include <sys/mem_cage.h>
83 #include <vm/vm_dep.h>
84 #include <sys/fpu/fpusystm.h>
85 #include <vm/mach_kpm.h>
86 #include <sys/callb.h>
87
88 #ifdef DEBUG
89 #define SFMMU_VALIDATE_HMERID(hat, rid, saddr, len) \
90 if (SFMMU_IS_SHMERID_VALID(rid)) { \
91 caddr_t _eaddr = (saddr) + (len); \
92 sf_srd_t *_srdp; \
93 sf_region_t *_rgnp; \
94 ASSERT((rid) < SFMMU_MAX_HME_REGIONS); \
95 ASSERT(SF_RGNMAP_TEST(hat->sfmmu_hmeregion_map, rid)); \
96 ASSERT((hat) != ksfmmup); \
97 _srdp = (hat)->sfmmu_srdp; \
98 ASSERT(_srdp != NULL); \
99 ASSERT(_srdp->srd_refcnt != 0); \
100 _rgnp = _srdp->srd_hmergnp[(rid)]; \
101 ASSERT(_rgnp != NULL && _rgnp->rgn_id == rid); \
102 ASSERT(_rgnp->rgn_refcnt != 0); \
103 ASSERT(!(_rgnp->rgn_flags & SFMMU_REGION_FREE)); \
1332 * Reserve some kernel virtual address space for the locked TTEs
1333 * that allow us to probe the TSB from TL>0.
1334 */
1335 utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1336 0, 0, NULL, NULL, VM_SLEEP);
1337 utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1338 0, 0, NULL, NULL, VM_SLEEP);
1339 #endif
1340
1341 #ifdef VAC
1342 /*
1343 * The big page VAC handling code assumes VAC
1344 * will not be bigger than the smallest big
1345 * page- which is 64K.
1346 */
1347 if (TTEPAGES(TTE64K) < CACHE_NUM_COLOR) {
1348 cmn_err(CE_PANIC, "VAC too big!");
1349 }
1350 #endif
1351
1352 uhme_hash_pa = va_to_pa(uhme_hash);
1353 khme_hash_pa = va_to_pa(khme_hash);
1354
1355 /*
1356 * Initialize relocation locks. kpr_suspendlock is held
1357 * at PIL_MAX to prevent interrupts from pinning the holder
1358 * of a suspended TTE which may access it leading to a
1359 * deadlock condition.
1360 */
1361 mutex_init(&kpr_mutex, NULL, MUTEX_DEFAULT, NULL);
1362 mutex_init(&kpr_suspendlock, NULL, MUTEX_SPIN, (void *)PIL_MAX);
1363
1364 /*
1365 * If Shared context support is disabled via /etc/system
1366 * set shctx_on to 0 here if it was set to 1 earlier in boot
1367 * sequence by cpu module initialization code.
1368 */
1369 if (shctx_on && disable_shctx) {
1370 shctx_on = 0;
1371 }
1519 for (i = 0; i < max_mmu_page_sizes; i++) {
1520 sfmmup->sfmmu_ttecnt[i] = 0;
1521 sfmmup->sfmmu_scdrttecnt[i] = 0;
1522 sfmmup->sfmmu_ismttecnt[i] = 0;
1523 sfmmup->sfmmu_scdismttecnt[i] = 0;
1524 sfmmup->sfmmu_pgsz[i] = TTE8K;
1525 }
1526 sfmmup->sfmmu_tsb0_4minflcnt = 0;
1527 sfmmup->sfmmu_iblk = NULL;
1528 sfmmup->sfmmu_ismhat = 0;
1529 sfmmup->sfmmu_scdhat = 0;
1530 sfmmup->sfmmu_ismblkpa = (uint64_t)-1;
1531 if (sfmmup == ksfmmup) {
1532 CPUSET_ALL(sfmmup->sfmmu_cpusran);
1533 } else {
1534 CPUSET_ZERO(sfmmup->sfmmu_cpusran);
1535 }
1536 sfmmup->sfmmu_free = 0;
1537 sfmmup->sfmmu_rmstat = 0;
1538 sfmmup->sfmmu_clrbin = sfmmup->sfmmu_clrstart;
1539 cv_init(&sfmmup->sfmmu_tsb_cv, NULL, CV_DEFAULT, NULL);
1540 sfmmup->sfmmu_srdp = NULL;
1541 SF_RGNMAP_ZERO(sfmmup->sfmmu_region_map);
1542 bzero(sfmmup->sfmmu_hmeregion_links, SFMMU_L1_HMERLINKS_SIZE);
1543 sfmmup->sfmmu_scdp = NULL;
1544 sfmmup->sfmmu_scd_link.next = NULL;
1545 sfmmup->sfmmu_scd_link.prev = NULL;
1546 return (sfmmup);
1547 }
1548
1549 /*
1550 * Create per-MMU context domain kstats for a given MMU ctx.
1551 */
1552 static void
1553 sfmmu_mmu_kstat_create(mmu_ctx_t *mmu_ctxp)
1554 {
1555 mmu_ctx_stat_t stat;
1556 kstat_t *mmu_kstat;
1557
1558 ASSERT(MUTEX_HELD(&cpu_lock));
1898 * INVALID_CONTEXT to it.
1899 * Compatibility Note: hw takes care of MMU_SCONTEXT1
1900 */
1901 sfmmu_setctx_sec(INVALID_CONTEXT);
1902 sfmmu_clear_utsbinfo();
1903
1904 kpreempt_enable();
1905 sfmmu_hat_exit(hatlockp);
1906 }
1907 }
1908
1909 /*
1910 * Free all the translation resources for the specified address space.
1911 * Called from as_free when an address space is being destroyed.
1912 */
1913 void
1914 hat_free_start(struct hat *sfmmup)
1915 {
1916 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
1917 ASSERT(sfmmup != ksfmmup);
1918
1919 sfmmup->sfmmu_free = 1;
1920 if (sfmmup->sfmmu_scdp != NULL) {
1921 sfmmu_leave_scd(sfmmup, 0);
1922 }
1923
1924 ASSERT(sfmmup->sfmmu_scdp == NULL);
1925 }
1926
1927 void
1928 hat_free_end(struct hat *sfmmup)
1929 {
1930 int i;
1931
1932 ASSERT(sfmmup->sfmmu_free == 1);
1933 ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1934 ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
1935 ASSERT(sfmmup->sfmmu_ttecnt[TTE512K] == 0);
1936 ASSERT(sfmmup->sfmmu_ttecnt[TTE4M] == 0);
1937 ASSERT(sfmmup->sfmmu_ttecnt[TTE32M] == 0);
1938 ASSERT(sfmmup->sfmmu_ttecnt[TTE256M] == 0);
1939
1940 if (sfmmup->sfmmu_rmstat) {
1941 hat_freestat(sfmmup->sfmmu_as, NULL);
1942 }
1943
1944 while (sfmmup->sfmmu_tsb != NULL) {
1945 struct tsb_info *next = sfmmup->sfmmu_tsb->tsb_next;
1946 sfmmu_tsbinfo_free(sfmmup->sfmmu_tsb);
1947 sfmmup->sfmmu_tsb = next;
1948 }
1949
1950 if (sfmmup->sfmmu_srdp != NULL) {
1951 sfmmu_leave_srd(sfmmup);
1960 }
1961 sfmmu_free_sfmmu(sfmmup);
1962
1963 #ifdef DEBUG
1964 for (i = 0; i < SFMMU_L1_HMERLINKS; i++) {
1965 ASSERT(sfmmup->sfmmu_hmeregion_links[i] == NULL);
1966 }
1967 #endif
1968
1969 kmem_cache_free(sfmmuid_cache, sfmmup);
1970 }
1971
1972 /*
1973 * Set up any translation structures, for the specified address space,
1974 * that are needed or preferred when the process is being swapped in.
1975 */
1976 /* ARGSUSED */
1977 void
1978 hat_swapin(struct hat *hat)
1979 {
1980 }
1981
1982 /*
1983 * Free all of the translation resources, for the specified address space,
1984 * that can be freed while the process is swapped out. Called from as_swapout.
1985 * Also, free up the ctx that this process was using.
1986 */
1987 void
1988 hat_swapout(struct hat *sfmmup)
1989 {
1990 struct hmehash_bucket *hmebp;
1991 struct hme_blk *hmeblkp;
1992 struct hme_blk *pr_hblk = NULL;
1993 struct hme_blk *nx_hblk;
1994 int i;
1995 struct hme_blk *list = NULL;
1996 hatlock_t *hatlockp;
1997 struct tsb_info *tsbinfop;
1998 struct free_tsb {
1999 struct free_tsb *next;
2000 struct tsb_info *tsbinfop;
2001 }; /* free list of TSBs */
2002 struct free_tsb *freelist, *last, *next;
2003
2004 SFMMU_STAT(sf_swapout);
2005
2006 /*
2007 * There is no way to go from an as to all its translations in sfmmu.
2008 * Here is one of the times when we take the big hit and traverse
2009 * the hash looking for hme_blks to free up. Not only do we free up
2010 * this as hme_blks but all those that are free. We are obviously
2011 * swapping because we need memory so let's free up as much
2012 * as we can.
2013 *
2014 * Note that we don't flush TLB/TSB here -- it's not necessary
2015 * because:
2016 * 1) we free the ctx we're using and throw away the TSB(s);
2017 * 2) processes aren't runnable while being swapped out.
2018 */
2019 ASSERT(sfmmup != KHATID);
2020 for (i = 0; i <= UHMEHASH_SZ; i++) {
2021 hmebp = &uhme_hash[i];
2022 SFMMU_HASH_LOCK(hmebp);
2023 hmeblkp = hmebp->hmeblkp;
2024 pr_hblk = NULL;
2025 while (hmeblkp) {
2026
2027 if ((hmeblkp->hblk_tag.htag_id == sfmmup) &&
2028 !hmeblkp->hblk_shw_bit && !hmeblkp->hblk_lckcnt) {
2029 ASSERT(!hmeblkp->hblk_shared);
2030 (void) sfmmu_hblk_unload(sfmmup, hmeblkp,
2031 (caddr_t)get_hblk_base(hmeblkp),
2032 get_hblk_endaddr(hmeblkp),
2033 NULL, HAT_UNLOAD);
2034 }
2035 nx_hblk = hmeblkp->hblk_next;
2036 if (!hmeblkp->hblk_vcnt && !hmeblkp->hblk_hmecnt) {
2037 ASSERT(!hmeblkp->hblk_lckcnt);
2038 sfmmu_hblk_hash_rm(hmebp, hmeblkp, pr_hblk,
2039 &list, 0);
2040 } else {
2041 pr_hblk = hmeblkp;
2042 }
2043 hmeblkp = nx_hblk;
2044 }
2045 SFMMU_HASH_UNLOCK(hmebp);
2046 }
2108 sfmmu_hat_exit(hatlockp);
2109 for (; freelist != NULL; freelist = next) {
2110 next = freelist->next;
2111 sfmmu_tsb_free(freelist->tsbinfop);
2112 }
2113 }
2114
2115 /*
2116 * Duplicate the translations of an as into another newas
2117 */
2118 /* ARGSUSED */
2119 int
2120 hat_dup(struct hat *hat, struct hat *newhat, caddr_t addr, size_t len,
2121 uint_t flag)
2122 {
2123 sf_srd_t *srdp;
2124 sf_scd_t *scdp;
2125 int i;
2126 extern uint_t get_color_start(struct as *);
2127
2128 ASSERT((flag == 0) || (flag == HAT_DUP_ALL) || (flag == HAT_DUP_COW) ||
2129 (flag == HAT_DUP_SRD));
2130 ASSERT(hat != ksfmmup);
2131 ASSERT(newhat != ksfmmup);
2132 ASSERT(flag != HAT_DUP_ALL || hat->sfmmu_srdp == newhat->sfmmu_srdp);
2133
2134 if (flag == HAT_DUP_COW) {
2135 panic("hat_dup: HAT_DUP_COW not supported");
2136 }
2137
2138 if (flag == HAT_DUP_SRD && ((srdp = hat->sfmmu_srdp) != NULL)) {
2139 ASSERT(srdp->srd_evp != NULL);
2140 VN_HOLD(srdp->srd_evp);
2141 ASSERT(srdp->srd_refcnt > 0);
2142 newhat->sfmmu_srdp = srdp;
2143 atomic_inc_32((volatile uint_t *)&srdp->srd_refcnt);
2144 }
2145
2146 /*
2147 * HAT_DUP_ALL flag is used after as duplication is done.
2177 if (flag == HAT_DUP_ALL && consistent_coloring == 0 &&
2178 update_proc_pgcolorbase_after_fork != 0) {
2179 hat->sfmmu_clrbin = get_color_start(hat->sfmmu_as);
2180 }
2181 return (0);
2182 }
2183
2184 void
2185 hat_memload(struct hat *hat, caddr_t addr, struct page *pp,
2186 uint_t attr, uint_t flags)
2187 {
2188 hat_do_memload(hat, addr, pp, attr, flags,
2189 SFMMU_INVALID_SHMERID);
2190 }
2191
2192 void
2193 hat_memload_region(struct hat *hat, caddr_t addr, struct page *pp,
2194 uint_t attr, uint_t flags, hat_region_cookie_t rcookie)
2195 {
2196 uint_t rid;
2197 if (rcookie == HAT_INVALID_REGION_COOKIE) {
2198 hat_do_memload(hat, addr, pp, attr, flags,
2199 SFMMU_INVALID_SHMERID);
2200 return;
2201 }
2202 rid = (uint_t)((uint64_t)rcookie);
2203 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2204 hat_do_memload(hat, addr, pp, attr, flags, rid);
2205 }
2206
2207 /*
2208 * Set up addr to map to page pp with protection prot.
2209 * As an optimization we also load the TSB with the
2210 * corresponding tte but it is no big deal if the tte gets kicked out.
2211 */
2212 static void
2213 hat_do_memload(struct hat *hat, caddr_t addr, struct page *pp,
2214 uint_t attr, uint_t flags, uint_t rid)
2215 {
2216 tte_t tte;
2217
2218
2219 ASSERT(hat != NULL);
2220 ASSERT(PAGE_LOCKED(pp));
2221 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2222 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2223 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2224 SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2225
2226 if (PP_ISFREE(pp)) {
2227 panic("hat_memload: loading a mapping to free page %p",
2228 (void *)pp);
2229 }
2230
2231 ASSERT((hat == ksfmmup) ||
2232 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2233
2234 if (flags & ~SFMMU_LOAD_ALLFLAG)
2235 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2236 flags & ~SFMMU_LOAD_ALLFLAG);
2237
2238 if (hat->sfmmu_rmstat)
2239 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2240
2241 #if defined(SF_ERRATA_57)
2242 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2243 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2244 !(flags & HAT_LOAD_SHARE)) {
2245 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2246 " page executable");
2247 attr &= ~PROT_EXEC;
2248 }
2249 #endif
2250
2260 }
2261
2262 /*
2263 * hat_devload can be called to map real memory (e.g.
2264 * /dev/kmem) and even though hat_devload will determine pf is
2265 * for memory, it will be unable to get a shared lock on the
2266 * page (because someone else has it exclusively) and will
2267 * pass dp = NULL. If tteload doesn't get a non-NULL
2268 * page pointer it can't cache memory.
2269 */
2270 void
2271 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2272 uint_t attr, int flags)
2273 {
2274 tte_t tte;
2275 struct page *pp = NULL;
2276 int use_lgpg = 0;
2277
2278 ASSERT(hat != NULL);
2279
2280 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2281 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2282 ASSERT((hat == ksfmmup) ||
2283 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2284 if (len == 0)
2285 panic("hat_devload: zero len");
2286 if (flags & ~SFMMU_LOAD_ALLFLAG)
2287 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2288 flags & ~SFMMU_LOAD_ALLFLAG);
2289
2290 #if defined(SF_ERRATA_57)
2291 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2292 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2293 !(flags & HAT_LOAD_SHARE)) {
2294 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2295 " page executable");
2296 attr &= ~PROT_EXEC;
2297 }
2298 #endif
2299
2406 */
2407 if ((flags & HAT_LOAD_SHARE) == 0) {
2408 sfmmu_check_page_sizes(hat, 1);
2409 }
2410 }
2411
2412 void
2413 hat_memload_array(struct hat *hat, caddr_t addr, size_t len,
2414 struct page **pps, uint_t attr, uint_t flags)
2415 {
2416 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2417 SFMMU_INVALID_SHMERID);
2418 }
2419
2420 void
2421 hat_memload_array_region(struct hat *hat, caddr_t addr, size_t len,
2422 struct page **pps, uint_t attr, uint_t flags,
2423 hat_region_cookie_t rcookie)
2424 {
2425 uint_t rid;
2426 if (rcookie == HAT_INVALID_REGION_COOKIE) {
2427 hat_do_memload_array(hat, addr, len, pps, attr, flags,
2428 SFMMU_INVALID_SHMERID);
2429 return;
2430 }
2431 rid = (uint_t)((uint64_t)rcookie);
2432 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
2433 hat_do_memload_array(hat, addr, len, pps, attr, flags, rid);
2434 }
2435
2436 /*
2437 * Map the largest extend possible out of the page array. The array may NOT
2438 * be in order. The largest possible mapping a page can have
2439 * is specified in the p_szc field. The p_szc field
2440 * cannot change as long as there any mappings (large or small)
2441 * to any of the pages that make up the large page. (ie. any
2442 * promotion/demotion of page size is not up to the hat but up to
2443 * the page free list manager). The array
2444 * should consist of properly aligned contigous pages that are
2445 * part of a big page for a large mapping to be created.
2446 */
2447 static void
2448 hat_do_memload_array(struct hat *hat, caddr_t addr, size_t len,
2449 struct page **pps, uint_t attr, uint_t flags, uint_t rid)
2450 {
2451 int ttesz;
2452 size_t mapsz;
2453 pgcnt_t numpg, npgs;
2454 tte_t tte;
2455 page_t *pp;
2456 uint_t large_pages_disable;
2457
2458 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2459 SFMMU_VALIDATE_HMERID(hat, rid, addr, len);
2460
2461 if (hat->sfmmu_rmstat)
2462 hat_resvstat(len, hat->sfmmu_as, addr);
2463
2464 #if defined(SF_ERRATA_57)
2465 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2466 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2467 !(flags & HAT_LOAD_SHARE)) {
2468 cmn_err(CE_WARN, "hat_memload_array: illegal attempt to make "
2469 "user page executable");
2470 attr &= ~PROT_EXEC;
2471 }
2472 #endif
2473
2474 /* Get number of pages */
2475 npgs = len >> MMU_PAGESHIFT;
2476
2477 if (flags & HAT_LOAD_SHARE) {
2478 large_pages_disable = disable_ism_large_pages;
2479 } else {
2480 large_pages_disable = disable_large_pages;
3923 rsz, rgnp->rgn_obj,
3924 rgnp->rgn_objoff);
3925 }
3926 ttesz--;
3927 }
3928 }
3929
3930 /*
3931 * Release one hardware address translation lock on the given address range.
3932 */
3933 void
3934 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3935 {
3936 struct hmehash_bucket *hmebp;
3937 hmeblk_tag hblktag;
3938 int hmeshift, hashno = 1;
3939 struct hme_blk *hmeblkp, *list = NULL;
3940 caddr_t endaddr;
3941
3942 ASSERT(sfmmup != NULL);
3943
3944 ASSERT((sfmmup == ksfmmup) ||
3945 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
3946 ASSERT((len & MMU_PAGEOFFSET) == 0);
3947 endaddr = addr + len;
3948 hblktag.htag_id = sfmmup;
3949 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3950
3951 /*
3952 * Spitfire supports 4 page sizes.
3953 * Most pages are expected to be of the smallest page size (8K) and
3954 * these will not need to be rehashed. 64K pages also don't need to be
3955 * rehashed because an hmeblk spans 64K of address space. 512K pages
3956 * might need 1 rehash and and 4M pages might need 2 rehashes.
3957 */
3958 while (addr < endaddr) {
3959 hmeshift = HME_HASH_SHIFT(hashno);
3960 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3961 hblktag.htag_rehash = hashno;
3962 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
4007 {
4008 sf_srd_t *srdp;
4009 sf_region_t *rgnp;
4010 int ttesz;
4011 uint_t rid;
4012 caddr_t eaddr;
4013 caddr_t va;
4014 int hmeshift;
4015 hmeblk_tag hblktag;
4016 struct hmehash_bucket *hmebp;
4017 struct hme_blk *hmeblkp;
4018 struct hme_blk *pr_hblk;
4019 struct hme_blk *list;
4020
4021 if (rcookie == HAT_INVALID_REGION_COOKIE) {
4022 hat_unlock(sfmmup, addr, len);
4023 return;
4024 }
4025
4026 ASSERT(sfmmup != NULL);
4027 ASSERT(sfmmup != ksfmmup);
4028
4029 srdp = sfmmup->sfmmu_srdp;
4030 rid = (uint_t)((uint64_t)rcookie);
4031 VERIFY3U(rid, <, SFMMU_MAX_HME_REGIONS);
4032 eaddr = addr + len;
4033 va = addr;
4034 list = NULL;
4035 rgnp = srdp->srd_hmergnp[rid];
4036 SFMMU_VALIDATE_HMERID(sfmmup, rid, addr, len);
4037
4038 ASSERT(IS_P2ALIGNED(addr, TTEBYTES(rgnp->rgn_pgszc)));
4039 ASSERT(IS_P2ALIGNED(len, TTEBYTES(rgnp->rgn_pgszc)));
4040 if (rgnp->rgn_pgszc < HBLK_MIN_TTESZ) {
4041 ttesz = HBLK_MIN_TTESZ;
4042 } else {
4043 ttesz = rgnp->rgn_pgszc;
4044 }
4045 while (va < eaddr) {
4046 while (ttesz < rgnp->rgn_pgszc &&
4718 page_unlock(pp);
4719 }
4720
4721 /*
4722 * hat_probe returns 1 if the translation for the address 'addr' is
4723 * loaded, zero otherwise.
4724 *
4725 * hat_probe should be used only for advisorary purposes because it may
4726 * occasionally return the wrong value. The implementation must guarantee that
4727 * returning the wrong value is a very rare event. hat_probe is used
4728 * to implement optimizations in the segment drivers.
4729 *
4730 */
4731 int
4732 hat_probe(struct hat *sfmmup, caddr_t addr)
4733 {
4734 pfn_t pfn;
4735 tte_t tte;
4736
4737 ASSERT(sfmmup != NULL);
4738
4739 ASSERT((sfmmup == ksfmmup) ||
4740 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4741
4742 if (sfmmup == ksfmmup) {
4743 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4744 == PFN_SUSPENDED) {
4745 sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4746 }
4747 } else {
4748 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4749 }
4750
4751 if (pfn != PFN_INVALID)
4752 return (1);
4753 else
4754 return (0);
4755 }
4756
4757 ssize_t
4758 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4759 {
4760 tte_t tte;
4761
4762 if (sfmmup == ksfmmup) {
4763 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4764 return (-1);
4765 }
4766 } else {
4767 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4768 return (-1);
4769 }
4770 }
4771
4772 ASSERT(TTE_IS_VALID(&tte));
4773 return (TTEBYTES(TTE_CSZ(&tte)));
4774 }
4775
4776 uint_t
4777 hat_getattr(struct hat *sfmmup, caddr_t addr, uint_t *attr)
4778 {
4779 tte_t tte;
4780
4781 if (sfmmup == ksfmmup) {
4782 if (sfmmu_vatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4783 tte.ll = 0;
4784 }
4785 } else {
4786 if (sfmmu_uvatopfn(addr, sfmmup, &tte) == PFN_INVALID) {
4787 tte.ll = 0;
4788 }
4789 }
4790 if (TTE_IS_VALID(&tte)) {
4791 *attr = sfmmu_ptov_attr(&tte);
4792 return (0);
4793 }
4794 *attr = 0;
4795 return ((uint_t)0xffffffff);
4796 }
4797
4798 /*
4799 * Enables more attributes on specified address range (ie. logical OR)
4800 */
4801 void
4802 hat_setattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4803 {
4804 ASSERT(hat->sfmmu_as != NULL);
4805
4806 sfmmu_chgattr(hat, addr, len, attr, SFMMU_SETATTR);
4807 }
4808
4809 /*
4810 * Assigns attributes to the specified address range. All the attributes
4811 * are specified.
4812 */
4813 void
4814 hat_chgattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4815 {
4816 ASSERT(hat->sfmmu_as != NULL);
4817
4818 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CHGATTR);
4819 }
4820
4821 /*
4822 * Remove attributes on the specified address range (ie. loginal NAND)
4823 */
4824 void
4825 hat_clrattr(struct hat *hat, caddr_t addr, size_t len, uint_t attr)
4826 {
4827 ASSERT(hat->sfmmu_as != NULL);
4828
4829 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4830 }
4831
4832 /*
4833 * Change attributes on an address range to that specified by attr and mode.
4834 */
4835 static void
4836 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4837 int mode)
4838 {
4839 struct hmehash_bucket *hmebp;
4840 hmeblk_tag hblktag;
4841 int hmeshift, hashno = 1;
4842 struct hme_blk *hmeblkp, *list = NULL;
4843 caddr_t endaddr;
4844 cpuset_t cpuset;
4845 demap_range_t dmr;
4846
4847 CPUSET_ZERO(cpuset);
5160 * Change the protections in the virtual address range
5161 * given to the specified virtual protection. If vprot is ~PROT_WRITE,
5162 * then remove write permission, leaving the other
5163 * permissions unchanged. If vprot is ~PROT_USER, remove user permissions.
5164 *
5165 */
5166 void
5167 hat_chgprot(struct hat *sfmmup, caddr_t addr, size_t len, uint_t vprot)
5168 {
5169 struct hmehash_bucket *hmebp;
5170 hmeblk_tag hblktag;
5171 int hmeshift, hashno = 1;
5172 struct hme_blk *hmeblkp, *list = NULL;
5173 caddr_t endaddr;
5174 cpuset_t cpuset;
5175 demap_range_t dmr;
5176
5177 ASSERT((len & MMU_PAGEOFFSET) == 0);
5178 ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
5179
5180 ASSERT(sfmmup->sfmmu_as != NULL);
5181
5182 CPUSET_ZERO(cpuset);
5183
5184 if ((vprot != (uint_t)~PROT_WRITE) && (vprot & PROT_USER) &&
5185 ((addr + len) > (caddr_t)USERLIMIT)) {
5186 panic("user addr %p vprot %x in kernel space",
5187 (void *)addr, vprot);
5188 }
5189 endaddr = addr + len;
5190 hblktag.htag_id = sfmmup;
5191 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
5192 DEMAP_RANGE_INIT(sfmmup, &dmr);
5193
5194 while (addr < endaddr) {
5195 hmeshift = HME_HASH_SHIFT(hashno);
5196 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
5197 hblktag.htag_rehash = hashno;
5198 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
5199
5200 SFMMU_HASH_LOCK(hmebp);
5595 hat_unload_callback(
5596 struct hat *sfmmup,
5597 caddr_t addr,
5598 size_t len,
5599 uint_t flags,
5600 hat_callback_t *callback)
5601 {
5602 struct hmehash_bucket *hmebp;
5603 hmeblk_tag hblktag;
5604 int hmeshift, hashno, iskernel;
5605 struct hme_blk *hmeblkp, *pr_hblk, *list = NULL;
5606 caddr_t endaddr;
5607 cpuset_t cpuset;
5608 int addr_count = 0;
5609 int a;
5610 caddr_t cb_start_addr[MAX_CB_ADDR];
5611 caddr_t cb_end_addr[MAX_CB_ADDR];
5612 int issegkmap = ISSEGKMAP(sfmmup, addr);
5613 demap_range_t dmr, *dmrp;
5614
5615 ASSERT(sfmmup->sfmmu_as != NULL);
5616
5617 ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5618 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
5619
5620 ASSERT(sfmmup != NULL);
5621 ASSERT((len & MMU_PAGEOFFSET) == 0);
5622 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5623
5624 /*
5625 * Probing through a large VA range (say 63 bits) will be slow, even
5626 * at 4 Meg steps between the probes. So, when the virtual address range
5627 * is very large, search the HME entries for what to unload.
5628 *
5629 * len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5630 *
5631 * UHMEHASH_SZ is number of hash buckets to examine
5632 *
5633 */
5634 if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5635 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);
5872 callback->hcb_start_addr = cb_start_addr[a];
5873 callback->hcb_end_addr = cb_end_addr[a];
5874 callback->hcb_function(callback);
5875 }
5876 }
5877
5878 /*
5879 * Check TSB and TLB page sizes if the process isn't exiting.
5880 */
5881 if (!sfmmup->sfmmu_free)
5882 sfmmu_check_page_sizes(sfmmup, 0);
5883 }
5884
5885 /*
5886 * Unload all the mappings in the range [addr..addr+len). addr and len must
5887 * be MMU_PAGESIZE aligned.
5888 */
5889 void
5890 hat_unload(struct hat *sfmmup, caddr_t addr, size_t len, uint_t flags)
5891 {
5892 hat_unload_callback(sfmmup, addr, len, flags, NULL);
5893 }
5894
5895
5896 /*
5897 * Find the largest mapping size for this page.
5898 */
5899 int
5900 fnd_mapping_sz(page_t *pp)
5901 {
5902 int sz;
5903 int p_index;
5904
5905 p_index = PP_MAPINDEX(pp);
5906
5907 sz = 0;
5908 p_index >>= 1; /* don't care about 8K bit */
5909 for (; p_index; p_index >>= 1) {
5910 sz++;
5911 }
6211 va += sz;
6212 }
6213 }
6214
6215 /*
6216 * Synchronize all the mappings in the range [addr..addr+len).
6217 * Can be called with clearflag having two states:
6218 * HAT_SYNC_DONTZERO means just return the rm stats
6219 * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6220 */
6221 void
6222 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6223 {
6224 struct hmehash_bucket *hmebp;
6225 hmeblk_tag hblktag;
6226 int hmeshift, hashno = 1;
6227 struct hme_blk *hmeblkp, *list = NULL;
6228 caddr_t endaddr;
6229 cpuset_t cpuset;
6230
6231 ASSERT((sfmmup == ksfmmup) ||
6232 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
6233 ASSERT((len & MMU_PAGEOFFSET) == 0);
6234 ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6235 (clearflag == HAT_SYNC_ZERORM));
6236
6237 CPUSET_ZERO(cpuset);
6238
6239 endaddr = addr + len;
6240 hblktag.htag_id = sfmmup;
6241 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6242
6243 /*
6244 * Spitfire supports 4 page sizes.
6245 * Most pages are expected to be of the smallest page
6246 * size (8K) and these will not need to be rehashed. 64K
6247 * pages also don't need to be rehashed because the an hmeblk
6248 * spans 64K of address space. 512K pages might need 1 rehash and
6249 * and 4M pages 2 rehashes.
6250 */
7017 return; /* non-fatal */
7018 }
7019 panic("pa_hment leaked: 0x%p", (void *)pahmep);
7020 }
7021
7022 /*
7023 * Remove all mappings to page 'pp'.
7024 */
7025 int
7026 hat_pageunload(struct page *pp, uint_t forceflag)
7027 {
7028 struct page *origpp = pp;
7029 struct sf_hment *sfhme, *tmphme;
7030 struct hme_blk *hmeblkp;
7031 kmutex_t *pml;
7032 #ifdef VAC
7033 kmutex_t *pmtx;
7034 #endif
7035 cpuset_t cpuset, tset;
7036 int index, cons;
7037 int pa_hments;
7038
7039 ASSERT(PAGE_EXCL(pp));
7040
7041 tmphme = NULL;
7042 pa_hments = 0;
7043 CPUSET_ZERO(cpuset);
7044
7045 pml = sfmmu_mlist_enter(pp);
7046
7047 #ifdef VAC
7048 if (pp->p_kpmref)
7049 sfmmu_kpm_pageunload(pp);
7050 ASSERT(!PP_ISMAPPED_KPM(pp));
7051 #endif
7052 /*
7053 * Clear vpm reference. Since the page is exclusively locked
7054 * vpm cannot be referencing it.
7055 */
7056 if (vpm_enable) {
7057 pp->p_vpmref = 0;
7058 }
7059
7060 index = PP_MAPINDEX(pp);
7061 cons = TTE8K;
7062 retry:
7063 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7064 tmphme = sfhme->hme_next;
7065
7066 if (IS_PAHME(sfhme)) {
7067 ASSERT(sfhme->hme_data != NULL);
7068 pa_hments++;
7069 continue;
7070 }
7071
7072 hmeblkp = sfmmu_hmetohblk(sfhme);
7073
7074 /*
7075 * If there are kernel mappings don't unload them, they will
7076 * be suspended.
7077 */
7078 if (forceflag == SFMMU_KERNEL_RELOC && hmeblkp->hblk_lckcnt &&
7079 hmeblkp->hblk_tag.htag_id == ksfmmup)
7080 continue;
7081
7082 tset = sfmmu_pageunload(pp, sfhme, cons);
7083 CPUSET_OR(cpuset, tset);
7084 }
7085
7086 while (index != 0) {
7087 index = index >> 1;
7088 if (index != 0)
7089 cons++;
7090 if (index & 0x1) {
7091 /* Go to leading page */
7092 pp = PP_GROUPLEADER(pp, cons);
7093 ASSERT(sfmmu_mlist_held(pp));
7094 goto retry;
7095 }
7096 }
7097
7098 /*
7099 * cpuset may be empty if the page was only mapped by segkpm,
7100 * in which case we won't actually cross-trap.
7101 */
7102 xt_sync(cpuset);
7103
7104 /*
7105 * The page should have no mappings at this point, unless
7106 * we were called from hat_page_relocate() in which case we
7107 * leave the locked mappings which will be suspended later.
7108 */
7109 ASSERT(!PP_ISMAPPED(origpp) || pa_hments ||
7110 (forceflag == SFMMU_KERNEL_RELOC));
7111
7112 #ifdef VAC
7113 if (PP_ISTNC(pp)) {
7114 if (cons == TTE8K) {
7115 pmtx = sfmmu_page_enter(pp);
7116 PP_CLRTNC(pp);
7117 sfmmu_page_exit(pmtx);
7118 } else {
7119 conv_tnc(pp, cons);
7120 }
7121 }
7122 #endif /* VAC */
7123
7124 if (pa_hments && forceflag != SFMMU_KERNEL_RELOC) {
7125 /*
7126 * Unlink any pa_hments and free them, calling back
7127 * the responsible subsystem to notify it of the error.
7128 * This can occur in situations such as drivers leaking
7129 * DMA handles: naughty, but common enough that we'd like
7130 * to keep the system running rather than bringing it
7131 * down with an obscure error like "pa_hment leaked"
7132 * which doesn't aid the user in debugging their driver.
7133 */
7134 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7135 tmphme = sfhme->hme_next;
7136 if (IS_PAHME(sfhme)) {
7137 struct pa_hment *pahmep = sfhme->hme_data;
7138 sfmmu_pahment_leaked(pahmep);
7139 HME_SUB(sfhme, pp);
7140 kmem_cache_free(pa_hment_cache, pahmep);
7141 }
7142 }
7143
7144 ASSERT(!PP_ISMAPPED(origpp));
7145 }
7146
7147 sfmmu_mlist_exit(pml);
7148
7149 return (0);
7150 }
7151
7152 cpuset_t
7153 sfmmu_pageunload(page_t *pp, struct sf_hment *sfhme, int cons)
7154 {
7155 struct hme_blk *hmeblkp;
7156 sfmmu_t *sfmmup;
7157 tte_t tte, ttemod;
7158 #ifdef DEBUG
7159 tte_t orig_old;
7160 #endif /* DEBUG */
7161 caddr_t addr;
7162 int ttesz;
7163 int ret;
7164 cpuset_t cpuset;
7165
7166 ASSERT(pp != NULL);
7167 ASSERT(sfmmu_mlist_held(pp));
7168 ASSERT(!PP_ISKAS(pp));
7409
7410 clearflag &= ~HAT_SYNC_STOPON_SHARED;
7411 pml = sfmmu_mlist_enter(pp);
7412 index = PP_MAPINDEX(pp);
7413 cons = TTE8K;
7414 retry:
7415 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
7416 /*
7417 * We need to save the next hment on the list since
7418 * it is possible for pagesync to remove an invalid hment
7419 * from the list.
7420 */
7421 tmphme = sfhme->hme_next;
7422 if (IS_PAHME(sfhme))
7423 continue;
7424 /*
7425 * If we are looking for large mappings and this hme doesn't
7426 * reach the range we are seeking, just ignore it.
7427 */
7428 hmeblkp = sfmmu_hmetohblk(sfhme);
7429
7430 if (hme_size(sfhme) < cons)
7431 continue;
7432
7433 if (stop_on_sh) {
7434 if (hmeblkp->hblk_shared) {
7435 sf_srd_t *srdp = hblktosrd(hmeblkp);
7436 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7437 sf_region_t *rgnp;
7438 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
7439 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
7440 ASSERT(srdp != NULL);
7441 rgnp = srdp->srd_hmergnp[rid];
7442 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
7443 rgnp, rid);
7444 shcnt += rgnp->rgn_refcnt;
7445 } else {
7446 shcnt++;
7447 }
7448 if (shcnt > po_share) {
7568 {
7569 caddr_t addr;
7570 tte_t tte;
7571 tte_t ttemod;
7572 struct hme_blk *hmeblkp;
7573 int ret;
7574 sfmmu_t *sfmmup;
7575 cpuset_t cpuset;
7576
7577 ASSERT(pp != NULL);
7578 ASSERT(sfmmu_mlist_held(pp));
7579
7580 CPUSET_ZERO(cpuset);
7581 SFMMU_STAT(sf_clrwrt);
7582
7583 retry:
7584
7585 sfmmu_copytte(&sfhme->hme_tte, &tte);
7586 if (TTE_IS_VALID(&tte) && TTE_IS_WRITABLE(&tte)) {
7587 hmeblkp = sfmmu_hmetohblk(sfhme);
7588 sfmmup = hblktosfmmu(hmeblkp);
7589 addr = tte_to_vaddr(hmeblkp, tte);
7590
7591 ttemod = tte;
7592 TTE_CLR_WRT(&ttemod);
7593 TTE_CLR_MOD(&ttemod);
7594 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
7595
7596 /*
7597 * if cas failed and the new value is not what
7598 * we want retry
7599 */
7600 if (ret < 0)
7601 goto retry;
7602
7603 /* we win the cas */
7604 if (ret > 0) {
7605 if (hmeblkp->hblk_shared) {
7606 sf_srd_t *srdp = (sf_srd_t *)sfmmup;
7607 uint_t rid = hmeblkp->hblk_tag.htag_rid;
7832 * Returns a page frame number for a given virtual address.
7833 * Returns PFN_INVALID to indicate an invalid mapping
7834 */
7835 pfn_t
7836 hat_getpfnum(struct hat *hat, caddr_t addr)
7837 {
7838 pfn_t pfn;
7839 tte_t tte;
7840
7841 /*
7842 * We would like to
7843 * ASSERT(AS_LOCK_HELD(as, &as->a_lock));
7844 * but we can't because the iommu driver will call this
7845 * routine at interrupt time and it can't grab the as lock
7846 * or it will deadlock: A thread could have the as lock
7847 * and be waiting for io. The io can't complete
7848 * because the interrupt thread is blocked trying to grab
7849 * the as lock.
7850 */
7851
7852 if (hat == ksfmmup) {
7853 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7854 ASSERT(segkmem_lpszc > 0);
7855 pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7856 if (pfn != PFN_INVALID) {
7857 sfmmu_check_kpfn(pfn);
7858 return (pfn);
7859 }
7860 } else if (segkpm && IS_KPM_ADDR(addr)) {
7861 return (sfmmu_kpm_vatopfn(addr));
7862 }
7863 while ((pfn = sfmmu_vatopfn(addr, ksfmmup, &tte))
7864 == PFN_SUSPENDED) {
7865 sfmmu_vatopfn_suspended(addr, ksfmmup, &tte);
7866 }
7867 sfmmu_check_kpfn(pfn);
7868 return (pfn);
7869 } else {
7870 return (sfmmu_uvatopfn(addr, hat, NULL));
7871 }
8015 SFMMU_HASH_UNLOCK(hmebp);
8016 pfn = PFN_INVALID;
8017 return (pfn);
8018 }
8019 }
8020 SFMMU_HASH_UNLOCK(hmebp);
8021 hashno++;
8022 } while (hashno <= mmu_hashcnt);
8023 return (PFN_INVALID);
8024 }
8025
8026
8027 /*
8028 * For compatability with AT&T and later optimizations
8029 */
8030 /* ARGSUSED */
8031 void
8032 hat_map(struct hat *hat, caddr_t addr, size_t len, uint_t flags)
8033 {
8034 ASSERT(hat != NULL);
8035 }
8036
8037 /*
8038 * Return the number of mappings to a particular page. This number is an
8039 * approximation of the number of people sharing the page.
8040 *
8041 * shared hmeblks or ism hmeblks are counted as 1 mapping here.
8042 * hat_page_checkshare() can be used to compare threshold to share
8043 * count that reflects the number of region sharers albeit at higher cost.
8044 */
8045 ulong_t
8046 hat_page_getshare(page_t *pp)
8047 {
8048 page_t *spp = pp; /* start page */
8049 kmutex_t *pml;
8050 ulong_t cnt;
8051 int index, sz = TTE64K;
8052
8053 /*
8054 * We need to grab the mlist lock to make sure any outstanding
8107
8108 if (vpm_enable && pp->p_vpmref) {
8109 cnt += 1;
8110 }
8111
8112 if (pp->p_share + cnt > sh_thresh) {
8113 sfmmu_mlist_exit(pml);
8114 return (1);
8115 }
8116
8117 index = PP_MAPINDEX(pp);
8118
8119 again:
8120 for (sfhme = pp->p_mapping; sfhme; sfhme = tmphme) {
8121 tmphme = sfhme->hme_next;
8122 if (IS_PAHME(sfhme)) {
8123 continue;
8124 }
8125
8126 hmeblkp = sfmmu_hmetohblk(sfhme);
8127 if (hme_size(sfhme) != sz) {
8128 continue;
8129 }
8130
8131 if (hmeblkp->hblk_shared) {
8132 sf_srd_t *srdp = hblktosrd(hmeblkp);
8133 uint_t rid = hmeblkp->hblk_tag.htag_rid;
8134 sf_region_t *rgnp;
8135 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
8136 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
8137 ASSERT(srdp != NULL);
8138 rgnp = srdp->srd_hmergnp[rid];
8139 SFMMU_VALIDATE_SHAREDHBLK(hmeblkp, srdp,
8140 rgnp, rid);
8141 cnt += rgnp->rgn_refcnt;
8142 } else {
8143 cnt++;
8144 }
8145 if (cnt > sh_thresh) {
8146 sfmmu_mlist_exit(pml);
8221 CPUSET_ZERO(cpuset);
8222 sz = TTE64K;
8223 sync = 1;
8224 }
8225
8226 while (index) {
8227 if (!(index & 0x1)) {
8228 index >>= 1;
8229 sz++;
8230 continue;
8231 }
8232 ASSERT(sz <= pszc);
8233 rootpp = PP_GROUPLEADER(pp, sz);
8234 for (sfhme = rootpp->p_mapping; sfhme; sfhme = tmphme) {
8235 tmphme = sfhme->hme_next;
8236 ASSERT(!IS_PAHME(sfhme));
8237 hmeblkp = sfmmu_hmetohblk(sfhme);
8238 if (hme_size(sfhme) != sz) {
8239 continue;
8240 }
8241 tset = sfmmu_pageunload(rootpp, sfhme, sz);
8242 CPUSET_OR(cpuset, tset);
8243 }
8244 if (index >>= 1) {
8245 sz++;
8246 }
8247 }
8248
8249 ASSERT(!PP_ISMAPPED_LARGE(pp));
8250
8251 if (sync) {
8252 xt_sync(cpuset);
8253 #ifdef VAC
8254 if (PP_ISTNC(pp)) {
8255 conv_tnc(rootpp, sz);
8256 }
8257 #endif /* VAC */
8258 }
8259
8260 pmtx = sfmmu_page_enter(pp);
8348 * This is currently implemented as the number of bytes that have active
8349 * hardware translations that have page structures. Therefore, it can
8350 * underestimate the traditional resident set size, eg, if the
8351 * physical page is present and the hardware translation is missing;
8352 * and it can overestimate the rss, eg, if there are active
8353 * translations to a frame buffer with page structs.
8354 * Also, it does not take sharing into account.
8355 *
8356 * Note that we don't acquire locks here since this function is most often
8357 * called from the clock thread.
8358 */
8359 size_t
8360 hat_get_mapped_size(struct hat *hat)
8361 {
8362 size_t assize = 0;
8363 int i;
8364
8365 if (hat == NULL)
8366 return (0);
8367
8368 for (i = 0; i < mmu_page_sizes; i++)
8369 assize += ((pgcnt_t)hat->sfmmu_ttecnt[i] +
8370 (pgcnt_t)hat->sfmmu_scdrttecnt[i]) * TTEBYTES(i);
8371
8372 if (hat->sfmmu_iblk == NULL)
8373 return (assize);
8374
8375 for (i = 0; i < mmu_page_sizes; i++)
8376 assize += ((pgcnt_t)hat->sfmmu_ismttecnt[i] +
8377 (pgcnt_t)hat->sfmmu_scdismttecnt[i]) * TTEBYTES(i);
8378
8379 return (assize);
8380 }
8381
8382 int
8383 hat_stats_enable(struct hat *hat)
8384 {
8385 hatlock_t *hatlockp;
8386
8387 hatlockp = sfmmu_hat_enter(hat);
8388 hat->sfmmu_rmstat++;
8389 sfmmu_hat_exit(hatlockp);
8390 return (1);
8391 }
8392
8393 void
8394 hat_stats_disable(struct hat *hat)
8395 {
8396 hatlock_t *hatlockp;
8397
8398 hatlockp = sfmmu_hat_enter(hat);
8399 hat->sfmmu_rmstat--;
8400 sfmmu_hat_exit(hatlockp);
8401 }
8402
8403 /*
8404 * Routines for entering or removing ourselves from the
8405 * ism_hat's mapping list. This is used for both private and
8406 * SCD hats.
8407 */
8408 static void
8409 iment_add(struct ism_ment *iment, struct hat *ism_hat)
8410 {
8411 ASSERT(MUTEX_HELD(&ism_mlist_lock));
8412
8413 iment->iment_prev = NULL;
8414 iment->iment_next = ism_hat->sfmmu_iment;
8415 if (ism_hat->sfmmu_iment) {
8416 ism_hat->sfmmu_iment->iment_prev = iment;
8417 }
8479 sf_scd_t *old_scdp;
8480
8481 #ifdef DEBUG
8482 caddr_t eaddr = addr + len;
8483 #endif /* DEBUG */
8484
8485 ASSERT(ism_hatid != NULL && sfmmup != NULL);
8486 ASSERT(sptaddr == ISMID_STARTADDR);
8487 /*
8488 * Check the alignment.
8489 */
8490 if (!ISM_ALIGNED(ismshift, addr) || !ISM_ALIGNED(ismshift, sptaddr))
8491 return (EINVAL);
8492
8493 /*
8494 * Check size alignment.
8495 */
8496 if (!ISM_ALIGNED(ismshift, len))
8497 return (EINVAL);
8498
8499 /*
8500 * Allocate ism_ment for the ism_hat's mapping list, and an
8501 * ism map blk in case we need one. We must do our
8502 * allocations before acquiring locks to prevent a deadlock
8503 * in the kmem allocator on the mapping list lock.
8504 */
8505 new_iblk = kmem_cache_alloc(ism_blk_cache, KM_SLEEP);
8506 ism_ment = kmem_cache_alloc(ism_ment_cache, KM_SLEEP);
8507
8508 /*
8509 * Serialize ISM mappings with the ISM busy flag, and also the
8510 * trap handlers.
8511 */
8512 sfmmu_ismhat_enter(sfmmup, 0);
8513
8514 /*
8515 * Allocate an ism map blk if necessary.
8516 */
8517 if (sfmmup->sfmmu_iblk == NULL) {
8518 sfmmup->sfmmu_iblk = new_iblk;
8690 void
8691 hat_unshare(struct hat *sfmmup, caddr_t addr, size_t len, uint_t ismszc)
8692 {
8693 ism_map_t *ism_map;
8694 ism_ment_t *free_ment = NULL;
8695 ism_blk_t *ism_blkp;
8696 struct hat *ism_hatid;
8697 int found, i;
8698 hatlock_t *hatlockp;
8699 struct tsb_info *tsbinfo;
8700 uint_t ismshift = page_get_shift(ismszc);
8701 size_t sh_size = ISM_SHIFT(ismshift, len);
8702 uchar_t ism_rid;
8703 sf_scd_t *old_scdp;
8704
8705 ASSERT(ISM_ALIGNED(ismshift, addr));
8706 ASSERT(ISM_ALIGNED(ismshift, len));
8707 ASSERT(sfmmup != NULL);
8708 ASSERT(sfmmup != ksfmmup);
8709
8710 ASSERT(sfmmup->sfmmu_as != NULL);
8711
8712 /*
8713 * Make sure that during the entire time ISM mappings are removed,
8714 * the trap handlers serialize behind us, and that no one else
8715 * can be mucking with ISM mappings. This also lets us get away
8716 * with not doing expensive cross calls to flush the TLB -- we
8717 * just discard the context, flush the entire TSB, and call it
8718 * a day.
8719 */
8720 sfmmu_ismhat_enter(sfmmup, 0);
8721
8722 /*
8723 * Remove the mapping.
8724 *
8725 * We can't have any holes in the ism map.
8726 * The tsb miss code while searching the ism map will
8727 * stop on an empty map slot. So we must move
8728 * everyone past the hole up 1 if any.
8729 *
8730 * Also empty ism map blks are not freed until the
9144 * Always convert all mappings to TNC.
9145 */
9146 sz = fnd_mapping_sz(pp);
9147 pp = PP_GROUPLEADER(pp, sz);
9148 SFMMU_STAT_ADD(sf_uncache_conflict, TTEPAGES(sz));
9149 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH,
9150 TTEPAGES(sz));
9151
9152 return;
9153 }
9154
9155 /*
9156 * check if any mapping is in same as or if it is locked
9157 * since in that case we need to uncache.
9158 */
9159 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9160 tmphme = sfhmep->hme_next;
9161 if (IS_PAHME(sfhmep))
9162 continue;
9163 hmeblkp = sfmmu_hmetohblk(sfhmep);
9164 tmphat = hblktosfmmu(hmeblkp);
9165 sfmmu_copytte(&sfhmep->hme_tte, &tte);
9166 ASSERT(TTE_IS_VALID(&tte));
9167 if (hmeblkp->hblk_shared || tmphat == hat ||
9168 hmeblkp->hblk_lckcnt) {
9169 /*
9170 * We have an uncache conflict
9171 */
9172 SFMMU_STAT(sf_uncache_conflict);
9173 sfmmu_page_cache_array(pp, HAT_TMPNC, CACHE_FLUSH, 1);
9174 return;
9175 }
9176 }
9177
9178 /*
9179 * We have an unload conflict
9180 * We have already checked for LARGE mappings, therefore
9181 * the remaining mapping(s) must be TTE8K.
9182 */
9183 SFMMU_STAT(sf_unload_conflict);
9184
9185 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = tmphme) {
9186 tmphme = sfhmep->hme_next;
9187 if (IS_PAHME(sfhmep))
9188 continue;
9189 hmeblkp = sfmmu_hmetohblk(sfhmep);
9190 ASSERT(!hmeblkp->hblk_shared);
9191 (void) sfmmu_pageunload(pp, sfhmep, TTE8K);
9192 }
9193
9194 if (PP_ISMAPPED_KPM(pp))
9195 sfmmu_kpm_vac_unload(pp, addr);
9196
9197 /*
9198 * Unloads only do TLB flushes so we need to flush the
9199 * cache here.
9200 */
9201 sfmmu_cache_flush(pp->p_pagenum, PP_GET_VCOLOR(pp));
9202 PP_SET_VCOLOR(pp, vcolor);
9203 }
9204
9205 /*
9206 * Whenever a mapping is unloaded and the page is in TNC state,
9207 * we see if the page can be made cacheable again. 'pp' is
9208 * the page that we just unloaded a mapping from, the size
9209 * of mapping that was unloaded is 'ottesz'.
9317
9318 if (PP_ISPNC(pp)) {
9319 return (0);
9320 }
9321
9322 clr_valid = 0;
9323 if (PP_ISMAPPED_KPM(pp)) {
9324 caddr_t kpmvaddr;
9325
9326 ASSERT(kpm_enable);
9327 kpmvaddr = hat_kpm_page2va(pp, 1);
9328 ASSERT(!(npages > 1 && IS_KPM_ALIAS_RANGE(kpmvaddr)));
9329 color1 = addr_to_vcolor(kpmvaddr);
9330 clr_valid = 1;
9331 }
9332
9333 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9334 if (IS_PAHME(sfhme))
9335 continue;
9336 hmeblkp = sfmmu_hmetohblk(sfhme);
9337
9338 sfmmu_copytte(&sfhme->hme_tte, &tte);
9339 ASSERT(TTE_IS_VALID(&tte));
9340
9341 vaddr = tte_to_vaddr(hmeblkp, tte);
9342 color = addr_to_vcolor(vaddr);
9343
9344 if (npages > 1) {
9345 /*
9346 * If there is a big mapping, make sure
9347 * 8K mapping is consistent with the big
9348 * mapping.
9349 */
9350 bcolor = i % ncolors;
9351 if (color != bcolor) {
9352 return (0);
9353 }
9354 }
9355 if (!clr_valid) {
9356 clr_valid = 1;
9464 static void
9465 sfmmu_page_cache(page_t *pp, int flags, int cache_flush_flag, int bcolor)
9466 {
9467 struct sf_hment *sfhme;
9468 struct hme_blk *hmeblkp;
9469 sfmmu_t *sfmmup;
9470 tte_t tte, ttemod;
9471 caddr_t vaddr;
9472 int ret, color;
9473 pfn_t pfn;
9474
9475 color = bcolor;
9476 pfn = pp->p_pagenum;
9477
9478 for (sfhme = pp->p_mapping; sfhme; sfhme = sfhme->hme_next) {
9479
9480 if (IS_PAHME(sfhme))
9481 continue;
9482 hmeblkp = sfmmu_hmetohblk(sfhme);
9483
9484 sfmmu_copytte(&sfhme->hme_tte, &tte);
9485 ASSERT(TTE_IS_VALID(&tte));
9486 vaddr = tte_to_vaddr(hmeblkp, tte);
9487 color = addr_to_vcolor(vaddr);
9488
9489 #ifdef DEBUG
9490 if ((flags & HAT_CACHE) && bcolor != NO_VCOLOR) {
9491 ASSERT(color == bcolor);
9492 }
9493 #endif
9494
9495 ASSERT(flags != HAT_TMPNC || color == PP_GET_VCOLOR(pp));
9496
9497 ttemod = tte;
9498 if (flags & (HAT_UNCACHE | HAT_TMPNC)) {
9499 TTE_CLR_VCACHEABLE(&ttemod);
9500 } else { /* flags & HAT_CACHE */
9501 TTE_SET_VCACHEABLE(&ttemod);
9502 }
9503 ret = sfmmu_modifytte_try(&tte, &ttemod, &sfhme->hme_tte);
13211 for (; i <= (size - hme1blk_sz); i += hme1blk_sz, k++) {
13212 hmeblkp = (struct hme_blk *)addr;
13213 addr += hme1blk_sz;
13214 hmeblkp->hblk_nuc_bit = 1;
13215 hmeblkp->hblk_nextpa = cached_va_to_pa((caddr_t)hmeblkp);
13216 }
13217 ASSERT(k >= nhblk1);
13218 nucleus_hblk1.len = k;
13219 SFMMU_STAT_ADD(sf_hblk1_ncreate, k);
13220 }
13221
13222 /*
13223 * This function is currently not supported on this platform. For what
13224 * it's supposed to do, see hat.c and hat_srmmu.c
13225 */
13226 /* ARGSUSED */
13227 faultcode_t
13228 hat_softlock(struct hat *hat, caddr_t addr, size_t *lenp, page_t **ppp,
13229 uint_t flags)
13230 {
13231 return (FC_NOSUPPORT);
13232 }
13233
13234 /*
13235 * Searchs the mapping list of the page for a mapping of the same size. If not
13236 * found the corresponding bit is cleared in the p_index field. When large
13237 * pages are more prevalent in the system, we can maintain the mapping list
13238 * in order and we don't have to traverse the list each time. Just check the
13239 * next and prev entries, and if both are of different size, we clear the bit.
13240 */
13241 static void
13242 sfmmu_rm_large_mappings(page_t *pp, int ttesz)
13243 {
13244 struct sf_hment *sfhmep;
13245 struct hme_blk *hmeblkp;
13246 int index;
13247 pgcnt_t npgs;
13248
13249 ASSERT(ttesz > TTE8K);
13250
13251 ASSERT(sfmmu_mlist_held(pp));
13252
13253 ASSERT(PP_ISMAPPED_LARGE(pp));
13254
13255 /*
13256 * Traverse mapping list looking for another mapping of same size.
13257 * since we only want to clear index field if all mappings of
13258 * that size are gone.
13259 */
13260
13261 for (sfhmep = pp->p_mapping; sfhmep; sfhmep = sfhmep->hme_next) {
13262 if (IS_PAHME(sfhmep))
13263 continue;
13264 hmeblkp = sfmmu_hmetohblk(sfhmep);
13265 if (hme_size(sfhmep) == ttesz) {
13266 /*
13267 * another mapping of the same size. don't clear index.
13268 */
13269 return;
13270 }
13271 }
13272
13273 /*
13274 * Clear the p_index bit for large page.
13275 */
13276 index = PAGESZ_TO_INDEX(ttesz);
13277 npgs = TTEPAGES(ttesz);
13278 while (npgs-- > 0) {
13279 ASSERT(pp->p_index & index);
13280 pp->p_index &= ~index;
13281 pp = PP_PAGENEXT(pp);
13282 }
13283 }
13284
13829 uint_t rhash;
13830 uint_t rid;
13831 hatlock_t *hatlockp;
13832 sf_region_t *rgnp;
13833 sf_region_t *new_rgnp = NULL;
13834 int i;
13835 uint16_t *nextidp;
13836 sf_region_t **freelistp;
13837 int maxids;
13838 sf_region_t **rarrp;
13839 uint16_t *busyrgnsp;
13840 ulong_t rttecnt;
13841 uchar_t tteflag;
13842 uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
13843 int text = (r_type == HAT_REGION_TEXT);
13844
13845 if (srdp == NULL || r_size == 0) {
13846 return (HAT_INVALID_REGION_COOKIE);
13847 }
13848
13849 ASSERT(sfmmup != ksfmmup);
13850 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
13851 ASSERT(srdp->srd_refcnt > 0);
13852 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
13853 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
13854 ASSERT(r_pgszc < mmu_page_sizes);
13855 if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
13856 !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
13857 panic("hat_join_region: region addr or size is not aligned\n");
13858 }
13859
13860
13861 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
13862 SFMMU_REGION_HME;
13863 /*
13864 * Currently only support shared hmes for the read only main text
13865 * region.
13866 */
13867 if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
13868 (r_perm & PROT_WRITE))) {
14134 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14135 ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
14136
14137 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14138 SFMMU_REGION_HME;
14139
14140 if (r_type == SFMMU_REGION_ISM) {
14141 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
14142 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14143 rgnp = srdp->srd_ismrgnp[rid];
14144 } else {
14145 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14146 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14147 rgnp = srdp->srd_hmergnp[rid];
14148 }
14149 ASSERT(rgnp != NULL);
14150 ASSERT(rgnp->rgn_id == rid);
14151 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14152 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14153 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14154
14155 if (sfmmup->sfmmu_free) {
14156 ulong_t rttecnt;
14157 r_pgszc = rgnp->rgn_pgszc;
14158 r_size = rgnp->rgn_size;
14159
14160 ASSERT(sfmmup->sfmmu_scdp == NULL);
14161 if (r_type == SFMMU_REGION_ISM) {
14162 SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14163 } else {
14164 /* update shme rgns ttecnt in sfmmu_ttecnt */
14165 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14166 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14167
14168 atomic_add_long(&sfmmup->sfmmu_ttecnt[r_pgszc],
14169 -rttecnt);
14170
14171 SF_RGNMAP_DEL(sfmmup->sfmmu_hmeregion_map, rid);
14172 }
14173 } else if (r_type == SFMMU_REGION_ISM) {
|