1301 sfmmu1_cache = kmem_cache_create("sfmmu1_cache", HME1BLK_SZ,
1302 HMEBLK_ALIGN, sfmmu_hblkcache_constructor,
1303 sfmmu_hblkcache_destructor,
1304 NULL, (void *)HME1BLK_SZ,
1305 hat_memload1_arena, KMC_NOHASH);
1306
1307 pa_hment_cache = kmem_cache_create("pa_hment_cache", PAHME_SZ,
1308 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
1309
1310 ism_blk_cache = kmem_cache_create("ism_blk_cache",
1311 sizeof (ism_blk_t), ecache_alignsize, NULL, NULL,
1312 NULL, NULL, static_arena, KMC_NOHASH);
1313
1314 ism_ment_cache = kmem_cache_create("ism_ment_cache",
1315 sizeof (ism_ment_t), 0, NULL, NULL,
1316 NULL, NULL, NULL, 0);
1317
1318 /*
1319 * We grab the first hat for the kernel,
1320 */
1321 AS_LOCK_ENTER(&kas, &kas.a_lock, RW_WRITER);
1322 kas.a_hat = hat_alloc(&kas);
1323 AS_LOCK_EXIT(&kas, &kas.a_lock);
1324
1325 /*
1326 * Initialize hblk_reserve.
1327 */
1328 ((struct hme_blk *)hblk_reserve)->hblk_nextpa =
1329 va_to_pa((caddr_t)hblk_reserve);
1330
1331 #ifndef UTSB_PHYS
1332 /*
1333 * Reserve some kernel virtual address space for the locked TTEs
1334 * that allow us to probe the TSB from TL>0.
1335 */
1336 utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1337 0, 0, NULL, NULL, VM_SLEEP);
1338 utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1339 0, 0, NULL, NULL, VM_SLEEP);
1340 #endif
1341
1342 #ifdef VAC
1343 /*
1446 for (i = 0; i < SFMMU_NUM_LOCK; i++)
1447 mutex_init(HATLOCK_MUTEXP(&hat_lock[i]), NULL, MUTEX_DEFAULT,
1448 NULL);
1449 }
1450
1451 #define SFMMU_KERNEL_MAXVA \
1452 (kmem64_base ? (uintptr_t)kmem64_end : (SYSLIMIT))
1453
1454 /*
1455 * Allocate a hat structure.
1456 * Called when an address space first uses a hat.
1457 */
1458 struct hat *
1459 hat_alloc(struct as *as)
1460 {
1461 sfmmu_t *sfmmup;
1462 int i;
1463 uint64_t cnum;
1464 extern uint_t get_color_start(struct as *);
1465
1466 ASSERT(AS_WRITE_HELD(as, &as->a_lock));
1467 sfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP);
1468 sfmmup->sfmmu_as = as;
1469 sfmmup->sfmmu_flags = 0;
1470 sfmmup->sfmmu_tteflags = 0;
1471 sfmmup->sfmmu_rtteflags = 0;
1472 LOCK_INIT_CLEAR(&sfmmup->sfmmu_ctx_lock);
1473
1474 if (as == &kas) {
1475 ksfmmup = sfmmup;
1476 sfmmup->sfmmu_cext = 0;
1477 cnum = KCONTEXT;
1478
1479 sfmmup->sfmmu_clrstart = 0;
1480 sfmmup->sfmmu_tsb = NULL;
1481 /*
1482 * hat_kern_setup() will call sfmmu_init_ktsbinfo()
1483 * to setup tsb_info for ksfmmup.
1484 */
1485 } else {
1486
1900 * sfmmu_setctx_sec takes <pgsz|cnum> as a parameter,
1901 * pagesize bits don't matter in this case since we are passing
1902 * INVALID_CONTEXT to it.
1903 * Compatibility Note: hw takes care of MMU_SCONTEXT1
1904 */
1905 sfmmu_setctx_sec(INVALID_CONTEXT);
1906 sfmmu_clear_utsbinfo();
1907
1908 kpreempt_enable();
1909 sfmmu_hat_exit(hatlockp);
1910 }
1911 }
1912
1913 /*
1914 * Free all the translation resources for the specified address space.
1915 * Called from as_free when an address space is being destroyed.
1916 */
1917 void
1918 hat_free_start(struct hat *sfmmup)
1919 {
1920 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
1921 ASSERT(sfmmup != ksfmmup);
1922 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1923
1924 sfmmup->sfmmu_free = 1;
1925 if (sfmmup->sfmmu_scdp != NULL) {
1926 sfmmu_leave_scd(sfmmup, 0);
1927 }
1928
1929 ASSERT(sfmmup->sfmmu_scdp == NULL);
1930 }
1931
1932 void
1933 hat_free_end(struct hat *sfmmup)
1934 {
1935 int i;
1936
1937 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1938 ASSERT(sfmmup->sfmmu_free == 1);
1939 ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1940 ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
2230
2231 ASSERT(hat != NULL);
2232 ASSERT(PAGE_LOCKED(pp));
2233 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2234 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2235 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2236 SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2237
2238 if (PP_ISFREE(pp)) {
2239 panic("hat_memload: loading a mapping to free page %p",
2240 (void *)pp);
2241 }
2242
2243 if (hat->sfmmu_xhat_provider) {
2244 /* no regions for xhats */
2245 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2246 XHAT_MEMLOAD(hat, addr, pp, attr, flags);
2247 return;
2248 }
2249
2250 ASSERT((hat == ksfmmup) ||
2251 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2252
2253 if (flags & ~SFMMU_LOAD_ALLFLAG)
2254 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2255 flags & ~SFMMU_LOAD_ALLFLAG);
2256
2257 if (hat->sfmmu_rmstat)
2258 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2259
2260 #if defined(SF_ERRATA_57)
2261 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2262 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2263 !(flags & HAT_LOAD_SHARE)) {
2264 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2265 " page executable");
2266 attr &= ~PROT_EXEC;
2267 }
2268 #endif
2269
2270 sfmmu_memtte(&tte, pp->p_pagenum, attr, TTE8K);
2271 (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags, rid);
2286 * pass dp = NULL. If tteload doesn't get a non-NULL
2287 * page pointer it can't cache memory.
2288 */
2289 void
2290 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2291 uint_t attr, int flags)
2292 {
2293 tte_t tte;
2294 struct page *pp = NULL;
2295 int use_lgpg = 0;
2296
2297 ASSERT(hat != NULL);
2298
2299 if (hat->sfmmu_xhat_provider) {
2300 XHAT_DEVLOAD(hat, addr, len, pfn, attr, flags);
2301 return;
2302 }
2303
2304 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2305 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2306 ASSERT((hat == ksfmmup) ||
2307 AS_LOCK_HELD(hat->sfmmu_as, &hat->sfmmu_as->a_lock));
2308 if (len == 0)
2309 panic("hat_devload: zero len");
2310 if (flags & ~SFMMU_LOAD_ALLFLAG)
2311 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2312 flags & ~SFMMU_LOAD_ALLFLAG);
2313
2314 #if defined(SF_ERRATA_57)
2315 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2316 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2317 !(flags & HAT_LOAD_SHARE)) {
2318 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2319 " page executable");
2320 attr &= ~PROT_EXEC;
2321 }
2322 #endif
2323
2324 /*
2325 * If it's a memory page find its pp
2326 */
2327 if (!(flags & HAT_LOAD_NOCONSIST) && pf_is_memory(pfn)) {
3956 }
3957 ttesz--;
3958 }
3959 }
3960
3961 /*
3962 * Release one hardware address translation lock on the given address range.
3963 */
3964 void
3965 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3966 {
3967 struct hmehash_bucket *hmebp;
3968 hmeblk_tag hblktag;
3969 int hmeshift, hashno = 1;
3970 struct hme_blk *hmeblkp, *list = NULL;
3971 caddr_t endaddr;
3972
3973 ASSERT(sfmmup != NULL);
3974 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
3975
3976 ASSERT((sfmmup == ksfmmup) ||
3977 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
3978 ASSERT((len & MMU_PAGEOFFSET) == 0);
3979 endaddr = addr + len;
3980 hblktag.htag_id = sfmmup;
3981 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3982
3983 /*
3984 * Spitfire supports 4 page sizes.
3985 * Most pages are expected to be of the smallest page size (8K) and
3986 * these will not need to be rehashed. 64K pages also don't need to be
3987 * rehashed because an hmeblk spans 64K of address space. 512K pages
3988 * might need 1 rehash and and 4M pages might need 2 rehashes.
3989 */
3990 while (addr < endaddr) {
3991 hmeshift = HME_HASH_SHIFT(hashno);
3992 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3993 hblktag.htag_rehash = hashno;
3994 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
3995
3996 SFMMU_HASH_LOCK(hmebp);
3997
4753
4754 /*
4755 * hat_probe returns 1 if the translation for the address 'addr' is
4756 * loaded, zero otherwise.
4757 *
4758 * hat_probe should be used only for advisorary purposes because it may
4759 * occasionally return the wrong value. The implementation must guarantee that
4760 * returning the wrong value is a very rare event. hat_probe is used
4761 * to implement optimizations in the segment drivers.
4762 *
4763 */
4764 int
4765 hat_probe(struct hat *sfmmup, caddr_t addr)
4766 {
4767 pfn_t pfn;
4768 tte_t tte;
4769
4770 ASSERT(sfmmup != NULL);
4771 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4772
4773 ASSERT((sfmmup == ksfmmup) ||
4774 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4775
4776 if (sfmmup == ksfmmup) {
4777 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4778 == PFN_SUSPENDED) {
4779 sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4780 }
4781 } else {
4782 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4783 }
4784
4785 if (pfn != PFN_INVALID)
4786 return (1);
4787 else
4788 return (0);
4789 }
4790
4791 ssize_t
4792 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4793 {
4794 tte_t tte;
4903 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4904 }
4905
4906 /*
4907 * Change attributes on an address range to that specified by attr and mode.
4908 */
4909 static void
4910 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4911 int mode)
4912 {
4913 struct hmehash_bucket *hmebp;
4914 hmeblk_tag hblktag;
4915 int hmeshift, hashno = 1;
4916 struct hme_blk *hmeblkp, *list = NULL;
4917 caddr_t endaddr;
4918 cpuset_t cpuset;
4919 demap_range_t dmr;
4920
4921 CPUSET_ZERO(cpuset);
4922
4923 ASSERT((sfmmup == ksfmmup) ||
4924 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
4925 ASSERT((len & MMU_PAGEOFFSET) == 0);
4926 ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
4927
4928 if ((attr & PROT_USER) && (mode != SFMMU_CLRATTR) &&
4929 ((addr + len) > (caddr_t)USERLIMIT)) {
4930 panic("user addr %p in kernel space",
4931 (void *)addr);
4932 }
4933
4934 endaddr = addr + len;
4935 hblktag.htag_id = sfmmup;
4936 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
4937 DEMAP_RANGE_INIT(sfmmup, &dmr);
4938
4939 while (addr < endaddr) {
4940 hmeshift = HME_HASH_SHIFT(hashno);
4941 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
4942 hblktag.htag_rehash = hashno;
4943 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
4944
5697 caddr_t cb_end_addr[MAX_CB_ADDR];
5698 int issegkmap = ISSEGKMAP(sfmmup, addr);
5699 demap_range_t dmr, *dmrp;
5700
5701 if (sfmmup->sfmmu_xhat_provider) {
5702 XHAT_UNLOAD_CALLBACK(sfmmup, addr, len, flags, callback);
5703 return;
5704 } else {
5705 /*
5706 * This must be a CPU HAT. If the address space has
5707 * XHATs attached, unload the mappings for all of them,
5708 * just in case
5709 */
5710 ASSERT(sfmmup->sfmmu_as != NULL);
5711 if (sfmmup->sfmmu_as->a_xhat != NULL)
5712 xhat_unload_callback_all(sfmmup->sfmmu_as, addr,
5713 len, flags, callback);
5714 }
5715
5716 ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5717 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
5718
5719 ASSERT(sfmmup != NULL);
5720 ASSERT((len & MMU_PAGEOFFSET) == 0);
5721 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5722
5723 /*
5724 * Probing through a large VA range (say 63 bits) will be slow, even
5725 * at 4 Meg steps between the probes. So, when the virtual address range
5726 * is very large, search the HME entries for what to unload.
5727 *
5728 * len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5729 *
5730 * UHMEHASH_SZ is number of hash buckets to examine
5731 *
5732 */
5733 if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5734 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);
5735 return;
5736 }
5737
6315 }
6316 }
6317
6318 /*
6319 * Synchronize all the mappings in the range [addr..addr+len).
6320 * Can be called with clearflag having two states:
6321 * HAT_SYNC_DONTZERO means just return the rm stats
6322 * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6323 */
6324 void
6325 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6326 {
6327 struct hmehash_bucket *hmebp;
6328 hmeblk_tag hblktag;
6329 int hmeshift, hashno = 1;
6330 struct hme_blk *hmeblkp, *list = NULL;
6331 caddr_t endaddr;
6332 cpuset_t cpuset;
6333
6334 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
6335 ASSERT((sfmmup == ksfmmup) ||
6336 AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
6337 ASSERT((len & MMU_PAGEOFFSET) == 0);
6338 ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6339 (clearflag == HAT_SYNC_ZERORM));
6340
6341 CPUSET_ZERO(cpuset);
6342
6343 endaddr = addr + len;
6344 hblktag.htag_id = sfmmup;
6345 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6346
6347 /*
6348 * Spitfire supports 4 page sizes.
6349 * Most pages are expected to be of the smallest page
6350 * size (8K) and these will not need to be rehashed. 64K
6351 * pages also don't need to be rehashed because the an hmeblk
6352 * spans 64K of address space. 512K pages might need 1 rehash and
6353 * and 4M pages 2 rehashes.
6354 */
6355 while (addr < endaddr) {
6356 hmeshift = HME_HASH_SHIFT(hashno);
7959 panic("Illegal VA->PA translation, pp 0x%p not permanent",
7960 (void *)pp);
7961 else
7962 panic("Illegal VA->PA translation, pp 0x%p not locked",
7963 (void *)pp);
7964 }
7965 #endif /* DEBUG */
7966
7967 /*
7968 * Returns a page frame number for a given virtual address.
7969 * Returns PFN_INVALID to indicate an invalid mapping
7970 */
7971 pfn_t
7972 hat_getpfnum(struct hat *hat, caddr_t addr)
7973 {
7974 pfn_t pfn;
7975 tte_t tte;
7976
7977 /*
7978 * We would like to
7979 * ASSERT(AS_LOCK_HELD(as, &as->a_lock));
7980 * but we can't because the iommu driver will call this
7981 * routine at interrupt time and it can't grab the as lock
7982 * or it will deadlock: A thread could have the as lock
7983 * and be waiting for io. The io can't complete
7984 * because the interrupt thread is blocked trying to grab
7985 * the as lock.
7986 */
7987
7988 ASSERT(hat->sfmmu_xhat_provider == NULL);
7989
7990 if (hat == ksfmmup) {
7991 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7992 ASSERT(segkmem_lpszc > 0);
7993 pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7994 if (pfn != PFN_INVALID) {
7995 sfmmu_check_kpfn(pfn);
7996 return (pfn);
7997 }
7998 } else if (segkpm && IS_KPM_ADDR(addr)) {
7999 return (sfmmu_kpm_vatopfn(addr));
14014 hatlock_t *hatlockp;
14015 sf_region_t *rgnp;
14016 sf_region_t *new_rgnp = NULL;
14017 int i;
14018 uint16_t *nextidp;
14019 sf_region_t **freelistp;
14020 int maxids;
14021 sf_region_t **rarrp;
14022 uint16_t *busyrgnsp;
14023 ulong_t rttecnt;
14024 uchar_t tteflag;
14025 uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
14026 int text = (r_type == HAT_REGION_TEXT);
14027
14028 if (srdp == NULL || r_size == 0) {
14029 return (HAT_INVALID_REGION_COOKIE);
14030 }
14031
14032 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14033 ASSERT(sfmmup != ksfmmup);
14034 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14035 ASSERT(srdp->srd_refcnt > 0);
14036 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14037 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14038 ASSERT(r_pgszc < mmu_page_sizes);
14039 if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
14040 !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
14041 panic("hat_join_region: region addr or size is not aligned\n");
14042 }
14043
14044
14045 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14046 SFMMU_REGION_HME;
14047 /*
14048 * Currently only support shared hmes for the read only main text
14049 * region.
14050 */
14051 if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
14052 (r_perm & PROT_WRITE))) {
14053 return (HAT_INVALID_REGION_COOKIE);
14054 }
14317 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14318 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14319 ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
14320
14321 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14322 SFMMU_REGION_HME;
14323
14324 if (r_type == SFMMU_REGION_ISM) {
14325 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
14326 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14327 rgnp = srdp->srd_ismrgnp[rid];
14328 } else {
14329 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14330 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14331 rgnp = srdp->srd_hmergnp[rid];
14332 }
14333 ASSERT(rgnp != NULL);
14334 ASSERT(rgnp->rgn_id == rid);
14335 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14336 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14337 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
14338
14339 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14340 if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) {
14341 xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr,
14342 rgnp->rgn_size, 0, NULL);
14343 }
14344
14345 if (sfmmup->sfmmu_free) {
14346 ulong_t rttecnt;
14347 r_pgszc = rgnp->rgn_pgszc;
14348 r_size = rgnp->rgn_size;
14349
14350 ASSERT(sfmmup->sfmmu_scdp == NULL);
14351 if (r_type == SFMMU_REGION_ISM) {
14352 SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14353 } else {
14354 /* update shme rgns ttecnt in sfmmu_ttecnt */
14355 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14356 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14357
15113 }
15114
15115 /*
15116 * The first phase of a process joining an SCD. The hat structure is
15117 * linked to the SCD queue and then the HAT_JOIN_SCD sfmmu flag is set
15118 * and a cross-call with context invalidation is used to cause the
15119 * remaining work to be carried out in the sfmmu_tsbmiss_exception()
15120 * routine.
15121 */
15122 static void
15123 sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup)
15124 {
15125 hatlock_t *hatlockp;
15126 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15127 int i;
15128 sf_scd_t *old_scdp;
15129
15130 ASSERT(srdp != NULL);
15131 ASSERT(scdp != NULL);
15132 ASSERT(scdp->scd_refcnt > 0);
15133 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15134
15135 if ((old_scdp = sfmmup->sfmmu_scdp) != NULL) {
15136 ASSERT(old_scdp != scdp);
15137
15138 mutex_enter(&old_scdp->scd_mutex);
15139 sfmmu_from_scd_list(&old_scdp->scd_sf_list, sfmmup);
15140 mutex_exit(&old_scdp->scd_mutex);
15141 /*
15142 * sfmmup leaves the old scd. Update sfmmu_ttecnt to
15143 * include the shme rgn ttecnt for rgns that
15144 * were in the old SCD
15145 */
15146 for (i = 0; i < mmu_page_sizes; i++) {
15147 ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
15148 old_scdp->scd_rttecnt[i]);
15149 atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
15150 sfmmup->sfmmu_scdrttecnt[i]);
15151 }
15152 }
15153
15225 }
15226
15227 /* Set HAT_CTX1_FLAG for all SCD ISMs */
15228 sfmmu_ism_hatflags(sfmmup, 1);
15229
15230 SFMMU_STAT(sf_join_scd);
15231 }
15232
15233 /*
15234 * This routine is called in order to check if there is an SCD which matches
15235 * the process's region map if not then a new SCD may be created.
15236 */
15237 static void
15238 sfmmu_find_scd(sfmmu_t *sfmmup)
15239 {
15240 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15241 sf_scd_t *scdp, *new_scdp;
15242 int ret;
15243
15244 ASSERT(srdp != NULL);
15245 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15246
15247 mutex_enter(&srdp->srd_scd_mutex);
15248 for (scdp = srdp->srd_scdp; scdp != NULL;
15249 scdp = scdp->scd_next) {
15250 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15251 &sfmmup->sfmmu_region_map, ret);
15252 if (ret == 1) {
15253 SF_SCD_INCR_REF(scdp);
15254 mutex_exit(&srdp->srd_scd_mutex);
15255 sfmmu_join_scd(scdp, sfmmup);
15256 ASSERT(scdp->scd_refcnt >= 2);
15257 atomic_dec_32((volatile uint32_t *)&scdp->scd_refcnt);
15258 return;
15259 } else {
15260 /*
15261 * If the sfmmu region map is a subset of the scd
15262 * region map, then the assumption is that this process
15263 * will continue attaching to ISM segments until the
15264 * region maps are equal.
15265 */
15331 * are about to leave the SCD
15332 */
15333 for (i = 0; i < mmu_page_sizes; i++) {
15334 ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
15335 scdp->scd_rttecnt[i]);
15336 atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
15337 sfmmup->sfmmu_scdrttecnt[i]);
15338 sfmmup->sfmmu_scdrttecnt[i] = 0;
15339 }
15340 sfmmup->sfmmu_scdp = NULL;
15341
15342 SF_SCD_DECR_REF(srdp, scdp);
15343 return;
15344 }
15345
15346 ASSERT(r_type != SFMMU_REGION_ISM ||
15347 SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
15348 ASSERT(scdp->scd_refcnt);
15349 ASSERT(!sfmmup->sfmmu_free);
15350 ASSERT(sfmmu_hat_lock_held(sfmmup));
15351 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as, &sfmmup->sfmmu_as->a_lock));
15352
15353 /*
15354 * Wait for ISM maps to be updated.
15355 */
15356 if (r_type != SFMMU_REGION_ISM) {
15357 while (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY) &&
15358 sfmmup->sfmmu_scdp != NULL) {
15359 cv_wait(&sfmmup->sfmmu_tsb_cv,
15360 HATLOCK_MUTEXP(hatlockp));
15361 }
15362
15363 if (sfmmup->sfmmu_scdp == NULL) {
15364 sfmmu_hat_exit(hatlockp);
15365 return;
15366 }
15367 SFMMU_FLAGS_SET(sfmmup, HAT_ISMBUSY);
15368 }
15369
15370 if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
15371 SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD);
|
1301 sfmmu1_cache = kmem_cache_create("sfmmu1_cache", HME1BLK_SZ,
1302 HMEBLK_ALIGN, sfmmu_hblkcache_constructor,
1303 sfmmu_hblkcache_destructor,
1304 NULL, (void *)HME1BLK_SZ,
1305 hat_memload1_arena, KMC_NOHASH);
1306
1307 pa_hment_cache = kmem_cache_create("pa_hment_cache", PAHME_SZ,
1308 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
1309
1310 ism_blk_cache = kmem_cache_create("ism_blk_cache",
1311 sizeof (ism_blk_t), ecache_alignsize, NULL, NULL,
1312 NULL, NULL, static_arena, KMC_NOHASH);
1313
1314 ism_ment_cache = kmem_cache_create("ism_ment_cache",
1315 sizeof (ism_ment_t), 0, NULL, NULL,
1316 NULL, NULL, NULL, 0);
1317
1318 /*
1319 * We grab the first hat for the kernel,
1320 */
1321 AS_LOCK_ENTER(&kas, RW_WRITER);
1322 kas.a_hat = hat_alloc(&kas);
1323 AS_LOCK_EXIT(&kas);
1324
1325 /*
1326 * Initialize hblk_reserve.
1327 */
1328 ((struct hme_blk *)hblk_reserve)->hblk_nextpa =
1329 va_to_pa((caddr_t)hblk_reserve);
1330
1331 #ifndef UTSB_PHYS
1332 /*
1333 * Reserve some kernel virtual address space for the locked TTEs
1334 * that allow us to probe the TSB from TL>0.
1335 */
1336 utsb_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1337 0, 0, NULL, NULL, VM_SLEEP);
1338 utsb4m_vabase = vmem_xalloc(heap_arena, tsb_slab_size, tsb_slab_size,
1339 0, 0, NULL, NULL, VM_SLEEP);
1340 #endif
1341
1342 #ifdef VAC
1343 /*
1446 for (i = 0; i < SFMMU_NUM_LOCK; i++)
1447 mutex_init(HATLOCK_MUTEXP(&hat_lock[i]), NULL, MUTEX_DEFAULT,
1448 NULL);
1449 }
1450
1451 #define SFMMU_KERNEL_MAXVA \
1452 (kmem64_base ? (uintptr_t)kmem64_end : (SYSLIMIT))
1453
1454 /*
1455 * Allocate a hat structure.
1456 * Called when an address space first uses a hat.
1457 */
1458 struct hat *
1459 hat_alloc(struct as *as)
1460 {
1461 sfmmu_t *sfmmup;
1462 int i;
1463 uint64_t cnum;
1464 extern uint_t get_color_start(struct as *);
1465
1466 ASSERT(AS_WRITE_HELD(as));
1467 sfmmup = kmem_cache_alloc(sfmmuid_cache, KM_SLEEP);
1468 sfmmup->sfmmu_as = as;
1469 sfmmup->sfmmu_flags = 0;
1470 sfmmup->sfmmu_tteflags = 0;
1471 sfmmup->sfmmu_rtteflags = 0;
1472 LOCK_INIT_CLEAR(&sfmmup->sfmmu_ctx_lock);
1473
1474 if (as == &kas) {
1475 ksfmmup = sfmmup;
1476 sfmmup->sfmmu_cext = 0;
1477 cnum = KCONTEXT;
1478
1479 sfmmup->sfmmu_clrstart = 0;
1480 sfmmup->sfmmu_tsb = NULL;
1481 /*
1482 * hat_kern_setup() will call sfmmu_init_ktsbinfo()
1483 * to setup tsb_info for ksfmmup.
1484 */
1485 } else {
1486
1900 * sfmmu_setctx_sec takes <pgsz|cnum> as a parameter,
1901 * pagesize bits don't matter in this case since we are passing
1902 * INVALID_CONTEXT to it.
1903 * Compatibility Note: hw takes care of MMU_SCONTEXT1
1904 */
1905 sfmmu_setctx_sec(INVALID_CONTEXT);
1906 sfmmu_clear_utsbinfo();
1907
1908 kpreempt_enable();
1909 sfmmu_hat_exit(hatlockp);
1910 }
1911 }
1912
1913 /*
1914 * Free all the translation resources for the specified address space.
1915 * Called from as_free when an address space is being destroyed.
1916 */
1917 void
1918 hat_free_start(struct hat *sfmmup)
1919 {
1920 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as));
1921 ASSERT(sfmmup != ksfmmup);
1922 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1923
1924 sfmmup->sfmmu_free = 1;
1925 if (sfmmup->sfmmu_scdp != NULL) {
1926 sfmmu_leave_scd(sfmmup, 0);
1927 }
1928
1929 ASSERT(sfmmup->sfmmu_scdp == NULL);
1930 }
1931
1932 void
1933 hat_free_end(struct hat *sfmmup)
1934 {
1935 int i;
1936
1937 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
1938 ASSERT(sfmmup->sfmmu_free == 1);
1939 ASSERT(sfmmup->sfmmu_ttecnt[TTE8K] == 0);
1940 ASSERT(sfmmup->sfmmu_ttecnt[TTE64K] == 0);
2230
2231 ASSERT(hat != NULL);
2232 ASSERT(PAGE_LOCKED(pp));
2233 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
2234 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2235 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2236 SFMMU_VALIDATE_HMERID(hat, rid, addr, MMU_PAGESIZE);
2237
2238 if (PP_ISFREE(pp)) {
2239 panic("hat_memload: loading a mapping to free page %p",
2240 (void *)pp);
2241 }
2242
2243 if (hat->sfmmu_xhat_provider) {
2244 /* no regions for xhats */
2245 ASSERT(!SFMMU_IS_SHMERID_VALID(rid));
2246 XHAT_MEMLOAD(hat, addr, pp, attr, flags);
2247 return;
2248 }
2249
2250 ASSERT((hat == ksfmmup) || AS_LOCK_HELD(hat->sfmmu_as));
2251
2252 if (flags & ~SFMMU_LOAD_ALLFLAG)
2253 cmn_err(CE_NOTE, "hat_memload: unsupported flags %d",
2254 flags & ~SFMMU_LOAD_ALLFLAG);
2255
2256 if (hat->sfmmu_rmstat)
2257 hat_resvstat(MMU_PAGESIZE, hat->sfmmu_as, addr);
2258
2259 #if defined(SF_ERRATA_57)
2260 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2261 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2262 !(flags & HAT_LOAD_SHARE)) {
2263 cmn_err(CE_WARN, "hat_memload: illegal attempt to make user "
2264 " page executable");
2265 attr &= ~PROT_EXEC;
2266 }
2267 #endif
2268
2269 sfmmu_memtte(&tte, pp->p_pagenum, attr, TTE8K);
2270 (void) sfmmu_tteload_array(hat, &tte, addr, &pp, flags, rid);
2285 * pass dp = NULL. If tteload doesn't get a non-NULL
2286 * page pointer it can't cache memory.
2287 */
2288 void
2289 hat_devload(struct hat *hat, caddr_t addr, size_t len, pfn_t pfn,
2290 uint_t attr, int flags)
2291 {
2292 tte_t tte;
2293 struct page *pp = NULL;
2294 int use_lgpg = 0;
2295
2296 ASSERT(hat != NULL);
2297
2298 if (hat->sfmmu_xhat_provider) {
2299 XHAT_DEVLOAD(hat, addr, len, pfn, attr, flags);
2300 return;
2301 }
2302
2303 ASSERT(!(flags & ~SFMMU_LOAD_ALLFLAG));
2304 ASSERT(!(attr & ~SFMMU_LOAD_ALLATTR));
2305 ASSERT((hat == ksfmmup) || AS_LOCK_HELD(hat->sfmmu_as));
2306 if (len == 0)
2307 panic("hat_devload: zero len");
2308 if (flags & ~SFMMU_LOAD_ALLFLAG)
2309 cmn_err(CE_NOTE, "hat_devload: unsupported flags %d",
2310 flags & ~SFMMU_LOAD_ALLFLAG);
2311
2312 #if defined(SF_ERRATA_57)
2313 if ((hat != ksfmmup) && AS_TYPE_64BIT(hat->sfmmu_as) &&
2314 (addr < errata57_limit) && (attr & PROT_EXEC) &&
2315 !(flags & HAT_LOAD_SHARE)) {
2316 cmn_err(CE_WARN, "hat_devload: illegal attempt to make user "
2317 " page executable");
2318 attr &= ~PROT_EXEC;
2319 }
2320 #endif
2321
2322 /*
2323 * If it's a memory page find its pp
2324 */
2325 if (!(flags & HAT_LOAD_NOCONSIST) && pf_is_memory(pfn)) {
3954 }
3955 ttesz--;
3956 }
3957 }
3958
3959 /*
3960 * Release one hardware address translation lock on the given address range.
3961 */
3962 void
3963 hat_unlock(struct hat *sfmmup, caddr_t addr, size_t len)
3964 {
3965 struct hmehash_bucket *hmebp;
3966 hmeblk_tag hblktag;
3967 int hmeshift, hashno = 1;
3968 struct hme_blk *hmeblkp, *list = NULL;
3969 caddr_t endaddr;
3970
3971 ASSERT(sfmmup != NULL);
3972 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
3973
3974 ASSERT((sfmmup == ksfmmup) || AS_LOCK_HELD(sfmmup->sfmmu_as));
3975 ASSERT((len & MMU_PAGEOFFSET) == 0);
3976 endaddr = addr + len;
3977 hblktag.htag_id = sfmmup;
3978 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
3979
3980 /*
3981 * Spitfire supports 4 page sizes.
3982 * Most pages are expected to be of the smallest page size (8K) and
3983 * these will not need to be rehashed. 64K pages also don't need to be
3984 * rehashed because an hmeblk spans 64K of address space. 512K pages
3985 * might need 1 rehash and and 4M pages might need 2 rehashes.
3986 */
3987 while (addr < endaddr) {
3988 hmeshift = HME_HASH_SHIFT(hashno);
3989 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
3990 hblktag.htag_rehash = hashno;
3991 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
3992
3993 SFMMU_HASH_LOCK(hmebp);
3994
4750
4751 /*
4752 * hat_probe returns 1 if the translation for the address 'addr' is
4753 * loaded, zero otherwise.
4754 *
4755 * hat_probe should be used only for advisorary purposes because it may
4756 * occasionally return the wrong value. The implementation must guarantee that
4757 * returning the wrong value is a very rare event. hat_probe is used
4758 * to implement optimizations in the segment drivers.
4759 *
4760 */
4761 int
4762 hat_probe(struct hat *sfmmup, caddr_t addr)
4763 {
4764 pfn_t pfn;
4765 tte_t tte;
4766
4767 ASSERT(sfmmup != NULL);
4768 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
4769
4770 ASSERT((sfmmup == ksfmmup) || AS_LOCK_HELD(sfmmup->sfmmu_as));
4771
4772 if (sfmmup == ksfmmup) {
4773 while ((pfn = sfmmu_vatopfn(addr, sfmmup, &tte))
4774 == PFN_SUSPENDED) {
4775 sfmmu_vatopfn_suspended(addr, sfmmup, &tte);
4776 }
4777 } else {
4778 pfn = sfmmu_uvatopfn(addr, sfmmup, NULL);
4779 }
4780
4781 if (pfn != PFN_INVALID)
4782 return (1);
4783 else
4784 return (0);
4785 }
4786
4787 ssize_t
4788 hat_getpagesize(struct hat *sfmmup, caddr_t addr)
4789 {
4790 tte_t tte;
4899 sfmmu_chgattr(hat, addr, len, attr, SFMMU_CLRATTR);
4900 }
4901
4902 /*
4903 * Change attributes on an address range to that specified by attr and mode.
4904 */
4905 static void
4906 sfmmu_chgattr(struct hat *sfmmup, caddr_t addr, size_t len, uint_t attr,
4907 int mode)
4908 {
4909 struct hmehash_bucket *hmebp;
4910 hmeblk_tag hblktag;
4911 int hmeshift, hashno = 1;
4912 struct hme_blk *hmeblkp, *list = NULL;
4913 caddr_t endaddr;
4914 cpuset_t cpuset;
4915 demap_range_t dmr;
4916
4917 CPUSET_ZERO(cpuset);
4918
4919 ASSERT((sfmmup == ksfmmup) || AS_LOCK_HELD(sfmmup->sfmmu_as));
4920 ASSERT((len & MMU_PAGEOFFSET) == 0);
4921 ASSERT(((uintptr_t)addr & MMU_PAGEOFFSET) == 0);
4922
4923 if ((attr & PROT_USER) && (mode != SFMMU_CLRATTR) &&
4924 ((addr + len) > (caddr_t)USERLIMIT)) {
4925 panic("user addr %p in kernel space",
4926 (void *)addr);
4927 }
4928
4929 endaddr = addr + len;
4930 hblktag.htag_id = sfmmup;
4931 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
4932 DEMAP_RANGE_INIT(sfmmup, &dmr);
4933
4934 while (addr < endaddr) {
4935 hmeshift = HME_HASH_SHIFT(hashno);
4936 hblktag.htag_bspage = HME_HASH_BSPAGE(addr, hmeshift);
4937 hblktag.htag_rehash = hashno;
4938 hmebp = HME_HASH_FUNCTION(sfmmup, addr, hmeshift);
4939
5692 caddr_t cb_end_addr[MAX_CB_ADDR];
5693 int issegkmap = ISSEGKMAP(sfmmup, addr);
5694 demap_range_t dmr, *dmrp;
5695
5696 if (sfmmup->sfmmu_xhat_provider) {
5697 XHAT_UNLOAD_CALLBACK(sfmmup, addr, len, flags, callback);
5698 return;
5699 } else {
5700 /*
5701 * This must be a CPU HAT. If the address space has
5702 * XHATs attached, unload the mappings for all of them,
5703 * just in case
5704 */
5705 ASSERT(sfmmup->sfmmu_as != NULL);
5706 if (sfmmup->sfmmu_as->a_xhat != NULL)
5707 xhat_unload_callback_all(sfmmup->sfmmu_as, addr,
5708 len, flags, callback);
5709 }
5710
5711 ASSERT((sfmmup == ksfmmup) || (flags & HAT_UNLOAD_OTHER) || \
5712 AS_LOCK_HELD(sfmmup->sfmmu_as));
5713
5714 ASSERT(sfmmup != NULL);
5715 ASSERT((len & MMU_PAGEOFFSET) == 0);
5716 ASSERT(!((uintptr_t)addr & MMU_PAGEOFFSET));
5717
5718 /*
5719 * Probing through a large VA range (say 63 bits) will be slow, even
5720 * at 4 Meg steps between the probes. So, when the virtual address range
5721 * is very large, search the HME entries for what to unload.
5722 *
5723 * len >> TTE_PAGE_SHIFT(TTE4M) is the # of 4Meg probes we'd need
5724 *
5725 * UHMEHASH_SZ is number of hash buckets to examine
5726 *
5727 */
5728 if (sfmmup != KHATID && (len >> TTE_PAGE_SHIFT(TTE4M)) > UHMEHASH_SZ) {
5729 hat_unload_large_virtual(sfmmup, addr, len, flags, callback);
5730 return;
5731 }
5732
6310 }
6311 }
6312
6313 /*
6314 * Synchronize all the mappings in the range [addr..addr+len).
6315 * Can be called with clearflag having two states:
6316 * HAT_SYNC_DONTZERO means just return the rm stats
6317 * HAT_SYNC_ZERORM means zero rm bits in the tte and return the stats
6318 */
6319 void
6320 hat_sync(struct hat *sfmmup, caddr_t addr, size_t len, uint_t clearflag)
6321 {
6322 struct hmehash_bucket *hmebp;
6323 hmeblk_tag hblktag;
6324 int hmeshift, hashno = 1;
6325 struct hme_blk *hmeblkp, *list = NULL;
6326 caddr_t endaddr;
6327 cpuset_t cpuset;
6328
6329 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
6330 ASSERT((sfmmup == ksfmmup) || AS_LOCK_HELD(sfmmup->sfmmu_as));
6331 ASSERT((len & MMU_PAGEOFFSET) == 0);
6332 ASSERT((clearflag == HAT_SYNC_DONTZERO) ||
6333 (clearflag == HAT_SYNC_ZERORM));
6334
6335 CPUSET_ZERO(cpuset);
6336
6337 endaddr = addr + len;
6338 hblktag.htag_id = sfmmup;
6339 hblktag.htag_rid = SFMMU_INVALID_SHMERID;
6340
6341 /*
6342 * Spitfire supports 4 page sizes.
6343 * Most pages are expected to be of the smallest page
6344 * size (8K) and these will not need to be rehashed. 64K
6345 * pages also don't need to be rehashed because the an hmeblk
6346 * spans 64K of address space. 512K pages might need 1 rehash and
6347 * and 4M pages 2 rehashes.
6348 */
6349 while (addr < endaddr) {
6350 hmeshift = HME_HASH_SHIFT(hashno);
7953 panic("Illegal VA->PA translation, pp 0x%p not permanent",
7954 (void *)pp);
7955 else
7956 panic("Illegal VA->PA translation, pp 0x%p not locked",
7957 (void *)pp);
7958 }
7959 #endif /* DEBUG */
7960
7961 /*
7962 * Returns a page frame number for a given virtual address.
7963 * Returns PFN_INVALID to indicate an invalid mapping
7964 */
7965 pfn_t
7966 hat_getpfnum(struct hat *hat, caddr_t addr)
7967 {
7968 pfn_t pfn;
7969 tte_t tte;
7970
7971 /*
7972 * We would like to
7973 * ASSERT(AS_LOCK_HELD(as));
7974 * but we can't because the iommu driver will call this
7975 * routine at interrupt time and it can't grab the as lock
7976 * or it will deadlock: A thread could have the as lock
7977 * and be waiting for io. The io can't complete
7978 * because the interrupt thread is blocked trying to grab
7979 * the as lock.
7980 */
7981
7982 ASSERT(hat->sfmmu_xhat_provider == NULL);
7983
7984 if (hat == ksfmmup) {
7985 if (IS_KMEM_VA_LARGEPAGE(addr)) {
7986 ASSERT(segkmem_lpszc > 0);
7987 pfn = sfmmu_kvaszc2pfn(addr, segkmem_lpszc);
7988 if (pfn != PFN_INVALID) {
7989 sfmmu_check_kpfn(pfn);
7990 return (pfn);
7991 }
7992 } else if (segkpm && IS_KPM_ADDR(addr)) {
7993 return (sfmmu_kpm_vatopfn(addr));
14008 hatlock_t *hatlockp;
14009 sf_region_t *rgnp;
14010 sf_region_t *new_rgnp = NULL;
14011 int i;
14012 uint16_t *nextidp;
14013 sf_region_t **freelistp;
14014 int maxids;
14015 sf_region_t **rarrp;
14016 uint16_t *busyrgnsp;
14017 ulong_t rttecnt;
14018 uchar_t tteflag;
14019 uchar_t r_type = flags & HAT_REGION_TYPE_MASK;
14020 int text = (r_type == HAT_REGION_TEXT);
14021
14022 if (srdp == NULL || r_size == 0) {
14023 return (HAT_INVALID_REGION_COOKIE);
14024 }
14025
14026 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14027 ASSERT(sfmmup != ksfmmup);
14028 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as));
14029 ASSERT(srdp->srd_refcnt > 0);
14030 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14031 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14032 ASSERT(r_pgszc < mmu_page_sizes);
14033 if (!IS_P2ALIGNED(r_saddr, TTEBYTES(r_pgszc)) ||
14034 !IS_P2ALIGNED(r_size, TTEBYTES(r_pgszc))) {
14035 panic("hat_join_region: region addr or size is not aligned\n");
14036 }
14037
14038
14039 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14040 SFMMU_REGION_HME;
14041 /*
14042 * Currently only support shared hmes for the read only main text
14043 * region.
14044 */
14045 if (r_type == SFMMU_REGION_HME && ((r_obj != srdp->srd_evp) ||
14046 (r_perm & PROT_WRITE))) {
14047 return (HAT_INVALID_REGION_COOKIE);
14048 }
14311 ASSERT(!(flags & ~HAT_REGION_TYPE_MASK));
14312 ASSERT(flags == HAT_REGION_TEXT || flags == HAT_REGION_ISM);
14313 ASSERT(!sfmmup->sfmmu_free || sfmmup->sfmmu_scdp == NULL);
14314
14315 r_type = (r_type == HAT_REGION_ISM) ? SFMMU_REGION_ISM :
14316 SFMMU_REGION_HME;
14317
14318 if (r_type == SFMMU_REGION_ISM) {
14319 ASSERT(SFMMU_IS_ISMRID_VALID(rid));
14320 ASSERT(rid < SFMMU_MAX_ISM_REGIONS);
14321 rgnp = srdp->srd_ismrgnp[rid];
14322 } else {
14323 ASSERT(SFMMU_IS_SHMERID_VALID(rid));
14324 ASSERT(rid < SFMMU_MAX_HME_REGIONS);
14325 rgnp = srdp->srd_hmergnp[rid];
14326 }
14327 ASSERT(rgnp != NULL);
14328 ASSERT(rgnp->rgn_id == rid);
14329 ASSERT((rgnp->rgn_flags & SFMMU_REGION_TYPE_MASK) == r_type);
14330 ASSERT(!(rgnp->rgn_flags & SFMMU_REGION_FREE));
14331 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as));
14332
14333 ASSERT(sfmmup->sfmmu_xhat_provider == NULL);
14334 if (r_type == SFMMU_REGION_HME && sfmmup->sfmmu_as->a_xhat != NULL) {
14335 xhat_unload_callback_all(sfmmup->sfmmu_as, rgnp->rgn_saddr,
14336 rgnp->rgn_size, 0, NULL);
14337 }
14338
14339 if (sfmmup->sfmmu_free) {
14340 ulong_t rttecnt;
14341 r_pgszc = rgnp->rgn_pgszc;
14342 r_size = rgnp->rgn_size;
14343
14344 ASSERT(sfmmup->sfmmu_scdp == NULL);
14345 if (r_type == SFMMU_REGION_ISM) {
14346 SF_RGNMAP_DEL(sfmmup->sfmmu_ismregion_map, rid);
14347 } else {
14348 /* update shme rgns ttecnt in sfmmu_ttecnt */
14349 rttecnt = r_size >> TTE_PAGE_SHIFT(r_pgszc);
14350 ASSERT(sfmmup->sfmmu_ttecnt[r_pgszc] >= rttecnt);
14351
15107 }
15108
15109 /*
15110 * The first phase of a process joining an SCD. The hat structure is
15111 * linked to the SCD queue and then the HAT_JOIN_SCD sfmmu flag is set
15112 * and a cross-call with context invalidation is used to cause the
15113 * remaining work to be carried out in the sfmmu_tsbmiss_exception()
15114 * routine.
15115 */
15116 static void
15117 sfmmu_join_scd(sf_scd_t *scdp, sfmmu_t *sfmmup)
15118 {
15119 hatlock_t *hatlockp;
15120 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15121 int i;
15122 sf_scd_t *old_scdp;
15123
15124 ASSERT(srdp != NULL);
15125 ASSERT(scdp != NULL);
15126 ASSERT(scdp->scd_refcnt > 0);
15127 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as));
15128
15129 if ((old_scdp = sfmmup->sfmmu_scdp) != NULL) {
15130 ASSERT(old_scdp != scdp);
15131
15132 mutex_enter(&old_scdp->scd_mutex);
15133 sfmmu_from_scd_list(&old_scdp->scd_sf_list, sfmmup);
15134 mutex_exit(&old_scdp->scd_mutex);
15135 /*
15136 * sfmmup leaves the old scd. Update sfmmu_ttecnt to
15137 * include the shme rgn ttecnt for rgns that
15138 * were in the old SCD
15139 */
15140 for (i = 0; i < mmu_page_sizes; i++) {
15141 ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
15142 old_scdp->scd_rttecnt[i]);
15143 atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
15144 sfmmup->sfmmu_scdrttecnt[i]);
15145 }
15146 }
15147
15219 }
15220
15221 /* Set HAT_CTX1_FLAG for all SCD ISMs */
15222 sfmmu_ism_hatflags(sfmmup, 1);
15223
15224 SFMMU_STAT(sf_join_scd);
15225 }
15226
15227 /*
15228 * This routine is called in order to check if there is an SCD which matches
15229 * the process's region map if not then a new SCD may be created.
15230 */
15231 static void
15232 sfmmu_find_scd(sfmmu_t *sfmmup)
15233 {
15234 sf_srd_t *srdp = sfmmup->sfmmu_srdp;
15235 sf_scd_t *scdp, *new_scdp;
15236 int ret;
15237
15238 ASSERT(srdp != NULL);
15239 ASSERT(AS_WRITE_HELD(sfmmup->sfmmu_as));
15240
15241 mutex_enter(&srdp->srd_scd_mutex);
15242 for (scdp = srdp->srd_scdp; scdp != NULL;
15243 scdp = scdp->scd_next) {
15244 SF_RGNMAP_EQUAL(&scdp->scd_region_map,
15245 &sfmmup->sfmmu_region_map, ret);
15246 if (ret == 1) {
15247 SF_SCD_INCR_REF(scdp);
15248 mutex_exit(&srdp->srd_scd_mutex);
15249 sfmmu_join_scd(scdp, sfmmup);
15250 ASSERT(scdp->scd_refcnt >= 2);
15251 atomic_dec_32((volatile uint32_t *)&scdp->scd_refcnt);
15252 return;
15253 } else {
15254 /*
15255 * If the sfmmu region map is a subset of the scd
15256 * region map, then the assumption is that this process
15257 * will continue attaching to ISM segments until the
15258 * region maps are equal.
15259 */
15325 * are about to leave the SCD
15326 */
15327 for (i = 0; i < mmu_page_sizes; i++) {
15328 ASSERT(sfmmup->sfmmu_scdrttecnt[i] ==
15329 scdp->scd_rttecnt[i]);
15330 atomic_add_long(&sfmmup->sfmmu_ttecnt[i],
15331 sfmmup->sfmmu_scdrttecnt[i]);
15332 sfmmup->sfmmu_scdrttecnt[i] = 0;
15333 }
15334 sfmmup->sfmmu_scdp = NULL;
15335
15336 SF_SCD_DECR_REF(srdp, scdp);
15337 return;
15338 }
15339
15340 ASSERT(r_type != SFMMU_REGION_ISM ||
15341 SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY));
15342 ASSERT(scdp->scd_refcnt);
15343 ASSERT(!sfmmup->sfmmu_free);
15344 ASSERT(sfmmu_hat_lock_held(sfmmup));
15345 ASSERT(AS_LOCK_HELD(sfmmup->sfmmu_as));
15346
15347 /*
15348 * Wait for ISM maps to be updated.
15349 */
15350 if (r_type != SFMMU_REGION_ISM) {
15351 while (SFMMU_FLAGS_ISSET(sfmmup, HAT_ISMBUSY) &&
15352 sfmmup->sfmmu_scdp != NULL) {
15353 cv_wait(&sfmmup->sfmmu_tsb_cv,
15354 HATLOCK_MUTEXP(hatlockp));
15355 }
15356
15357 if (sfmmup->sfmmu_scdp == NULL) {
15358 sfmmu_hat_exit(hatlockp);
15359 return;
15360 }
15361 SFMMU_FLAGS_SET(sfmmup, HAT_ISMBUSY);
15362 }
15363
15364 if (SFMMU_FLAGS_ISSET(sfmmup, HAT_JOIN_SCD)) {
15365 SFMMU_FLAGS_CLEAR(sfmmup, HAT_JOIN_SCD);
|