Print this page
5042 stop using deprecated atomic functions
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/os/ppage.c
+++ new/usr/src/uts/sun4u/os/ppage.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 -#pragma ident "%Z%%M% %I% %E% SMI"
27 -
28 26 #include <sys/types.h>
29 27 #include <sys/systm.h>
30 28 #include <sys/archsystm.h>
31 29 #include <sys/machsystm.h>
32 30 #include <sys/t_lock.h>
33 31 #include <sys/vmem.h>
34 32 #include <sys/mman.h>
35 33 #include <sys/vm.h>
36 34 #include <sys/cpu.h>
37 35 #include <sys/cmn_err.h>
38 36 #include <sys/cpuvar.h>
39 37 #include <sys/atomic.h>
40 38 #include <vm/as.h>
41 39 #include <vm/hat.h>
42 40 #include <vm/as.h>
43 41 #include <vm/page.h>
44 42 #include <vm/seg.h>
45 43 #include <vm/seg_kmem.h>
46 44 #include <vm/seg_kpm.h>
47 45 #include <vm/hat_sfmmu.h>
48 46 #include <sys/debug.h>
49 47 #include <sys/cpu_module.h>
50 48 #include <sys/mem_cage.h>
51 49
52 50 /*
53 51 * A quick way to generate a cache consistent address to map in a page.
54 52 * users: ppcopy, pagezero, /proc, dev/mem
55 53 *
56 54 * The ppmapin/ppmapout routines provide a quick way of generating a cache
57 55 * consistent address by reserving a given amount of kernel address space.
58 56 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
59 57 * into x number of sets, where x is the number of colors for the virtual
60 58 * cache. The number of colors is how many times a page can be mapped
61 59 * simulatenously in the cache. For direct map caches this translates to
62 60 * the number of pages in the cache.
63 61 * Each set will be assigned a group of virtual pages from the reserved memory
64 62 * depending on its virtual color.
65 63 * When trying to assign a virtual address we will find out the color for the
66 64 * physical page in question (if applicable). Then we will try to find an
67 65 * available virtual page from the set of the appropiate color.
68 66 */
69 67
70 68 #define clsettoarray(color, set) ((color * nsets) + set)
71 69
72 70 int pp_slots = 4; /* small default, tuned by cpu module */
73 71
74 72 /* tuned by cpu module, default is "safe" */
75 73 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
76 74
77 75 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
78 76 static int nsets; /* number of sets */
79 77 static int ppmap_pages; /* generate align mask */
80 78 static int ppmap_shift; /* set selector */
81 79
82 80 #ifdef PPDEBUG
83 81 #define MAXCOLORS 16 /* for debug only */
84 82 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */
85 83 static int align_hits[MAXCOLORS];
86 84 static int pp_allocs; /* # of ppmapin requests */
87 85 #endif /* PPDEBUG */
88 86
89 87 /*
90 88 * There are only 64 TLB entries on spitfire, 16 on cheetah
91 89 * (fully-associative TLB) so we allow the cpu module to tune the
92 90 * number to use here via pp_slots.
93 91 */
94 92 static struct ppmap_va {
95 93 caddr_t ppmap_slots[MAXPP_SLOTS];
96 94 } ppmap_va[NCPU];
97 95
98 96 void
99 97 ppmapinit(void)
100 98 {
101 99 int color, nset, setsize;
102 100 caddr_t va;
103 101
104 102 ASSERT(pp_slots <= MAXPP_SLOTS);
105 103
106 104 va = (caddr_t)PPMAPBASE;
107 105 if (cache & CACHE_VAC) {
108 106 int a;
109 107
110 108 ppmap_pages = mmu_btop(shm_alignment);
111 109 nsets = PPMAPSIZE / shm_alignment;
112 110 setsize = shm_alignment;
113 111 ppmap_shift = MMU_PAGESHIFT;
114 112 a = ppmap_pages;
115 113 while (a >>= 1)
116 114 ppmap_shift++;
117 115 } else {
118 116 /*
119 117 * If we do not have a virtual indexed cache we simply
120 118 * have only one set containing all pages.
121 119 */
122 120 ppmap_pages = 1;
123 121 nsets = mmu_btop(PPMAPSIZE);
124 122 setsize = MMU_PAGESIZE;
125 123 ppmap_shift = MMU_PAGESHIFT;
126 124 }
127 125 for (color = 0; color < ppmap_pages; color++) {
128 126 for (nset = 0; nset < nsets; nset++) {
129 127 ppmap_vaddrs[clsettoarray(color, nset)] =
130 128 (caddr_t)((uintptr_t)va + (nset * setsize));
131 129 }
132 130 va += MMU_PAGESIZE;
133 131 }
134 132 }
135 133
136 134 /*
137 135 * Allocate a cache consistent virtual address to map a page, pp,
138 136 * with protection, vprot; and map it in the MMU, using the most
139 137 * efficient means possible. The argument avoid is a virtual address
140 138 * hint which when masked yields an offset into a virtual cache
141 139 * that should be avoided when allocating an address to map in a
142 140 * page. An avoid arg of -1 means you don't care, for instance pagezero.
143 141 *
144 142 * machine dependent, depends on virtual address space layout,
145 143 * understands that all kernel addresses have bit 31 set.
146 144 *
147 145 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
148 146 * that found in other architectures. In other architectures the hint
149 147 * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
150 148 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately
151 149 * in the case of a COW, this later on caused a cache aliasing conflict. In
152 150 * sun4, the bcopy routine uses the block ld/st instructions so we don't have
153 151 * to worry about virtual cache trashing. Actually, by using the hint to choose
154 152 * the right color we can almost guarantee a cache conflict will not occur.
155 153 */
156 154
157 155 caddr_t
158 156 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
159 157 {
160 158 int color, nset, index, start;
161 159 caddr_t va;
162 160
163 161 #ifdef PPDEBUG
164 162 pp_allocs++;
165 163 #endif /* PPDEBUG */
166 164 if (cache & CACHE_VAC) {
167 165 color = sfmmu_get_ppvcolor(pp);
168 166 if (color == -1) {
169 167 if ((intptr_t)hint != -1L) {
170 168 color = addr_to_vcolor(hint);
171 169 } else {
172 170 color = addr_to_vcolor(mmu_ptob(pp->p_pagenum));
173 171 }
174 172 }
175 173
176 174 } else {
177 175 /*
178 176 * For physical caches, we can pick any address we want.
179 177 */
180 178 color = 0;
181 179 }
↓ open down ↓ |
144 lines elided |
↑ open up ↑ |
182 180
183 181 start = color;
184 182 do {
185 183 for (nset = 0; nset < nsets; nset++) {
186 184 index = clsettoarray(color, nset);
187 185 va = ppmap_vaddrs[index];
188 186 if (va != NULL) {
189 187 #ifdef PPDEBUG
190 188 align_hits[color]++;
191 189 #endif /* PPDEBUG */
192 - if (casptr(&ppmap_vaddrs[index],
190 + if (atomic_cas_ptr(&ppmap_vaddrs[index],
193 191 va, NULL) == va) {
194 192 hat_memload(kas.a_hat, va, pp,
195 193 vprot | HAT_NOSYNC,
196 194 HAT_LOAD_LOCK);
197 195 return (va);
198 196 }
199 197 }
200 198 }
201 199 /*
202 200 * first pick didn't succeed, try another
203 201 */
204 202 if (++color == ppmap_pages)
205 203 color = 0;
206 204 } while (color != start);
207 205
208 206 #ifdef PPDEBUG
209 207 ppalloc_noslot++;
210 208 #endif /* PPDEBUG */
211 209
212 210 /*
213 211 * No free slots; get a random one from the kernel heap area.
214 212 */
215 213 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
216 214
217 215 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
218 216
219 217 return (va);
220 218
221 219 }
222 220
223 221 void
224 222 ppmapout(caddr_t va)
225 223 {
226 224 int color, nset, index;
227 225
228 226 if (va >= kernelheap && va < ekernelheap) {
229 227 /*
230 228 * Space came from kernelmap, flush the page and
231 229 * return the space.
232 230 */
233 231 hat_unload(kas.a_hat, va, PAGESIZE,
234 232 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
235 233 vmem_free(heap_arena, va, PAGESIZE);
236 234 } else {
237 235 /*
238 236 * Space came from ppmap_vaddrs[], give it back.
239 237 */
240 238 color = addr_to_vcolor(va);
241 239 ASSERT((cache & CACHE_VAC)? (color < ppmap_pages) : 1);
242 240
243 241 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
244 242 index = clsettoarray(color, nset);
245 243 hat_unload(kas.a_hat, va, PAGESIZE,
246 244 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
247 245
248 246 ASSERT(ppmap_vaddrs[index] == NULL);
249 247 ppmap_vaddrs[index] = va;
250 248 }
251 249 }
252 250
253 251 #ifdef DEBUG
254 252 #define PP_STAT_ADD(stat) (stat)++
255 253 uint_t pload, ploadfail;
256 254 uint_t ppzero, ppzero_short;
257 255 #else
258 256 #define PP_STAT_ADD(stat)
259 257 #endif /* DEBUG */
260 258
261 259 /*
262 260 * Find a slot in per CPU page copy area. Load up a locked TLB in the
263 261 * running cpu. We don't call hat layer to load up the tte since the
264 262 * mapping is only temporary. If the thread migrates it'll get a TLB
265 263 * miss trap and TLB/TSB miss handler will panic since there is no
266 264 * official hat record of this mapping.
267 265 */
268 266 static caddr_t
269 267 pp_load_tlb(processorid_t cpu, caddr_t **pslot, page_t *pp, uint_t prot)
270 268 {
271 269 struct ppmap_va *ppmap;
272 270 tte_t tte;
273 271 caddr_t *myslot;
274 272 caddr_t va;
275 273 long i, start, stride;
276 274 int vcolor;
277 275 uint_t flags, strict_flag;
278 276
279 277 PP_STAT_ADD(pload);
280 278
281 279 ppmap = &ppmap_va[cpu];
282 280 va = (caddr_t)(PPMAP_FAST_BASE + (MMU_PAGESIZE * MAXPP_SLOTS) * cpu);
283 281 myslot = ppmap->ppmap_slots;
284 282 ASSERT(addr_to_vcolor(va) == 0);
285 283
286 284 if (prot & TTE_HWWR_INT) {
287 285 flags = PPAGE_STORE_VCOLORING | PPAGE_STORES_POLLUTE;
288 286 strict_flag = PPAGE_STORES_POLLUTE;
289 287 } else {
290 288 flags = PPAGE_LOAD_VCOLORING | PPAGE_LOADS_POLLUTE;
291 289 strict_flag = PPAGE_LOADS_POLLUTE;
292 290 }
293 291
294 292 /*
295 293 * If consistent handling is required then keep the current
296 294 * vcolor of the page. Furthermore, if loads or stores can
297 295 * pollute the VAC then using a "new" page (unassigned vcolor)
298 296 * won't work and we have to return a failure.
299 297 */
300 298 if (pp_consistent_coloring & flags) {
301 299 vcolor = sfmmu_get_ppvcolor(pp);
302 300 if ((vcolor == -1) &&
303 301 (pp_consistent_coloring & strict_flag))
304 302 return (NULL);
305 303 /* else keep the current vcolor of the page */
306 304 } else {
307 305 vcolor = -1;
308 306 }
309 307
310 308 if (vcolor != -1) {
311 309 va += MMU_PAGESIZE * vcolor;
↓ open down ↓ |
109 lines elided |
↑ open up ↑ |
312 310 start = vcolor;
313 311 stride = ppmap_pages; /* number of colors */
314 312 myslot += vcolor;
315 313 } else {
316 314 start = 0;
317 315 stride = 1;
318 316 }
319 317
320 318 for (i = start; i < pp_slots; i += stride) {
321 319 if (*myslot == NULL) {
322 - if (casptr(myslot, NULL, va) == NULL)
320 + if (atomic_cas_ptr(myslot, NULL, va) == NULL)
323 321 break;
324 322 }
325 323 myslot += stride;
326 324 va += MMU_PAGESIZE * stride;
327 325 }
328 326
329 327 if (i >= pp_slots) {
330 328 PP_STAT_ADD(ploadfail);
331 329 return (NULL);
332 330 }
333 331
334 332 ASSERT(vcolor == -1 || addr_to_vcolor(va) == vcolor);
335 333
336 334 /*
337 335 * Now we have a slot we can use, make the tte.
338 336 */
339 337 tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(pp->p_pagenum);
340 338 tte.tte_intlo = TTE_PFN_INTLO(pp->p_pagenum) | TTE_CP_INT |
341 339 TTE_CV_INT | TTE_PRIV_INT | TTE_LCK_INT | prot;
342 340
343 341 ASSERT(CPU->cpu_id == cpu);
344 342 sfmmu_dtlb_ld_kva(va, &tte);
345 343
346 344 *pslot = myslot; /* Return ptr to the slot we used. */
347 345
348 346 return (va);
349 347 }
350 348
351 349 static void
352 350 pp_unload_tlb(caddr_t *pslot, caddr_t va)
353 351 {
354 352 ASSERT(*pslot == va);
355 353
356 354 vtag_flushpage(va, (uint64_t)ksfmmup);
357 355 *pslot = NULL; /* release the slot */
358 356 }
359 357
360 358 /*
361 359 * Common copy routine which attempts to use hwblkpagecopy. If this routine
362 360 * can't be used, failure (0) will be returned. Otherwise, a PAGESIZE page
363 361 * will be copied and success (1) will be returned.
364 362 */
365 363 int
366 364 ppcopy_common(page_t *fm_pp, page_t *to_pp)
367 365 {
368 366 caddr_t fm_va, to_va;
369 367 caddr_t *fm_slot, *to_slot;
370 368 processorid_t cpu;
371 369 label_t ljb;
372 370 int ret = 1;
373 371
374 372 ASSERT(fm_pp != NULL && PAGE_LOCKED(fm_pp));
375 373 ASSERT(to_pp != NULL && PAGE_LOCKED(to_pp));
376 374
377 375 /*
378 376 * If we can't use VIS block loads and stores we can't use
379 377 * pp_load_tlb/pp_unload_tlb due to the possibility of
380 378 * d$ aliasing.
381 379 */
382 380 if (!use_hw_bcopy && (cache & CACHE_VAC))
383 381 return (0);
384 382
385 383 kpreempt_disable();
386 384 cpu = CPU->cpu_id;
387 385 fm_va = pp_load_tlb(cpu, &fm_slot, fm_pp, 0);
388 386 if (fm_va == NULL) {
389 387 kpreempt_enable();
390 388 return (0);
391 389 }
392 390 to_va = pp_load_tlb(cpu, &to_slot, to_pp, TTE_HWWR_INT);
393 391 if (to_va == NULL) {
394 392 pp_unload_tlb(fm_slot, fm_va);
395 393 kpreempt_enable();
396 394 return (0);
397 395 }
398 396 if (on_fault(&ljb)) {
399 397 ret = 0;
400 398 goto faulted;
401 399 }
402 400 hwblkpagecopy(fm_va, to_va);
403 401 no_fault();
404 402 faulted:
405 403 ASSERT(CPU->cpu_id == cpu);
406 404 pp_unload_tlb(fm_slot, fm_va);
407 405 pp_unload_tlb(to_slot, to_va);
408 406 kpreempt_enable();
409 407 return (ret);
410 408 }
411 409
412 410 /*
413 411 * Routine to copy kernel pages during relocation. It will copy one
414 412 * PAGESIZE page to another PAGESIZE page. This function may be called
415 413 * above LOCK_LEVEL so it should not grab any locks.
416 414 */
417 415 void
418 416 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
419 417 {
420 418 uint64_t fm_pa, to_pa;
421 419 size_t nbytes;
422 420
423 421 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
424 422 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
425 423
426 424 nbytes = MMU_PAGESIZE;
427 425
428 426 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
429 427 hw_pa_bcopy32(fm_pa, to_pa);
430 428 }
431 429
432 430 /*
433 431 * Copy the data from the physical page represented by "frompp" to
434 432 * that represented by "topp".
435 433 *
436 434 * Try to use per cpu mapping first, if that fails then call pp_mapin
437 435 * to load it.
438 436 *
439 437 * Returns one on success or zero on some sort of fault while doing the copy.
440 438 */
441 439 int
442 440 ppcopy(page_t *fm_pp, page_t *to_pp)
443 441 {
444 442 caddr_t fm_va, to_va;
445 443 label_t ljb;
446 444 int ret = 1;
447 445 boolean_t use_kpm = B_FALSE;
448 446
449 447 /* Try the fast path first */
450 448 if (ppcopy_common(fm_pp, to_pp))
451 449 return (1);
452 450
453 451 /*
454 452 * Try to map using KPM if enabled and we are the cageout thread.
455 453 * If it fails, fall back to ppmapin/ppmaput
456 454 */
457 455
458 456 if (kpm_enable) {
459 457 if (curthread == kcage_cageout_thread)
460 458 use_kpm = B_TRUE;
461 459 }
462 460
463 461 if (use_kpm) {
464 462 if ((fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
465 463 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
466 464 if (fm_va != NULL)
467 465 hat_kpm_mapout(fm_pp, NULL, fm_va);
468 466 use_kpm = B_FALSE;
469 467 }
470 468 }
471 469
472 470 if (use_kpm == B_FALSE) {
473 471 /* do the slow path */
474 472 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
475 473 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
476 474 if (on_fault(&ljb)) {
477 475 ret = 0;
478 476 goto faulted;
479 477 }
480 478 }
481 479 bcopy(fm_va, to_va, PAGESIZE);
482 480 no_fault();
483 481 faulted:
484 482 /* unmap */
485 483 if (use_kpm == B_TRUE) {
486 484 hat_kpm_mapout(fm_pp, NULL, fm_va);
487 485 hat_kpm_mapout(to_pp, NULL, to_va);
488 486 } else {
489 487 ppmapout(fm_va);
490 488 ppmapout(to_va);
491 489 }
492 490 return (ret);
493 491 }
494 492
495 493 /*
496 494 * Zero the physical page from off to off + len given by `pp'
497 495 * without changing the reference and modified bits of page.
498 496 *
499 497 * Again, we'll try per cpu mapping first.
500 498 */
501 499 void
502 500 pagezero(page_t *pp, uint_t off, uint_t len)
503 501 {
504 502 caddr_t va;
505 503 caddr_t *slot;
506 504 int fast = 1;
507 505 processorid_t cpu;
508 506 extern int hwblkclr(void *, size_t);
509 507 extern int use_hw_bzero;
510 508
511 509 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
512 510 ASSERT(PAGE_LOCKED(pp));
513 511
514 512 PP_STAT_ADD(ppzero);
515 513
516 514 if (len != MMU_PAGESIZE || !use_hw_bzero) {
517 515 /*
518 516 * Since the fast path doesn't do anything about
519 517 * VAC coloring, we make sure bcopy h/w will be used.
520 518 */
521 519 fast = 0;
522 520 va = NULL;
523 521 PP_STAT_ADD(ppzero_short);
524 522 }
525 523
526 524 kpreempt_disable();
527 525
528 526 if (fast) {
529 527 cpu = CPU->cpu_id;
530 528 va = pp_load_tlb(cpu, &slot, pp, TTE_HWWR_INT);
531 529 }
532 530
533 531 if (va == NULL) {
534 532 /*
535 533 * We are here either length != MMU_PAGESIZE or pp_load_tlb()
536 534 * returns NULL or use_hw_bzero is disabled.
537 535 */
538 536 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
539 537 fast = 0;
540 538 }
541 539
542 540 if (hwblkclr(va + off, len)) {
543 541 /*
544 542 * We may not have used block commit asi.
545 543 * So flush the I-$ manually
546 544 */
547 545
548 546 ASSERT(fast == 0);
549 547
550 548 sync_icache(va + off, len);
551 549 } else {
552 550 /*
553 551 * We have used blk commit, and flushed the I-$. However we
554 552 * still may have an instruction in the pipeline. Only a flush
555 553 * instruction will invalidate that.
556 554 */
557 555 doflush(va);
558 556 }
559 557
560 558 if (fast) {
561 559 ASSERT(CPU->cpu_id == cpu);
562 560 pp_unload_tlb(slot, va);
563 561 } else {
564 562 ppmapout(va);
565 563 }
566 564
567 565 kpreempt_enable();
568 566 }
↓ open down ↓ |
236 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX