Print this page
6149 use NULL capable segop as a shorthand for no-capabilities
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/vm/seg_kp.c
+++ new/usr/src/uts/common/vm/seg_kp.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 26 /* All Rights Reserved */
27 27
28 28 /*
29 29 * Portions of this source code were derived from Berkeley 4.3 BSD
30 30 * under license from the Regents of the University of California.
31 31 */
32 32
33 33 /*
34 34 * segkp is a segment driver that administers the allocation and deallocation
35 35 * of pageable variable size chunks of kernel virtual address space. Each
36 36 * allocated resource is page-aligned.
37 37 *
38 38 * The user may specify whether the resource should be initialized to 0,
39 39 * include a redzone, or locked in memory.
40 40 */
41 41
42 42 #include <sys/types.h>
43 43 #include <sys/t_lock.h>
44 44 #include <sys/thread.h>
45 45 #include <sys/param.h>
46 46 #include <sys/errno.h>
47 47 #include <sys/sysmacros.h>
48 48 #include <sys/systm.h>
49 49 #include <sys/buf.h>
50 50 #include <sys/mman.h>
51 51 #include <sys/vnode.h>
52 52 #include <sys/cmn_err.h>
53 53 #include <sys/swap.h>
54 54 #include <sys/tuneable.h>
55 55 #include <sys/kmem.h>
56 56 #include <sys/vmem.h>
57 57 #include <sys/cred.h>
58 58 #include <sys/dumphdr.h>
59 59 #include <sys/debug.h>
60 60 #include <sys/vtrace.h>
61 61 #include <sys/stack.h>
62 62 #include <sys/atomic.h>
63 63 #include <sys/archsystm.h>
64 64 #include <sys/lgrp.h>
65 65
66 66 #include <vm/as.h>
67 67 #include <vm/seg.h>
68 68 #include <vm/seg_kp.h>
69 69 #include <vm/seg_kmem.h>
70 70 #include <vm/anon.h>
71 71 #include <vm/page.h>
72 72 #include <vm/hat.h>
73 73 #include <sys/bitmap.h>
74 74
75 75 /*
76 76 * Private seg op routines
77 77 */
78 78 static void segkp_badop(void);
79 79 static void segkp_dump(struct seg *seg);
80 80 static int segkp_checkprot(struct seg *seg, caddr_t addr, size_t len,
81 81 uint_t prot);
82 82 static int segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta);
83 83 static int segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
84 84 struct page ***page, enum lock_type type,
85 85 enum seg_rw rw);
86 86 static void segkp_insert(struct seg *seg, struct segkp_data *kpd);
87 87 static void segkp_delete(struct seg *seg, struct segkp_data *kpd);
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
88 88 static caddr_t segkp_get_internal(struct seg *seg, size_t len, uint_t flags,
89 89 struct segkp_data **tkpd, struct anon_map *amp);
90 90 static void segkp_release_internal(struct seg *seg,
91 91 struct segkp_data *kpd, size_t len);
92 92 static int segkp_unlock(struct hat *hat, struct seg *seg, caddr_t vaddr,
93 93 size_t len, struct segkp_data *kpd, uint_t flags);
94 94 static int segkp_load(struct hat *hat, struct seg *seg, caddr_t vaddr,
95 95 size_t len, struct segkp_data *kpd, uint_t flags);
96 96 static struct segkp_data *segkp_find(struct seg *seg, caddr_t vaddr);
97 97 static int segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
98 -static int segkp_capable(struct seg *seg, segcapability_t capability);
99 98
100 99 /*
101 100 * Lock used to protect the hash table(s) and caches.
102 101 */
103 102 static kmutex_t segkp_lock;
104 103
105 104 /*
106 105 * The segkp caches
107 106 */
108 107 static struct segkp_cache segkp_cache[SEGKP_MAX_CACHE];
109 108
110 109 #define SEGKP_BADOP(t) (t(*)())segkp_badop
111 110
112 111 /*
113 112 * When there are fewer than red_minavail bytes left on the stack,
114 113 * segkp_map_red() will map in the redzone (if called). 5000 seems
115 114 * to work reasonably well...
116 115 */
117 116 long red_minavail = 5000;
118 117
119 118 /*
120 119 * will be set to 1 for 32 bit x86 systems only, in startup.c
121 120 */
122 121 int segkp_fromheap = 0;
123 122 ulong_t *segkp_bitmap;
124 123
125 124 /*
126 125 * If segkp_map_red() is called with the redzone already mapped and
127 126 * with less than RED_DEEP_THRESHOLD bytes available on the stack,
128 127 * then the stack situation has become quite serious; if much more stack
129 128 * is consumed, we have the potential of scrogging the next thread/LWP
130 129 * structure. To help debug the "can't happen" panics which may
131 130 * result from this condition, we record hrestime and the calling thread
132 131 * in red_deep_hires and red_deep_thread respectively.
133 132 */
134 133 #define RED_DEEP_THRESHOLD 2000
135 134
136 135 hrtime_t red_deep_hires;
137 136 kthread_t *red_deep_thread;
138 137
139 138 uint32_t red_nmapped;
140 139 uint32_t red_closest = UINT_MAX;
141 140 uint32_t red_ndoubles;
142 141
143 142 pgcnt_t anon_segkp_pages_locked; /* See vm/anon.h */
144 143 pgcnt_t anon_segkp_pages_resv; /* anon reserved by seg_kp */
145 144
146 145 static struct seg_ops segkp_ops = {
147 146 .dup = SEGKP_BADOP(int),
148 147 .unmap = SEGKP_BADOP(int),
149 148 .free = SEGKP_BADOP(void),
150 149 .fault = segkp_fault,
151 150 .faulta = SEGKP_BADOP(faultcode_t),
152 151 .setprot = SEGKP_BADOP(int),
153 152 .checkprot = segkp_checkprot,
154 153 .kluster = segkp_kluster,
155 154 .swapout = SEGKP_BADOP(size_t),
156 155 .sync = SEGKP_BADOP(int),
157 156 .incore = SEGKP_BADOP(size_t),
↓ open down ↓ |
49 lines elided |
↑ open up ↑ |
158 157 .lockop = SEGKP_BADOP(int),
159 158 .getprot = SEGKP_BADOP(int),
160 159 .getoffset = SEGKP_BADOP(u_offset_t),
161 160 .gettype = SEGKP_BADOP(int),
162 161 .getvp = SEGKP_BADOP(int),
163 162 .advise = SEGKP_BADOP(int),
164 163 .dump = segkp_dump,
165 164 .pagelock = segkp_pagelock,
166 165 .setpagesize = SEGKP_BADOP(int),
167 166 .getmemid = segkp_getmemid,
168 - .capable = segkp_capable,
169 167 };
170 168
171 169
172 170 static void
173 171 segkp_badop(void)
174 172 {
175 173 panic("segkp_badop");
176 174 /*NOTREACHED*/
177 175 }
178 176
179 177 static void segkpinit_mem_config(struct seg *);
180 178
181 179 static uint32_t segkp_indel;
182 180
183 181 /*
184 182 * Allocate the segment specific private data struct and fill it in
185 183 * with the per kp segment mutex, anon ptr. array and hash table.
186 184 */
187 185 int
188 186 segkp_create(struct seg *seg)
189 187 {
190 188 struct segkp_segdata *kpsd;
191 189 size_t np;
192 190
193 191 ASSERT(seg != NULL && seg->s_as == &kas);
194 192 ASSERT(RW_WRITE_HELD(&seg->s_as->a_lock));
195 193
196 194 if (seg->s_size & PAGEOFFSET) {
197 195 panic("Bad segkp size");
198 196 /*NOTREACHED*/
199 197 }
200 198
201 199 kpsd = kmem_zalloc(sizeof (struct segkp_segdata), KM_SLEEP);
202 200
203 201 /*
204 202 * Allocate the virtual memory for segkp and initialize it
205 203 */
206 204 if (segkp_fromheap) {
207 205 np = btop(kvseg.s_size);
208 206 segkp_bitmap = kmem_zalloc(BT_SIZEOFMAP(np), KM_SLEEP);
209 207 kpsd->kpsd_arena = vmem_create("segkp", NULL, 0, PAGESIZE,
210 208 vmem_alloc, vmem_free, heap_arena, 5 * PAGESIZE, VM_SLEEP);
211 209 } else {
212 210 segkp_bitmap = NULL;
213 211 np = btop(seg->s_size);
214 212 kpsd->kpsd_arena = vmem_create("segkp", seg->s_base,
215 213 seg->s_size, PAGESIZE, NULL, NULL, NULL, 5 * PAGESIZE,
216 214 VM_SLEEP);
217 215 }
218 216
219 217 kpsd->kpsd_anon = anon_create(np, ANON_SLEEP | ANON_ALLOC_FORCE);
220 218
221 219 kpsd->kpsd_hash = kmem_zalloc(SEGKP_HASHSZ * sizeof (struct segkp *),
222 220 KM_SLEEP);
223 221 seg->s_data = (void *)kpsd;
224 222 seg->s_ops = &segkp_ops;
225 223 segkpinit_mem_config(seg);
226 224 return (0);
227 225 }
228 226
229 227
230 228 /*
231 229 * Find a free 'freelist' and initialize it with the appropriate attributes
232 230 */
233 231 void *
234 232 segkp_cache_init(struct seg *seg, int maxsize, size_t len, uint_t flags)
235 233 {
236 234 int i;
237 235
238 236 if ((flags & KPD_NO_ANON) && !(flags & KPD_LOCKED))
239 237 return ((void *)-1);
240 238
241 239 mutex_enter(&segkp_lock);
242 240 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
243 241 if (segkp_cache[i].kpf_inuse)
244 242 continue;
245 243 segkp_cache[i].kpf_inuse = 1;
246 244 segkp_cache[i].kpf_max = maxsize;
247 245 segkp_cache[i].kpf_flags = flags;
248 246 segkp_cache[i].kpf_seg = seg;
249 247 segkp_cache[i].kpf_len = len;
250 248 mutex_exit(&segkp_lock);
251 249 return ((void *)(uintptr_t)i);
252 250 }
253 251 mutex_exit(&segkp_lock);
254 252 return ((void *)-1);
255 253 }
256 254
257 255 /*
258 256 * Free all the cache resources.
259 257 */
260 258 void
261 259 segkp_cache_free(void)
262 260 {
263 261 struct segkp_data *kpd;
264 262 struct seg *seg;
265 263 int i;
266 264
267 265 mutex_enter(&segkp_lock);
268 266 for (i = 0; i < SEGKP_MAX_CACHE; i++) {
269 267 if (!segkp_cache[i].kpf_inuse)
270 268 continue;
271 269 /*
272 270 * Disconnect the freelist and process each element
273 271 */
274 272 kpd = segkp_cache[i].kpf_list;
275 273 seg = segkp_cache[i].kpf_seg;
276 274 segkp_cache[i].kpf_list = NULL;
277 275 segkp_cache[i].kpf_count = 0;
278 276 mutex_exit(&segkp_lock);
279 277
280 278 while (kpd != NULL) {
281 279 struct segkp_data *next;
282 280
283 281 next = kpd->kp_next;
284 282 segkp_release_internal(seg, kpd, kpd->kp_len);
285 283 kpd = next;
286 284 }
287 285 mutex_enter(&segkp_lock);
288 286 }
289 287 mutex_exit(&segkp_lock);
290 288 }
291 289
292 290 /*
293 291 * There are 2 entries into segkp_get_internal. The first includes a cookie
294 292 * used to access a pool of cached segkp resources. The second does not
295 293 * use the cache.
296 294 */
297 295 caddr_t
298 296 segkp_get(struct seg *seg, size_t len, uint_t flags)
299 297 {
300 298 struct segkp_data *kpd = NULL;
301 299
302 300 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
303 301 kpd->kp_cookie = -1;
304 302 return (stom(kpd->kp_base, flags));
305 303 }
306 304 return (NULL);
307 305 }
308 306
309 307 /*
310 308 * Return a 'cached' segkp address
311 309 */
312 310 caddr_t
313 311 segkp_cache_get(void *cookie)
314 312 {
315 313 struct segkp_cache *freelist = NULL;
316 314 struct segkp_data *kpd = NULL;
317 315 int index = (int)(uintptr_t)cookie;
318 316 struct seg *seg;
319 317 size_t len;
320 318 uint_t flags;
321 319
322 320 if (index < 0 || index >= SEGKP_MAX_CACHE)
323 321 return (NULL);
324 322 freelist = &segkp_cache[index];
325 323
326 324 mutex_enter(&segkp_lock);
327 325 seg = freelist->kpf_seg;
328 326 flags = freelist->kpf_flags;
329 327 if (freelist->kpf_list != NULL) {
330 328 kpd = freelist->kpf_list;
331 329 freelist->kpf_list = kpd->kp_next;
332 330 freelist->kpf_count--;
333 331 mutex_exit(&segkp_lock);
334 332 kpd->kp_next = NULL;
335 333 segkp_insert(seg, kpd);
336 334 return (stom(kpd->kp_base, flags));
337 335 }
338 336 len = freelist->kpf_len;
339 337 mutex_exit(&segkp_lock);
340 338 if (segkp_get_internal(seg, len, flags, &kpd, NULL) != NULL) {
341 339 kpd->kp_cookie = index;
342 340 return (stom(kpd->kp_base, flags));
343 341 }
344 342 return (NULL);
345 343 }
346 344
347 345 caddr_t
348 346 segkp_get_withanonmap(
349 347 struct seg *seg,
350 348 size_t len,
351 349 uint_t flags,
352 350 struct anon_map *amp)
353 351 {
354 352 struct segkp_data *kpd = NULL;
355 353
356 354 ASSERT(amp != NULL);
357 355 flags |= KPD_HASAMP;
358 356 if (segkp_get_internal(seg, len, flags, &kpd, amp) != NULL) {
359 357 kpd->kp_cookie = -1;
360 358 return (stom(kpd->kp_base, flags));
361 359 }
362 360 return (NULL);
363 361 }
364 362
365 363 /*
366 364 * This does the real work of segkp allocation.
367 365 * Return to client base addr. len must be page-aligned. A null value is
368 366 * returned if there are no more vm resources (e.g. pages, swap). The len
369 367 * and base recorded in the private data structure include the redzone
370 368 * and the redzone length (if applicable). If the user requests a redzone
371 369 * either the first or last page is left unmapped depending whether stacks
372 370 * grow to low or high memory.
373 371 *
374 372 * The client may also specify a no-wait flag. If that is set then the
375 373 * request will choose a non-blocking path when requesting resources.
376 374 * The default is make the client wait.
377 375 */
378 376 static caddr_t
379 377 segkp_get_internal(
380 378 struct seg *seg,
381 379 size_t len,
382 380 uint_t flags,
383 381 struct segkp_data **tkpd,
384 382 struct anon_map *amp)
385 383 {
386 384 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
387 385 struct segkp_data *kpd;
388 386 caddr_t vbase = NULL; /* always first virtual, may not be mapped */
389 387 pgcnt_t np = 0; /* number of pages in the resource */
390 388 pgcnt_t segkpindex;
391 389 long i;
392 390 caddr_t va;
393 391 pgcnt_t pages = 0;
394 392 ulong_t anon_idx = 0;
395 393 int kmflag = (flags & KPD_NOWAIT) ? KM_NOSLEEP : KM_SLEEP;
396 394 caddr_t s_base = (segkp_fromheap) ? kvseg.s_base : seg->s_base;
397 395
398 396 if (len & PAGEOFFSET) {
399 397 panic("segkp_get: len is not page-aligned");
400 398 /*NOTREACHED*/
401 399 }
402 400
403 401 ASSERT(((flags & KPD_HASAMP) == 0) == (amp == NULL));
404 402
405 403 /* Only allow KPD_NO_ANON if we are going to lock it down */
406 404 if ((flags & (KPD_LOCKED|KPD_NO_ANON)) == KPD_NO_ANON)
407 405 return (NULL);
408 406
409 407 if ((kpd = kmem_zalloc(sizeof (struct segkp_data), kmflag)) == NULL)
410 408 return (NULL);
411 409 /*
412 410 * Fix up the len to reflect the REDZONE if applicable
413 411 */
414 412 if (flags & KPD_HASREDZONE)
415 413 len += PAGESIZE;
416 414 np = btop(len);
417 415
418 416 vbase = vmem_alloc(SEGKP_VMEM(seg), len, kmflag | VM_BESTFIT);
419 417 if (vbase == NULL) {
420 418 kmem_free(kpd, sizeof (struct segkp_data));
421 419 return (NULL);
422 420 }
423 421
424 422 /* If locking, reserve physical memory */
425 423 if (flags & KPD_LOCKED) {
426 424 pages = btop(SEGKP_MAPLEN(len, flags));
427 425 if (page_resv(pages, kmflag) == 0) {
428 426 vmem_free(SEGKP_VMEM(seg), vbase, len);
429 427 kmem_free(kpd, sizeof (struct segkp_data));
430 428 return (NULL);
431 429 }
432 430 if ((flags & KPD_NO_ANON) == 0)
433 431 atomic_add_long(&anon_segkp_pages_locked, pages);
434 432 }
435 433
436 434 /*
437 435 * Reserve sufficient swap space for this vm resource. We'll
438 436 * actually allocate it in the loop below, but reserving it
439 437 * here allows us to back out more gracefully than if we
440 438 * had an allocation failure in the body of the loop.
441 439 *
442 440 * Note that we don't need swap space for the red zone page.
443 441 */
444 442 if (amp != NULL) {
445 443 /*
446 444 * The swap reservation has been done, if required, and the
447 445 * anon_hdr is separate.
448 446 */
449 447 anon_idx = 0;
450 448 kpd->kp_anon_idx = anon_idx;
451 449 kpd->kp_anon = amp->ahp;
452 450
453 451 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
454 452 kpd, vbase, len, flags, 1);
455 453
456 454 } else if ((flags & KPD_NO_ANON) == 0) {
457 455 if (anon_resv_zone(SEGKP_MAPLEN(len, flags), NULL) == 0) {
458 456 if (flags & KPD_LOCKED) {
459 457 atomic_add_long(&anon_segkp_pages_locked,
460 458 -pages);
461 459 page_unresv(pages);
462 460 }
463 461 vmem_free(SEGKP_VMEM(seg), vbase, len);
464 462 kmem_free(kpd, sizeof (struct segkp_data));
465 463 return (NULL);
466 464 }
467 465 atomic_add_long(&anon_segkp_pages_resv,
468 466 btop(SEGKP_MAPLEN(len, flags)));
469 467 anon_idx = ((uintptr_t)(vbase - s_base)) >> PAGESHIFT;
470 468 kpd->kp_anon_idx = anon_idx;
471 469 kpd->kp_anon = kpsd->kpsd_anon;
472 470
473 471 TRACE_5(TR_FAC_VM, TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
474 472 kpd, vbase, len, flags, 1);
475 473 } else {
476 474 kpd->kp_anon = NULL;
477 475 kpd->kp_anon_idx = 0;
478 476 }
479 477
480 478 /*
481 479 * Allocate page and anon resources for the virtual address range
482 480 * except the redzone
483 481 */
484 482 if (segkp_fromheap)
485 483 segkpindex = btop((uintptr_t)(vbase - kvseg.s_base));
486 484 for (i = 0, va = vbase; i < np; i++, va += PAGESIZE) {
487 485 page_t *pl[2];
488 486 struct vnode *vp;
489 487 anoff_t off;
490 488 int err;
491 489 page_t *pp = NULL;
492 490
493 491 /*
494 492 * Mark this page to be a segkp page in the bitmap.
495 493 */
496 494 if (segkp_fromheap) {
497 495 BT_ATOMIC_SET(segkp_bitmap, segkpindex);
498 496 segkpindex++;
499 497 }
500 498
501 499 /*
502 500 * If this page is the red zone page, we don't need swap
503 501 * space for it. Note that we skip over the code that
504 502 * establishes MMU mappings, so that the page remains
505 503 * invalid.
506 504 */
507 505 if ((flags & KPD_HASREDZONE) && KPD_REDZONE(kpd) == i)
508 506 continue;
509 507
510 508 if (kpd->kp_anon != NULL) {
511 509 struct anon *ap;
512 510
513 511 ASSERT(anon_get_ptr(kpd->kp_anon, anon_idx + i)
514 512 == NULL);
515 513 /*
516 514 * Determine the "vp" and "off" of the anon slot.
517 515 */
518 516 ap = anon_alloc(NULL, 0);
519 517 if (amp != NULL)
520 518 ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
521 519 (void) anon_set_ptr(kpd->kp_anon, anon_idx + i,
522 520 ap, ANON_SLEEP);
523 521 if (amp != NULL)
524 522 ANON_LOCK_EXIT(&->a_rwlock);
525 523 swap_xlate(ap, &vp, &off);
526 524
527 525 /*
528 526 * Create a page with the specified identity. The
529 527 * page is returned with the "shared" lock held.
530 528 */
531 529 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE,
532 530 NULL, pl, PAGESIZE, seg, va, S_CREATE,
533 531 kcred, NULL);
534 532 if (err) {
535 533 /*
536 534 * XXX - This should not fail.
537 535 */
538 536 panic("segkp_get: no pages");
539 537 /*NOTREACHED*/
540 538 }
541 539 pp = pl[0];
542 540 } else {
543 541 ASSERT(page_exists(&kvp,
544 542 (u_offset_t)(uintptr_t)va) == NULL);
545 543
546 544 if ((pp = page_create_va(&kvp,
547 545 (u_offset_t)(uintptr_t)va, PAGESIZE,
548 546 (flags & KPD_NOWAIT ? 0 : PG_WAIT) | PG_EXCL |
549 547 PG_NORELOC, seg, va)) == NULL) {
550 548 /*
551 549 * Legitimize resource; then destroy it.
552 550 * Easier than trying to unwind here.
553 551 */
554 552 kpd->kp_flags = flags;
555 553 kpd->kp_base = vbase;
556 554 kpd->kp_len = len;
557 555 segkp_release_internal(seg, kpd, va - vbase);
558 556 return (NULL);
559 557 }
560 558 page_io_unlock(pp);
561 559 }
562 560
563 561 if (flags & KPD_ZERO)
564 562 pagezero(pp, 0, PAGESIZE);
565 563
566 564 /*
567 565 * Load and lock an MMU translation for the page.
568 566 */
569 567 hat_memload(seg->s_as->a_hat, va, pp, (PROT_READ|PROT_WRITE),
570 568 ((flags & KPD_LOCKED) ? HAT_LOAD_LOCK : HAT_LOAD));
571 569
572 570 /*
573 571 * Now, release lock on the page.
574 572 */
575 573 if (flags & KPD_LOCKED) {
576 574 /*
577 575 * Indicate to page_retire framework that this
578 576 * page can only be retired when it is freed.
579 577 */
580 578 PP_SETRAF(pp);
581 579 page_downgrade(pp);
582 580 } else
583 581 page_unlock(pp);
584 582 }
585 583
586 584 kpd->kp_flags = flags;
587 585 kpd->kp_base = vbase;
588 586 kpd->kp_len = len;
589 587 segkp_insert(seg, kpd);
590 588 *tkpd = kpd;
591 589 return (stom(kpd->kp_base, flags));
592 590 }
593 591
594 592 /*
595 593 * Release the resource to cache if the pool(designate by the cookie)
596 594 * has less than the maximum allowable. If inserted in cache,
597 595 * segkp_delete insures element is taken off of active list.
598 596 */
599 597 void
600 598 segkp_release(struct seg *seg, caddr_t vaddr)
601 599 {
602 600 struct segkp_cache *freelist;
603 601 struct segkp_data *kpd = NULL;
604 602
605 603 if ((kpd = segkp_find(seg, vaddr)) == NULL) {
606 604 panic("segkp_release: null kpd");
607 605 /*NOTREACHED*/
608 606 }
609 607
610 608 if (kpd->kp_cookie != -1) {
611 609 freelist = &segkp_cache[kpd->kp_cookie];
612 610 mutex_enter(&segkp_lock);
613 611 if (!segkp_indel && freelist->kpf_count < freelist->kpf_max) {
614 612 segkp_delete(seg, kpd);
615 613 kpd->kp_next = freelist->kpf_list;
616 614 freelist->kpf_list = kpd;
617 615 freelist->kpf_count++;
618 616 mutex_exit(&segkp_lock);
619 617 return;
620 618 } else {
621 619 mutex_exit(&segkp_lock);
622 620 kpd->kp_cookie = -1;
623 621 }
624 622 }
625 623 segkp_release_internal(seg, kpd, kpd->kp_len);
626 624 }
627 625
628 626 /*
629 627 * Free the entire resource. segkp_unlock gets called with the start of the
630 628 * mapped portion of the resource. The length is the size of the mapped
631 629 * portion
632 630 */
633 631 static void
634 632 segkp_release_internal(struct seg *seg, struct segkp_data *kpd, size_t len)
635 633 {
636 634 caddr_t va;
637 635 long i;
638 636 long redzone;
639 637 size_t np;
640 638 page_t *pp;
641 639 struct vnode *vp;
642 640 anoff_t off;
643 641 struct anon *ap;
644 642 pgcnt_t segkpindex;
645 643
646 644 ASSERT(kpd != NULL);
647 645 ASSERT((kpd->kp_flags & KPD_HASAMP) == 0 || kpd->kp_cookie == -1);
648 646 np = btop(len);
649 647
650 648 /* Remove from active hash list */
651 649 if (kpd->kp_cookie == -1) {
652 650 mutex_enter(&segkp_lock);
653 651 segkp_delete(seg, kpd);
654 652 mutex_exit(&segkp_lock);
655 653 }
656 654
657 655 /*
658 656 * Precompute redzone page index.
659 657 */
660 658 redzone = -1;
661 659 if (kpd->kp_flags & KPD_HASREDZONE)
662 660 redzone = KPD_REDZONE(kpd);
663 661
664 662
665 663 va = kpd->kp_base;
666 664
667 665 hat_unload(seg->s_as->a_hat, va, (np << PAGESHIFT),
668 666 ((kpd->kp_flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
669 667 /*
670 668 * Free up those anon resources that are quiescent.
671 669 */
672 670 if (segkp_fromheap)
673 671 segkpindex = btop((uintptr_t)(va - kvseg.s_base));
674 672 for (i = 0; i < np; i++, va += PAGESIZE) {
675 673
676 674 /*
677 675 * Clear the bit for this page from the bitmap.
678 676 */
679 677 if (segkp_fromheap) {
680 678 BT_ATOMIC_CLEAR(segkp_bitmap, segkpindex);
681 679 segkpindex++;
682 680 }
683 681
684 682 if (i == redzone)
685 683 continue;
686 684 if (kpd->kp_anon) {
687 685 /*
688 686 * Free up anon resources and destroy the
689 687 * associated pages.
690 688 *
691 689 * Release the lock if there is one. Have to get the
692 690 * page to do this, unfortunately.
693 691 */
694 692 if (kpd->kp_flags & KPD_LOCKED) {
695 693 ap = anon_get_ptr(kpd->kp_anon,
696 694 kpd->kp_anon_idx + i);
697 695 swap_xlate(ap, &vp, &off);
698 696 /* Find the shared-locked page. */
699 697 pp = page_find(vp, (u_offset_t)off);
700 698 if (pp == NULL) {
701 699 panic("segkp_release: "
702 700 "kp_anon: no page to unlock ");
703 701 /*NOTREACHED*/
704 702 }
705 703 if (PP_ISRAF(pp))
706 704 PP_CLRRAF(pp);
707 705
708 706 page_unlock(pp);
709 707 }
710 708 if ((kpd->kp_flags & KPD_HASAMP) == 0) {
711 709 anon_free(kpd->kp_anon, kpd->kp_anon_idx + i,
712 710 PAGESIZE);
713 711 anon_unresv_zone(PAGESIZE, NULL);
714 712 atomic_dec_ulong(&anon_segkp_pages_resv);
715 713 }
716 714 TRACE_5(TR_FAC_VM,
717 715 TR_ANON_SEGKP, "anon segkp:%p %p %lu %u %u",
718 716 kpd, va, PAGESIZE, 0, 0);
719 717 } else {
720 718 if (kpd->kp_flags & KPD_LOCKED) {
721 719 pp = page_find(&kvp, (u_offset_t)(uintptr_t)va);
722 720 if (pp == NULL) {
723 721 panic("segkp_release: "
724 722 "no page to unlock");
725 723 /*NOTREACHED*/
726 724 }
727 725 if (PP_ISRAF(pp))
728 726 PP_CLRRAF(pp);
729 727 /*
730 728 * We should just upgrade the lock here
731 729 * but there is no upgrade that waits.
732 730 */
733 731 page_unlock(pp);
734 732 }
735 733 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)va,
736 734 SE_EXCL);
737 735 if (pp != NULL)
738 736 page_destroy(pp, 0);
739 737 }
740 738 }
741 739
742 740 /* If locked, release physical memory reservation */
743 741 if (kpd->kp_flags & KPD_LOCKED) {
744 742 pgcnt_t pages = btop(SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
745 743 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
746 744 atomic_add_long(&anon_segkp_pages_locked, -pages);
747 745 page_unresv(pages);
748 746 }
749 747
750 748 vmem_free(SEGKP_VMEM(seg), kpd->kp_base, kpd->kp_len);
751 749 kmem_free(kpd, sizeof (struct segkp_data));
752 750 }
753 751
754 752 /*
755 753 * segkp_map_red() will check the current frame pointer against the
756 754 * stack base. If the amount of stack remaining is questionable
757 755 * (less than red_minavail), then segkp_map_red() will map in the redzone
758 756 * and return 1. Otherwise, it will return 0. segkp_map_red() can
759 757 * _only_ be called when:
760 758 *
761 759 * - it is safe to sleep on page_create_va().
762 760 * - the caller is non-swappable.
763 761 *
764 762 * It is up to the caller to remember whether segkp_map_red() successfully
765 763 * mapped the redzone, and, if so, to call segkp_unmap_red() at a later
766 764 * time. Note that the caller must _remain_ non-swappable until after
767 765 * calling segkp_unmap_red().
768 766 *
769 767 * Currently, this routine is only called from pagefault() (which necessarily
770 768 * satisfies the above conditions).
771 769 */
772 770 #if defined(STACK_GROWTH_DOWN)
773 771 int
774 772 segkp_map_red(void)
775 773 {
776 774 uintptr_t fp = STACK_BIAS + (uintptr_t)getfp();
777 775 #ifndef _LP64
778 776 caddr_t stkbase;
779 777 #endif
780 778
781 779 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
782 780
783 781 /*
784 782 * Optimize for the common case where we simply return.
785 783 */
786 784 if ((curthread->t_red_pp == NULL) &&
787 785 (fp - (uintptr_t)curthread->t_stkbase >= red_minavail))
788 786 return (0);
789 787
790 788 #if defined(_LP64)
791 789 /*
792 790 * XXX We probably need something better than this.
793 791 */
794 792 panic("kernel stack overflow");
795 793 /*NOTREACHED*/
796 794 #else /* _LP64 */
797 795 if (curthread->t_red_pp == NULL) {
798 796 page_t *red_pp;
799 797 struct seg kseg;
800 798
801 799 caddr_t red_va = (caddr_t)
802 800 (((uintptr_t)curthread->t_stkbase & (uintptr_t)PAGEMASK) -
803 801 PAGESIZE);
804 802
805 803 ASSERT(page_exists(&kvp, (u_offset_t)(uintptr_t)red_va) ==
806 804 NULL);
807 805
808 806 /*
809 807 * Allocate the physical for the red page.
810 808 */
811 809 /*
812 810 * No PG_NORELOC here to avoid waits. Unlikely to get
813 811 * a relocate happening in the short time the page exists
814 812 * and it will be OK anyway.
815 813 */
816 814
817 815 kseg.s_as = &kas;
818 816 red_pp = page_create_va(&kvp, (u_offset_t)(uintptr_t)red_va,
819 817 PAGESIZE, PG_WAIT | PG_EXCL, &kseg, red_va);
820 818 ASSERT(red_pp != NULL);
821 819
822 820 /*
823 821 * So we now have a page to jam into the redzone...
824 822 */
825 823 page_io_unlock(red_pp);
826 824
827 825 hat_memload(kas.a_hat, red_va, red_pp,
828 826 (PROT_READ|PROT_WRITE), HAT_LOAD_LOCK);
829 827 page_downgrade(red_pp);
830 828
831 829 /*
832 830 * The page is left SE_SHARED locked so we can hold on to
833 831 * the page_t pointer.
834 832 */
835 833 curthread->t_red_pp = red_pp;
836 834
837 835 atomic_inc_32(&red_nmapped);
838 836 while (fp - (uintptr_t)curthread->t_stkbase < red_closest) {
839 837 (void) atomic_cas_32(&red_closest, red_closest,
840 838 (uint32_t)(fp - (uintptr_t)curthread->t_stkbase));
841 839 }
842 840 return (1);
843 841 }
844 842
845 843 stkbase = (caddr_t)(((uintptr_t)curthread->t_stkbase &
846 844 (uintptr_t)PAGEMASK) - PAGESIZE);
847 845
848 846 atomic_inc_32(&red_ndoubles);
849 847
850 848 if (fp - (uintptr_t)stkbase < RED_DEEP_THRESHOLD) {
851 849 /*
852 850 * Oh boy. We're already deep within the mapped-in
853 851 * redzone page, and the caller is trying to prepare
854 852 * for a deep stack run. We're running without a
855 853 * redzone right now: if the caller plows off the
856 854 * end of the stack, it'll plow another thread or
857 855 * LWP structure. That situation could result in
858 856 * a very hard-to-debug panic, so, in the spirit of
859 857 * recording the name of one's killer in one's own
860 858 * blood, we're going to record hrestime and the calling
861 859 * thread.
862 860 */
863 861 red_deep_hires = hrestime.tv_nsec;
864 862 red_deep_thread = curthread;
865 863 }
866 864
867 865 /*
868 866 * If this is a DEBUG kernel, and we've run too deep for comfort, toss.
869 867 */
870 868 ASSERT(fp - (uintptr_t)stkbase >= RED_DEEP_THRESHOLD);
871 869 return (0);
872 870 #endif /* _LP64 */
873 871 }
874 872
875 873 void
876 874 segkp_unmap_red(void)
877 875 {
878 876 page_t *pp;
879 877 caddr_t red_va = (caddr_t)(((uintptr_t)curthread->t_stkbase &
880 878 (uintptr_t)PAGEMASK) - PAGESIZE);
881 879
882 880 ASSERT(curthread->t_red_pp != NULL);
883 881 ASSERT(curthread->t_schedflag & TS_DONT_SWAP);
884 882
885 883 /*
886 884 * Because we locked the mapping down, we can't simply rely
887 885 * on page_destroy() to clean everything up; we need to call
888 886 * hat_unload() to explicitly unlock the mapping resources.
889 887 */
890 888 hat_unload(kas.a_hat, red_va, PAGESIZE, HAT_UNLOAD_UNLOCK);
891 889
892 890 pp = curthread->t_red_pp;
893 891
894 892 ASSERT(pp == page_find(&kvp, (u_offset_t)(uintptr_t)red_va));
895 893
896 894 /*
897 895 * Need to upgrade the SE_SHARED lock to SE_EXCL.
898 896 */
899 897 if (!page_tryupgrade(pp)) {
900 898 /*
901 899 * As there is now wait for upgrade, release the
902 900 * SE_SHARED lock and wait for SE_EXCL.
903 901 */
904 902 page_unlock(pp);
905 903 pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)red_va, SE_EXCL);
906 904 /* pp may be NULL here, hence the test below */
907 905 }
908 906
909 907 /*
910 908 * Destroy the page, with dontfree set to zero (i.e. free it).
911 909 */
912 910 if (pp != NULL)
913 911 page_destroy(pp, 0);
914 912 curthread->t_red_pp = NULL;
915 913 }
916 914 #else
917 915 #error Red stacks only supported with downwards stack growth.
918 916 #endif
919 917
920 918 /*
921 919 * Handle a fault on an address corresponding to one of the
922 920 * resources in the segkp segment.
923 921 */
924 922 faultcode_t
925 923 segkp_fault(
926 924 struct hat *hat,
927 925 struct seg *seg,
928 926 caddr_t vaddr,
929 927 size_t len,
930 928 enum fault_type type,
931 929 enum seg_rw rw)
932 930 {
933 931 struct segkp_data *kpd = NULL;
934 932 int err;
935 933
936 934 ASSERT(seg->s_as == &kas && RW_READ_HELD(&seg->s_as->a_lock));
937 935
938 936 /*
939 937 * Sanity checks.
940 938 */
941 939 if (type == F_PROT) {
942 940 panic("segkp_fault: unexpected F_PROT fault");
943 941 /*NOTREACHED*/
944 942 }
945 943
946 944 if ((kpd = segkp_find(seg, vaddr)) == NULL)
947 945 return (FC_NOMAP);
948 946
949 947 mutex_enter(&kpd->kp_lock);
950 948
951 949 if (type == F_SOFTLOCK) {
952 950 ASSERT(!(kpd->kp_flags & KPD_LOCKED));
953 951 /*
954 952 * The F_SOFTLOCK case has more stringent
955 953 * range requirements: the given range must exactly coincide
956 954 * with the resource's mapped portion. Note reference to
957 955 * redzone is handled since vaddr would not equal base
958 956 */
959 957 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
960 958 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
961 959 mutex_exit(&kpd->kp_lock);
962 960 return (FC_MAKE_ERR(EFAULT));
963 961 }
964 962
965 963 if ((err = segkp_load(hat, seg, vaddr, len, kpd, KPD_LOCKED))) {
966 964 mutex_exit(&kpd->kp_lock);
967 965 return (FC_MAKE_ERR(err));
968 966 }
969 967 kpd->kp_flags |= KPD_LOCKED;
970 968 mutex_exit(&kpd->kp_lock);
971 969 return (0);
972 970 }
973 971
974 972 if (type == F_INVAL) {
975 973 ASSERT(!(kpd->kp_flags & KPD_NO_ANON));
976 974
977 975 /*
978 976 * Check if we touched the redzone. Somewhat optimistic
979 977 * here if we are touching the redzone of our own stack
980 978 * since we wouldn't have a stack to get this far...
981 979 */
982 980 if ((kpd->kp_flags & KPD_HASREDZONE) &&
983 981 btop((uintptr_t)(vaddr - kpd->kp_base)) == KPD_REDZONE(kpd))
984 982 panic("segkp_fault: accessing redzone");
985 983
986 984 /*
987 985 * This fault may occur while the page is being F_SOFTLOCK'ed.
988 986 * Return since a 2nd segkp_load is unnecessary and also would
989 987 * result in the page being locked twice and eventually
990 988 * hang the thread_reaper thread.
991 989 */
992 990 if (kpd->kp_flags & KPD_LOCKED) {
993 991 mutex_exit(&kpd->kp_lock);
994 992 return (0);
995 993 }
996 994
997 995 err = segkp_load(hat, seg, vaddr, len, kpd, kpd->kp_flags);
998 996 mutex_exit(&kpd->kp_lock);
999 997 return (err ? FC_MAKE_ERR(err) : 0);
1000 998 }
1001 999
1002 1000 if (type == F_SOFTUNLOCK) {
1003 1001 uint_t flags;
1004 1002
1005 1003 /*
1006 1004 * Make sure the addr is LOCKED and it has anon backing
1007 1005 * before unlocking
1008 1006 */
1009 1007 if ((kpd->kp_flags & (KPD_LOCKED|KPD_NO_ANON)) != KPD_LOCKED) {
1010 1008 panic("segkp_fault: bad unlock");
1011 1009 /*NOTREACHED*/
1012 1010 }
1013 1011
1014 1012 if (vaddr != stom(kpd->kp_base, kpd->kp_flags) ||
1015 1013 len != SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags)) {
1016 1014 panic("segkp_fault: bad range");
1017 1015 /*NOTREACHED*/
1018 1016 }
1019 1017
1020 1018 if (rw == S_WRITE)
1021 1019 flags = kpd->kp_flags | KPD_WRITEDIRTY;
1022 1020 else
1023 1021 flags = kpd->kp_flags;
1024 1022 err = segkp_unlock(hat, seg, vaddr, len, kpd, flags);
1025 1023 kpd->kp_flags &= ~KPD_LOCKED;
1026 1024 mutex_exit(&kpd->kp_lock);
1027 1025 return (err ? FC_MAKE_ERR(err) : 0);
1028 1026 }
1029 1027 mutex_exit(&kpd->kp_lock);
1030 1028 panic("segkp_fault: bogus fault type: %d\n", type);
1031 1029 /*NOTREACHED*/
1032 1030 }
1033 1031
1034 1032 /*
1035 1033 * Check that the given protections suffice over the range specified by
1036 1034 * vaddr and len. For this segment type, the only issue is whether or
1037 1035 * not the range lies completely within the mapped part of an allocated
1038 1036 * resource.
1039 1037 */
1040 1038 /* ARGSUSED */
1041 1039 static int
1042 1040 segkp_checkprot(struct seg *seg, caddr_t vaddr, size_t len, uint_t prot)
1043 1041 {
1044 1042 struct segkp_data *kpd = NULL;
1045 1043 caddr_t mbase;
1046 1044 size_t mlen;
1047 1045
1048 1046 if ((kpd = segkp_find(seg, vaddr)) == NULL)
1049 1047 return (EACCES);
1050 1048
1051 1049 mutex_enter(&kpd->kp_lock);
1052 1050 mbase = stom(kpd->kp_base, kpd->kp_flags);
1053 1051 mlen = SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags);
1054 1052 if (len > mlen || vaddr < mbase ||
1055 1053 ((vaddr + len) > (mbase + mlen))) {
1056 1054 mutex_exit(&kpd->kp_lock);
1057 1055 return (EACCES);
1058 1056 }
1059 1057 mutex_exit(&kpd->kp_lock);
1060 1058 return (0);
1061 1059 }
1062 1060
1063 1061
1064 1062 /*
1065 1063 * Check to see if it makes sense to do kluster/read ahead to
1066 1064 * addr + delta relative to the mapping at addr. We assume here
1067 1065 * that delta is a signed PAGESIZE'd multiple (which can be negative).
1068 1066 *
1069 1067 * For seg_u we always "approve" of this action from our standpoint.
1070 1068 */
1071 1069 /*ARGSUSED*/
1072 1070 static int
1073 1071 segkp_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
1074 1072 {
1075 1073 return (0);
1076 1074 }
1077 1075
1078 1076 /*
1079 1077 * Load and possibly lock intra-slot resources in the range given by
1080 1078 * vaddr and len.
1081 1079 */
1082 1080 static int
1083 1081 segkp_load(
1084 1082 struct hat *hat,
1085 1083 struct seg *seg,
1086 1084 caddr_t vaddr,
1087 1085 size_t len,
1088 1086 struct segkp_data *kpd,
1089 1087 uint_t flags)
1090 1088 {
1091 1089 caddr_t va;
1092 1090 caddr_t vlim;
1093 1091 ulong_t i;
1094 1092 uint_t lock;
1095 1093
1096 1094 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1097 1095
1098 1096 len = P2ROUNDUP(len, PAGESIZE);
1099 1097
1100 1098 /* If locking, reserve physical memory */
1101 1099 if (flags & KPD_LOCKED) {
1102 1100 pgcnt_t pages = btop(len);
1103 1101 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1104 1102 atomic_add_long(&anon_segkp_pages_locked, pages);
1105 1103 (void) page_resv(pages, KM_SLEEP);
1106 1104 }
1107 1105
1108 1106 /*
1109 1107 * Loop through the pages in the given range.
1110 1108 */
1111 1109 va = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
1112 1110 vaddr = va;
1113 1111 vlim = va + len;
1114 1112 lock = flags & KPD_LOCKED;
1115 1113 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1116 1114 for (; va < vlim; va += PAGESIZE, i++) {
1117 1115 page_t *pl[2]; /* second element NULL terminator */
1118 1116 struct vnode *vp;
1119 1117 anoff_t off;
1120 1118 int err;
1121 1119 struct anon *ap;
1122 1120
1123 1121 /*
1124 1122 * Summon the page. If it's not resident, arrange
1125 1123 * for synchronous i/o to pull it in.
1126 1124 */
1127 1125 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1128 1126 swap_xlate(ap, &vp, &off);
1129 1127
1130 1128 /*
1131 1129 * The returned page list will have exactly one entry,
1132 1130 * which is returned to us already kept.
1133 1131 */
1134 1132 err = VOP_GETPAGE(vp, (offset_t)off, PAGESIZE, NULL,
1135 1133 pl, PAGESIZE, seg, va, S_READ, kcred, NULL);
1136 1134
1137 1135 if (err) {
1138 1136 /*
1139 1137 * Back out of what we've done so far.
1140 1138 */
1141 1139 (void) segkp_unlock(hat, seg, vaddr,
1142 1140 (va - vaddr), kpd, flags);
1143 1141 return (err);
1144 1142 }
1145 1143
1146 1144 /*
1147 1145 * Load an MMU translation for the page.
1148 1146 */
1149 1147 hat_memload(hat, va, pl[0], (PROT_READ|PROT_WRITE),
1150 1148 lock ? HAT_LOAD_LOCK : HAT_LOAD);
1151 1149
1152 1150 if (!lock) {
1153 1151 /*
1154 1152 * Now, release "shared" lock on the page.
1155 1153 */
1156 1154 page_unlock(pl[0]);
1157 1155 }
1158 1156 }
1159 1157 return (0);
1160 1158 }
1161 1159
1162 1160 /*
1163 1161 * At the very least unload the mmu-translations and unlock the range if locked
1164 1162 * Can be called with the following flag value KPD_WRITEDIRTY which specifies
1165 1163 * any dirty pages should be written to disk.
1166 1164 */
1167 1165 static int
1168 1166 segkp_unlock(
1169 1167 struct hat *hat,
1170 1168 struct seg *seg,
1171 1169 caddr_t vaddr,
1172 1170 size_t len,
1173 1171 struct segkp_data *kpd,
1174 1172 uint_t flags)
1175 1173 {
1176 1174 caddr_t va;
1177 1175 caddr_t vlim;
1178 1176 ulong_t i;
1179 1177 struct page *pp;
1180 1178 struct vnode *vp;
1181 1179 anoff_t off;
1182 1180 struct anon *ap;
1183 1181
1184 1182 #ifdef lint
1185 1183 seg = seg;
1186 1184 #endif /* lint */
1187 1185
1188 1186 ASSERT(MUTEX_HELD(&kpd->kp_lock));
1189 1187
1190 1188 /*
1191 1189 * Loop through the pages in the given range. It is assumed
1192 1190 * segkp_unlock is called with page aligned base
1193 1191 */
1194 1192 va = vaddr;
1195 1193 vlim = va + len;
1196 1194 i = ((uintptr_t)(va - kpd->kp_base)) >> PAGESHIFT;
1197 1195 hat_unload(hat, va, len,
1198 1196 ((flags & KPD_LOCKED) ? HAT_UNLOAD_UNLOCK : HAT_UNLOAD));
1199 1197 for (; va < vlim; va += PAGESIZE, i++) {
1200 1198 /*
1201 1199 * Find the page associated with this part of the
1202 1200 * slot, tracking it down through its associated swap
1203 1201 * space.
1204 1202 */
1205 1203 ap = anon_get_ptr(kpd->kp_anon, kpd->kp_anon_idx + i);
1206 1204 swap_xlate(ap, &vp, &off);
1207 1205
1208 1206 if (flags & KPD_LOCKED) {
1209 1207 if ((pp = page_find(vp, off)) == NULL) {
1210 1208 if (flags & KPD_LOCKED) {
1211 1209 panic("segkp_softunlock: missing page");
1212 1210 /*NOTREACHED*/
1213 1211 }
1214 1212 }
1215 1213 } else {
1216 1214 /*
1217 1215 * Nothing to do if the slot is not locked and the
1218 1216 * page doesn't exist.
1219 1217 */
1220 1218 if ((pp = page_lookup(vp, off, SE_SHARED)) == NULL)
1221 1219 continue;
1222 1220 }
1223 1221
1224 1222 /*
1225 1223 * If the page doesn't have any translations, is
1226 1224 * dirty and not being shared, then push it out
1227 1225 * asynchronously and avoid waiting for the
1228 1226 * pageout daemon to do it for us.
1229 1227 *
1230 1228 * XXX - Do we really need to get the "exclusive"
1231 1229 * lock via an upgrade?
1232 1230 */
1233 1231 if ((flags & KPD_WRITEDIRTY) && !hat_page_is_mapped(pp) &&
1234 1232 hat_ismod(pp) && page_tryupgrade(pp)) {
1235 1233 /*
1236 1234 * Hold the vnode before releasing the page lock to
1237 1235 * prevent it from being freed and re-used by some
1238 1236 * other thread.
1239 1237 */
1240 1238 VN_HOLD(vp);
1241 1239 page_unlock(pp);
1242 1240
1243 1241 /*
1244 1242 * Want most powerful credentials we can get so
1245 1243 * use kcred.
1246 1244 */
1247 1245 (void) VOP_PUTPAGE(vp, (offset_t)off, PAGESIZE,
1248 1246 B_ASYNC | B_FREE, kcred, NULL);
1249 1247 VN_RELE(vp);
1250 1248 } else {
1251 1249 page_unlock(pp);
1252 1250 }
1253 1251 }
1254 1252
1255 1253 /* If unlocking, release physical memory */
1256 1254 if (flags & KPD_LOCKED) {
1257 1255 pgcnt_t pages = btopr(len);
1258 1256 if ((kpd->kp_flags & KPD_NO_ANON) == 0)
1259 1257 atomic_add_long(&anon_segkp_pages_locked, -pages);
1260 1258 page_unresv(pages);
1261 1259 }
1262 1260 return (0);
1263 1261 }
1264 1262
1265 1263 /*
1266 1264 * Insert the kpd in the hash table.
1267 1265 */
1268 1266 static void
1269 1267 segkp_insert(struct seg *seg, struct segkp_data *kpd)
1270 1268 {
1271 1269 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1272 1270 int index;
1273 1271
1274 1272 /*
1275 1273 * Insert the kpd based on the address that will be returned
1276 1274 * via segkp_release.
1277 1275 */
1278 1276 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1279 1277 mutex_enter(&segkp_lock);
1280 1278 kpd->kp_next = kpsd->kpsd_hash[index];
1281 1279 kpsd->kpsd_hash[index] = kpd;
1282 1280 mutex_exit(&segkp_lock);
1283 1281 }
1284 1282
1285 1283 /*
1286 1284 * Remove kpd from the hash table.
1287 1285 */
1288 1286 static void
1289 1287 segkp_delete(struct seg *seg, struct segkp_data *kpd)
1290 1288 {
1291 1289 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1292 1290 struct segkp_data **kpp;
1293 1291 int index;
1294 1292
1295 1293 ASSERT(MUTEX_HELD(&segkp_lock));
1296 1294
1297 1295 index = SEGKP_HASH(stom(kpd->kp_base, kpd->kp_flags));
1298 1296 for (kpp = &kpsd->kpsd_hash[index];
1299 1297 *kpp != NULL; kpp = &((*kpp)->kp_next)) {
1300 1298 if (*kpp == kpd) {
1301 1299 *kpp = kpd->kp_next;
1302 1300 return;
1303 1301 }
1304 1302 }
1305 1303 panic("segkp_delete: unable to find element to delete");
1306 1304 /*NOTREACHED*/
1307 1305 }
1308 1306
1309 1307 /*
1310 1308 * Find the kpd associated with a vaddr.
1311 1309 *
1312 1310 * Most of the callers of segkp_find will pass the vaddr that
1313 1311 * hashes to the desired index, but there are cases where
1314 1312 * this is not true in which case we have to (potentially) scan
1315 1313 * the whole table looking for it. This should be very rare
1316 1314 * (e.g. a segkp_fault(F_INVAL) on an address somewhere in the
1317 1315 * middle of the segkp_data region).
1318 1316 */
1319 1317 static struct segkp_data *
1320 1318 segkp_find(struct seg *seg, caddr_t vaddr)
1321 1319 {
1322 1320 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1323 1321 struct segkp_data *kpd;
1324 1322 int i;
1325 1323 int stop;
1326 1324
1327 1325 i = stop = SEGKP_HASH(vaddr);
1328 1326 mutex_enter(&segkp_lock);
1329 1327 do {
1330 1328 for (kpd = kpsd->kpsd_hash[i]; kpd != NULL;
1331 1329 kpd = kpd->kp_next) {
1332 1330 if (vaddr >= kpd->kp_base &&
1333 1331 vaddr < kpd->kp_base + kpd->kp_len) {
1334 1332 mutex_exit(&segkp_lock);
1335 1333 return (kpd);
1336 1334 }
1337 1335 }
1338 1336 if (--i < 0)
1339 1337 i = SEGKP_HASHSZ - 1; /* Wrap */
1340 1338 } while (i != stop);
1341 1339 mutex_exit(&segkp_lock);
1342 1340 return (NULL); /* Not found */
1343 1341 }
1344 1342
1345 1343 /*
1346 1344 * returns size of swappable area.
1347 1345 */
1348 1346 size_t
1349 1347 swapsize(caddr_t v)
1350 1348 {
1351 1349 struct segkp_data *kpd;
1352 1350
1353 1351 if ((kpd = segkp_find(segkp, v)) != NULL)
1354 1352 return (SEGKP_MAPLEN(kpd->kp_len, kpd->kp_flags));
1355 1353 else
1356 1354 return (NULL);
1357 1355 }
1358 1356
1359 1357 /*
1360 1358 * Dump out all the active segkp pages
1361 1359 */
1362 1360 static void
1363 1361 segkp_dump(struct seg *seg)
1364 1362 {
1365 1363 int i;
1366 1364 struct segkp_data *kpd;
1367 1365 struct segkp_segdata *kpsd = (struct segkp_segdata *)seg->s_data;
1368 1366
1369 1367 for (i = 0; i < SEGKP_HASHSZ; i++) {
1370 1368 for (kpd = kpsd->kpsd_hash[i];
1371 1369 kpd != NULL; kpd = kpd->kp_next) {
1372 1370 pfn_t pfn;
1373 1371 caddr_t addr;
1374 1372 caddr_t eaddr;
1375 1373
1376 1374 addr = kpd->kp_base;
1377 1375 eaddr = addr + kpd->kp_len;
1378 1376 while (addr < eaddr) {
1379 1377 ASSERT(seg->s_as == &kas);
1380 1378 pfn = hat_getpfnum(seg->s_as->a_hat, addr);
1381 1379 if (pfn != PFN_INVALID)
1382 1380 dump_addpage(seg->s_as, addr, pfn);
1383 1381 addr += PAGESIZE;
1384 1382 dump_timeleft = dump_timeout;
1385 1383 }
1386 1384 }
1387 1385 }
1388 1386 }
1389 1387
1390 1388 /*ARGSUSED*/
1391 1389 static int
1392 1390 segkp_pagelock(struct seg *seg, caddr_t addr, size_t len,
↓ open down ↓ |
1214 lines elided |
↑ open up ↑ |
1393 1391 struct page ***ppp, enum lock_type type, enum seg_rw rw)
1394 1392 {
1395 1393 return (ENOTSUP);
1396 1394 }
1397 1395
1398 1396 /*ARGSUSED*/
1399 1397 static int
1400 1398 segkp_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
1401 1399 {
1402 1400 return (ENODEV);
1403 -}
1404 -
1405 -/*ARGSUSED*/
1406 -static int
1407 -segkp_capable(struct seg *seg, segcapability_t capability)
1408 -{
1409 - return (0);
1410 1401 }
1411 1402
1412 1403 #include <sys/mem_config.h>
1413 1404
1414 1405 /*ARGSUSED*/
1415 1406 static void
1416 1407 segkp_mem_config_post_add(void *arg, pgcnt_t delta_pages)
1417 1408 {}
1418 1409
1419 1410 /*
1420 1411 * During memory delete, turn off caches so that pages are not held.
1421 1412 * A better solution may be to unlock the pages while they are
1422 1413 * in the cache so that they may be collected naturally.
1423 1414 */
1424 1415
1425 1416 /*ARGSUSED*/
1426 1417 static int
1427 1418 segkp_mem_config_pre_del(void *arg, pgcnt_t delta_pages)
1428 1419 {
1429 1420 atomic_inc_32(&segkp_indel);
1430 1421 segkp_cache_free();
1431 1422 return (0);
1432 1423 }
1433 1424
1434 1425 /*ARGSUSED*/
1435 1426 static void
1436 1427 segkp_mem_config_post_del(void *arg, pgcnt_t delta_pages, int cancelled)
1437 1428 {
1438 1429 atomic_dec_32(&segkp_indel);
1439 1430 }
1440 1431
1441 1432 static kphysm_setup_vector_t segkp_mem_config_vec = {
1442 1433 KPHYSM_SETUP_VECTOR_VERSION,
1443 1434 segkp_mem_config_post_add,
1444 1435 segkp_mem_config_pre_del,
1445 1436 segkp_mem_config_post_del,
1446 1437 };
1447 1438
1448 1439 static void
1449 1440 segkpinit_mem_config(struct seg *seg)
1450 1441 {
1451 1442 int ret;
1452 1443
1453 1444 ret = kphysm_setup_func_register(&segkp_mem_config_vec, (void *)seg);
1454 1445 ASSERT(ret == 0);
1455 1446 }
↓ open down ↓ |
36 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX