Print this page
5382 pvn_getpages handles lengths <= PAGESIZE just fine
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/swapfs/swap_vnops.c
+++ new/usr/src/uts/common/fs/swapfs/swap_vnops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 24 */
24 25
25 26 #include <sys/types.h>
26 27 #include <sys/param.h>
27 28 #include <sys/systm.h>
28 29 #include <sys/buf.h>
29 30 #include <sys/cred.h>
30 31 #include <sys/errno.h>
31 32 #include <sys/vnode.h>
32 33 #include <sys/vfs_opreg.h>
33 34 #include <sys/cmn_err.h>
34 35 #include <sys/swap.h>
35 36 #include <sys/mman.h>
36 37 #include <sys/vmsystm.h>
37 38 #include <sys/vtrace.h>
38 39 #include <sys/debug.h>
39 40 #include <sys/sysmacros.h>
40 41 #include <sys/vm.h>
41 42
42 43 #include <sys/fs/swapnode.h>
43 44
44 45 #include <vm/seg.h>
45 46 #include <vm/page.h>
46 47 #include <vm/pvn.h>
47 48 #include <fs/fs_subr.h>
48 49
49 50 #include <vm/seg_kp.h>
50 51
51 52 /*
52 53 * Define the routines within this file.
53 54 */
54 55 static int swap_getpage(struct vnode *vp, offset_t off, size_t len,
55 56 uint_t *protp, struct page **plarr, size_t plsz, struct seg *seg,
56 57 caddr_t addr, enum seg_rw rw, struct cred *cr, caller_context_t *ct);
57 58 static int swap_putpage(struct vnode *vp, offset_t off, size_t len,
58 59 int flags, struct cred *cr, caller_context_t *ct);
59 60 static void swap_inactive(struct vnode *vp, struct cred *cr,
60 61 caller_context_t *ct);
61 62 static void swap_dispose(vnode_t *vp, page_t *pp, int fl, int dn,
62 63 cred_t *cr, caller_context_t *ct);
63 64
64 65 static int swap_getapage(struct vnode *vp, u_offset_t off, size_t len,
65 66 uint_t *protp, page_t **plarr, size_t plsz,
66 67 struct seg *seg, caddr_t addr, enum seg_rw rw, struct cred *cr);
67 68
68 69 int swap_getconpage(struct vnode *vp, u_offset_t off, size_t len,
69 70 uint_t *protp, page_t **plarr, size_t plsz, page_t *conpp,
70 71 uint_t *pszc, spgcnt_t *nreloc, struct seg *seg, caddr_t addr,
71 72 enum seg_rw rw, struct cred *cr);
72 73
73 74 static int swap_putapage(struct vnode *vp, page_t *pp, u_offset_t *off,
74 75 size_t *lenp, int flags, struct cred *cr);
75 76
76 77 const fs_operation_def_t swap_vnodeops_template[] = {
77 78 VOPNAME_INACTIVE, { .vop_inactive = swap_inactive },
78 79 VOPNAME_GETPAGE, { .vop_getpage = swap_getpage },
79 80 VOPNAME_PUTPAGE, { .vop_putpage = swap_putpage },
80 81 VOPNAME_DISPOSE, { .vop_dispose = swap_dispose },
81 82 VOPNAME_SETFL, { .error = fs_error },
82 83 VOPNAME_POLL, { .error = fs_error },
83 84 VOPNAME_PATHCONF, { .error = fs_error },
84 85 VOPNAME_GETSECATTR, { .error = fs_error },
85 86 VOPNAME_SHRLOCK, { .error = fs_error },
86 87 NULL, NULL
87 88 };
88 89
89 90 vnodeops_t *swap_vnodeops;
90 91
91 92 /* ARGSUSED */
92 93 static void
93 94 swap_inactive(
94 95 struct vnode *vp,
95 96 struct cred *cr,
96 97 caller_context_t *ct)
97 98 {
98 99 SWAPFS_PRINT(SWAP_VOPS, "swap_inactive: vp %x\n", vp, 0, 0, 0, 0);
99 100 }
100 101
101 102 /*
102 103 * Return all the pages from [off..off+len] in given file
103 104 */
104 105 /*ARGSUSED*/
105 106 static int
106 107 swap_getpage(
107 108 struct vnode *vp,
108 109 offset_t off,
↓ open down ↓ |
76 lines elided |
↑ open up ↑ |
109 110 size_t len,
110 111 uint_t *protp,
111 112 page_t *pl[],
112 113 size_t plsz,
113 114 struct seg *seg,
114 115 caddr_t addr,
115 116 enum seg_rw rw,
116 117 struct cred *cr,
117 118 caller_context_t *ct)
118 119 {
119 - int err;
120 -
121 120 SWAPFS_PRINT(SWAP_VOPS, "swap_getpage: vp %p, off %llx, len %lx\n",
122 121 (void *)vp, off, len, 0, 0);
123 122
124 123 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETPAGE,
125 124 "swapfs getpage:vp %p off %llx len %ld",
126 125 (void *)vp, off, len);
127 126
128 - if (len <= PAGESIZE) {
129 - err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
130 - seg, addr, rw, cr);
131 - } else {
132 - err = pvn_getpages(swap_getapage, vp, (u_offset_t)off, len,
133 - protp, pl, plsz, seg, addr, rw, cr);
134 - }
135 -
136 - return (err);
127 + return (pvn_getpages(swap_getapage, vp, (u_offset_t)off, len, protp,
128 + pl, plsz, seg, addr, rw, cr));
137 129 }
138 130
139 131 /*
140 - * Called from pvn_getpages or swap_getpage to get a particular page.
132 + * Called from pvn_getpages to get a particular page.
141 133 */
142 134 /*ARGSUSED*/
143 135 static int
144 136 swap_getapage(
145 137 struct vnode *vp,
146 138 u_offset_t off,
147 139 size_t len,
148 140 uint_t *protp,
149 141 page_t *pl[],
150 142 size_t plsz,
151 143 struct seg *seg,
152 144 caddr_t addr,
153 145 enum seg_rw rw,
154 146 struct cred *cr)
155 147 {
156 148 struct page *pp, *rpp;
157 149 int flags;
158 150 int err = 0;
159 151 struct vnode *pvp = NULL;
160 152 u_offset_t poff;
161 153 int flag_noreloc;
162 154 se_t lock;
163 155 extern int kcage_on;
164 156 int upgrade = 0;
165 157
166 158 SWAPFS_PRINT(SWAP_VOPS, "swap_getapage: vp %p, off %llx, len %lx\n",
167 159 vp, off, len, 0, 0);
168 160
169 161 /*
170 162 * Until there is a call-back mechanism to cause SEGKP
171 163 * pages to be unlocked, make them non-relocatable.
172 164 */
173 165 if (SEG_IS_SEGKP(seg))
174 166 flag_noreloc = PG_NORELOC;
175 167 else
176 168 flag_noreloc = 0;
177 169
178 170 if (protp != NULL)
179 171 *protp = PROT_ALL;
180 172
181 173 lock = (rw == S_CREATE ? SE_EXCL : SE_SHARED);
182 174
183 175 again:
184 176 if (pp = page_lookup(vp, off, lock)) {
185 177 /*
186 178 * In very rare instances, a segkp page may have been
187 179 * relocated outside of the kernel by the kernel cage
188 180 * due to the window between page_unlock() and
189 181 * VOP_PUTPAGE() in segkp_unlock(). Due to the
190 182 * rareness of these occurances, the solution is to
191 183 * relocate the page to a P_NORELOC page.
192 184 */
193 185 if (flag_noreloc != 0) {
194 186 if (!PP_ISNORELOC(pp) && kcage_on) {
195 187 if (lock != SE_EXCL) {
196 188 upgrade = 1;
197 189 if (!page_tryupgrade(pp)) {
198 190 page_unlock(pp);
199 191 lock = SE_EXCL;
200 192 goto again;
201 193 }
202 194 }
203 195
204 196 if (page_relocate_cage(&pp, &rpp) != 0)
205 197 panic("swap_getapage: "
206 198 "page_relocate_cage failed");
207 199
208 200 pp = rpp;
209 201 }
210 202 }
211 203
212 204 if (pl) {
213 205 if (upgrade)
214 206 page_downgrade(pp);
215 207
216 208 pl[0] = pp;
217 209 pl[1] = NULL;
218 210 } else {
219 211 page_unlock(pp);
220 212 }
221 213 } else {
222 214 pp = page_create_va(vp, off, PAGESIZE,
223 215 PG_WAIT | PG_EXCL | flag_noreloc,
224 216 seg, addr);
225 217 /*
226 218 * Someone raced in and created the page after we did the
227 219 * lookup but before we did the create, so go back and
228 220 * try to look it up again.
229 221 */
230 222 if (pp == NULL)
231 223 goto again;
232 224 if (rw != S_CREATE) {
233 225 err = swap_getphysname(vp, off, &pvp, &poff);
234 226 if (pvp) {
235 227 struct anon *ap;
236 228 kmutex_t *ahm;
237 229
238 230 flags = (pl == NULL ? B_ASYNC|B_READ : B_READ);
239 231 err = VOP_PAGEIO(pvp, pp, poff,
240 232 PAGESIZE, flags, cr, NULL);
241 233
242 234 if (!err) {
243 235 ahm = AH_MUTEX(vp, off);
244 236 mutex_enter(ahm);
245 237
246 238 ap = swap_anon(vp, off);
247 239 if (ap == NULL) {
248 240 panic("swap_getapage:"
249 241 " null anon");
250 242 }
251 243
252 244 if (ap->an_pvp == pvp &&
253 245 ap->an_poff == poff) {
254 246 swap_phys_free(pvp, poff,
255 247 PAGESIZE);
256 248 ap->an_pvp = NULL;
257 249 ap->an_poff = NULL;
258 250 hat_setmod(pp);
259 251 }
260 252
261 253 mutex_exit(ahm);
262 254 }
263 255 } else {
264 256 if (!err)
265 257 pagezero(pp, 0, PAGESIZE);
266 258
267 259 /*
268 260 * If it's a fault ahead, release page_io_lock
269 261 * and SE_EXCL we grabbed in page_create_va
270 262 *
271 263 * If we are here, we haven't called VOP_PAGEIO
272 264 * and thus calling pvn_read_done(pp, B_READ)
273 265 * below may mislead that we tried i/o. Besides,
274 266 * in case of async, pvn_read_done() should
275 267 * not be called by *getpage()
276 268 */
277 269 if (pl == NULL) {
278 270 /*
279 271 * swap_getphysname can return error
280 272 * only when we are getting called from
281 273 * swapslot_free which passes non-NULL
282 274 * pl to VOP_GETPAGE.
283 275 */
284 276 ASSERT(err == 0);
285 277 page_io_unlock(pp);
286 278 page_unlock(pp);
287 279 }
288 280 }
289 281 }
290 282
291 283 ASSERT(pp != NULL);
292 284
293 285 if (err && pl)
294 286 pvn_read_done(pp, B_ERROR);
295 287
296 288 if (!err && pl)
297 289 pvn_plist_init(pp, pl, plsz, off, PAGESIZE, rw);
298 290 }
299 291 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
300 292 "swapfs getapage:pp %p vp %p off %llx", pp, vp, off);
301 293 return (err);
302 294 }
303 295
304 296 /*
305 297 * Called from large page anon routines only! This is an ugly hack where
306 298 * the anon layer directly calls into swapfs with a preallocated large page.
307 299 * Another method would have been to change to VOP and add an extra arg for
308 300 * the preallocated large page. This all could be cleaned up later when we
309 301 * solve the anonymous naming problem and no longer need to loop across of
310 302 * the VOP in PAGESIZE increments to fill in or initialize a large page as
311 303 * is done today. I think the latter is better since it avoid a change to
312 304 * the VOP interface that could later be avoided.
313 305 */
314 306 int
315 307 swap_getconpage(
316 308 struct vnode *vp,
317 309 u_offset_t off,
318 310 size_t len,
319 311 uint_t *protp,
320 312 page_t *pl[],
321 313 size_t plsz,
322 314 page_t *conpp,
323 315 uint_t *pszc,
324 316 spgcnt_t *nreloc,
325 317 struct seg *seg,
326 318 caddr_t addr,
327 319 enum seg_rw rw,
328 320 struct cred *cr)
329 321 {
330 322 struct page *pp;
331 323 int err = 0;
332 324 struct vnode *pvp = NULL;
333 325 u_offset_t poff;
334 326
335 327 ASSERT(len == PAGESIZE);
336 328 ASSERT(pl != NULL);
337 329 ASSERT(plsz == PAGESIZE);
338 330 ASSERT(protp == NULL);
339 331 ASSERT(nreloc != NULL);
340 332 ASSERT(!SEG_IS_SEGKP(seg)); /* XXX for now not supported */
341 333 SWAPFS_PRINT(SWAP_VOPS, "swap_getconpage: vp %p, off %llx, len %lx\n",
342 334 vp, off, len, 0, 0);
343 335
344 336 /*
345 337 * If we are not using a preallocated page then we know one already
346 338 * exists. So just let the old code handle it.
347 339 */
348 340 if (conpp == NULL) {
349 341 err = swap_getapage(vp, (u_offset_t)off, len, protp, pl, plsz,
350 342 seg, addr, rw, cr);
351 343 return (err);
352 344 }
353 345 ASSERT(conpp->p_szc != 0);
354 346 ASSERT(PAGE_EXCL(conpp));
355 347
356 348
357 349 ASSERT(conpp->p_next == conpp);
358 350 ASSERT(conpp->p_prev == conpp);
359 351 ASSERT(!PP_ISAGED(conpp));
360 352 ASSERT(!PP_ISFREE(conpp));
361 353
362 354 *nreloc = 0;
363 355 pp = page_lookup_create(vp, off, SE_SHARED, conpp, nreloc, 0);
364 356
365 357 /*
366 358 * If existing page is found we may need to relocate.
367 359 */
368 360 if (pp != conpp) {
369 361 ASSERT(rw != S_CREATE);
370 362 ASSERT(pszc != NULL);
371 363 ASSERT(PAGE_SHARED(pp));
372 364 if (pp->p_szc < conpp->p_szc) {
373 365 *pszc = pp->p_szc;
374 366 page_unlock(pp);
375 367 err = -1;
376 368 } else if (pp->p_szc > conpp->p_szc &&
377 369 seg->s_szc > conpp->p_szc) {
378 370 *pszc = MIN(pp->p_szc, seg->s_szc);
379 371 page_unlock(pp);
380 372 err = -2;
381 373 } else {
382 374 pl[0] = pp;
383 375 pl[1] = NULL;
384 376 if (page_pptonum(pp) &
385 377 (page_get_pagecnt(conpp->p_szc) - 1))
386 378 cmn_err(CE_PANIC, "swap_getconpage: no root");
387 379 }
388 380 return (err);
389 381 }
390 382
391 383 ASSERT(PAGE_EXCL(pp));
392 384
393 385 if (*nreloc != 0) {
394 386 ASSERT(rw != S_CREATE);
395 387 pl[0] = pp;
396 388 pl[1] = NULL;
397 389 return (0);
398 390 }
399 391
400 392 *nreloc = 1;
401 393
402 394 /*
403 395 * If necessary do the page io.
404 396 */
405 397 if (rw != S_CREATE) {
406 398 /*
407 399 * Since we are only called now on behalf of an
408 400 * address space operation it's impossible for
409 401 * us to fail unlike swap_getapge() which
410 402 * also gets called from swapslot_free().
411 403 */
412 404 if (swap_getphysname(vp, off, &pvp, &poff)) {
413 405 cmn_err(CE_PANIC,
414 406 "swap_getconpage: swap_getphysname failed!");
415 407 }
416 408
417 409 if (pvp != NULL) {
418 410 err = VOP_PAGEIO(pvp, pp, poff, PAGESIZE, B_READ,
419 411 cr, NULL);
420 412 if (err == 0) {
421 413 struct anon *ap;
422 414 kmutex_t *ahm;
423 415
424 416 ahm = AH_MUTEX(vp, off);
425 417 mutex_enter(ahm);
426 418 ap = swap_anon(vp, off);
427 419 if (ap == NULL)
428 420 panic("swap_getconpage: null anon");
429 421 if (ap->an_pvp != pvp || ap->an_poff != poff)
430 422 panic("swap_getconpage: bad anon");
431 423
432 424 swap_phys_free(pvp, poff, PAGESIZE);
433 425 ap->an_pvp = NULL;
434 426 ap->an_poff = NULL;
435 427 hat_setmod(pp);
436 428 mutex_exit(ahm);
437 429 }
438 430 } else {
439 431 pagezero(pp, 0, PAGESIZE);
440 432 }
441 433 }
442 434
443 435 /*
444 436 * Normally we would let pvn_read_done() destroy
445 437 * the page on IO error. But since this is a preallocated
446 438 * page we'll let the anon layer handle it.
447 439 */
448 440 page_io_unlock(pp);
449 441 if (err != 0)
450 442 page_hashout(pp, NULL);
451 443 ASSERT(pp->p_next == pp);
452 444 ASSERT(pp->p_prev == pp);
453 445
454 446 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_GETAPAGE,
455 447 "swapfs getconpage:pp %p vp %p off %llx", pp, vp, off);
456 448
457 449 pl[0] = pp;
458 450 pl[1] = NULL;
459 451 return (err);
460 452 }
461 453
462 454 /* Async putpage klustering stuff */
463 455 int sw_pending_size;
464 456 extern int klustsize;
465 457 extern struct async_reqs *sw_getreq();
466 458 extern void sw_putreq(struct async_reqs *);
467 459 extern void sw_putbackreq(struct async_reqs *);
468 460 extern struct async_reqs *sw_getfree();
469 461 extern void sw_putfree(struct async_reqs *);
470 462
471 463 static size_t swap_putpagecnt, swap_pagespushed;
472 464 static size_t swap_otherfail, swap_otherpages;
473 465 static size_t swap_klustfail, swap_klustpages;
474 466 static size_t swap_getiofail, swap_getiopages;
475 467
476 468 /*
477 469 * Flags are composed of {B_INVAL, B_DIRTY B_FREE, B_DONTNEED}.
478 470 * If len == 0, do from off to EOF.
479 471 */
480 472 static int swap_nopage = 0; /* Don't do swap_putpage's if set */
481 473
482 474 /* ARGSUSED */
483 475 static int
484 476 swap_putpage(
485 477 struct vnode *vp,
486 478 offset_t off,
487 479 size_t len,
488 480 int flags,
489 481 struct cred *cr,
490 482 caller_context_t *ct)
491 483 {
492 484 page_t *pp;
493 485 u_offset_t io_off;
494 486 size_t io_len = 0;
495 487 int err = 0;
496 488 int nowait;
497 489 struct async_reqs *arg;
498 490
499 491 if (swap_nopage)
500 492 return (0);
501 493
502 494 ASSERT(vp->v_count != 0);
503 495
504 496 nowait = flags & B_PAGE_NOWAIT;
505 497
506 498 /*
507 499 * Clear force flag so that p_lckcnt pages are not invalidated.
508 500 */
509 501 flags &= ~(B_FORCE | B_PAGE_NOWAIT);
510 502
511 503 SWAPFS_PRINT(SWAP_VOPS,
512 504 "swap_putpage: vp %p, off %llx len %lx, flags %x\n",
513 505 (void *)vp, off, len, flags, 0);
514 506 TRACE_3(TR_FAC_SWAPFS, TR_SWAPFS_PUTPAGE,
515 507 "swapfs putpage:vp %p off %llx len %ld", (void *)vp, off, len);
516 508
517 509 if (vp->v_flag & VNOMAP)
518 510 return (ENOSYS);
519 511
520 512 if (!vn_has_cached_data(vp))
521 513 return (0);
522 514
523 515 if (len == 0) {
524 516 if (curproc == proc_pageout)
525 517 cmn_err(CE_PANIC, "swapfs: pageout can't block");
526 518
527 519 /* Search the entire vp list for pages >= off. */
528 520 err = pvn_vplist_dirty(vp, (u_offset_t)off, swap_putapage,
529 521 flags, cr);
530 522 } else {
531 523 u_offset_t eoff;
532 524
533 525 /*
534 526 * Loop over all offsets in the range [off...off + len]
535 527 * looking for pages to deal with.
536 528 */
537 529 eoff = off + len;
538 530 for (io_off = (u_offset_t)off; io_off < eoff;
539 531 io_off += io_len) {
540 532 /*
541 533 * If we run out of the async req slot, put the page
542 534 * now instead of queuing.
543 535 */
544 536 if (flags == (B_ASYNC | B_FREE) &&
545 537 sw_pending_size < klustsize &&
546 538 (arg = sw_getfree())) {
547 539 /*
548 540 * If we are clustering, we should allow
549 541 * pageout to feed us more pages because # of
550 542 * pushes is limited by # of I/Os, and one
551 543 * cluster is considered to be one I/O.
552 544 */
553 545 if (pushes)
554 546 pushes--;
555 547
556 548 arg->a_vp = vp;
557 549 arg->a_off = io_off;
558 550 arg->a_len = PAGESIZE;
559 551 arg->a_flags = B_ASYNC | B_FREE;
560 552 arg->a_cred = kcred;
561 553 sw_putreq(arg);
562 554 io_len = PAGESIZE;
563 555 continue;
564 556 }
565 557 /*
566 558 * If we are not invalidating pages, use the
567 559 * routine page_lookup_nowait() to prevent
568 560 * reclaiming them from the free list.
569 561 */
570 562 if (!nowait && ((flags & B_INVAL) ||
571 563 (flags & (B_ASYNC | B_FREE)) == B_FREE))
572 564 pp = page_lookup(vp, io_off, SE_EXCL);
573 565 else
574 566 pp = page_lookup_nowait(vp, io_off,
575 567 (flags & (B_FREE | B_INVAL)) ?
576 568 SE_EXCL : SE_SHARED);
577 569
578 570 if (pp == NULL || pvn_getdirty(pp, flags) == 0)
579 571 io_len = PAGESIZE;
580 572 else {
581 573 err = swap_putapage(vp, pp, &io_off, &io_len,
582 574 flags, cr);
583 575 if (err != 0)
584 576 break;
585 577 }
586 578 }
587 579 }
588 580 /* If invalidating, verify all pages on vnode list are gone. */
589 581 if (err == 0 && off == 0 && len == 0 &&
590 582 (flags & B_INVAL) && vn_has_cached_data(vp)) {
591 583 cmn_err(CE_WARN,
592 584 "swap_putpage: B_INVAL, pages not gone");
593 585 }
594 586 return (err);
595 587 }
596 588
597 589 /*
598 590 * Write out a single page.
599 591 * For swapfs this means choose a physical swap slot and write the page
600 592 * out using VOP_PAGEIO.
601 593 * In the (B_ASYNC | B_FREE) case we try to find a bunch of other dirty
602 594 * swapfs pages, a bunch of contiguous swap slots and then write them
603 595 * all out in one clustered i/o.
604 596 */
605 597 /*ARGSUSED*/
606 598 static int
607 599 swap_putapage(
608 600 struct vnode *vp,
609 601 page_t *pp,
610 602 u_offset_t *offp,
611 603 size_t *lenp,
612 604 int flags,
613 605 struct cred *cr)
614 606 {
615 607 int err;
616 608 struct vnode *pvp;
617 609 u_offset_t poff, off;
618 610 u_offset_t doff;
619 611 size_t dlen;
620 612 size_t klsz = 0;
621 613 u_offset_t klstart = 0;
622 614 struct vnode *klvp = NULL;
623 615 page_t *pplist;
624 616 se_t se;
625 617 struct async_reqs *arg;
626 618 size_t swap_klustsize;
627 619
628 620 /*
629 621 * This check is added for callers who access swap_putpage with len = 0.
630 622 * swap_putpage calls swap_putapage page-by-page via pvn_vplist_dirty.
631 623 * And it's necessary to do the same queuing if users have the same
632 624 * B_ASYNC|B_FREE flags on.
633 625 */
634 626 if (flags == (B_ASYNC | B_FREE) &&
635 627 sw_pending_size < klustsize && (arg = sw_getfree())) {
636 628
637 629 hat_setmod(pp);
638 630 page_io_unlock(pp);
639 631 page_unlock(pp);
640 632
641 633 arg->a_vp = vp;
642 634 arg->a_off = pp->p_offset;
643 635 arg->a_len = PAGESIZE;
644 636 arg->a_flags = B_ASYNC | B_FREE;
645 637 arg->a_cred = kcred;
646 638 sw_putreq(arg);
647 639
648 640 return (0);
649 641 }
650 642
651 643 SWAPFS_PRINT(SWAP_PUTP,
652 644 "swap_putapage: pp %p, vp %p, off %llx, flags %x\n",
653 645 pp, vp, pp->p_offset, flags, 0);
654 646
655 647 ASSERT(PAGE_LOCKED(pp));
656 648
657 649 off = pp->p_offset;
658 650
659 651 doff = off;
660 652 dlen = PAGESIZE;
661 653
662 654 if (err = swap_newphysname(vp, off, &doff, &dlen, &pvp, &poff)) {
663 655 err = (flags == (B_ASYNC | B_FREE) ? ENOMEM : 0);
664 656 hat_setmod(pp);
665 657 page_io_unlock(pp);
666 658 page_unlock(pp);
667 659 goto out;
668 660 }
669 661
670 662 klvp = pvp;
671 663 klstart = poff;
672 664 pplist = pp;
673 665 /*
674 666 * If this is ASYNC | FREE and we've accumulated a bunch of such
675 667 * pending requests, kluster.
676 668 */
677 669 if (flags == (B_ASYNC | B_FREE))
678 670 swap_klustsize = klustsize;
679 671 else
680 672 swap_klustsize = PAGESIZE;
681 673 se = (flags & B_FREE ? SE_EXCL : SE_SHARED);
682 674 klsz = PAGESIZE;
683 675 while (klsz < swap_klustsize) {
684 676 if ((arg = sw_getreq()) == NULL) {
685 677 swap_getiofail++;
686 678 swap_getiopages += btop(klsz);
687 679 break;
688 680 }
689 681 ASSERT(vn_matchops(arg->a_vp, swap_vnodeops));
690 682 vp = arg->a_vp;
691 683 off = arg->a_off;
692 684
693 685 if ((pp = page_lookup_nowait(vp, off, se)) == NULL) {
694 686 swap_otherfail++;
695 687 swap_otherpages += btop(klsz);
696 688 sw_putfree(arg);
697 689 break;
698 690 }
699 691 if (pvn_getdirty(pp, flags | B_DELWRI) == 0) {
700 692 sw_putfree(arg);
701 693 continue;
702 694 }
703 695 /* Get new physical backing store for the page */
704 696 doff = off;
705 697 dlen = PAGESIZE;
706 698 if (err = swap_newphysname(vp, off, &doff, &dlen,
707 699 &pvp, &poff)) {
708 700 swap_otherfail++;
709 701 swap_otherpages += btop(klsz);
710 702 hat_setmod(pp);
711 703 page_io_unlock(pp);
712 704 page_unlock(pp);
713 705 sw_putbackreq(arg);
714 706 break;
715 707 }
716 708 /* Try to cluster new physical name with previous ones */
717 709 if (klvp == pvp && poff == klstart + klsz) {
718 710 klsz += PAGESIZE;
719 711 page_add(&pplist, pp);
720 712 pplist = pplist->p_next;
721 713 sw_putfree(arg);
722 714 } else if (klvp == pvp && poff == klstart - PAGESIZE) {
723 715 klsz += PAGESIZE;
724 716 klstart -= PAGESIZE;
725 717 page_add(&pplist, pp);
726 718 sw_putfree(arg);
727 719 } else {
728 720 swap_klustfail++;
729 721 swap_klustpages += btop(klsz);
730 722 hat_setmod(pp);
731 723 page_io_unlock(pp);
732 724 page_unlock(pp);
733 725 sw_putbackreq(arg);
734 726 break;
735 727 }
736 728 }
737 729
738 730 err = VOP_PAGEIO(klvp, pplist, klstart, klsz,
739 731 B_WRITE | flags, cr, NULL);
740 732
741 733 if ((flags & B_ASYNC) == 0)
742 734 pvn_write_done(pp, ((err) ? B_ERROR : 0) | B_WRITE | flags);
743 735
744 736 /* Statistics */
745 737 if (!err) {
746 738 swap_putpagecnt++;
747 739 swap_pagespushed += btop(klsz);
748 740 }
749 741 out:
750 742 TRACE_4(TR_FAC_SWAPFS, TR_SWAPFS_PUTAPAGE,
751 743 "swapfs putapage:vp %p klvp %p, klstart %lx, klsz %lx",
752 744 vp, klvp, klstart, klsz);
753 745 if (err && err != ENOMEM)
754 746 cmn_err(CE_WARN, "swapfs_putapage: err %d\n", err);
755 747 if (lenp)
756 748 *lenp = PAGESIZE;
757 749 return (err);
758 750 }
759 751
760 752 static void
761 753 swap_dispose(
762 754 vnode_t *vp,
763 755 page_t *pp,
764 756 int fl,
765 757 int dn,
766 758 cred_t *cr,
767 759 caller_context_t *ct)
768 760 {
769 761 int err;
770 762 u_offset_t off = pp->p_offset;
771 763 vnode_t *pvp;
772 764 u_offset_t poff;
773 765
774 766 ASSERT(PAGE_EXCL(pp));
775 767
776 768 /*
777 769 * The caller will free/invalidate large page in one shot instead of
778 770 * one small page at a time.
779 771 */
780 772 if (pp->p_szc != 0) {
781 773 page_unlock(pp);
782 774 return;
783 775 }
784 776
785 777 err = swap_getphysname(vp, off, &pvp, &poff);
786 778 if (!err && pvp != NULL)
787 779 VOP_DISPOSE(pvp, pp, fl, dn, cr, ct);
788 780 else
789 781 fs_dispose(vp, pp, fl, dn, cr, ct);
790 782 }
↓ open down ↓ |
640 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX