Print this page
5045 use atomic_{inc,dec}_* instead of atomic_add_*
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/sun4u/os/memscrub.c
+++ new/usr/src/uts/sun4u/os/memscrub.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * sun4u Memory Scrubbing
28 28 *
29 29 * On detection of a correctable memory ECC error, the sun4u kernel
30 30 * returns the corrected data to the requester and re-writes it
31 31 * to memory (DRAM). So if the correctable error was transient,
32 32 * the read has effectively been cleaned (scrubbed) from memory.
33 33 *
34 34 * Scrubbing thus reduces the likelyhood that multiple transient errors
35 35 * will occur in the same memory word, making uncorrectable errors due
36 36 * to transients less likely.
37 37 *
38 38 * Thus is born the desire that every memory location be periodically
39 39 * accessed.
40 40 *
41 41 * This file implements a memory scrubbing thread. This scrubber
42 42 * guarantees that all of physical memory is accessed periodically
43 43 * (memscrub_period_sec -- 12 hours).
44 44 *
45 45 * It attempts to do this as unobtrusively as possible. The thread
46 46 * schedules itself to wake up at an interval such that if it reads
47 47 * memscrub_span_pages (32MB) on each wakeup, it will read all of physical
48 48 * memory in in memscrub_period_sec (12 hours).
49 49 *
50 50 * The scrubber uses the block load and prefetch hardware to read memory
51 51 * @ 1300MB/s, so it reads spans of 32MB in 0.025 seconds. Unlike the
52 52 * original sun4d scrubber the sun4u scrubber does not read ahead if the
53 53 * system is idle because we can read memory very efficently.
54 54 *
55 55 * The scrubber maintains a private copy of the phys_install memory list
56 56 * to keep track of what memory should be scrubbed.
57 57 *
58 58 * The global routines memscrub_add_span() and memscrub_delete_span() are
59 59 * used to add and delete from this list. If hotplug memory is later
60 60 * supported these two routines can be used to notify the scrubber of
61 61 * memory configuration changes.
62 62 *
63 63 * The following parameters can be set via /etc/system
64 64 *
65 65 * memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES (8MB)
66 66 * memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC (12 hours)
67 67 * memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI)
68 68 * memscrub_delay_start_sec = (5 minutes)
69 69 * memscrub_verbose = (0)
70 70 * memscrub_override_ticks = (1 tick)
71 71 * disable_memscrub = (0)
72 72 * pause_memscrub = (0)
73 73 * read_all_memscrub = (0)
74 74 *
75 75 * The scrubber will print NOTICE messages of what it is doing if
76 76 * "memscrub_verbose" is set.
77 77 *
78 78 * If the scrubber's sleep time calculation drops to zero ticks,
79 79 * memscrub_override_ticks will be used as the sleep time instead. The
80 80 * sleep time should only drop to zero on a system with over 131.84
81 81 * terabytes of memory, or where the default scrubber parameters have
82 82 * been adjusted. For example, reducing memscrub_span_pages or
83 83 * memscrub_period_sec causes the sleep time to drop to zero with less
84 84 * memory. Note that since the sleep time is calculated in clock ticks,
85 85 * using hires clock ticks allows for more memory before the sleep time
86 86 * becomes zero.
87 87 *
88 88 * The scrubber will exit (or never be started) if it finds the variable
89 89 * "disable_memscrub" set.
90 90 *
91 91 * The scrubber will pause (not read memory) when "pause_memscrub"
92 92 * is set. It will check the state of pause_memscrub at each wakeup
93 93 * period. The scrubber will not make up for lost time. If you
94 94 * pause the scrubber for a prolonged period of time you can use
95 95 * the "read_all_memscrub" switch (see below) to catch up. In addition,
96 96 * pause_memscrub is used internally by the post memory DR callbacks.
97 97 * It is set for the small period of time during which the callbacks
98 98 * are executing. This ensures "memscrub_lock" will be released,
99 99 * allowing the callbacks to finish.
100 100 *
101 101 * The scrubber will read all memory if "read_all_memscrub" is set.
102 102 * The normal span read will also occur during the wakeup.
103 103 *
104 104 * MEMSCRUB_MIN_PAGES (32MB) is the minimum amount of memory a system
105 105 * must have before we'll start the scrubber.
106 106 *
107 107 * MEMSCRUB_DFL_SPAN_PAGES (32MB) is based on the guess that 0.025 sec
108 108 * is a "good" amount of minimum time for the thread to run at a time.
109 109 *
110 110 * MEMSCRUB_DFL_PERIOD_SEC (12 hours) is nearly a total guess --
111 111 * twice the frequency the hardware folk estimated would be necessary.
112 112 *
113 113 * MEMSCRUB_DFL_THREAD_PRI (MINCLSYSPRI) is based on the assumption
114 114 * that the scurbber should get its fair share of time (since it
115 115 * is short). At a priority of 0 the scrubber will be starved.
116 116 */
117 117
118 118 #include <sys/systm.h> /* timeout, types, t_lock */
119 119 #include <sys/cmn_err.h>
120 120 #include <sys/sysmacros.h> /* MIN */
121 121 #include <sys/memlist.h> /* memlist */
122 122 #include <sys/mem_config.h> /* memory add/delete */
123 123 #include <sys/kmem.h> /* KMEM_NOSLEEP */
124 124 #include <sys/cpuvar.h> /* ncpus_online */
125 125 #include <sys/debug.h> /* ASSERTs */
126 126 #include <sys/machsystm.h> /* lddphys */
127 127 #include <sys/cpu_module.h> /* vtag_flushpage */
128 128 #include <sys/kstat.h>
129 129 #include <sys/atomic.h> /* atomic_add_32 */
130 130
131 131 #include <vm/hat.h>
132 132 #include <vm/seg_kmem.h>
133 133 #include <vm/hat_sfmmu.h> /* XXX FIXME - delete */
134 134
135 135 #include <sys/time.h>
136 136 #include <sys/callb.h> /* CPR callback */
137 137 #include <sys/ontrap.h>
138 138
139 139 /*
140 140 * Should really have paddr_t defined, but it is broken. Use
141 141 * ms_paddr_t in the meantime to make the code cleaner
142 142 */
143 143 typedef uint64_t ms_paddr_t;
144 144
145 145 /*
146 146 * Global Routines:
147 147 */
148 148 int memscrub_add_span(pfn_t pfn, pgcnt_t pages);
149 149 int memscrub_delete_span(pfn_t pfn, pgcnt_t pages);
150 150 int memscrub_init(void);
151 151 void memscrub_induced_error(void);
152 152
153 153 /*
154 154 * Global Data:
155 155 */
156 156
157 157 /*
158 158 * scrub if we have at least this many pages
159 159 */
160 160 #define MEMSCRUB_MIN_PAGES (32 * 1024 * 1024 / PAGESIZE)
161 161
162 162 /*
163 163 * scan all of physical memory at least once every MEMSCRUB_PERIOD_SEC
164 164 */
165 165 #define MEMSCRUB_DFL_PERIOD_SEC (12 * 60 * 60) /* 12 hours */
166 166
167 167 /*
168 168 * scan at least MEMSCRUB_DFL_SPAN_PAGES each iteration
169 169 */
170 170 #define MEMSCRUB_DFL_SPAN_PAGES ((32 * 1024 * 1024) / PAGESIZE)
171 171
172 172 /*
173 173 * almost anything is higher priority than scrubbing
174 174 */
175 175 #define MEMSCRUB_DFL_THREAD_PRI MINCLSYSPRI
176 176
177 177 /*
178 178 * size used when scanning memory
179 179 */
180 180 #define MEMSCRUB_BLOCK_SIZE 256
181 181 #define MEMSCRUB_BLOCK_SIZE_SHIFT 8 /* log2(MEMSCRUB_BLOCK_SIZE) */
182 182 #define MEMSCRUB_BLOCKS_PER_PAGE (PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT)
183 183
184 184 #define MEMSCRUB_BPP4M MMU_PAGESIZE4M >> MEMSCRUB_BLOCK_SIZE_SHIFT
185 185 #define MEMSCRUB_BPP512K MMU_PAGESIZE512K >> MEMSCRUB_BLOCK_SIZE_SHIFT
186 186 #define MEMSCRUB_BPP64K MMU_PAGESIZE64K >> MEMSCRUB_BLOCK_SIZE_SHIFT
187 187 #define MEMSCRUB_BPP MMU_PAGESIZE >> MEMSCRUB_BLOCK_SIZE_SHIFT
188 188
189 189 /*
190 190 * This message indicates that we have exceeded the limitations of
191 191 * the memscrubber. See the comments above regarding what would
192 192 * cause the sleep time to become zero. In DEBUG mode, this message
193 193 * is logged on the console and in the messages file. In non-DEBUG
194 194 * mode, it is only logged in the messages file.
195 195 */
196 196 #ifdef DEBUG
197 197 #define MEMSCRUB_OVERRIDE_MSG "Memory scrubber sleep time is zero " \
198 198 "seconds, consuming entire CPU."
199 199 #else
200 200 #define MEMSCRUB_OVERRIDE_MSG "!Memory scrubber sleep time is zero " \
201 201 "seconds, consuming entire CPU."
202 202 #endif /* DEBUG */
203 203
204 204 /*
205 205 * we can patch these defaults in /etc/system if necessary
206 206 */
207 207 uint_t disable_memscrub = 0;
208 208 uint_t pause_memscrub = 0;
209 209 uint_t read_all_memscrub = 0;
210 210 uint_t memscrub_verbose = 0;
211 211 uint_t memscrub_all_idle = 0;
212 212 uint_t memscrub_span_pages = MEMSCRUB_DFL_SPAN_PAGES;
213 213 uint_t memscrub_period_sec = MEMSCRUB_DFL_PERIOD_SEC;
214 214 uint_t memscrub_thread_pri = MEMSCRUB_DFL_THREAD_PRI;
215 215 uint_t memscrub_delay_start_sec = 5 * 60;
216 216 uint_t memscrub_override_ticks = 1;
217 217
218 218 /*
219 219 * Static Routines
220 220 */
221 221 static void memscrubber(void);
222 222 static void memscrub_cleanup(void);
223 223 static int memscrub_add_span_gen(pfn_t, pgcnt_t, struct memlist **, uint_t *);
224 224 static int memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp);
225 225 static void memscrub_scan(uint_t blks, ms_paddr_t src);
226 226
227 227 /*
228 228 * Static Data
229 229 */
230 230
231 231 static struct memlist *memscrub_memlist;
232 232 static uint_t memscrub_phys_pages;
233 233
234 234 static kcondvar_t memscrub_cv;
235 235 static kmutex_t memscrub_lock;
236 236 /*
237 237 * memscrub_lock protects memscrub_memlist, interval_ticks, cprinfo, ...
238 238 */
239 239 static void memscrub_init_mem_config(void);
240 240 static void memscrub_uninit_mem_config(void);
241 241
242 242 /*
243 243 * Linked list of memscrub aware spans having retired pages.
244 244 * Currently enabled only on sun4u USIII-based platforms.
245 245 */
246 246 typedef struct memscrub_page_retire_span {
247 247 ms_paddr_t address;
248 248 struct memscrub_page_retire_span *next;
249 249 } memscrub_page_retire_span_t;
250 250
251 251 static memscrub_page_retire_span_t *memscrub_page_retire_span_list = NULL;
252 252
253 253 static void memscrub_page_retire_span_add(ms_paddr_t);
254 254 static void memscrub_page_retire_span_delete(ms_paddr_t);
255 255 static int memscrub_page_retire_span_search(ms_paddr_t);
256 256 static void memscrub_page_retire_span_list_update(void);
257 257
258 258 /*
259 259 * add_to_page_retire_list: Set by cpu_async_log_err() routine
260 260 * by calling memscrub_induced_error() when CE/UE occurs on a retired
261 261 * page due to memscrub reading. Cleared by memscrub after updating
262 262 * global page retire span list. Piggybacking on protection of
263 263 * memscrub_lock, which is held during set and clear.
264 264 * Note: When cpu_async_log_err() calls memscrub_induced_error(), it is running
265 265 * on softint context, which gets fired on a cpu memscrub thread currently
266 266 * running. Memscrub thread has affinity set during memscrub_read(), hence
267 267 * migration to new cpu not expected.
268 268 */
269 269 static int add_to_page_retire_list = 0;
270 270
271 271 /*
272 272 * Keep track of some interesting statistics
273 273 */
274 274 static struct memscrub_kstats {
275 275 kstat_named_t done_early; /* ahead of schedule */
276 276 kstat_named_t early_sec; /* by cumulative num secs */
277 277 kstat_named_t done_late; /* behind schedule */
278 278 kstat_named_t late_sec; /* by cumulative num secs */
279 279 kstat_named_t interval_ticks; /* num ticks between intervals */
280 280 kstat_named_t force_run; /* forced to run, non-timeout */
281 281 kstat_named_t errors_found; /* num errors found by memscrub */
282 282 } memscrub_counts = {
283 283 { "done_early", KSTAT_DATA_UINT32 },
284 284 { "early_sec", KSTAT_DATA_UINT32 },
285 285 { "done_late", KSTAT_DATA_UINT32 },
286 286 { "late_sec", KSTAT_DATA_UINT32 },
287 287 { "interval_ticks", KSTAT_DATA_UINT32 },
288 288 { "force_run", KSTAT_DATA_UINT32 },
289 289 { "errors_found", KSTAT_DATA_UINT32 },
290 290 };
291 291
292 292 #define MEMSCRUB_STAT_INC(stat) memscrub_counts.stat.value.ui32++
293 293 #define MEMSCRUB_STAT_SET(stat, val) memscrub_counts.stat.value.ui32 = (val)
294 294 #define MEMSCRUB_STAT_NINC(stat, val) memscrub_counts.stat.value.ui32 += (val)
295 295
296 296 static struct kstat *memscrub_ksp = (struct kstat *)NULL;
297 297
298 298 static timeout_id_t memscrub_tid = 0; /* keep track of timeout id */
299 299
300 300 /*
301 301 * create memscrub_memlist from phys_install list
302 302 * initialize locks, set memscrub_phys_pages.
303 303 */
304 304 int
305 305 memscrub_init(void)
306 306 {
307 307 struct memlist *src;
308 308
309 309 /*
310 310 * only startup the scrubber if we have a minimum
311 311 * number of pages
312 312 */
313 313 if (physinstalled >= MEMSCRUB_MIN_PAGES) {
314 314
315 315 /*
316 316 * initialize locks
317 317 */
318 318 mutex_init(&memscrub_lock, NULL, MUTEX_DRIVER, NULL);
319 319 cv_init(&memscrub_cv, NULL, CV_DRIVER, NULL);
320 320
321 321 /*
322 322 * copy phys_install to memscrub_memlist
323 323 */
324 324 for (src = phys_install; src; src = src->ml_next) {
325 325 if (memscrub_add_span(
326 326 (pfn_t)(src->ml_address >> PAGESHIFT),
327 327 (pgcnt_t)(src->ml_size >> PAGESHIFT))) {
328 328 memscrub_cleanup();
329 329 return (-1);
330 330 }
331 331 }
332 332
333 333 /*
334 334 * initialize kstats
335 335 */
336 336 memscrub_ksp = kstat_create("unix", 0, "memscrub_kstat",
337 337 "misc", KSTAT_TYPE_NAMED,
338 338 sizeof (memscrub_counts) / sizeof (kstat_named_t),
339 339 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
340 340
341 341 if (memscrub_ksp) {
342 342 memscrub_ksp->ks_data = (void *)&memscrub_counts;
343 343 kstat_install(memscrub_ksp);
344 344 } else {
345 345 cmn_err(CE_NOTE, "Memscrubber cannot create kstats\n");
346 346 }
347 347
348 348 /*
349 349 * create memscrubber thread
350 350 */
351 351 (void) thread_create(NULL, 0, (void (*)())memscrubber,
352 352 NULL, 0, &p0, TS_RUN, memscrub_thread_pri);
353 353
354 354 /*
355 355 * We don't want call backs changing the list
356 356 * if there is no thread running. We do not
357 357 * attempt to deal with stopping/starting scrubbing
358 358 * on memory size changes.
359 359 */
360 360 memscrub_init_mem_config();
361 361 }
362 362
363 363 return (0);
364 364 }
365 365
366 366 static void
367 367 memscrub_cleanup(void)
368 368 {
369 369 memscrub_uninit_mem_config();
370 370 while (memscrub_memlist) {
371 371 (void) memscrub_delete_span(
372 372 (pfn_t)(memscrub_memlist->ml_address >> PAGESHIFT),
373 373 (pgcnt_t)(memscrub_memlist->ml_size >> PAGESHIFT));
374 374 }
375 375 if (memscrub_ksp)
376 376 kstat_delete(memscrub_ksp);
377 377 cv_destroy(&memscrub_cv);
378 378 mutex_destroy(&memscrub_lock);
379 379 }
380 380
381 381 #ifdef MEMSCRUB_DEBUG
382 382 static void
383 383 memscrub_printmemlist(char *title, struct memlist *listp)
384 384 {
385 385 struct memlist *list;
386 386
387 387 cmn_err(CE_CONT, "%s:\n", title);
388 388
389 389 for (list = listp; list; list = list->ml_next) {
390 390 cmn_err(CE_CONT, "addr = 0x%llx, size = 0x%llx\n",
391 391 list->ml_address, list->ml_size);
392 392 }
393 393 }
394 394 #endif /* MEMSCRUB_DEBUG */
395 395
396 396 /* ARGSUSED */
397 397 static void
398 398 memscrub_wakeup(void *c)
399 399 {
400 400 /*
401 401 * grab mutex to guarantee that our wakeup call
402 402 * arrives after we go to sleep -- so we can't sleep forever.
403 403 */
404 404 mutex_enter(&memscrub_lock);
405 405 cv_signal(&memscrub_cv);
406 406 mutex_exit(&memscrub_lock);
407 407 }
408 408
409 409 /*
410 410 * provide an interface external to the memscrubber
411 411 * which will force the memscrub thread to run vs.
412 412 * waiting for the timeout, if one is set
413 413 */
414 414 void
415 415 memscrub_run(void)
416 416 {
417 417 MEMSCRUB_STAT_INC(force_run);
418 418 if (memscrub_tid) {
419 419 (void) untimeout(memscrub_tid);
420 420 memscrub_wakeup((void *)NULL);
421 421 }
422 422 }
423 423
424 424 /*
425 425 * this calculation doesn't account for the time
426 426 * that the actual scan consumes -- so we'd fall
427 427 * slightly behind schedule with this interval.
428 428 * It's very small.
429 429 */
430 430
431 431 static uint_t
432 432 compute_interval_ticks(void)
433 433 {
434 434 /*
435 435 * We use msp_safe mpp_safe below to insure somebody
436 436 * doesn't set memscrub_span_pages or memscrub_phys_pages
437 437 * to 0 on us.
438 438 */
439 439 static uint_t msp_safe, mpp_safe;
440 440 static uint_t interval_ticks, period_ticks;
441 441 msp_safe = memscrub_span_pages;
442 442 mpp_safe = memscrub_phys_pages;
443 443
444 444 period_ticks = memscrub_period_sec * hz;
445 445 interval_ticks = period_ticks;
446 446
447 447 ASSERT(mutex_owned(&memscrub_lock));
448 448
449 449 if ((msp_safe != 0) && (mpp_safe != 0)) {
450 450 if (memscrub_phys_pages <= msp_safe) {
451 451 interval_ticks = period_ticks;
452 452 } else {
453 453 interval_ticks = (period_ticks /
454 454 (mpp_safe / msp_safe));
455 455 }
456 456 }
457 457 return (interval_ticks);
458 458 }
459 459
460 460 void
461 461 memscrubber(void)
462 462 {
463 463 ms_paddr_t address, addr;
464 464 time_t deadline;
465 465 pgcnt_t pages;
466 466 uint_t reached_end = 1;
467 467 uint_t paused_message = 0;
468 468 uint_t interval_ticks = 0;
469 469 uint_t sleep_warn_printed = 0;
470 470 callb_cpr_t cprinfo;
471 471
472 472 /*
473 473 * notify CPR of our existence
474 474 */
475 475 CALLB_CPR_INIT(&cprinfo, &memscrub_lock, callb_generic_cpr, "memscrub");
476 476
477 477 mutex_enter(&memscrub_lock);
478 478
479 479 if (memscrub_memlist == NULL) {
480 480 cmn_err(CE_WARN, "memscrub_memlist not initialized.");
481 481 goto memscrub_exit;
482 482 }
483 483
484 484 address = memscrub_memlist->ml_address;
485 485
486 486 deadline = gethrestime_sec() + memscrub_delay_start_sec;
487 487
488 488 for (;;) {
489 489 if (disable_memscrub)
490 490 break;
491 491
492 492 /*
493 493 * compute interval_ticks
494 494 */
495 495 interval_ticks = compute_interval_ticks();
496 496
497 497 /*
498 498 * If the calculated sleep time is zero, and pause_memscrub
499 499 * has been set, make sure we sleep so that another thread
500 500 * can acquire memscrub_lock.
501 501 */
502 502 if (interval_ticks == 0 && pause_memscrub) {
503 503 interval_ticks = hz;
504 504 }
505 505
506 506 /*
507 507 * And as a fail safe, under normal non-paused operation, do
508 508 * not allow the sleep time to be zero.
509 509 */
510 510 if (interval_ticks == 0) {
511 511 interval_ticks = memscrub_override_ticks;
512 512 if (!sleep_warn_printed) {
513 513 cmn_err(CE_NOTE, MEMSCRUB_OVERRIDE_MSG);
514 514 sleep_warn_printed = 1;
515 515 }
516 516 }
517 517
518 518 MEMSCRUB_STAT_SET(interval_ticks, interval_ticks);
519 519
520 520 /*
521 521 * Did we just reach the end of memory? If we are at the
522 522 * end of memory, delay end of memory processing until
523 523 * pause_memscrub is not set.
524 524 */
525 525 if (reached_end && !pause_memscrub) {
526 526 time_t now = gethrestime_sec();
527 527
528 528 if (now >= deadline) {
529 529 MEMSCRUB_STAT_INC(done_late);
530 530 MEMSCRUB_STAT_NINC(late_sec, now - deadline);
531 531 /*
532 532 * past deadline, start right away
533 533 */
534 534 interval_ticks = 0;
535 535
536 536 deadline = now + memscrub_period_sec;
537 537 } else {
538 538 /*
539 539 * we finished ahead of schedule.
540 540 * wait till previous deadline before re-start.
541 541 */
542 542 interval_ticks = (deadline - now) * hz;
543 543 MEMSCRUB_STAT_INC(done_early);
544 544 MEMSCRUB_STAT_NINC(early_sec, deadline - now);
545 545 deadline += memscrub_period_sec;
546 546 }
547 547 reached_end = 0;
548 548 sleep_warn_printed = 0;
549 549 }
550 550
551 551 if (interval_ticks != 0) {
552 552 /*
553 553 * it is safe from our standpoint for CPR to
554 554 * suspend the system
555 555 */
556 556 CALLB_CPR_SAFE_BEGIN(&cprinfo);
557 557
558 558 /*
559 559 * hit the snooze bar
560 560 */
561 561 memscrub_tid = timeout(memscrub_wakeup, NULL,
562 562 interval_ticks);
563 563
564 564 /*
565 565 * go to sleep
566 566 */
567 567 cv_wait(&memscrub_cv, &memscrub_lock);
568 568
569 569 /*
570 570 * at this point, no timeout should be set
571 571 */
572 572 memscrub_tid = 0;
573 573
574 574 /*
575 575 * we need to goto work and will be modifying
576 576 * our internal state and mapping/unmapping
577 577 * TTEs
578 578 */
579 579 CALLB_CPR_SAFE_END(&cprinfo, &memscrub_lock);
580 580 }
581 581
582 582
583 583 if (memscrub_phys_pages == 0) {
584 584 cmn_err(CE_WARN, "Memory scrubber has 0 pages to read");
585 585 goto memscrub_exit;
586 586 }
587 587
588 588 if (!pause_memscrub) {
589 589 if (paused_message) {
590 590 paused_message = 0;
591 591 if (memscrub_verbose)
592 592 cmn_err(CE_NOTE, "Memory scrubber "
593 593 "resuming");
594 594 }
595 595
596 596 if (read_all_memscrub) {
597 597 if (memscrub_verbose)
598 598 cmn_err(CE_NOTE, "Memory scrubber "
599 599 "reading all memory per request");
600 600
601 601 addr = memscrub_memlist->ml_address;
602 602 reached_end = 0;
603 603 while (!reached_end) {
604 604 if (disable_memscrub)
605 605 break;
606 606 pages = memscrub_phys_pages;
607 607 reached_end = memscrub_verify_span(
608 608 &addr, &pages);
609 609 memscrub_scan(pages *
610 610 MEMSCRUB_BLOCKS_PER_PAGE, addr);
611 611 addr += ((uint64_t)pages * PAGESIZE);
612 612 }
613 613 read_all_memscrub = 0;
614 614 }
615 615
616 616 /*
617 617 * read 1 span
618 618 */
619 619 pages = memscrub_span_pages;
620 620
621 621 if (disable_memscrub)
622 622 break;
623 623
624 624 /*
625 625 * determine physical address range
626 626 */
627 627 reached_end = memscrub_verify_span(&address,
628 628 &pages);
629 629
630 630 memscrub_scan(pages * MEMSCRUB_BLOCKS_PER_PAGE,
631 631 address);
632 632
633 633 address += ((uint64_t)pages * PAGESIZE);
634 634 }
635 635
636 636 if (pause_memscrub && !paused_message) {
637 637 paused_message = 1;
638 638 if (memscrub_verbose)
639 639 cmn_err(CE_NOTE, "Memory scrubber paused");
640 640 }
641 641 }
642 642
643 643 memscrub_exit:
644 644 cmn_err(CE_NOTE, "Memory scrubber exiting");
645 645 CALLB_CPR_EXIT(&cprinfo);
646 646 memscrub_cleanup();
647 647 thread_exit();
648 648 /* NOTREACHED */
649 649 }
650 650
651 651 /*
652 652 * condition address and size
653 653 * such that they span legal physical addresses.
654 654 *
655 655 * when appropriate, address will be rounded up to start of next
656 656 * struct memlist, and pages will be rounded down to the end of the
657 657 * memlist size.
658 658 *
659 659 * returns 1 if reached end of list, else returns 0.
660 660 */
661 661 static int
662 662 memscrub_verify_span(ms_paddr_t *addrp, pgcnt_t *pagesp)
663 663 {
664 664 struct memlist *mlp;
665 665 ms_paddr_t address = *addrp;
666 666 uint64_t bytes = (uint64_t)*pagesp * PAGESIZE;
667 667 uint64_t bytes_remaining;
668 668 int reached_end = 0;
669 669
670 670 ASSERT(mutex_owned(&memscrub_lock));
671 671
672 672 /*
673 673 * find memlist struct that contains addrp
674 674 * assumes memlist is sorted by ascending address.
675 675 */
676 676 for (mlp = memscrub_memlist; mlp != NULL; mlp = mlp->ml_next) {
677 677 /*
678 678 * if before this chunk, round up to beginning
679 679 */
680 680 if (address < mlp->ml_address) {
681 681 address = mlp->ml_address;
682 682 break;
683 683 }
684 684 /*
685 685 * if before end of chunk, then we found it
686 686 */
687 687 if (address < (mlp->ml_address + mlp->ml_size))
688 688 break;
689 689
690 690 /* else go to next struct memlist */
691 691 }
692 692 /*
693 693 * if we hit end of list, start at beginning
694 694 */
695 695 if (mlp == NULL) {
696 696 mlp = memscrub_memlist;
697 697 address = mlp->ml_address;
698 698 }
699 699
700 700 /*
701 701 * now we have legal address, and its mlp, condition bytes
702 702 */
703 703 bytes_remaining = (mlp->ml_address + mlp->ml_size) - address;
704 704
705 705 if (bytes > bytes_remaining)
706 706 bytes = bytes_remaining;
707 707
708 708 /*
709 709 * will this span take us to end of list?
710 710 */
711 711 if ((mlp->ml_next == NULL) &&
712 712 ((mlp->ml_address + mlp->ml_size) == (address + bytes)))
713 713 reached_end = 1;
714 714
715 715 /* return values */
716 716 *addrp = address;
717 717 *pagesp = bytes / PAGESIZE;
718 718
719 719 return (reached_end);
720 720 }
721 721
722 722 /*
723 723 * add a span to the memscrub list
724 724 * add to memscrub_phys_pages
725 725 */
726 726 int
727 727 memscrub_add_span(pfn_t pfn, pgcnt_t pages)
728 728 {
729 729 #ifdef MEMSCRUB_DEBUG
730 730 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
731 731 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
732 732 #endif /* MEMSCRUB_DEBUG */
733 733
734 734 int retval;
735 735
736 736 mutex_enter(&memscrub_lock);
737 737
738 738 #ifdef MEMSCRUB_DEBUG
739 739 memscrub_printmemlist("memscrub_memlist before", memscrub_memlist);
740 740 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
741 741 cmn_err(CE_CONT, "memscrub_add_span: address: 0x%llx"
742 742 " size: 0x%llx\n", address, bytes);
743 743 #endif /* MEMSCRUB_DEBUG */
744 744
745 745 retval = memscrub_add_span_gen(pfn, pages, &memscrub_memlist,
746 746 &memscrub_phys_pages);
747 747
748 748 #ifdef MEMSCRUB_DEBUG
749 749 memscrub_printmemlist("memscrub_memlist after", memscrub_memlist);
750 750 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
751 751 #endif /* MEMSCRUB_DEBUG */
752 752
753 753 mutex_exit(&memscrub_lock);
754 754
755 755 return (retval);
756 756 }
757 757
758 758 static int
759 759 memscrub_add_span_gen(
760 760 pfn_t pfn,
761 761 pgcnt_t pages,
762 762 struct memlist **list,
763 763 uint_t *npgs)
764 764 {
765 765 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
766 766 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
767 767 struct memlist *dst;
768 768 struct memlist *prev, *next;
769 769 int retval = 0;
770 770
771 771 /*
772 772 * allocate a new struct memlist
773 773 */
774 774
775 775 dst = (struct memlist *)
776 776 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
777 777
778 778 if (dst == NULL) {
779 779 retval = -1;
780 780 goto add_done;
781 781 }
782 782
783 783 dst->ml_address = address;
784 784 dst->ml_size = bytes;
785 785
786 786 /*
787 787 * first insert
788 788 */
789 789 if (*list == NULL) {
790 790 dst->ml_prev = NULL;
791 791 dst->ml_next = NULL;
792 792 *list = dst;
793 793
794 794 goto add_done;
795 795 }
796 796
797 797 /*
798 798 * insert into sorted list
799 799 */
800 800 for (prev = NULL, next = *list;
801 801 next != NULL;
802 802 prev = next, next = next->ml_next) {
803 803 if (address > (next->ml_address + next->ml_size))
804 804 continue;
805 805
806 806 /*
807 807 * else insert here
808 808 */
809 809
810 810 /*
811 811 * prepend to next
812 812 */
813 813 if ((address + bytes) == next->ml_address) {
814 814 kmem_free(dst, sizeof (struct memlist));
815 815
816 816 next->ml_address = address;
817 817 next->ml_size += bytes;
818 818
819 819 goto add_done;
820 820 }
821 821
822 822 /*
823 823 * append to next
824 824 */
825 825 if (address == (next->ml_address + next->ml_size)) {
826 826 kmem_free(dst, sizeof (struct memlist));
827 827
828 828 if (next->ml_next) {
829 829 /*
830 830 * don't overlap with next->ml_next
831 831 */
832 832 if ((address + bytes) >
833 833 next->ml_next->ml_address) {
834 834 retval = -1;
835 835 goto add_done;
836 836 }
837 837 /*
838 838 * concatenate next and next->ml_next
839 839 */
840 840 if ((address + bytes) ==
841 841 next->ml_next->ml_address) {
842 842 struct memlist *mlp = next->ml_next;
843 843
844 844 if (next == *list)
845 845 *list = next->ml_next;
846 846
847 847 mlp->ml_address = next->ml_address;
848 848 mlp->ml_size += next->ml_size;
849 849 mlp->ml_size += bytes;
850 850
851 851 if (next->ml_prev)
852 852 next->ml_prev->ml_next = mlp;
853 853 mlp->ml_prev = next->ml_prev;
854 854
855 855 kmem_free(next,
856 856 sizeof (struct memlist));
857 857 goto add_done;
858 858 }
859 859 }
860 860
861 861 next->ml_size += bytes;
862 862
863 863 goto add_done;
864 864 }
865 865
866 866 /* don't overlap with next */
867 867 if ((address + bytes) > next->ml_address) {
868 868 retval = -1;
869 869 kmem_free(dst, sizeof (struct memlist));
870 870 goto add_done;
871 871 }
872 872
873 873 /*
874 874 * insert before next
875 875 */
876 876 dst->ml_prev = prev;
877 877 dst->ml_next = next;
878 878 next->ml_prev = dst;
879 879 if (prev == NULL) {
880 880 *list = dst;
881 881 } else {
882 882 prev->ml_next = dst;
883 883 }
884 884 goto add_done;
885 885 } /* end for */
886 886
887 887 /*
888 888 * end of list, prev is valid and next is NULL
889 889 */
890 890 prev->ml_next = dst;
891 891 dst->ml_prev = prev;
892 892 dst->ml_next = NULL;
893 893
894 894 add_done:
895 895
896 896 if (retval != -1)
897 897 *npgs += pages;
898 898
899 899 return (retval);
900 900 }
901 901
902 902 /*
903 903 * delete a span from the memscrub list
904 904 * subtract from memscrub_phys_pages
905 905 */
906 906 int
907 907 memscrub_delete_span(pfn_t pfn, pgcnt_t pages)
908 908 {
909 909 ms_paddr_t address = (ms_paddr_t)pfn << PAGESHIFT;
910 910 uint64_t bytes = (uint64_t)pages << PAGESHIFT;
911 911 struct memlist *dst, *next;
912 912 int retval = 0;
913 913
914 914 mutex_enter(&memscrub_lock);
915 915
916 916 #ifdef MEMSCRUB_DEBUG
917 917 memscrub_printmemlist("memscrub_memlist Before", memscrub_memlist);
918 918 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
919 919 cmn_err(CE_CONT, "memscrub_delete_span: 0x%llx 0x%llx\n",
920 920 address, bytes);
921 921 #endif /* MEMSCRUB_DEBUG */
922 922
923 923 /*
924 924 * find struct memlist containing page
925 925 */
926 926 for (next = memscrub_memlist; next != NULL; next = next->ml_next) {
927 927 if ((address >= next->ml_address) &&
928 928 (address < next->ml_address + next->ml_size))
929 929 break;
930 930 }
931 931
932 932 /*
933 933 * if start address not in list
934 934 */
935 935 if (next == NULL) {
936 936 retval = -1;
937 937 goto delete_done;
938 938 }
939 939
940 940 /*
941 941 * error if size goes off end of this struct memlist
942 942 */
943 943 if (address + bytes > next->ml_address + next->ml_size) {
944 944 retval = -1;
945 945 goto delete_done;
946 946 }
947 947
948 948 /*
949 949 * pages at beginning of struct memlist
950 950 */
951 951 if (address == next->ml_address) {
952 952 /*
953 953 * if start & size match, delete from list
954 954 */
955 955 if (bytes == next->ml_size) {
956 956 if (next == memscrub_memlist)
957 957 memscrub_memlist = next->ml_next;
958 958 if (next->ml_prev != NULL)
959 959 next->ml_prev->ml_next = next->ml_next;
960 960 if (next->ml_next != NULL)
961 961 next->ml_next->ml_prev = next->ml_prev;
962 962
963 963 kmem_free(next, sizeof (struct memlist));
964 964 } else {
965 965 /*
966 966 * increment start address by bytes
967 967 */
968 968 next->ml_address += bytes;
969 969 next->ml_size -= bytes;
970 970 }
971 971 goto delete_done;
972 972 }
973 973
974 974 /*
975 975 * pages at end of struct memlist
976 976 */
977 977 if (address + bytes == next->ml_address + next->ml_size) {
978 978 /*
979 979 * decrement size by bytes
980 980 */
981 981 next->ml_size -= bytes;
982 982 goto delete_done;
983 983 }
984 984
985 985 /*
986 986 * delete a span in the middle of the struct memlist
987 987 */
988 988 {
989 989 /*
990 990 * create a new struct memlist
991 991 */
992 992 dst = (struct memlist *)
993 993 kmem_alloc(sizeof (struct memlist), KM_NOSLEEP);
994 994
995 995 if (dst == NULL) {
996 996 retval = -1;
997 997 goto delete_done;
998 998 }
999 999
1000 1000 /*
1001 1001 * existing struct memlist gets address
1002 1002 * and size up to pfn
1003 1003 */
1004 1004 dst->ml_address = address + bytes;
1005 1005 dst->ml_size =
1006 1006 (next->ml_address + next->ml_size) - dst->ml_address;
1007 1007 next->ml_size = address - next->ml_address;
1008 1008
1009 1009 /*
1010 1010 * new struct memlist gets address starting
1011 1011 * after pfn, until end
1012 1012 */
1013 1013
1014 1014 /*
1015 1015 * link in new memlist after old
1016 1016 */
1017 1017 dst->ml_next = next->ml_next;
1018 1018 dst->ml_prev = next;
1019 1019
1020 1020 if (next->ml_next != NULL)
1021 1021 next->ml_next->ml_prev = dst;
1022 1022 next->ml_next = dst;
1023 1023 }
1024 1024
1025 1025 delete_done:
1026 1026 if (retval != -1) {
1027 1027 memscrub_phys_pages -= pages;
1028 1028 if (memscrub_phys_pages == 0)
1029 1029 disable_memscrub = 1;
1030 1030 }
1031 1031
1032 1032 #ifdef MEMSCRUB_DEBUG
1033 1033 memscrub_printmemlist("memscrub_memlist After", memscrub_memlist);
1034 1034 cmn_err(CE_CONT, "memscrub_phys_pages: 0x%x\n", memscrub_phys_pages);
1035 1035 #endif /* MEMSCRUB_DEBUG */
1036 1036
1037 1037 mutex_exit(&memscrub_lock);
1038 1038 return (retval);
1039 1039 }
1040 1040
1041 1041 static void
1042 1042 memscrub_scan(uint_t blks, ms_paddr_t src)
1043 1043 {
1044 1044 uint_t psz, bpp, pgsread;
1045 1045 pfn_t pfn;
1046 1046 ms_paddr_t pa;
1047 1047 caddr_t va;
1048 1048 on_trap_data_t otd;
1049 1049 int scan_mmu_pagesize = 0;
1050 1050 int retired_pages = 0;
1051 1051
1052 1052 extern void memscrub_read(caddr_t src, uint_t blks);
1053 1053
1054 1054 ASSERT(mutex_owned(&memscrub_lock));
1055 1055
1056 1056 pgsread = 0;
1057 1057 pa = src;
1058 1058
1059 1059 if (memscrub_page_retire_span_list != NULL) {
1060 1060 if (memscrub_page_retire_span_search(src)) {
1061 1061 /* retired pages in current span */
1062 1062 scan_mmu_pagesize = 1;
1063 1063 }
1064 1064 }
1065 1065
1066 1066 #ifdef MEMSCRUB_DEBUG
1067 1067 cmn_err(CE_NOTE, "scan_mmu_pagesize = %d\n" scan_mmu_pagesize);
1068 1068 #endif /* MEMSCRUB_DEBUG */
1069 1069
1070 1070 while (blks != 0) {
1071 1071 /* Ensure the PA is properly aligned */
1072 1072 if (((pa & MMU_PAGEMASK4M) == pa) &&
1073 1073 (blks >= MEMSCRUB_BPP4M)) {
1074 1074 psz = MMU_PAGESIZE4M;
1075 1075 bpp = MEMSCRUB_BPP4M;
1076 1076 } else if (((pa & MMU_PAGEMASK512K) == pa) &&
1077 1077 (blks >= MEMSCRUB_BPP512K)) {
1078 1078 psz = MMU_PAGESIZE512K;
1079 1079 bpp = MEMSCRUB_BPP512K;
1080 1080 } else if (((pa & MMU_PAGEMASK64K) == pa) &&
1081 1081 (blks >= MEMSCRUB_BPP64K)) {
1082 1082 psz = MMU_PAGESIZE64K;
1083 1083 bpp = MEMSCRUB_BPP64K;
1084 1084 } else if ((pa & MMU_PAGEMASK) == pa) {
1085 1085 psz = MMU_PAGESIZE;
1086 1086 bpp = MEMSCRUB_BPP;
1087 1087 } else {
1088 1088 if (memscrub_verbose) {
1089 1089 cmn_err(CE_NOTE, "Memory scrubber ignoring "
1090 1090 "non-page aligned block starting at 0x%"
1091 1091 PRIx64, src);
1092 1092 }
1093 1093 return;
1094 1094 }
1095 1095 if (blks < bpp) bpp = blks;
1096 1096
1097 1097 #ifdef MEMSCRUB_DEBUG
1098 1098 cmn_err(CE_NOTE, "Going to run psz=%x, "
1099 1099 "bpp=%x pa=%llx\n", psz, bpp, pa);
1100 1100 #endif /* MEMSCRUB_DEBUG */
1101 1101
1102 1102 /*
1103 1103 * MEMSCRUBBASE is a 4MB aligned page in the
1104 1104 * kernel so that we can quickly map the PA
1105 1105 * to a VA for the block loads performed in
1106 1106 * memscrub_read.
1107 1107 */
1108 1108 pfn = mmu_btop(pa);
1109 1109 va = (caddr_t)MEMSCRUBBASE;
1110 1110 hat_devload(kas.a_hat, va, psz, pfn, PROT_READ,
1111 1111 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
1112 1112
1113 1113 /*
1114 1114 * Can't allow the memscrubber to migrate across CPUs as
1115 1115 * we need to know whether CEEN is enabled for the current
1116 1116 * CPU to enable us to scrub the memory. Don't use
1117 1117 * kpreempt_disable as the time we take to scan a span (even
1118 1118 * without cpu_check_ce having to manually cpu_check_block)
1119 1119 * is too long to hold a higher priority thread (eg, RT)
1120 1120 * off cpu.
1121 1121 */
1122 1122 thread_affinity_set(curthread, CPU_CURRENT);
1123 1123
1124 1124 /*
1125 1125 * Protect read scrub from async faults. For now, we simply
1126 1126 * maintain a count of such faults caught.
1127 1127 */
1128 1128
1129 1129 if (!on_trap(&otd, OT_DATA_EC) && !scan_mmu_pagesize) {
1130 1130 memscrub_read(va, bpp);
1131 1131 /*
1132 1132 * Check if CEs require logging
1133 1133 */
1134 1134 cpu_check_ce(SCRUBBER_CEEN_CHECK,
1135 1135 (uint64_t)pa, va, psz);
1136 1136 no_trap();
1137 1137 thread_affinity_clear(curthread);
1138 1138 } else {
1139 1139 no_trap();
1140 1140 thread_affinity_clear(curthread);
1141 1141
1142 1142 /*
1143 1143 * Got an async error..
1144 1144 * Try rescanning it at MMU_PAGESIZE
1145 1145 * granularity if we were trying to
1146 1146 * read at a larger page size.
1147 1147 * This is to ensure we continue to
1148 1148 * scan the rest of the span.
1149 1149 * OR scanning MMU_PAGESIZE granularity to avoid
1150 1150 * reading retired pages memory when scan_mmu_pagesize
1151 1151 * is set.
1152 1152 */
1153 1153 if (psz > MMU_PAGESIZE || scan_mmu_pagesize) {
1154 1154 caddr_t vaddr = va;
1155 1155 ms_paddr_t paddr = pa;
1156 1156 int tmp = 0;
1157 1157 for (; tmp < bpp; tmp += MEMSCRUB_BPP) {
1158 1158 /* Don't scrub retired pages */
1159 1159 if (page_retire_check(paddr, NULL)
1160 1160 == 0) {
1161 1161 vaddr += MMU_PAGESIZE;
1162 1162 paddr += MMU_PAGESIZE;
1163 1163 retired_pages++;
1164 1164 continue;
1165 1165 }
1166 1166 thread_affinity_set(curthread,
1167 1167 CPU_CURRENT);
1168 1168 if (!on_trap(&otd, OT_DATA_EC)) {
1169 1169 memscrub_read(vaddr,
1170 1170 MEMSCRUB_BPP);
1171 1171 cpu_check_ce(
1172 1172 SCRUBBER_CEEN_CHECK,
1173 1173 (uint64_t)paddr, vaddr,
1174 1174 MMU_PAGESIZE);
1175 1175 no_trap();
1176 1176 } else {
1177 1177 no_trap();
1178 1178 MEMSCRUB_STAT_INC(errors_found);
1179 1179 }
1180 1180 thread_affinity_clear(curthread);
1181 1181 vaddr += MMU_PAGESIZE;
1182 1182 paddr += MMU_PAGESIZE;
1183 1183 }
1184 1184 }
1185 1185 }
1186 1186 hat_unload(kas.a_hat, va, psz, HAT_UNLOAD_UNLOCK);
1187 1187
1188 1188 blks -= bpp;
1189 1189 pa += psz;
1190 1190 pgsread++;
1191 1191 }
1192 1192
1193 1193 /*
1194 1194 * If just finished scrubbing MMU_PAGESIZE at a time, but no retired
1195 1195 * pages found so delete span from global list.
1196 1196 */
1197 1197 if (scan_mmu_pagesize && retired_pages == 0)
1198 1198 memscrub_page_retire_span_delete(src);
1199 1199
1200 1200 /*
1201 1201 * Encountered CE/UE on a retired page during memscrub read of current
1202 1202 * span. Adding span to global list to enable avoid reading further.
1203 1203 */
1204 1204 if (add_to_page_retire_list) {
1205 1205 if (!memscrub_page_retire_span_search(src))
1206 1206 memscrub_page_retire_span_add(src);
1207 1207 add_to_page_retire_list = 0;
1208 1208 }
1209 1209
1210 1210 if (memscrub_verbose) {
1211 1211 cmn_err(CE_NOTE, "Memory scrubber read 0x%x pages starting "
1212 1212 "at 0x%" PRIx64, pgsread, src);
1213 1213 }
1214 1214 }
1215 1215
1216 1216 /*
1217 1217 * Called by cpu_async_log_err() when memscrub read causes
1218 1218 * CE/UE on a retired page.
1219 1219 */
1220 1220 void
1221 1221 memscrub_induced_error(void)
1222 1222 {
1223 1223 add_to_page_retire_list = 1;
1224 1224 }
1225 1225
1226 1226 /*
1227 1227 * Called by page_retire() when toxic pages cannot be retired
1228 1228 * immediately and are scheduled for retire. Memscrubber stops
1229 1229 * scrubbing them to avoid further CE/UEs.
1230 1230 */
1231 1231 void
1232 1232 memscrub_notify(ms_paddr_t pa)
1233 1233 {
1234 1234 mutex_enter(&memscrub_lock);
1235 1235 if (!memscrub_page_retire_span_search(pa))
1236 1236 memscrub_page_retire_span_add(pa);
1237 1237 mutex_exit(&memscrub_lock);
1238 1238 }
1239 1239
1240 1240 /*
1241 1241 * Called by memscrub_scan() and memscrub_notify().
1242 1242 * pa: physical address of span with CE/UE, add to global list.
1243 1243 */
1244 1244 static void
1245 1245 memscrub_page_retire_span_add(ms_paddr_t pa)
1246 1246 {
1247 1247 memscrub_page_retire_span_t *new_span;
1248 1248
1249 1249 new_span = (memscrub_page_retire_span_t *)
1250 1250 kmem_zalloc(sizeof (memscrub_page_retire_span_t), KM_NOSLEEP);
1251 1251
1252 1252 if (new_span == NULL) {
1253 1253 #ifdef MEMSCRUB_DEBUG
1254 1254 cmn_err(CE_NOTE, "failed to allocate new span - span with"
1255 1255 " retired page/s not tracked.\n");
1256 1256 #endif /* MEMSCRUB_DEBUG */
1257 1257 return;
1258 1258 }
1259 1259
1260 1260 new_span->address = pa;
1261 1261 new_span->next = memscrub_page_retire_span_list;
1262 1262 memscrub_page_retire_span_list = new_span;
1263 1263 }
1264 1264
1265 1265 /*
1266 1266 * Called by memscrub_scan().
1267 1267 * pa: physical address of span to be removed from global list.
1268 1268 */
1269 1269 static void
1270 1270 memscrub_page_retire_span_delete(ms_paddr_t pa)
1271 1271 {
1272 1272 memscrub_page_retire_span_t *prev_span, *next_span;
1273 1273
1274 1274 prev_span = memscrub_page_retire_span_list;
1275 1275 next_span = memscrub_page_retire_span_list->next;
1276 1276
1277 1277 if (pa == prev_span->address) {
1278 1278 memscrub_page_retire_span_list = next_span;
1279 1279 kmem_free(prev_span, sizeof (memscrub_page_retire_span_t));
1280 1280 return;
1281 1281 }
1282 1282
1283 1283 while (next_span) {
1284 1284 if (pa == next_span->address) {
1285 1285 prev_span->next = next_span->next;
1286 1286 kmem_free(next_span,
1287 1287 sizeof (memscrub_page_retire_span_t));
1288 1288 return;
1289 1289 }
1290 1290 prev_span = next_span;
1291 1291 next_span = next_span->next;
1292 1292 }
1293 1293 }
1294 1294
1295 1295 /*
1296 1296 * Called by memscrub_scan() and memscrub_notify().
1297 1297 * pa: physical address of span to be searched in global list.
1298 1298 */
1299 1299 static int
1300 1300 memscrub_page_retire_span_search(ms_paddr_t pa)
1301 1301 {
1302 1302 memscrub_page_retire_span_t *next_span = memscrub_page_retire_span_list;
1303 1303
1304 1304 while (next_span) {
1305 1305 if (pa == next_span->address)
1306 1306 return (1);
1307 1307 next_span = next_span->next;
1308 1308 }
1309 1309 return (0);
1310 1310 }
1311 1311
1312 1312 /*
1313 1313 * Called from new_memscrub() as a result of memory delete.
1314 1314 * Using page_numtopp_nolock() to determine if we have valid PA.
1315 1315 */
1316 1316 static void
1317 1317 memscrub_page_retire_span_list_update(void)
1318 1318 {
1319 1319 memscrub_page_retire_span_t *prev, *cur, *next;
1320 1320
1321 1321 if (memscrub_page_retire_span_list == NULL)
1322 1322 return;
1323 1323
1324 1324 prev = cur = memscrub_page_retire_span_list;
1325 1325 next = cur->next;
1326 1326
1327 1327 while (cur) {
1328 1328 if (page_numtopp_nolock(mmu_btop(cur->address)) == NULL) {
1329 1329 if (cur == memscrub_page_retire_span_list) {
1330 1330 memscrub_page_retire_span_list = next;
1331 1331 kmem_free(cur,
1332 1332 sizeof (memscrub_page_retire_span_t));
1333 1333 prev = cur = memscrub_page_retire_span_list;
1334 1334 } else {
1335 1335 prev->next = cur->next;
1336 1336 kmem_free(cur,
1337 1337 sizeof (memscrub_page_retire_span_t));
1338 1338 cur = next;
1339 1339 }
1340 1340 } else {
1341 1341 prev = cur;
1342 1342 cur = next;
1343 1343 }
1344 1344 if (cur != NULL)
1345 1345 next = cur->next;
1346 1346 }
1347 1347 }
1348 1348
1349 1349 /*
1350 1350 * The memory add/delete callback mechanism does not pass in the
1351 1351 * page ranges. The phys_install list has been updated though, so
1352 1352 * create a new scrub list from it.
1353 1353 */
1354 1354
1355 1355 static int
1356 1356 new_memscrub(int update_page_retire_list)
1357 1357 {
1358 1358 struct memlist *src, *list, *old_list;
1359 1359 uint_t npgs;
1360 1360
1361 1361 /*
1362 1362 * copy phys_install to memscrub_memlist
1363 1363 */
1364 1364 list = NULL;
1365 1365 npgs = 0;
1366 1366 memlist_read_lock();
1367 1367 for (src = phys_install; src; src = src->ml_next) {
1368 1368 if (memscrub_add_span_gen((pfn_t)(src->ml_address >> PAGESHIFT),
1369 1369 (pgcnt_t)(src->ml_size >> PAGESHIFT), &list, &npgs)) {
1370 1370 memlist_read_unlock();
1371 1371 while (list) {
1372 1372 struct memlist *el;
1373 1373
1374 1374 el = list;
1375 1375 list = list->ml_next;
1376 1376 kmem_free(el, sizeof (struct memlist));
1377 1377 }
1378 1378 return (-1);
1379 1379 }
1380 1380 }
1381 1381 memlist_read_unlock();
1382 1382
1383 1383 mutex_enter(&memscrub_lock);
1384 1384 memscrub_phys_pages = npgs;
1385 1385 old_list = memscrub_memlist;
1386 1386 memscrub_memlist = list;
1387 1387
1388 1388 if (update_page_retire_list)
1389 1389 memscrub_page_retire_span_list_update();
1390 1390
1391 1391 mutex_exit(&memscrub_lock);
1392 1392
1393 1393 while (old_list) {
1394 1394 struct memlist *el;
1395 1395
1396 1396 el = old_list;
1397 1397 old_list = old_list->ml_next;
1398 1398 kmem_free(el, sizeof (struct memlist));
1399 1399 }
1400 1400
1401 1401 return (0);
1402 1402 }
1403 1403
1404 1404 /*ARGSUSED*/
1405 1405 static void
1406 1406 memscrub_mem_config_post_add(
↓ open down ↓ |
1406 lines elided |
↑ open up ↑ |
1407 1407 void *arg,
1408 1408 pgcnt_t delta_pages)
1409 1409 {
1410 1410 /*
1411 1411 * We increment pause_memscrub before entering new_memscrub(). This
1412 1412 * will force the memscrubber to sleep, allowing the DR callback
1413 1413 * thread to acquire memscrub_lock in new_memscrub(). The use of
1414 1414 * atomic_add_32() allows concurrent memory DR operations to use the
1415 1415 * callbacks safely.
1416 1416 */
1417 - atomic_add_32(&pause_memscrub, 1);
1417 + atomic_inc_32(&pause_memscrub);
1418 1418 ASSERT(pause_memscrub != 0);
1419 1419
1420 1420 /*
1421 1421 * "Don't care" if we are not scrubbing new memory.
1422 1422 */
1423 1423 (void) new_memscrub(0); /* retain page retire list */
1424 1424
1425 1425 /* Restore the pause setting. */
1426 - atomic_add_32(&pause_memscrub, -1);
1426 + atomic_dec_32(&pause_memscrub);
1427 1427 }
1428 1428
1429 1429 /*ARGSUSED*/
1430 1430 static int
1431 1431 memscrub_mem_config_pre_del(
1432 1432 void *arg,
1433 1433 pgcnt_t delta_pages)
1434 1434 {
1435 1435 /* Nothing to do. */
1436 1436 return (0);
1437 1437 }
1438 1438
1439 1439 /*ARGSUSED*/
1440 1440 static void
1441 1441 memscrub_mem_config_post_del(
1442 1442 void *arg,
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1443 1443 pgcnt_t delta_pages,
1444 1444 int cancelled)
1445 1445 {
1446 1446 /*
1447 1447 * We increment pause_memscrub before entering new_memscrub(). This
1448 1448 * will force the memscrubber to sleep, allowing the DR callback
1449 1449 * thread to acquire memscrub_lock in new_memscrub(). The use of
1450 1450 * atomic_add_32() allows concurrent memory DR operations to use the
1451 1451 * callbacks safely.
1452 1452 */
1453 - atomic_add_32(&pause_memscrub, 1);
1453 + atomic_inc_32(&pause_memscrub);
1454 1454 ASSERT(pause_memscrub != 0);
1455 1455
1456 1456 /*
1457 1457 * Must stop scrubbing deleted memory as it may be disconnected.
1458 1458 */
1459 1459 if (new_memscrub(1)) { /* update page retire list */
1460 1460 disable_memscrub = 1;
1461 1461 }
1462 1462
1463 1463 /* Restore the pause setting. */
1464 - atomic_add_32(&pause_memscrub, -1);
1464 + atomic_dec_32(&pause_memscrub);
1465 1465 }
1466 1466
1467 1467 static kphysm_setup_vector_t memscrub_mem_config_vec = {
1468 1468 KPHYSM_SETUP_VECTOR_VERSION,
1469 1469 memscrub_mem_config_post_add,
1470 1470 memscrub_mem_config_pre_del,
1471 1471 memscrub_mem_config_post_del,
1472 1472 };
1473 1473
1474 1474 static void
1475 1475 memscrub_init_mem_config()
1476 1476 {
1477 1477 int ret;
1478 1478
1479 1479 ret = kphysm_setup_func_register(&memscrub_mem_config_vec,
1480 1480 (void *)NULL);
1481 1481 ASSERT(ret == 0);
1482 1482 }
1483 1483
1484 1484 static void
1485 1485 memscrub_uninit_mem_config()
1486 1486 {
1487 1487 /* This call is OK if the register call was not done. */
1488 1488 kphysm_setup_func_unregister(&memscrub_mem_config_vec, (void *)NULL);
1489 1489 }
↓ open down ↓ |
15 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX