Print this page
patch tsoome-feedback
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_dimm.c
+++ new/usr/src/cmd/fm/modules/sun4/cpumem-diagnosis/cmd_dimm.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 */
24 24
25 25 /*
26 26 * Support routines for DIMMs.
27 27 */
28 28
29 29 #include <cmd_mem.h>
30 30 #include <limits.h>
31 31 #include <cmd_dimm.h>
32 32 #include <cmd_bank.h>
33 33 #include <cmd.h>
34 34
35 35 #include <errno.h>
36 36 #include <string.h>
37 37 #include <strings.h>
38 38 #include <fcntl.h>
39 39 #include <unistd.h>
40 40 #include <fm/fmd_api.h>
41 41 #include <sys/fm/protocol.h>
42 42 #include <sys/mem.h>
43 43 #include <sys/nvpair.h>
44 44 #ifdef sun4v
45 45 #include <cmd_hc_sun4v.h>
46 46 #include <cmd_branch.h>
47 47 #endif /* sun4v */
48 48
49 49 /*
50 50 * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs,
51 51 * because sufficient information was unavailable prior to correlation.
52 52 * When the DE completes the pair, it uses this routine to retrieve the
53 53 * correct FMRI.
54 54 */
55 55 nvlist_t *
56 56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd,
57 57 uint64_t afsr)
58 58 {
59 59 nvlist_t *fmri;
60 60
61 61 if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL)
62 62 return (NULL);
63 63
64 64 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
65 65 nvlist_free(fmri);
66 66 return (NULL);
67 67 }
68 68
69 69 return (fmri);
70 70 }
71 71
72 72 nvlist_t *
73 73 cmd_dimm_fru(cmd_dimm_t *dimm)
74 74 {
75 75 return (dimm->dimm_asru_nvl);
76 76 }
77 77
78 78 nvlist_t *
79 79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm,
80 80 uint_t cert)
81 81 {
82 82 #ifdef sun4v
83 83 nvlist_t *flt, *nvlfru;
84 84 /*
85 85 * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms.
86 86 * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI
87 87 *
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
88 88 * Use the BR string as a distinguisher. BR (branch) is only
89 89 * present in ultraSPARC-T2/T2plus DIMM unums
90 90 */
91 91 if (strstr(dimm->dimm_unum, "BR") == NULL) {
92 92 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
93 93 dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL);
94 94 } else {
95 95 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl);
96 96 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
97 97 dimm->dimm_asru_nvl, nvlfru, NULL);
98 - if (nvlfru != NULL)
99 - nvlist_free(nvlfru);
98 + nvlist_free(nvlfru);
100 99 }
101 100 return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum));
102 101 #else
103 102 return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl,
104 103 dimm->dimm_asru_nvl, NULL));
105 104 #endif /* sun4v */
106 105 }
107 106
108 107 static void
109 108 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy)
110 109 {
111 110 cmd_case_t *cc = &dimm->dimm_case;
112 111 int i;
113 112 cmd_mq_t *q;
114 113 tstamp_t *tsp, *next;
115 114
116 115 #ifdef sun4v
117 116 cmd_branch_t *branch;
118 117 #endif
119 118 if (cc->cc_cp != NULL) {
120 119 cmd_case_fini(hdl, cc->cc_cp, destroy);
121 120 if (cc->cc_serdnm != NULL) {
122 121 if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
123 122 destroy)
124 123 fmd_serd_destroy(hdl, cc->cc_serdnm);
125 124 fmd_hdl_strfree(hdl, cc->cc_serdnm);
126 125 }
127 126 }
128 127
129 128 for (i = 0; i < CMD_MAX_CKWDS; i++) {
130 129 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) {
131 130 if (q->mq_serdnm != NULL) {
132 131 if (fmd_serd_exists(hdl, q->mq_serdnm)) {
133 132 fmd_serd_destroy(hdl, q->mq_serdnm);
134 133 }
135 134 fmd_hdl_strfree(hdl, q->mq_serdnm);
136 135 q->mq_serdnm = NULL;
137 136 }
138 137
139 138 for (tsp = cmd_list_next(&q->mq_dupce_tstamp);
140 139 tsp != NULL; tsp = next) {
141 140 next = cmd_list_next(tsp);
142 141 cmd_list_delete(&q->mq_dupce_tstamp,
143 142 &tsp->ts_l);
144 143 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
145 144 }
146 145
147 146 cmd_list_delete(&dimm->mq_root[i], q);
148 147 fmd_hdl_free(hdl, q, sizeof (cmd_mq_t));
149 148 }
150 149 }
151 150
152 151 if (dimm->dimm_bank != NULL)
153 152 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
154 153
155 154 #ifdef sun4v
156 155 branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
157 156 if (branch != NULL)
158 157 cmd_branch_remove_dimm(hdl, branch, dimm);
159 158 #endif
160 159
161 160 cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
162 161
163 162 if (destroy)
164 163 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
165 164
166 165 cmd_list_delete(&cmd.cmd_dimms, dimm);
167 166 fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t));
168 167 }
169 168
170 169 void
171 170 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
172 171 {
173 172
174 173 fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
175 174 cmd_dimm_free(hdl, dimm, FMD_B_TRUE);
176 175 }
177 176
178 177 static cmd_dimm_t *
179 178 dimm_lookup_by_unum(const char *unum)
180 179 {
181 180 cmd_dimm_t *dimm;
182 181
183 182 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
184 183 dimm = cmd_list_next(dimm)) {
185 184 if (strcmp(dimm->dimm_unum, unum) == 0)
186 185 return (dimm);
187 186 }
188 187
189 188 return (NULL);
190 189 }
191 190
192 191 static void
193 192 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
194 193 {
195 194 cmd_bank_t *bank;
196 195
197 196 for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
198 197 bank = cmd_list_next(bank)) {
199 198 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
200 199 dimm->dimm_asru_nvl)) {
201 200 cmd_bank_add_dimm(hdl, bank, dimm);
202 201 return;
203 202 }
204 203 }
205 204 }
206 205
207 206 cmd_dimm_t *
208 207 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru)
209 208 {
210 209 cmd_dimm_t *dimm;
211 210 const char *unum;
212 211 nvlist_t *fmri;
213 212 size_t nserids = 0;
214 213 char **serids = NULL;
215 214
216 215 if (!fmd_nvl_fmri_present(hdl, asru)) {
217 216 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
218 217 return (NULL);
219 218 }
220 219
221 220 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
222 221 CMD_STAT_BUMP(bad_mem_asru);
223 222 return (NULL);
224 223 }
225 224
226 225 #ifdef sun4v
227 226 if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids,
228 227 &nserids) != 0) {
229 228 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not"
230 229 " have serial_ids\n");
231 230 CMD_STAT_BUMP(bad_mem_asru);
232 231 return (NULL);
233 232 }
234 233 #endif
235 234 fmri = cmd_mem_fmri_create(unum, serids, nserids);
236 235 if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
237 236 CMD_STAT_BUMP(bad_mem_asru);
238 237 nvlist_free(fmri);
239 238 return (NULL);
240 239 }
241 240
242 241 fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum);
243 242 CMD_STAT_BUMP(dimm_creat);
244 243
245 244 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
246 245 dimm->dimm_nodetype = CMD_NT_DIMM;
247 246 dimm->dimm_version = CMD_DIMM_VERSION;
248 247 dimm->dimm_phys_addr_low = ULLONG_MAX;
249 248 dimm->dimm_phys_addr_hi = 0;
250 249 dimm->dimm_syl_error = USHRT_MAX;
251 250
252 251 cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
253 252 unum);
254 253 cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum);
255 254
256 255 nvlist_free(fmri);
257 256
258 257 (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM,
259 258 (char **)&dimm->dimm_unum);
260 259
261 260 dimm_attach_to_bank(hdl, dimm);
262 261
263 262 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0,
264 263 CMD_DIMM_STAT_PREFIX);
265 264
266 265 cmd_list_append(&cmd.cmd_dimms, dimm);
267 266 cmd_dimm_dirty(hdl, dimm);
268 267
269 268 return (dimm);
270 269 }
271 270
272 271 cmd_dimm_t *
273 272 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
274 273 {
275 274 cmd_dimm_t *dimm;
276 275 const char *unum;
277 276
278 277 if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
279 278 CMD_STAT_BUMP(bad_mem_asru);
280 279 return (NULL);
281 280 }
282 281
283 282 dimm = dimm_lookup_by_unum(unum);
284 283
285 284 if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) {
286 285 /*
287 286 * The DIMM doesn't exist anymore, so we need to delete the
288 287 * state structure, which is now out of date. The containing
289 288 * bank (if any) is also out of date, so blow it away too.
290 289 */
291 290 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n");
292 291
293 292 if (dimm->dimm_bank != NULL)
294 293 cmd_bank_destroy(hdl, dimm->dimm_bank);
295 294 cmd_dimm_destroy(hdl, dimm);
296 295
297 296 return (NULL);
298 297 }
299 298
300 299 return (dimm);
301 300 }
302 301
303 302 static cmd_dimm_t *
304 303 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz)
305 304 {
306 305 cmd_dimm_t *new;
307 306
308 307 if (oldsz != sizeof (cmd_dimm_0_t)) {
309 308 fmd_hdl_abort(hdl, "size of state doesn't match size of "
310 309 "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t));
311 310 }
312 311
313 312 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
314 313 new->dimm_header = old->dimm0_header;
315 314 new->dimm_version = CMD_DIMM_VERSION;
316 315 new->dimm_asru = old->dimm0_asru;
317 316 new->dimm_nretired = old->dimm0_nretired;
318 317 new->dimm_phys_addr_hi = 0;
319 318 new->dimm_phys_addr_low = ULLONG_MAX;
320 319
321 320 fmd_hdl_free(hdl, old, oldsz);
322 321 return (new);
323 322 }
324 323
325 324 static cmd_dimm_t *
326 325 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz)
327 326 {
328 327
329 328 cmd_dimm_t *new;
330 329
331 330 if (oldsz != sizeof (cmd_dimm_1_t)) {
332 331 fmd_hdl_abort(hdl, "size of state doesn't match size of "
333 332 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t));
334 333 }
335 334
336 335 new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
337 336
338 337 new->dimm_header = old->dimm1_header;
339 338 new->dimm_version = CMD_DIMM_VERSION;
340 339 new->dimm_asru = old->dimm1_asru;
341 340 new->dimm_nretired = old->dimm1_nretired;
342 341 new->dimm_flags = old->dimm1_flags;
343 342 new->dimm_phys_addr_hi = 0;
344 343 new->dimm_phys_addr_low = ULLONG_MAX;
345 344
346 345 fmd_hdl_free(hdl, old, oldsz);
347 346 return (new);
348 347 }
349 348
350 349 static cmd_dimm_t *
351 350 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz)
352 351 {
353 352 cmd_dimm_t *dimm;
354 353
355 354 if (psz != sizeof (cmd_dimm_pers_t)) {
356 355 fmd_hdl_abort(hdl, "size of state doesn't match size of "
357 356 "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t));
358 357 }
359 358
360 359 dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
361 360 bcopy(pers, dimm, sizeof (cmd_dimm_pers_t));
362 361 fmd_hdl_free(hdl, pers, psz);
363 362 return (dimm);
364 363 }
365 364
366 365 void *
367 366 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
368 367 {
369 368 cmd_dimm_t *dimm;
370 369
371 370 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
372 371 dimm = cmd_list_next(dimm)) {
373 372 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
374 373 break;
375 374 }
376 375
377 376 if (dimm == NULL) {
378 377 int migrated = 0;
379 378 size_t dimmsz;
380 379
381 380 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
382 381
383 382 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
384 383 fmd_hdl_abort(hdl, "dimm referenced by case %s does "
385 384 "not exist in saved state\n",
386 385 fmd_case_uuid(hdl, cp));
387 386 } else if (dimmsz > CMD_DIMM_MAXSIZE ||
388 387 dimmsz < CMD_DIMM_MINSIZE) {
389 388 fmd_hdl_abort(hdl,
390 389 "dimm buffer referenced by case %s "
391 390 "is out of bounds (is %u bytes, max %u, min %u)\n",
392 391 fmd_case_uuid(hdl, cp), dimmsz,
393 392 CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE);
394 393 }
395 394
396 395 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name,
397 396 dimmsz)) == NULL) {
398 397 fmd_hdl_abort(hdl, "failed to read dimm buf %s",
399 398 ptr->ptr_name);
400 399 }
401 400
402 401 fmd_hdl_debug(hdl, "found %d in version field\n",
403 402 dimm->dimm_version);
404 403
405 404 if (CMD_DIMM_VERSIONED(dimm)) {
406 405 switch (dimm->dimm_version) {
407 406 case CMD_DIMM_VERSION_1:
408 407 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm,
409 408 dimmsz);
410 409 break;
411 410 case CMD_DIMM_VERSION_2:
412 411 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm,
413 412 dimmsz);
414 413 break;
415 414 default:
416 415 fmd_hdl_abort(hdl, "unknown version (found %d) "
417 416 "for dimm state referenced by case %s.\n",
418 417 dimm->dimm_version, fmd_case_uuid(hdl, cp));
419 418 break;
420 419 }
421 420 } else {
422 421 dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz);
423 422 migrated = 1;
424 423 }
425 424
426 425 if (migrated) {
427 426 CMD_STAT_BUMP(dimm_migrat);
428 427 cmd_dimm_dirty(hdl, dimm);
429 428 }
430 429
431 430 cmd_fmri_restore(hdl, &dimm->dimm_asru);
432 431
433 432 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
434 433 FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0)
435 434 fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
436 435
437 436 dimm_attach_to_bank(hdl, dimm);
438 437
439 438 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat,
440 439 dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX);
441 440
442 441 cmd_list_append(&cmd.cmd_dimms, dimm);
443 442 }
444 443
445 444 switch (ptr->ptr_subtype) {
446 445 case BUG_PTR_DIMM_CASE:
447 446 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n");
448 447 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE);
449 448 /*FALLTHROUGH*/
450 449 case CMD_PTR_DIMM_CASE:
451 450 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
452 451 dimm->dimm_unum);
453 452 break;
454 453 default:
455 454 fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
456 455 ptr->ptr_name, ptr->ptr_subtype);
457 456 }
458 457
459 458 return (dimm);
460 459 }
461 460
462 461 void
463 462 cmd_dimm_validate(fmd_hdl_t *hdl)
464 463 {
465 464 cmd_dimm_t *dimm, *next;
466 465
467 466 for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) {
468 467 next = cmd_list_next(dimm);
469 468
470 469 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl))
471 470 cmd_dimm_destroy(hdl, dimm);
472 471 }
473 472 }
474 473
475 474 void
476 475 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
477 476 {
478 477 if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
479 478 sizeof (cmd_dimm_pers_t))
480 479 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
481 480
482 481 /* No need to rewrite the FMRIs in the dimm - they don't change */
483 482 fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
484 483 sizeof (cmd_dimm_pers_t));
485 484 }
486 485
487 486 void
488 487 cmd_dimm_gc(fmd_hdl_t *hdl)
489 488 {
490 489 cmd_dimm_validate(hdl);
491 490 }
492 491
493 492 void
494 493 cmd_dimm_fini(fmd_hdl_t *hdl)
495 494 {
496 495 cmd_dimm_t *dimm;
497 496
498 497 while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL)
499 498 cmd_dimm_free(hdl, dimm, FMD_B_FALSE);
500 499 }
501 500
502 501
503 502 void
504 503 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos)
505 504 {
506 505 cmd_dimm_t *d = NULL, *next = NULL;
507 506
508 507 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
509 508 next = cmd_list_next(d);
510 509 if (cmd_same_datapath_dimms(dimm, d))
511 510 d->dimm_syl_error = upos;
512 511 }
513 512 }
514 513
515 514 int
516 515 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd)
517 516 {
518 517 int upos;
519 518 cmd_dimm_t *d, *next;
520 519
521 520 if ((upos = cmd_synd2upos(synd)) < 0)
522 521 return (0);
523 522
524 523 for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
525 524 next = cmd_list_next(d);
526 525 if (cmd_same_datapath_dimms(dimm, d) &&
527 526 (d->dimm_syl_error == upos))
528 527 return (1);
529 528 }
530 529
531 530 return (0);
532 531 }
↓ open down ↓ |
423 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX