1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * Support routines for DIMMs.
  27  */
  28 
  29 #include <cmd_mem.h>
  30 #include <limits.h>
  31 #include <cmd_dimm.h>
  32 #include <cmd_bank.h>
  33 #include <cmd.h>
  34 
  35 #include <errno.h>
  36 #include <string.h>
  37 #include <strings.h>
  38 #include <fcntl.h>
  39 #include <unistd.h>
  40 #include <fm/fmd_api.h>
  41 #include <sys/fm/protocol.h>
  42 #include <sys/mem.h>
  43 #include <sys/nvpair.h>
  44 #ifdef sun4v
  45 #include <cmd_hc_sun4v.h>
  46 #include <cmd_branch.h>
  47 #endif /* sun4v */
  48 
  49 /*
  50  * Some errors (RxE/FRx pairs) don't have accurate DIMM (resource) FMRIs,
  51  * because sufficient information was unavailable prior to correlation.
  52  * When the DE completes the pair, it uses this routine to retrieve the
  53  * correct FMRI.
  54  */
  55 nvlist_t *
  56 cmd_dimm_fmri_derive(fmd_hdl_t *hdl, uint64_t afar, uint16_t synd,
  57     uint64_t afsr)
  58 {
  59         nvlist_t *fmri;
  60 
  61         if ((fmri = cmd_mem_fmri_derive(hdl, afar, afsr, synd)) == NULL)
  62                 return (NULL);
  63 
  64         if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
  65                 nvlist_free(fmri);
  66                 return (NULL);
  67         }
  68 
  69         return (fmri);
  70 }
  71 
  72 nvlist_t *
  73 cmd_dimm_fru(cmd_dimm_t *dimm)
  74 {
  75         return (dimm->dimm_asru_nvl);
  76 }
  77 
  78 nvlist_t *
  79 cmd_dimm_create_fault(fmd_hdl_t *hdl, cmd_dimm_t *dimm, const char *fltnm,
  80     uint_t cert)
  81 {
  82 #ifdef sun4v
  83         nvlist_t *flt, *nvlfru;
  84         /*
  85          * Do NOT issue hc scheme FRU FMRIs for ultraSPARC-T1 platforms.
  86          * The SP will misinterpret the FRU. Instead, reuse the ASRU FMRI
  87          *
  88          * Use the BR string as a distinguisher. BR (branch) is only
  89          * present in ultraSPARC-T2/T2plus DIMM unums
  90          */
  91         if (strstr(dimm->dimm_unum, "BR") == NULL) {
  92                 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
  93                     dimm->dimm_asru_nvl, dimm->dimm_asru_nvl, NULL);
  94         } else {
  95                 nvlfru = cmd_mem2hc(hdl, dimm->dimm_asru_nvl);
  96                 flt = cmd_nvl_create_fault(hdl, fltnm, cert,
  97                     dimm->dimm_asru_nvl, nvlfru, NULL);
  98                 nvlist_free(nvlfru);
  99         }
 100         return (cmd_fault_add_location(hdl, flt, dimm->dimm_unum));
 101 #else
 102         return (cmd_nvl_create_fault(hdl, fltnm, cert, dimm->dimm_asru_nvl,
 103             dimm->dimm_asru_nvl, NULL));
 104 #endif /* sun4v */
 105 }
 106 
 107 static void
 108 cmd_dimm_free(fmd_hdl_t *hdl, cmd_dimm_t *dimm, int destroy)
 109 {
 110         cmd_case_t *cc = &dimm->dimm_case;
 111         int i;
 112         cmd_mq_t *q;
 113         tstamp_t  *tsp, *next;
 114 
 115 #ifdef sun4v
 116         cmd_branch_t *branch;
 117 #endif
 118         if (cc->cc_cp != NULL) {
 119                 cmd_case_fini(hdl, cc->cc_cp, destroy);
 120                 if (cc->cc_serdnm != NULL) {
 121                         if (fmd_serd_exists(hdl, cc->cc_serdnm) &&
 122                             destroy)
 123                                 fmd_serd_destroy(hdl, cc->cc_serdnm);
 124                         fmd_hdl_strfree(hdl, cc->cc_serdnm);
 125                 }
 126         }
 127 
 128         for (i = 0; i < CMD_MAX_CKWDS; i++) {
 129                 while ((q = cmd_list_next(&dimm->mq_root[i])) != NULL) {
 130                         if (q->mq_serdnm != NULL) {
 131                                 if (fmd_serd_exists(hdl, q->mq_serdnm)) {
 132                                         fmd_serd_destroy(hdl, q->mq_serdnm);
 133                                 }
 134                                 fmd_hdl_strfree(hdl, q->mq_serdnm);
 135                                 q->mq_serdnm = NULL;
 136                         }
 137 
 138                         for (tsp = cmd_list_next(&q->mq_dupce_tstamp);
 139                             tsp != NULL; tsp = next) {
 140                                 next = cmd_list_next(tsp);
 141                                 cmd_list_delete(&q->mq_dupce_tstamp,
 142                                     &tsp->ts_l);
 143                                 fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
 144                         }
 145 
 146                         cmd_list_delete(&dimm->mq_root[i], q);
 147                         fmd_hdl_free(hdl, q, sizeof (cmd_mq_t));
 148                 }
 149         }
 150 
 151         if (dimm->dimm_bank != NULL)
 152                 cmd_bank_remove_dimm(hdl, dimm->dimm_bank, dimm);
 153 
 154 #ifdef sun4v
 155         branch = cmd_branch_lookup_by_unum(hdl, dimm->dimm_unum);
 156         if (branch != NULL)
 157                 cmd_branch_remove_dimm(hdl, branch, dimm);
 158 #endif
 159 
 160         cmd_fmri_fini(hdl, &dimm->dimm_asru, destroy);
 161 
 162         if (destroy)
 163                 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
 164 
 165         cmd_list_delete(&cmd.cmd_dimms, dimm);
 166         fmd_hdl_free(hdl, dimm, sizeof (cmd_dimm_t));
 167 }
 168 
 169 void
 170 cmd_dimm_destroy(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 171 {
 172 
 173         fmd_stat_destroy(hdl, 1, &(dimm->dimm_retstat));
 174         cmd_dimm_free(hdl, dimm, FMD_B_TRUE);
 175 }
 176 
 177 static cmd_dimm_t *
 178 dimm_lookup_by_unum(const char *unum)
 179 {
 180         cmd_dimm_t *dimm;
 181 
 182         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
 183             dimm = cmd_list_next(dimm)) {
 184                 if (strcmp(dimm->dimm_unum, unum) == 0)
 185                         return (dimm);
 186         }
 187 
 188         return (NULL);
 189 }
 190 
 191 static void
 192 dimm_attach_to_bank(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 193 {
 194         cmd_bank_t *bank;
 195 
 196         for (bank = cmd_list_next(&cmd.cmd_banks); bank != NULL;
 197             bank = cmd_list_next(bank)) {
 198                 if (fmd_nvl_fmri_contains(hdl, bank->bank_asru_nvl,
 199                     dimm->dimm_asru_nvl)) {
 200                         cmd_bank_add_dimm(hdl, bank, dimm);
 201                         return;
 202                 }
 203         }
 204 }
 205 
 206 cmd_dimm_t *
 207 cmd_dimm_create(fmd_hdl_t *hdl, nvlist_t *asru)
 208 {
 209         cmd_dimm_t *dimm;
 210         const char *unum;
 211         nvlist_t *fmri;
 212         size_t nserids = 0;
 213         char **serids = NULL;
 214 
 215         if (!fmd_nvl_fmri_present(hdl, asru)) {
 216                 fmd_hdl_debug(hdl, "dimm_lookup: discarding old ereport\n");
 217                 return (NULL);
 218         }
 219 
 220         if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
 221                 CMD_STAT_BUMP(bad_mem_asru);
 222                 return (NULL);
 223         }
 224 
 225 #ifdef sun4v
 226         if (nvlist_lookup_string_array(asru, FM_FMRI_HC_SERIAL_ID, &serids,
 227             &nserids) != 0) {
 228                 fmd_hdl_debug(hdl, "sun4v mem: FMRI does not"
 229                     " have serial_ids\n");
 230                 CMD_STAT_BUMP(bad_mem_asru);
 231                 return (NULL);
 232         }
 233 #endif
 234         fmri = cmd_mem_fmri_create(unum, serids, nserids);
 235         if (fmd_nvl_fmri_expand(hdl, fmri) < 0) {
 236                 CMD_STAT_BUMP(bad_mem_asru);
 237                 nvlist_free(fmri);
 238                 return (NULL);
 239         }
 240 
 241         fmd_hdl_debug(hdl, "dimm_create: creating new DIMM %s\n", unum);
 242         CMD_STAT_BUMP(dimm_creat);
 243 
 244         dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 245         dimm->dimm_nodetype = CMD_NT_DIMM;
 246         dimm->dimm_version = CMD_DIMM_VERSION;
 247         dimm->dimm_phys_addr_low = ULLONG_MAX;
 248         dimm->dimm_phys_addr_hi = 0;
 249         dimm->dimm_syl_error = USHRT_MAX;
 250 
 251         cmd_bufname(dimm->dimm_bufname, sizeof (dimm->dimm_bufname), "dimm_%s",
 252             unum);
 253         cmd_fmri_init(hdl, &dimm->dimm_asru, fmri, "dimm_asru_%s", unum);
 254 
 255         nvlist_free(fmri);
 256 
 257         (void) nvlist_lookup_string(dimm->dimm_asru_nvl, FM_FMRI_MEM_UNUM,
 258             (char **)&dimm->dimm_unum);
 259 
 260         dimm_attach_to_bank(hdl, dimm);
 261 
 262         cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat, dimm->dimm_unum, 0,
 263             CMD_DIMM_STAT_PREFIX);
 264 
 265         cmd_list_append(&cmd.cmd_dimms, dimm);
 266         cmd_dimm_dirty(hdl, dimm);
 267 
 268         return (dimm);
 269 }
 270 
 271 cmd_dimm_t *
 272 cmd_dimm_lookup(fmd_hdl_t *hdl, nvlist_t *asru)
 273 {
 274         cmd_dimm_t *dimm;
 275         const char *unum;
 276 
 277         if ((unum = cmd_fmri_get_unum(asru)) == NULL) {
 278                 CMD_STAT_BUMP(bad_mem_asru);
 279                 return (NULL);
 280         }
 281 
 282         dimm = dimm_lookup_by_unum(unum);
 283 
 284         if (dimm != NULL && !fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl)) {
 285                 /*
 286                  * The DIMM doesn't exist anymore, so we need to delete the
 287                  * state structure, which is now out of date.  The containing
 288                  * bank (if any) is also out of date, so blow it away too.
 289                  */
 290                 fmd_hdl_debug(hdl, "dimm_lookup: discarding old dimm\n");
 291 
 292                 if (dimm->dimm_bank != NULL)
 293                         cmd_bank_destroy(hdl, dimm->dimm_bank);
 294                 cmd_dimm_destroy(hdl, dimm);
 295 
 296                 return (NULL);
 297         }
 298 
 299         return (dimm);
 300 }
 301 
 302 static cmd_dimm_t *
 303 dimm_v0tov2(fmd_hdl_t *hdl, cmd_dimm_0_t *old, size_t oldsz)
 304 {
 305         cmd_dimm_t *new;
 306 
 307         if (oldsz != sizeof (cmd_dimm_0_t)) {
 308                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 309                     "version 0 state (%u bytes).\n", sizeof (cmd_dimm_0_t));
 310         }
 311 
 312         new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 313         new->dimm_header = old->dimm0_header;
 314         new->dimm_version = CMD_DIMM_VERSION;
 315         new->dimm_asru = old->dimm0_asru;
 316         new->dimm_nretired = old->dimm0_nretired;
 317         new->dimm_phys_addr_hi = 0;
 318         new->dimm_phys_addr_low = ULLONG_MAX;
 319 
 320         fmd_hdl_free(hdl, old, oldsz);
 321         return (new);
 322 }
 323 
 324 static cmd_dimm_t *
 325 dimm_v1tov2(fmd_hdl_t *hdl, cmd_dimm_1_t *old, size_t oldsz)
 326 {
 327 
 328         cmd_dimm_t *new;
 329 
 330         if (oldsz != sizeof (cmd_dimm_1_t)) {
 331                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 332                     "version 1 state (%u bytes).\n", sizeof (cmd_dimm_1_t));
 333         }
 334 
 335         new = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 336 
 337         new->dimm_header = old->dimm1_header;
 338         new->dimm_version = CMD_DIMM_VERSION;
 339         new->dimm_asru = old->dimm1_asru;
 340         new->dimm_nretired = old->dimm1_nretired;
 341         new->dimm_flags = old->dimm1_flags;
 342         new->dimm_phys_addr_hi = 0;
 343         new->dimm_phys_addr_low = ULLONG_MAX;
 344 
 345         fmd_hdl_free(hdl, old, oldsz);
 346         return (new);
 347 }
 348 
 349 static cmd_dimm_t *
 350 dimm_wrapv2(fmd_hdl_t *hdl, cmd_dimm_pers_t *pers, size_t psz)
 351 {
 352         cmd_dimm_t *dimm;
 353 
 354         if (psz != sizeof (cmd_dimm_pers_t)) {
 355                 fmd_hdl_abort(hdl, "size of state doesn't match size of "
 356                     "version 1 state (%u bytes).\n", sizeof (cmd_dimm_pers_t));
 357         }
 358 
 359         dimm = fmd_hdl_zalloc(hdl, sizeof (cmd_dimm_t), FMD_SLEEP);
 360         bcopy(pers, dimm, sizeof (cmd_dimm_pers_t));
 361         fmd_hdl_free(hdl, pers, psz);
 362         return (dimm);
 363 }
 364 
 365 void *
 366 cmd_dimm_restore(fmd_hdl_t *hdl, fmd_case_t *cp, cmd_case_ptr_t *ptr)
 367 {
 368         cmd_dimm_t *dimm;
 369 
 370         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL;
 371             dimm = cmd_list_next(dimm)) {
 372                 if (strcmp(dimm->dimm_bufname, ptr->ptr_name) == 0)
 373                         break;
 374         }
 375 
 376         if (dimm == NULL) {
 377                 int migrated = 0;
 378                 size_t dimmsz;
 379 
 380                 fmd_hdl_debug(hdl, "restoring dimm from %s\n", ptr->ptr_name);
 381 
 382                 if ((dimmsz = fmd_buf_size(hdl, NULL, ptr->ptr_name)) == 0) {
 383                         fmd_hdl_abort(hdl, "dimm referenced by case %s does "
 384                             "not exist in saved state\n",
 385                             fmd_case_uuid(hdl, cp));
 386                 } else if (dimmsz > CMD_DIMM_MAXSIZE ||
 387                     dimmsz < CMD_DIMM_MINSIZE) {
 388                         fmd_hdl_abort(hdl,
 389                             "dimm buffer referenced by case %s "
 390                             "is out of bounds (is %u bytes, max %u, min %u)\n",
 391                             fmd_case_uuid(hdl, cp), dimmsz,
 392                             CMD_DIMM_MAXSIZE, CMD_DIMM_MINSIZE);
 393                 }
 394 
 395                 if ((dimm = cmd_buf_read(hdl, NULL, ptr->ptr_name,
 396                     dimmsz)) == NULL) {
 397                         fmd_hdl_abort(hdl, "failed to read dimm buf %s",
 398                             ptr->ptr_name);
 399                 }
 400 
 401                 fmd_hdl_debug(hdl, "found %d in version field\n",
 402                     dimm->dimm_version);
 403 
 404                 if (CMD_DIMM_VERSIONED(dimm)) {
 405                         switch (dimm->dimm_version) {
 406                         case CMD_DIMM_VERSION_1:
 407                                 dimm = dimm_v1tov2(hdl, (cmd_dimm_1_t *)dimm,
 408                                     dimmsz);
 409                                 break;
 410                         case CMD_DIMM_VERSION_2:
 411                                 dimm = dimm_wrapv2(hdl, (cmd_dimm_pers_t *)dimm,
 412                                     dimmsz);
 413                                 break;
 414                         default:
 415                                 fmd_hdl_abort(hdl, "unknown version (found %d) "
 416                                     "for dimm state referenced by case %s.\n",
 417                                     dimm->dimm_version, fmd_case_uuid(hdl, cp));
 418                                 break;
 419                         }
 420                 } else {
 421                         dimm = dimm_v0tov2(hdl, (cmd_dimm_0_t *)dimm, dimmsz);
 422                         migrated = 1;
 423                 }
 424 
 425                 if (migrated) {
 426                         CMD_STAT_BUMP(dimm_migrat);
 427                         cmd_dimm_dirty(hdl, dimm);
 428                 }
 429 
 430                 cmd_fmri_restore(hdl, &dimm->dimm_asru);
 431 
 432                 if ((errno = nvlist_lookup_string(dimm->dimm_asru_nvl,
 433                     FM_FMRI_MEM_UNUM, (char **)&dimm->dimm_unum)) != 0)
 434                         fmd_hdl_abort(hdl, "failed to retrieve unum from asru");
 435 
 436                 dimm_attach_to_bank(hdl, dimm);
 437 
 438                 cmd_mem_retirestat_create(hdl, &dimm->dimm_retstat,
 439                     dimm->dimm_unum, dimm->dimm_nretired, CMD_DIMM_STAT_PREFIX);
 440 
 441                 cmd_list_append(&cmd.cmd_dimms, dimm);
 442         }
 443 
 444         switch (ptr->ptr_subtype) {
 445         case BUG_PTR_DIMM_CASE:
 446                 fmd_hdl_debug(hdl, "recovering from out of order dimm ptr\n");
 447                 cmd_case_redirect(hdl, cp, CMD_PTR_DIMM_CASE);
 448                 /*FALLTHROUGH*/
 449         case CMD_PTR_DIMM_CASE:
 450                 cmd_mem_case_restore(hdl, &dimm->dimm_case, cp, "dimm",
 451                     dimm->dimm_unum);
 452                 break;
 453         default:
 454                 fmd_hdl_abort(hdl, "invalid %s subtype %d\n",
 455                     ptr->ptr_name, ptr->ptr_subtype);
 456         }
 457 
 458         return (dimm);
 459 }
 460 
 461 void
 462 cmd_dimm_validate(fmd_hdl_t *hdl)
 463 {
 464         cmd_dimm_t *dimm, *next;
 465 
 466         for (dimm = cmd_list_next(&cmd.cmd_dimms); dimm != NULL; dimm = next) {
 467                 next = cmd_list_next(dimm);
 468 
 469                 if (!fmd_nvl_fmri_present(hdl, dimm->dimm_asru_nvl))
 470                         cmd_dimm_destroy(hdl, dimm);
 471         }
 472 }
 473 
 474 void
 475 cmd_dimm_dirty(fmd_hdl_t *hdl, cmd_dimm_t *dimm)
 476 {
 477         if (fmd_buf_size(hdl, NULL, dimm->dimm_bufname) !=
 478             sizeof (cmd_dimm_pers_t))
 479                 fmd_buf_destroy(hdl, NULL, dimm->dimm_bufname);
 480 
 481         /* No need to rewrite the FMRIs in the dimm - they don't change */
 482         fmd_buf_write(hdl, NULL, dimm->dimm_bufname, &dimm->dimm_pers,
 483             sizeof (cmd_dimm_pers_t));
 484 }
 485 
 486 void
 487 cmd_dimm_gc(fmd_hdl_t *hdl)
 488 {
 489         cmd_dimm_validate(hdl);
 490 }
 491 
 492 void
 493 cmd_dimm_fini(fmd_hdl_t *hdl)
 494 {
 495         cmd_dimm_t *dimm;
 496 
 497         while ((dimm = cmd_list_next(&cmd.cmd_dimms)) != NULL)
 498                 cmd_dimm_free(hdl, dimm, FMD_B_FALSE);
 499 }
 500 
 501 
 502 void
 503 cmd_dimm_save_symbol_error(cmd_dimm_t *dimm, uint16_t upos)
 504 {
 505         cmd_dimm_t *d = NULL, *next = NULL;
 506 
 507         for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
 508                 next = cmd_list_next(d);
 509                 if (cmd_same_datapath_dimms(dimm, d))
 510                         d->dimm_syl_error = upos;
 511         }
 512 }
 513 
 514 int
 515 cmd_dimm_check_symbol_error(cmd_dimm_t *dimm, uint16_t synd)
 516 {
 517         int upos;
 518         cmd_dimm_t *d, *next;
 519 
 520         if ((upos = cmd_synd2upos(synd)) < 0)
 521                 return (0);
 522 
 523         for (d = cmd_list_next(&cmd.cmd_dimms); d != NULL; d = next) {
 524                 next = cmd_list_next(d);
 525                 if (cmd_same_datapath_dimms(dimm, d) &&
 526                     (d->dimm_syl_error == upos))
 527                         return (1);
 528         }
 529 
 530         return (0);
 531 }