1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/errno.h>
  26 #include <sys/types.h>
  27 #include <sys/param.h>
  28 #include <sys/cpu.h>
  29 #include <sys/cpuvar.h>
  30 #include <sys/clock.h>
  31 #include <sys/promif.h>
  32 #include <sys/promimpl.h>
  33 #include <sys/systm.h>
  34 #include <sys/machsystm.h>
  35 #include <sys/debug.h>
  36 #include <sys/sunddi.h>
  37 #include <sys/modctl.h>
  38 #include <sys/cpu_module.h>
  39 #include <sys/kobj.h>
  40 #include <sys/cmp.h>
  41 #include <sys/async.h>
  42 #include <vm/page.h>
  43 #include <vm/hat_sfmmu.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/mach_descrip.h>
  46 #include <sys/mdesc.h>
  47 #include <sys/archsystm.h>
  48 #include <sys/error.h>
  49 #include <sys/mmu.h>
  50 #include <sys/bitmap.h>
  51 #include <sys/intreg.h>
  52 #include <sys/instance.h>
  53 
  54 struct cpu_node cpunodes[NCPU];
  55 
  56 uint64_t cpu_q_entries;
  57 uint64_t dev_q_entries;
  58 uint64_t cpu_rq_entries;
  59 uint64_t cpu_nrq_entries;
  60 uint64_t ncpu_guest_max;
  61 
  62 void fill_cpu(md_t *, mde_cookie_t);
  63 
  64 static uint64_t get_mmu_ctx_bits(md_t *, mde_cookie_t);
  65 static uint64_t get_mmu_tsbs(md_t *, mde_cookie_t);
  66 static uint64_t get_mmu_shcontexts(md_t *, mde_cookie_t);
  67 static uint64_t get_cpu_pagesizes(md_t *, mde_cookie_t);
  68 static char *construct_isalist(md_t *, mde_cookie_t, char **);
  69 static void init_md_broken(md_t *, mde_cookie_t *);
  70 static int get_l2_cache_info(md_t *, mde_cookie_t, uint64_t *, uint64_t *,
  71     uint64_t *);
  72 static void get_hwcaps(md_t *, mde_cookie_t);
  73 static void get_weakest_mem_model(md_t *, mde_cookie_t);
  74 static void get_q_sizes(md_t *, mde_cookie_t);
  75 static void get_va_bits(md_t *, mde_cookie_t);
  76 static size_t get_ra_limit(md_t *, mde_cookie_t);
  77 static int get_l2_cache_node_count(md_t *);
  78 static unsigned long names2bits(char *tokens, size_t tokenslen,
  79     char *bit_formatter, char *warning);
  80 
  81 uint64_t        system_clock_freq;
  82 uint_t          niommu_tsbs = 0;
  83 
  84 static int n_l2_caches = 0;
  85 
  86 /* prevent compilation with VAC defined */
  87 #ifdef VAC
  88 #error "The sun4v architecture does not support VAC"
  89 #endif
  90 
  91 #define S_VAC_SIZE      MMU_PAGESIZE
  92 #define S_VAC_SHIFT     MMU_PAGESHIFT
  93 
  94 int             vac_size = S_VAC_SIZE;
  95 uint_t          vac_mask = MMU_PAGEMASK & (S_VAC_SIZE - 1);
  96 int             vac_shift = S_VAC_SHIFT;
  97 uintptr_t       shm_alignment = S_VAC_SIZE;
  98 
  99 void
 100 map_wellknown_devices()
 101 {
 102 }
 103 
 104 void
 105 fill_cpu(md_t *mdp, mde_cookie_t cpuc)
 106 {
 107         struct cpu_node *cpunode;
 108         uint64_t cpuid;
 109         uint64_t clk_freq;
 110         char *namebuf;
 111         char *namebufp;
 112         int namelen;
 113         uint64_t associativity = 0, linesize = 0, size = 0;
 114 
 115         if (md_get_prop_val(mdp, cpuc, "id", &cpuid)) {
 116                 return;
 117         }
 118 
 119         /* All out-of-range cpus will be stopped later. */
 120         if (cpuid >= NCPU) {
 121                 cmn_err(CE_CONT, "fill_cpu: out of range cpuid %ld - "
 122                     "cpu excluded from configuration\n", cpuid);
 123 
 124                 return;
 125         }
 126 
 127         cpunode = &cpunodes[cpuid];
 128         cpunode->cpuid = (int)cpuid;
 129         cpunode->device_id = cpuid;
 130 
 131         if (sizeof (cpunode->fru_fmri) > strlen(CPU_FRU_FMRI))
 132                 (void) strcpy(cpunode->fru_fmri, CPU_FRU_FMRI);
 133 
 134         if (md_get_prop_data(mdp, cpuc,
 135             "compatible", (uint8_t **)&namebuf, &namelen)) {
 136                 cmn_err(CE_PANIC, "fill_cpu: Cannot read compatible "
 137                     "property");
 138         }
 139         namebufp = namebuf;
 140         if (strncmp(namebufp, "SUNW,", 5) == 0)
 141                 namebufp += 5;
 142         if (strlen(namebufp) > sizeof (cpunode->name))
 143                 cmn_err(CE_PANIC, "Compatible property too big to "
 144                     "fit into the cpunode name buffer");
 145         (void) strcpy(cpunode->name, namebufp);
 146 
 147         if (md_get_prop_val(mdp, cpuc,
 148             "clock-frequency", &clk_freq)) {
 149                         clk_freq = 0;
 150         }
 151         cpunode->clock_freq = clk_freq;
 152 
 153         ASSERT(cpunode->clock_freq != 0);
 154         /*
 155          * Compute scaling factor based on rate of %tick. This is used
 156          * to convert from ticks derived from %tick to nanoseconds. See
 157          * comment in sun4u/sys/clock.h for details.
 158          */
 159         cpunode->tick_nsec_scale = (uint_t)(((uint64_t)NANOSEC <<
 160             (32 - TICK_NSEC_SHIFT)) / cpunode->clock_freq);
 161 
 162         /*
 163          * The nodeid is not used in sun4v at all. Setting it
 164          * to positive value to make starting of slave CPUs
 165          * code happy.
 166          */
 167         cpunode->nodeid = cpuid + 1;
 168 
 169         /*
 170          * Obtain the L2 cache information from MD.
 171          * If "Cache" node exists, then set L2 cache properties
 172          * as read from MD.
 173          * If node does not exists, then set the L2 cache properties
 174          * in individual CPU module.
 175          */
 176         if ((!get_l2_cache_info(mdp, cpuc,
 177             &associativity, &size, &linesize)) ||
 178             associativity == 0 || size == 0 || linesize == 0) {
 179                 cpu_fiximp(cpunode);
 180         } else {
 181                 /*
 182                  * Do not expect L2 cache properties to be bigger
 183                  * than 32-bit quantity.
 184                  */
 185                 cpunode->ecache_associativity = (int)associativity;
 186                 cpunode->ecache_size = (int)size;
 187                 cpunode->ecache_linesize = (int)linesize;
 188         }
 189 
 190         cpunode->ecache_setsize =
 191             cpunode->ecache_size / cpunode->ecache_associativity;
 192 
 193         /*
 194          * Initialize the mapping for exec unit, chip and core.
 195          */
 196         cpunode->exec_unit_mapping = NO_EU_MAPPING_FOUND;
 197         cpunode->l2_cache_mapping = NO_MAPPING_FOUND;
 198         cpunode->core_mapping = NO_CORE_MAPPING_FOUND;
 199 
 200         if (ecache_setsize == 0)
 201                 ecache_setsize = cpunode->ecache_setsize;
 202         if (ecache_alignsize == 0)
 203                 ecache_alignsize = cpunode->ecache_linesize;
 204 
 205 }
 206 
 207 void
 208 empty_cpu(int cpuid)
 209 {
 210         bzero(&cpunodes[cpuid], sizeof (struct cpu_node));
 211 }
 212 
 213 /*
 214  * Use L2 cache node to derive the chip mapping.
 215  */
 216 void
 217 setup_chip_mappings(md_t *mdp)
 218 {
 219         int ncache, ncpu;
 220         mde_cookie_t *node, *cachelist;
 221         int i, j;
 222         processorid_t cpuid;
 223         int idx = 0;
 224 
 225         ncache = md_alloc_scan_dag(mdp, md_root_node(mdp), "cache",
 226             "fwd", &cachelist);
 227 
 228         /*
 229          * The "cache" node is optional in MD, therefore ncaches can be 0.
 230          */
 231         if (ncache < 1) {
 232                 return;
 233         }
 234 
 235         for (i = 0; i < ncache; i++) {
 236                 uint64_t cache_level;
 237                 uint64_t lcpuid;
 238 
 239                 if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
 240                         continue;
 241 
 242                 if (cache_level != 2)
 243                         continue;
 244 
 245                 /*
 246                  * Found a l2 cache node. Find out the cpu nodes it
 247                  * points to.
 248                  */
 249                 ncpu = md_alloc_scan_dag(mdp, cachelist[i], "cpu",
 250                     "back", &node);
 251 
 252                 if (ncpu < 1)
 253                         continue;
 254 
 255                 for (j = 0; j < ncpu; j++) {
 256                         if (md_get_prop_val(mdp, node[j], "id", &lcpuid))
 257                                 continue;
 258                         if (lcpuid >= NCPU)
 259                                 continue;
 260                         cpuid = (processorid_t)lcpuid;
 261                         cpunodes[cpuid].l2_cache_mapping = idx;
 262                 }
 263                 md_free_scan_dag(mdp, &node);
 264 
 265                 idx++;
 266         }
 267 
 268         md_free_scan_dag(mdp, &cachelist);
 269 }
 270 
 271 void
 272 setup_exec_unit_mappings(md_t *mdp)
 273 {
 274         int num, num_eunits;
 275         mde_cookie_t cpus_node;
 276         mde_cookie_t *node, *eunit;
 277         int idx, i, j;
 278         processorid_t cpuid;
 279         char *eunit_name = broken_md_flag ? "exec_unit" : "exec-unit";
 280         enum eu_type { INTEGER, FPU } etype;
 281 
 282         /*
 283          * Find the cpu integer exec units - and
 284          * setup the mappings appropriately.
 285          */
 286         num = md_alloc_scan_dag(mdp, md_root_node(mdp), "cpus", "fwd", &node);
 287         if (num < 1)
 288                 cmn_err(CE_PANIC, "No cpus node in machine description");
 289         if (num > 1)
 290                 cmn_err(CE_PANIC, "More than 1 cpus node in machine"
 291                     " description");
 292 
 293         cpus_node = node[0];
 294         md_free_scan_dag(mdp, &node);
 295 
 296         num_eunits = md_alloc_scan_dag(mdp, cpus_node, eunit_name,
 297             "fwd", &eunit);
 298         if (num_eunits > 0) {
 299                 char *int_str = broken_md_flag ? "int" : "integer";
 300                 char *fpu_str = "fp";
 301 
 302                 /* Spin through and find all the integer exec units */
 303                 for (i = 0; i < num_eunits; i++) {
 304                         char *p;
 305                         char *val;
 306                         int vallen;
 307                         uint64_t lcpuid;
 308 
 309                         /* ignore nodes with no type */
 310                         if (md_get_prop_data(mdp, eunit[i], "type",
 311                             (uint8_t **)&val, &vallen))
 312                                 continue;
 313 
 314                         for (p = val; *p != '\0'; p += strlen(p) + 1) {
 315                                 if (strcmp(p, int_str) == 0) {
 316                                         etype = INTEGER;
 317                                         goto found;
 318                                 }
 319                                 if (strcmp(p, fpu_str) == 0) {
 320                                         etype = FPU;
 321                                         goto found;
 322                                 }
 323                         }
 324 
 325                         continue;
 326 found:
 327                         idx = NCPU + i;
 328                         /*
 329                          * find the cpus attached to this EU and
 330                          * update their mapping indices
 331                          */
 332                         num = md_alloc_scan_dag(mdp, eunit[i], "cpu",
 333                             "back", &node);
 334 
 335                         if (num < 1)
 336                                 cmn_err(CE_PANIC, "exec-unit node in MD"
 337                                     " not attached to a cpu node");
 338 
 339                         for (j = 0; j < num; j++) {
 340                                 if (md_get_prop_val(mdp, node[j], "id",
 341                                     &lcpuid))
 342                                         continue;
 343                                 if (lcpuid >= NCPU)
 344                                         continue;
 345                                 cpuid = (processorid_t)lcpuid;
 346                                 switch (etype) {
 347                                 case INTEGER:
 348                                         cpunodes[cpuid].exec_unit_mapping = idx;
 349                                         break;
 350                                 case FPU:
 351                                         cpunodes[cpuid].fpu_mapping = idx;
 352                                         break;
 353                                 }
 354                         }
 355                         md_free_scan_dag(mdp, &node);
 356                 }
 357 
 358 
 359                 md_free_scan_dag(mdp, &eunit);
 360         }
 361 }
 362 
 363 /*
 364  * All the common setup of sun4v CPU modules is done by this routine.
 365  */
 366 void
 367 cpu_setup_common(char **cpu_module_isa_set)
 368 {
 369         extern int mmu_exported_pagesize_mask;
 370         int nocpus, i;
 371         size_t ra_limit;
 372         mde_cookie_t *cpulist;
 373         md_t *mdp;
 374 
 375         if ((mdp = md_get_handle()) == NULL)
 376                 cmn_err(CE_PANIC, "Unable to initialize machine description");
 377 
 378         boot_ncpus = nocpus = md_alloc_scan_dag(mdp,
 379             md_root_node(mdp), "cpu", "fwd", &cpulist);
 380         if (nocpus < 1) {
 381                 cmn_err(CE_PANIC, "cpu_common_setup: cpulist allocation "
 382                     "failed or incorrect number of CPUs in MD");
 383         }
 384 
 385         init_md_broken(mdp, cpulist);
 386 
 387         if (use_page_coloring) {
 388                 do_pg_coloring = 1;
 389         }
 390 
 391         /*
 392          * Get the valid mmu page sizes mask, Q sizes and isalist/r
 393          * from the MD for the first available CPU in cpulist.
 394          *
 395          * Do not expect the MMU page sizes mask to be more than 32-bit.
 396          */
 397         mmu_exported_pagesize_mask = (int)get_cpu_pagesizes(mdp, cpulist[0]);
 398 
 399         /*
 400          * Get the number of contexts and tsbs supported.
 401          */
 402         if (get_mmu_shcontexts(mdp, cpulist[0]) >= MIN_NSHCONTEXTS &&
 403             get_mmu_tsbs(mdp, cpulist[0]) >= MIN_NTSBS) {
 404                 shctx_on = 1;
 405         }
 406 
 407         for (i = 0; i < nocpus; i++)
 408                 fill_cpu(mdp, cpulist[i]);
 409 
 410         /* setup l2 cache count. */
 411         n_l2_caches = get_l2_cache_node_count(mdp);
 412 
 413         setup_chip_mappings(mdp);
 414         setup_exec_unit_mappings(mdp);
 415 
 416         /*
 417          * If MD is broken then append the passed ISA set,
 418          * otherwise trust the MD.
 419          */
 420 
 421         if (broken_md_flag)
 422                 isa_list = construct_isalist(mdp, cpulist[0],
 423                     cpu_module_isa_set);
 424         else
 425                 isa_list = construct_isalist(mdp, cpulist[0], NULL);
 426 
 427         get_hwcaps(mdp, cpulist[0]);
 428         get_weakest_mem_model(mdp, cpulist[0]);
 429         get_q_sizes(mdp, cpulist[0]);
 430         get_va_bits(mdp, cpulist[0]);
 431 
 432         /*
 433          * ra_limit is the highest real address in the machine.
 434          */
 435         ra_limit = get_ra_limit(mdp, cpulist[0]);
 436 
 437         md_free_scan_dag(mdp, &cpulist);
 438 
 439         (void) md_fini_handle(mdp);
 440 
 441         /*
 442          * Block stores invalidate all pages of the d$ so pagecopy
 443          * et. al. do not need virtual translations with virtual
 444          * coloring taken into consideration.
 445          */
 446         pp_consistent_coloring = 0;
 447 
 448         /*
 449          * The kpm mapping window.
 450          * kpm_size:
 451          *      The size of a single kpm range.
 452          *      The overall size will be: kpm_size * vac_colors.
 453          * kpm_vbase:
 454          *      The virtual start address of the kpm range within the kernel
 455          *      virtual address space. kpm_vbase has to be kpm_size aligned.
 456          */
 457 
 458         /*
 459          * Make kpm_vbase, kpm_size aligned to kpm_size_shift.
 460          * To do this find the nearest power of 2 size that the
 461          * actual ra_limit fits within.
 462          * If it is an even power of two use that, otherwise use the
 463          * next power of two larger than ra_limit.
 464          */
 465 
 466         ASSERT(ra_limit != 0);
 467 
 468         kpm_size_shift = !ISP2(ra_limit) ?
 469             highbit(ra_limit) : highbit(ra_limit) - 1;
 470 
 471         /*
 472          * No virtual caches on sun4v so size matches size shift
 473          */
 474         kpm_size = 1ul << kpm_size_shift;
 475 
 476         if (va_bits < VA_ADDRESS_SPACE_BITS) {
 477                 /*
 478                  * In case of VA hole
 479                  * kpm_base = hole_end + 1TB
 480                  * Starting 1TB beyond where VA hole ends because on Niagara
 481                  * processor software must not use pages within 4GB of the
 482                  * VA hole as instruction pages to avoid problems with
 483                  * prefetching into the VA hole.
 484                  */
 485                 kpm_vbase = (caddr_t)((0ull - (1ull << (va_bits - 1))) +
 486                     (1ull << 40));
 487         } else {                /* Number of VA bits 64 ... no VA hole */
 488                 kpm_vbase = (caddr_t)0x8000000000000000ull;     /* 8 EB */
 489         }
 490 
 491         /*
 492          * The traptrace code uses either %tick or %stick for
 493          * timestamping.  The sun4v require use of %stick.
 494          */
 495         traptrace_use_stick = 1;
 496 }
 497 
 498 /*
 499  * Get the nctxs from MD. If absent panic.
 500  */
 501 static uint64_t
 502 get_mmu_ctx_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
 503 {
 504         uint64_t ctx_bits;
 505 
 506         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#context-bits",
 507             &ctx_bits))
 508                 ctx_bits = 0;
 509 
 510         if (ctx_bits < MIN_NCTXS_BITS || ctx_bits > MAX_NCTXS_BITS)
 511                 cmn_err(CE_PANIC, "Incorrect %ld number of contexts bits "
 512                     "returned by MD", ctx_bits);
 513 
 514         return (ctx_bits);
 515 }
 516 
 517 /*
 518  * Get the number of tsbs from MD. If absent the default value is 0.
 519  */
 520 static uint64_t
 521 get_mmu_tsbs(md_t *mdp, mde_cookie_t cpu_node_cookie)
 522 {
 523         uint64_t number_tsbs;
 524 
 525         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-max-#tsbs",
 526             &number_tsbs))
 527                 number_tsbs = 0;
 528 
 529         return (number_tsbs);
 530 }
 531 
 532 /*
 533  * Get the number of shared contexts from MD. If absent the default value is 0.
 534  *
 535  */
 536 static uint64_t
 537 get_mmu_shcontexts(md_t *mdp, mde_cookie_t cpu_node_cookie)
 538 {
 539         uint64_t number_contexts;
 540 
 541         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#shared-contexts",
 542             &number_contexts))
 543                 number_contexts = 0;
 544 
 545         return (number_contexts);
 546 }
 547 
 548 /*
 549  * Initalize supported page sizes information.
 550  * Set to 0, if the page sizes mask information is absent in MD.
 551  */
 552 static uint64_t
 553 get_cpu_pagesizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
 554 {
 555         uint64_t mmu_page_size_list;
 556 
 557         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-page-size-list",
 558             &mmu_page_size_list))
 559                 mmu_page_size_list = 0;
 560 
 561         if (mmu_page_size_list == 0 || mmu_page_size_list > MAX_PAGESIZE_MASK)
 562                 cmn_err(CE_PANIC, "Incorrect 0x%lx pagesize mask returned"
 563                     "by MD", mmu_page_size_list);
 564 
 565         return (mmu_page_size_list);
 566 }
 567 
 568 /*
 569  * This routine gets the isalist information from MD and appends
 570  * the CPU module ISA set if required.
 571  */
 572 static char *
 573 construct_isalist(md_t *mdp, mde_cookie_t cpu_node_cookie,
 574     char **cpu_module_isa_set)
 575 {
 576         extern int at_flags;
 577         char *md_isalist;
 578         int md_isalen;
 579         char *isabuf;
 580         int isalen;
 581         char **isa_set;
 582         char *p, *q;
 583         int cpu_module_isalen = 0, found = 0;
 584 
 585         (void) md_get_prop_data(mdp, cpu_node_cookie,
 586             "isalist", (uint8_t **)&isabuf, &isalen);
 587 
 588         /*
 589          * We support binaries for all the cpus that have shipped so far.
 590          * The kernel emulates instructions that are not supported by hardware.
 591          */
 592         at_flags = EF_SPARC_SUN_US3 | EF_SPARC_32PLUS | EF_SPARC_SUN_US1;
 593 
 594         /*
 595          * Construct the space separated isa_list.
 596          */
 597         if (cpu_module_isa_set != NULL) {
 598                 for (isa_set = cpu_module_isa_set; *isa_set != NULL;
 599                     isa_set++) {
 600                         cpu_module_isalen += strlen(*isa_set);
 601                         cpu_module_isalen++;    /* for space character */
 602                 }
 603         }
 604 
 605         /*
 606          * Allocate the buffer of MD isa buffer length + CPU module
 607          * isa buffer length.
 608          */
 609         md_isalen = isalen + cpu_module_isalen + 2;
 610         md_isalist = (char *)prom_alloc((caddr_t)0, md_isalen, 0);
 611         if (md_isalist == NULL)
 612                 cmn_err(CE_PANIC, "construct_isalist: Allocation failed for "
 613                     "md_isalist");
 614 
 615         md_isalist[0] = '\0'; /* create an empty string to start */
 616         for (p = isabuf, q = p + isalen; p < q; p += strlen(p) + 1) {
 617                 (void) strlcat(md_isalist, p, md_isalen);
 618                 (void) strcat(md_isalist, " ");
 619         }
 620 
 621         /*
 622          * Check if the isa_set is present in isalist returned by MD.
 623          * If yes, then no need to append it, if no then append it to
 624          * isalist returned by MD.
 625          */
 626         if (cpu_module_isa_set != NULL) {
 627                 for (isa_set = cpu_module_isa_set; *isa_set != NULL;
 628                     isa_set++) {
 629                         found = 0;
 630                         for (p = isabuf, q = p + isalen; p < q;
 631                             p += strlen(p) + 1) {
 632                                 if (strcmp(p, *isa_set) == 0) {
 633                                         found = 1;
 634                                         break;
 635                                 }
 636                         }
 637                         if (!found) {
 638                                 (void) strlcat(md_isalist, *isa_set, md_isalen);
 639                                 (void) strcat(md_isalist, " ");
 640                         }
 641                 }
 642         }
 643 
 644         /* Get rid of any trailing white spaces */
 645         md_isalist[strlen(md_isalist) - 1] = '\0';
 646 
 647         return (md_isalist);
 648 }
 649 
 650 static void
 651 get_hwcaps(md_t *mdp, mde_cookie_t cpu_node_cookie)
 652 {
 653         char *hwcapbuf;
 654         int hwcaplen;
 655 
 656         if (md_get_prop_data(mdp, cpu_node_cookie,
 657             "hwcap-list", (uint8_t **)&hwcapbuf, &hwcaplen)) {
 658                 /* Property not found */
 659                 return;
 660         }
 661 
 662         cpu_hwcap_flags |= names2bits(hwcapbuf, hwcaplen, FMT_AV_SPARC,
 663             "unrecognized token: %s");
 664 }
 665 
 666 static void
 667 get_weakest_mem_model(md_t *mdp, mde_cookie_t cpu_node_cookie)
 668 {
 669         char *mmbuf;
 670         int mmlen;
 671         uint_t wmm;
 672         char *p, *q;
 673 
 674         if (md_get_prop_data(mdp, cpu_node_cookie,
 675             "memory-model-list", (uint8_t **)&mmbuf, &mmlen)) {
 676                 /* Property not found */
 677                 return;
 678         }
 679 
 680         wmm = TSTATE_MM_TSO;
 681         for (p = mmbuf, q = p + mmlen; p < q; p += strlen(p) + 1) {
 682                 if (strcmp(p, "wc") == 0)
 683                         wmm = TSTATE_MM_WC;
 684         }
 685         weakest_mem_model = wmm;
 686 }
 687 
 688 /*
 689  * Does the opposite of cmn_err(9f) "%b" conversion specification:
 690  * Given a list of strings, converts them to a bit-vector.
 691  *
 692  *  tokens - is a buffer of [NUL-terminated] strings.
 693  *  tokenslen - length of tokenbuf in bytes.
 694  *  bit_formatter - is a %b format string, such as FMT_AV_SPARC
 695  *    from /usr/include/sys/auxv_SPARC.h, of the form:
 696  *    <base-char>[<bit-char><token-string>]...
 697  *        <base-char> is ignored.
 698  *        <bit-char>  is [1-32], as per cmn_err(9f).
 699  *  warning - is a printf-style format string containing "%s",
 700  *    which is used to print a warning message when an unrecognized
 701  *    token is found.  If warning is NULL, no warning is printed.
 702  * Returns a bit-vector corresponding to the specified tokens.
 703  */
 704 
 705 static unsigned long
 706 names2bits(char *tokens, size_t tokenslen, char *bit_formatter, char *warning)
 707 {
 708         char *cur;
 709         size_t  curlen;
 710         unsigned long ul = 0;
 711         char *hit;
 712         char *bs;
 713 
 714         bit_formatter++;        /* skip base; not needed for input */
 715         cur = tokens;
 716         while (tokenslen) {
 717                 curlen = strlen(cur);
 718                 bs = bit_formatter;
 719                 /*
 720                  * We need a complicated while loop and the >=32 check,
 721                  * instead of a simple "if (strstr())" so that when the
 722                  * token is "vis", we don't match on "vis2" (for example).
 723                  */
 724                 /* LINTED E_EQUALITY_NOT_ASSIGNMENT */
 725                 while ((hit = strstr(bs, cur)) &&
 726                     *(hit + curlen) >= 32) {
 727                         /*
 728                          * We're still in the middle of a word, i.e., not
 729                          * pointing at a <bit-char>.  So advance ptr
 730                          * to ensure forward progress.
 731                          */
 732                         bs = hit + curlen + 1;
 733                 }
 734 
 735                 if (hit != NULL) {
 736                         ul |= (1<<(*(hit-1) - 1));
 737                 } else {
 738                         /* The token wasn't found in bit_formatter */
 739                         if (warning != NULL)
 740                                 cmn_err(CE_WARN, warning, cur);
 741                 }
 742                 tokenslen -= curlen + 1;
 743                 cur += curlen + 1;
 744         }
 745         return (ul);
 746 }
 747 
 748 uint64_t
 749 get_ra_limit(md_t *mdp, mde_cookie_t cpu_node_cookie)
 750 {
 751         extern int ppvm_enable;
 752         extern int meta_alloc_enable;
 753         mde_cookie_t *mem_list;
 754         mde_cookie_t *mblock_list;
 755         int i;
 756         int memnodes;
 757         int nmblock;
 758         uint64_t r;
 759         uint64_t base;
 760         uint64_t size;
 761         uint64_t ra_limit = 0, new_limit = 0;
 762 
 763         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#ra-bits", &r) == 0) {
 764                 if (r == 0 || r > RA_ADDRESS_SPACE_BITS)
 765                         cmn_err(CE_PANIC, "Incorrect number of ra bits in MD");
 766                 else {
 767                         /*
 768                          * Enable memory DR and metadata (page_t)
 769                          * allocation from existing memory.
 770                          */
 771                         ppvm_enable = 1;
 772                         meta_alloc_enable = 1;
 773                         return (1ULL << r);
 774                 }
 775         }
 776 
 777         memnodes = md_alloc_scan_dag(mdp,
 778             md_root_node(mdp), "memory", "fwd", &mem_list);
 779 
 780         ASSERT(memnodes == 1);
 781 
 782         nmblock = md_alloc_scan_dag(mdp,
 783             mem_list[0], "mblock", "fwd", &mblock_list);
 784         if (nmblock < 1)
 785                 cmn_err(CE_PANIC, "cannot find mblock nodes in MD");
 786 
 787         for (i = 0; i < nmblock; i++) {
 788                 if (md_get_prop_val(mdp, mblock_list[i], "base", &base))
 789                         cmn_err(CE_PANIC, "base property missing from MD"
 790                             " mblock node");
 791                 if (md_get_prop_val(mdp, mblock_list[i], "size", &size))
 792                         cmn_err(CE_PANIC, "size property missing from MD"
 793                             " mblock node");
 794 
 795                 ASSERT(size != 0);
 796 
 797                 new_limit = base + size;
 798 
 799                 if (base > new_limit)
 800                         cmn_err(CE_PANIC, "mblock in MD wrapped around");
 801 
 802                 if (new_limit > ra_limit)
 803                         ra_limit = new_limit;
 804         }
 805 
 806         ASSERT(ra_limit != 0);
 807 
 808         if (ra_limit > MAX_REAL_ADDRESS) {
 809                 cmn_err(CE_WARN, "Highest real address in MD too large"
 810                     " clipping to %llx\n", MAX_REAL_ADDRESS);
 811                 ra_limit = MAX_REAL_ADDRESS;
 812         }
 813 
 814         md_free_scan_dag(mdp, &mblock_list);
 815 
 816         md_free_scan_dag(mdp, &mem_list);
 817 
 818         return (ra_limit);
 819 }
 820 
 821 /*
 822  * This routine sets the globals for CPU and DEV mondo queue entries and
 823  * resumable and non-resumable error queue entries.
 824  *
 825  * First, look up the number of bits available to pass an entry number.
 826  * This can vary by platform and may result in allocating an unreasonably
 827  * (or impossibly) large amount of memory for the corresponding table,
 828  * so we clamp it by 'max_entries'.  Finally, since the q size is used when
 829  * calling contig_mem_alloc(), which expects a power of 2, clamp the q size
 830  * down to a power of 2.  If the prop is missing, use 'default_entries'.
 831  */
 832 static uint64_t
 833 get_single_q_size(md_t *mdp, mde_cookie_t cpu_node_cookie,
 834     char *qnamep, uint64_t default_entries, uint64_t max_entries)
 835 {
 836         uint64_t entries;
 837 
 838         if (default_entries > max_entries)
 839                 cmn_err(CE_CONT, "!get_single_q_size: dflt %ld > "
 840                     "max %ld for %s\n", default_entries, max_entries, qnamep);
 841 
 842         if (md_get_prop_val(mdp, cpu_node_cookie, qnamep, &entries)) {
 843                 if (!broken_md_flag)
 844                         cmn_err(CE_PANIC, "Missing %s property in MD cpu node",
 845                             qnamep);
 846                 entries = default_entries;
 847         } else {
 848                 entries = 1 << entries;
 849         }
 850 
 851         entries = MIN(entries, max_entries);
 852         /* If not a power of 2, truncate to a power of 2. */
 853         if (!ISP2(entries)) {
 854                 entries = 1 << (highbit(entries) - 1);
 855         }
 856 
 857         return (entries);
 858 }
 859 
 860 /* Scaling constant used to compute size of cpu mondo queue */
 861 #define CPU_MONDO_Q_MULTIPLIER  8
 862 
 863 static void
 864 get_q_sizes(md_t *mdp, mde_cookie_t cpu_node_cookie)
 865 {
 866         uint64_t max_qsize;
 867         mde_cookie_t *platlist;
 868         int nrnode;
 869 
 870         /*
 871          * Compute the maximum number of entries for the cpu mondo queue.
 872          * Use the appropriate property in the platform node, if it is
 873          * available.  Else, base it on NCPU.
 874          */
 875         nrnode = md_alloc_scan_dag(mdp,
 876             md_root_node(mdp), "platform", "fwd", &platlist);
 877 
 878         ASSERT(nrnode == 1);
 879 
 880         ncpu_guest_max = NCPU;
 881         (void) md_get_prop_val(mdp, platlist[0], "max-cpus", &ncpu_guest_max);
 882         max_qsize = ncpu_guest_max * CPU_MONDO_Q_MULTIPLIER;
 883 
 884         md_free_scan_dag(mdp, &platlist);
 885 
 886         cpu_q_entries = get_single_q_size(mdp, cpu_node_cookie,
 887             "q-cpu-mondo-#bits", DEFAULT_CPU_Q_ENTRIES, max_qsize);
 888 
 889         dev_q_entries = get_single_q_size(mdp, cpu_node_cookie,
 890             "q-dev-mondo-#bits", DEFAULT_DEV_Q_ENTRIES, MAXIVNUM);
 891 
 892         cpu_rq_entries = get_single_q_size(mdp, cpu_node_cookie,
 893             "q-resumable-#bits", CPU_RQ_ENTRIES, MAX_CPU_RQ_ENTRIES);
 894 
 895         cpu_nrq_entries = get_single_q_size(mdp, cpu_node_cookie,
 896             "q-nonresumable-#bits", CPU_NRQ_ENTRIES, MAX_CPU_NRQ_ENTRIES);
 897 }
 898 
 899 
 900 static void
 901 get_va_bits(md_t *mdp, mde_cookie_t cpu_node_cookie)
 902 {
 903         uint64_t value = VA_ADDRESS_SPACE_BITS;
 904 
 905         if (md_get_prop_val(mdp, cpu_node_cookie, "mmu-#va-bits", &value))
 906                 cmn_err(CE_PANIC, "mmu-#va-bits property  not found in MD");
 907 
 908 
 909         if (value == 0 || value > VA_ADDRESS_SPACE_BITS)
 910                 cmn_err(CE_PANIC, "Incorrect number of va bits in MD");
 911 
 912         /* Do not expect number of VA bits to be more than 32-bit quantity */
 913 
 914         va_bits = (int)value;
 915 
 916         /*
 917          * Correct the value for VA bits on UltraSPARC-T1 based systems
 918          * in case of broken MD.
 919          */
 920         if (broken_md_flag)
 921                 va_bits = DEFAULT_VA_ADDRESS_SPACE_BITS;
 922 }
 923 
 924 int
 925 l2_cache_node_count(void)
 926 {
 927         return (n_l2_caches);
 928 }
 929 
 930 /*
 931  * count the number of l2 caches.
 932  */
 933 int
 934 get_l2_cache_node_count(md_t *mdp)
 935 {
 936         int i;
 937         mde_cookie_t *cachenodes;
 938         uint64_t level;
 939         int n_cachenodes = md_alloc_scan_dag(mdp, md_root_node(mdp),
 940             "cache", "fwd", &cachenodes);
 941         int l2_caches = 0;
 942 
 943         for (i = 0; i < n_cachenodes; i++) {
 944                 if (md_get_prop_val(mdp, cachenodes[i], "level", &level) != 0) {
 945                         level = 0;
 946                 }
 947                 if (level == 2) {
 948                         l2_caches++;
 949                 }
 950         }
 951         md_free_scan_dag(mdp, &cachenodes);
 952         return (l2_caches);
 953 }
 954 
 955 /*
 956  * This routine returns the L2 cache information such as -- associativity,
 957  * size and linesize.
 958  */
 959 static int
 960 get_l2_cache_info(md_t *mdp, mde_cookie_t cpu_node_cookie,
 961             uint64_t *associativity, uint64_t *size, uint64_t *linesize)
 962 {
 963         mde_cookie_t *cachelist;
 964         int ncaches, i;
 965         uint64_t cache_level = 0;
 966 
 967         ncaches = md_alloc_scan_dag(mdp, cpu_node_cookie, "cache",
 968             "fwd", &cachelist);
 969         /*
 970          * The "cache" node is optional in MD, therefore ncaches can be 0.
 971          */
 972         if (ncaches < 1) {
 973                 return (0);
 974         }
 975 
 976         for (i = 0; i < ncaches; i++) {
 977                 uint64_t local_assoc;
 978                 uint64_t local_size;
 979                 uint64_t local_lsize;
 980 
 981                 if (md_get_prop_val(mdp, cachelist[i], "level", &cache_level))
 982                         continue;
 983 
 984                 if (cache_level != 2) continue;
 985 
 986                 /* If properties are missing from this cache ignore it */
 987 
 988                 if ((md_get_prop_val(mdp, cachelist[i],
 989                     "associativity", &local_assoc))) {
 990                         continue;
 991                 }
 992 
 993                 if ((md_get_prop_val(mdp, cachelist[i],
 994                     "size", &local_size))) {
 995                         continue;
 996                 }
 997 
 998                 if ((md_get_prop_val(mdp, cachelist[i],
 999                     "line-size", &local_lsize))) {
1000                         continue;
1001                 }
1002 
1003                 *associativity = local_assoc;
1004                 *size = local_size;
1005                 *linesize = local_lsize;
1006                 break;
1007         }
1008 
1009         md_free_scan_dag(mdp, &cachelist);
1010 
1011         return ((cache_level == 2) ? 1 : 0);
1012 }
1013 
1014 
1015 /*
1016  * Set the broken_md_flag to 1 if the MD doesn't have
1017  * the domaining-enabled property in the platform node and the
1018  * platform uses the UltraSPARC-T1 cpu. This flag is used to
1019  * workaround some of the incorrect MD properties.
1020  */
1021 static void
1022 init_md_broken(md_t *mdp, mde_cookie_t *cpulist)
1023 {
1024         int nrnode;
1025         mde_cookie_t *platlist, rootnode;
1026         uint64_t val = 0;
1027         char *namebuf;
1028         int namelen;
1029 
1030         rootnode = md_root_node(mdp);
1031         ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
1032         ASSERT(cpulist);
1033 
1034         nrnode = md_alloc_scan_dag(mdp, rootnode, "platform", "fwd",
1035             &platlist);
1036 
1037         if (nrnode < 1)
1038                 cmn_err(CE_PANIC, "init_md_broken: platform node missing");
1039 
1040         if (md_get_prop_data(mdp, cpulist[0],
1041             "compatible", (uint8_t **)&namebuf, &namelen)) {
1042                 cmn_err(CE_PANIC, "init_md_broken: "
1043                     "Cannot read 'compatible' property of 'cpu' node");
1044         }
1045 
1046         if (md_get_prop_val(mdp, platlist[0],
1047             "domaining-enabled", &val) == -1 &&
1048             strcmp(namebuf, "SUNW,UltraSPARC-T1") == 0)
1049                 broken_md_flag = 1;
1050 
1051         md_free_scan_dag(mdp, &platlist);
1052 }
1053 
1054 #define PLAT_MAX_IOALIASES      8
1055 
1056 static plat_alias_t *plat_ioaliases;
1057 static uint64_t plat_num_ioaliases;
1058 
1059 /*
1060  * split the aliases property into its
1061  * component strings for easy searching.
1062  */
1063 static void
1064 split_alias(plat_alias_t *pali, char *str)
1065 {
1066         char *aliasv[PLAT_MAX_IOALIASES], *p;
1067         int i, duplen;
1068         char *dup;
1069 
1070         /* skip leading space */
1071         str = dup = strdup(str);
1072         duplen = strlen(dup) + 1;
1073         str += strspn(str, " ");
1074         for (i = 0; *str != '\0'; str = p) {
1075 
1076                 p = strpbrk(str, " ");
1077                 if (p != NULL) {
1078                         *p++ = '\0';
1079                 }
1080 
1081                 VERIFY(i < PLAT_MAX_IOALIASES);
1082                 aliasv[i++] = strdup(str);
1083                 if (p == NULL)
1084                         break;
1085                 p += strspn(p, " ");
1086         }
1087 
1088         kmem_free(dup, duplen);
1089 
1090         if (i == 0) {
1091                 pali->pali_naliases = 0;
1092                 pali->pali_aliases = NULL;
1093                 return;
1094         }
1095 
1096         pali->pali_naliases = i;
1097         pali->pali_aliases = kmem_alloc(i * sizeof (char *), KM_SLEEP);
1098         for (i = 0; i < pali->pali_naliases; i++) {
1099                 pali->pali_aliases[i] = aliasv[i];
1100         }
1101 }
1102 
1103 /*
1104  * retrieve the ioalias info from the MD,
1105  * and init the ioalias struct.
1106  *
1107  * NOTE: Assumes that the ioalias info does not change at runtime
1108  * This routine is invoked only once at boot time.
1109  *
1110  * No lock needed as this is called at boot with a DDI lock held
1111  */
1112 void
1113 plat_ioaliases_init(void)
1114 {
1115         md_t *mdp;
1116         mde_cookie_t *ionodes, alinode;
1117         plat_alias_t *pali;
1118         int nio;
1119         int i;
1120         int err;
1121 
1122         mdp = md_get_handle();
1123         if (mdp == NULL) {
1124                 cmn_err(CE_PANIC, "no machine description (MD)");
1125                 /*NOTREACHED*/
1126         }
1127 
1128         nio = md_alloc_scan_dag(mdp, md_root_node(mdp),
1129             "ioaliases", "fwd", &ionodes);
1130 
1131 
1132         /* not all platforms support aliases */
1133         if (nio < 1) {
1134                 (void) md_fini_handle(mdp);
1135                 return;
1136         }
1137         if (nio > 1) {
1138                 cmn_err(CE_PANIC, "multiple ioalias nodes in MD");
1139                 /*NOTREACHED*/
1140         }
1141 
1142         alinode = ionodes[0];
1143         md_free_scan_dag(mdp, &ionodes);
1144 
1145         nio = md_alloc_scan_dag(mdp, alinode, "ioalias", "fwd", &ionodes);
1146         if (nio <= 0) {
1147                 cmn_err(CE_PANIC, "MD alias node has no aliases");
1148                 /*NOTREACHED*/
1149         }
1150 
1151         plat_num_ioaliases = nio;
1152         plat_ioaliases = pali = kmem_zalloc(nio * sizeof (plat_alias_t),
1153             KM_SLEEP);
1154 
1155         /*
1156          * Each ioalias map will have a composite property of
1157          * aliases and the current valid path.
1158          */
1159         for (i = 0; i < nio; i++) {
1160                 char *str;
1161 
1162                 err = md_get_prop_str(mdp, ionodes[i], "current", &str);
1163                 if (err != 0) {
1164                         cmn_err(CE_PANIC, "malformed ioalias node");
1165                         /*NOTREACHED*/
1166                 }
1167                 pali->pali_current = strdup(str);
1168 
1169                 err = md_get_prop_str(mdp, ionodes[i], "aliases", &str);
1170                 if (err != 0) {
1171                         cmn_err(CE_PANIC, "malformed aliases");
1172                         /*NOTREACHED*/
1173                 }
1174                 DDI_MP_DBG((CE_NOTE, "path: %s aliases %s",
1175                     pali->pali_current, str));
1176 
1177                 split_alias(pali, str);
1178                 pali++;
1179         }
1180 
1181         md_free_scan_dag(mdp, &ionodes);
1182 
1183         /*
1184          * Register the io-aliases array with the DDI framework
1185          * The DDI framework assumes that this array and its contents
1186          * will not change post-register. The DDI framework will
1187          * cache this array and is free to access this array at
1188          * any time without any locks.
1189          */
1190         ddi_register_aliases(plat_ioaliases, plat_num_ioaliases);
1191 
1192         (void) md_fini_handle(mdp);
1193 }
1194 
1195 /*
1196  * Number of bits forming a valid context for use in a sun4v TTE and the MMU
1197  * context registers. Sun4v defines the minimum default value to be 13 if this
1198  * property is not specified in a cpu node in machine descriptor graph.
1199  */
1200 #define MMU_INFO_CTXBITS_MIN            13
1201 
1202 /* Convert context bits to number of contexts */
1203 #define MMU_INFO_BNCTXS(nbits)          ((uint_t)(1u<<(nbits)))
1204 
1205 /*
1206  * Read machine descriptor and load TLB to CPU mappings.
1207  * Returned values: cpuid2pset[NCPU], nctxs[NCPU], md_gen
1208  * - cpuid2pset is initialized so it can convert cpuids to processor set of CPUs
1209  *   that are shared between TLBs.
1210  * - nctxs is initialized to number of contexts for each CPU
1211  * - md_gen is set to generation number of machine descriptor from which this
1212  *   data was.
1213  * Return: zero on success.
1214  */
1215 static int
1216 load_tlb_cpu_mappings(cpuset_t **cpuid2pset, uint_t *nctxs, uint64_t *md_gen)
1217 {
1218         mde_str_cookie_t cpu_sc, bck_sc;
1219         int             tlbs_idx, cp_idx;
1220         mde_cookie_t    root;
1221         md_t            *mdp = NULL;
1222         mde_cookie_t    *tlbs = NULL;
1223         mde_cookie_t    *cp = NULL;
1224         uint64_t        *cpids = NULL;
1225         uint64_t        nbit;
1226         int             ntlbs;
1227         int             ncp;
1228         int             retval = 1;
1229         cpuset_t        *ppset;
1230 
1231         /* get MD handle, and string cookies for cpu and back nodes */
1232         if ((mdp = md_get_handle()) == NULL ||
1233             (cpu_sc = md_find_name(mdp, "cpu")) == MDE_INVAL_STR_COOKIE ||
1234             (bck_sc = md_find_name(mdp, "back")) == MDE_INVAL_STR_COOKIE)
1235                 goto cleanup;
1236 
1237         /* set generation number of current MD handle */
1238         *md_gen = md_get_gen(mdp);
1239 
1240         /* Find root element, and search for all TLBs in MD */
1241         if ((root = md_root_node(mdp)) == MDE_INVAL_ELEM_COOKIE ||
1242             (ntlbs = md_alloc_scan_dag(mdp, root, "tlb", "fwd", &tlbs)) <= 0)
1243                 goto cleanup;
1244 
1245         cp = kmem_alloc(sizeof (mde_cookie_t) * NCPU, KM_SLEEP);
1246         cpids = kmem_alloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
1247 
1248         /*
1249          * Build processor sets, one per possible context domain.  For each tlb,
1250          * search for connected CPUs.  If any CPU is already in a set, then add
1251          * all the TLB's CPUs to that set.  Otherwise, create and populate a new
1252          * pset.  Thus, a single pset is built to represent multiple TLBs if
1253          * they have CPUs in common.
1254          */
1255         for (tlbs_idx = 0; tlbs_idx < ntlbs; tlbs_idx++) {
1256                 ncp = md_scan_dag(mdp, tlbs[tlbs_idx], cpu_sc, bck_sc, cp);
1257                 if (ncp < 0)
1258                         goto cleanup;
1259                 else if (ncp == 0)
1260                         continue;
1261 
1262                 /* Get the id and number of contexts for each cpu */
1263                 for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1264                         mde_cookie_t c = cp[cp_idx];
1265 
1266                         if (md_get_prop_val(mdp, c, "id", &cpids[cp_idx]))
1267                                 goto cleanup;
1268                         if (md_get_prop_val(mdp, c, "mmu-#context-bits", &nbit))
1269                                 nbit = MMU_INFO_CTXBITS_MIN;
1270                         nctxs[cpids[cp_idx]] = MMU_INFO_BNCTXS(nbit);
1271                 }
1272 
1273                 /*
1274                  * If a CPU is already in a set as shown by cpuid2pset[], then
1275                  * use that set.
1276                  */
1277                 for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1278                         ASSERT(cpids[cp_idx] < NCPU);
1279                         ppset = cpuid2pset[cpids[cp_idx]];
1280                         if (ppset != NULL)
1281                                 break;
1282                 }
1283 
1284                 /* No CPU has a set. Create a new one. */
1285                 if (ppset == NULL) {
1286                         ppset = kmem_alloc(sizeof (cpuset_t), KM_SLEEP);
1287                         CPUSET_ZERO(*ppset);
1288                 }
1289 
1290                 /* Add every CPU to the set, and record the set assignment. */
1291                 for (cp_idx = 0; cp_idx < ncp; cp_idx++) {
1292                         cpuid2pset[cpids[cp_idx]] = ppset;
1293                         CPUSET_ADD(*ppset, cpids[cp_idx]);
1294                 }
1295         }
1296 
1297         retval = 0;
1298 
1299 cleanup:
1300         if (tlbs != NULL)
1301                 md_free_scan_dag(mdp, &tlbs);
1302         if (cp != NULL)
1303                 kmem_free(cp, sizeof (mde_cookie_t) * NCPU);
1304         if (cpids != NULL)
1305                 kmem_free(cpids, sizeof (uint64_t) * NCPU);
1306         if (mdp != NULL)
1307                 (void) md_fini_handle(mdp);
1308 
1309         return (retval);
1310 }
1311 
1312 /*
1313  * Return MMU info based on cpuid.
1314  *
1315  * Algorithm:
1316  * Read machine descriptor and find all CPUs that share the same TLB with CPU
1317  * specified by cpuid. Go through found CPUs and see if any one of them already
1318  * has MMU index, if so, set index based on that value. If CPU does not share
1319  * TLB with any other CPU or if none of those CPUs has mmu_ctx pointer, find the
1320  * smallest available MMU index and give it to current CPU. If no available
1321  * domain, perform a round robin, and start assigning from the beginning.
1322  *
1323  * For optimization reasons, this function uses a cache to store all TLB to CPU
1324  * mappings, and updates them only when machine descriptor graph is changed.
1325  * Because of this, and because we search MMU table for smallest index id, this
1326  * function needs to be serialized which is protected by cpu_lock.
1327  */
1328 void
1329 plat_cpuid_to_mmu_ctx_info(processorid_t cpuid, mmu_ctx_info_t *info)
1330 {
1331         static cpuset_t **cpuid2pset = NULL;
1332         static uint_t   *nctxs;
1333         static uint_t   next_domain = 0;
1334         static uint64_t md_gen = MDESC_INVAL_GEN;
1335         uint64_t        current_gen;
1336         int             idx;
1337         cpuset_t        cpuid_pset;
1338         processorid_t   id;
1339         cpu_t           *cp;
1340 
1341         ASSERT(MUTEX_HELD(&cpu_lock));
1342 
1343         current_gen = md_get_current_gen();
1344 
1345         /*
1346          * Load TLB CPU mappings only if MD generation has changed, FW that do
1347          * not provide generation number, always return MDESC_INVAL_GEN, and as
1348          * result MD is read here only once on such machines: when cpuid2pset is
1349          * NULL
1350          */
1351         if (current_gen != md_gen || cpuid2pset == NULL) {
1352                 if (cpuid2pset == NULL) {
1353                         cpuid2pset = kmem_zalloc(sizeof (cpuset_t *) * NCPU,
1354                             KM_SLEEP);
1355                         nctxs = kmem_alloc(sizeof (uint_t) * NCPU, KM_SLEEP);
1356                 } else {
1357                         /* clean cpuid2pset[NCPU], before loading new values */
1358                         for (idx = 0; idx < NCPU; idx++) {
1359                                 cpuset_t *pset = cpuid2pset[idx];
1360 
1361                                 if (pset != NULL) {
1362                                         for (;;) {
1363                                                 CPUSET_FIND(*pset, id);
1364                                                 if (id == CPUSET_NOTINSET)
1365                                                         break;
1366                                                 CPUSET_DEL(*pset, id);
1367                                                 ASSERT(id < NCPU);
1368                                                 cpuid2pset[id] = NULL;
1369                                         }
1370                                         ASSERT(cpuid2pset[idx] == NULL);
1371                                         kmem_free(pset, sizeof (cpuset_t));
1372                                 }
1373                         }
1374                 }
1375 
1376                 if (load_tlb_cpu_mappings(cpuid2pset, nctxs, &md_gen))
1377                         goto error_panic;
1378         }
1379 
1380         info->mmu_nctxs = nctxs[cpuid];
1381 
1382         if (cpuid2pset[cpuid] == NULL)
1383                 goto error_panic;
1384 
1385         cpuid_pset = *cpuid2pset[cpuid];
1386         CPUSET_DEL(cpuid_pset, cpuid);
1387 
1388         /* Search for a processor in the same TLB pset with MMU context */
1389         for (;;) {
1390                 CPUSET_FIND(cpuid_pset, id);
1391 
1392                 if (id == CPUSET_NOTINSET)
1393                         break;
1394 
1395                 ASSERT(id < NCPU);
1396                 cp = cpu[id];
1397                 if (cp != NULL && CPU_MMU_CTXP(cp) != NULL) {
1398                         info->mmu_idx = CPU_MMU_IDX(cp);
1399 
1400                         return;
1401                 }
1402                 CPUSET_DEL(cpuid_pset, id);
1403         }
1404 
1405         /*
1406          * No CPU in the TLB pset has a context domain yet.
1407          * Use next_domain if available, or search for an unused domain, or
1408          * overload next_domain, in that order.  Overloading is necessary when
1409          * the number of TLB psets is greater than max_mmu_ctxdoms.
1410          */
1411         idx = next_domain;
1412 
1413         if (mmu_ctxs_tbl[idx] != NULL) {
1414                 for (idx = 0; idx < max_mmu_ctxdoms; idx++)
1415                         if (mmu_ctxs_tbl[idx] == NULL)
1416                                 break;
1417                 if (idx == max_mmu_ctxdoms) {
1418                         /* overload next_domain */
1419                         idx = next_domain;
1420 
1421                         if (info->mmu_nctxs < sfmmu_ctxdom_nctxs(idx))
1422                                 cmn_err(CE_PANIC, "max_mmu_ctxdoms is too small"
1423                                     " to support CPUs with different nctxs");
1424                 }
1425         }
1426 
1427         info->mmu_idx = idx;
1428         next_domain = (idx + 1) % max_mmu_ctxdoms;
1429 
1430         return;
1431 
1432 error_panic:
1433         cmn_err(CE_PANIC, "!cpu%d: failed to get MMU CTX domain index", cpuid);
1434 }