1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <sys/platform_module.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/atomic.h>
  30 #include <sys/memlist.h>
  31 #include <sys/memnode.h>
  32 #include <vm/vm_dep.h>
  33 
  34 int max_mem_nodes = 1;          /* max memory nodes on this system */
  35 
  36 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
  37 int mem_node_pfn_shift;
  38 /*
  39  * num_memnodes should be updated atomically and always >=
  40  * the number of bits in memnodes_mask or the algorithm may fail.
  41  */
  42 uint16_t num_memnodes;
  43 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
  44 
  45 /*
  46  * If set, mem_node_physalign should be a power of two, and
  47  * should reflect the minimum address alignment of each node.
  48  */
  49 uint64_t mem_node_physalign;
  50 
  51 /*
  52  * Platform hooks we will need.
  53  */
  54 
  55 #pragma weak plat_build_mem_nodes
  56 #pragma weak plat_slice_add
  57 #pragma weak plat_slice_del
  58 
  59 /*
  60  * Adjust the memnode config after a DR operation.
  61  *
  62  * It is rather tricky to do these updates since we can't
  63  * protect the memnode structures with locks, so we must
  64  * be mindful of the order in which updates and reads to
  65  * these values can occur.
  66  */
  67 void
  68 mem_node_add_slice(pfn_t start, pfn_t end)
  69 {
  70         int mnode;
  71         mnodeset_t newmask, oldmask;
  72 
  73         /*
  74          * DR will pass us the first pfn that is allocatable.
  75          * We need to round down to get the real start of
  76          * the slice.
  77          */
  78         if (mem_node_physalign) {
  79                 start &= ~(btop(mem_node_physalign) - 1);
  80                 end = roundup(end, btop(mem_node_physalign)) - 1;
  81         }
  82 
  83         mnode = PFN_2_MEM_NODE(start);
  84         ASSERT(mnode < max_mem_nodes);
  85 
  86         if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
  87                 /*
  88                  * Add slice to existing node.
  89                  */
  90                 if (start < mem_node_config[mnode].physbase)
  91                         mem_node_config[mnode].physbase = start;
  92                 if (end > mem_node_config[mnode].physmax)
  93                         mem_node_config[mnode].physmax = end;
  94         } else {
  95                 mem_node_config[mnode].physbase = start;
  96                 mem_node_config[mnode].physmax = end;
  97                 atomic_inc_16(&num_memnodes);
  98                 do {
  99                         oldmask = memnodes_mask;
 100                         newmask = memnodes_mask | (1ull << mnode);
 101                 } while (atomic_cas_64(&memnodes_mask, oldmask, newmask) !=
 102                          oldmask);
 103         }
 104         /*
 105          * Let the common lgrp framework know about the new memory
 106          */
 107         lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
 108 }
 109 
 110 /*
 111  * Remove a PFN range from a memnode.  On some platforms,
 112  * the memnode will be created with physbase at the first
 113  * allocatable PFN, but later deleted with the MC slice
 114  * base address converted to a PFN, in which case we need
 115  * to assume physbase and up.
 116  */
 117 void
 118 mem_node_del_slice(pfn_t start, pfn_t end)
 119 {
 120         int mnode;
 121         pgcnt_t delta_pgcnt, node_size;
 122         mnodeset_t omask, nmask;
 123 
 124         if (mem_node_physalign) {
 125                 start &= ~(btop(mem_node_physalign) - 1);
 126                 end = roundup(end, btop(mem_node_physalign)) - 1;
 127         }
 128         mnode = PFN_2_MEM_NODE(start);
 129 
 130         ASSERT(mnode < max_mem_nodes);
 131         ASSERT(mem_node_config[mnode].exists == 1);
 132 
 133         delta_pgcnt = end - start;
 134         node_size = mem_node_config[mnode].physmax -
 135             mem_node_config[mnode].physbase;
 136 
 137         if (node_size > delta_pgcnt) {
 138                 /*
 139                  * Subtract the slice from the memnode.
 140                  */
 141                 if (start <= mem_node_config[mnode].physbase)
 142                         mem_node_config[mnode].physbase = end + 1;
 143                 ASSERT(end <= mem_node_config[mnode].physmax);
 144                 if (end == mem_node_config[mnode].physmax)
 145                         mem_node_config[mnode].physmax = start - 1;
 146         } else {
 147 
 148                 /*
 149                  * Let the common lgrp framework know the mnode is
 150                  * leaving
 151                  */
 152                 lgrp_config(LGRP_CONFIG_MEM_DEL, mnode,
 153                     MEM_NODE_2_LGRPHAND(mnode));
 154 
 155                 /*
 156                  * Delete the whole node.
 157                  */
 158                 ASSERT(MNODE_PGCNT(mnode) == 0);
 159                 do {
 160                         omask = memnodes_mask;
 161                         nmask = omask & ~(1ull << mnode);
 162                 } while (atomic_cas_64(&memnodes_mask, omask, nmask) != omask);
 163                 atomic_dec_16(&num_memnodes);
 164                 mem_node_config[mnode].exists = 0;
 165         }
 166 }
 167 
 168 void
 169 mem_node_add_range(pfn_t start, pfn_t end)
 170 {
 171         if (&plat_slice_add != NULL)
 172                 plat_slice_add(start, end);
 173         else
 174                 mem_node_add_slice(start, end);
 175 }
 176 
 177 void
 178 mem_node_del_range(pfn_t start, pfn_t end)
 179 {
 180         if (&plat_slice_del != NULL)
 181                 plat_slice_del(start, end);
 182         else
 183                 mem_node_del_slice(start, end);
 184 }
 185 
 186 void
 187 startup_build_mem_nodes(prom_memlist_t *list, size_t nelems)
 188 {
 189         size_t  elem;
 190         pfn_t   basepfn;
 191         pgcnt_t npgs;
 192 
 193         /* LINTED: ASSERT will always true or false */
 194         ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
 195 
 196         if (&plat_build_mem_nodes != NULL) {
 197                 plat_build_mem_nodes(list, nelems);
 198         } else {
 199                 /*
 200                  * Boot install lists are arranged <addr, len>, ...
 201                  */
 202                 for (elem = 0; elem < nelems; list++, elem++) {
 203                         basepfn = btop(list->addr);
 204                         npgs = btop(list->size);
 205                         mem_node_add_range(basepfn, basepfn + npgs - 1);
 206                 }
 207         }
 208 }
 209 
 210 /*
 211  * Allocate an unassigned memnode.
 212  */
 213 int
 214 mem_node_alloc()
 215 {
 216         int mnode;
 217         mnodeset_t newmask, oldmask;
 218 
 219         /*
 220          * Find an unused memnode.  Update it atomically to prevent
 221          * a first time memnode creation race.
 222          */
 223         for (mnode = 0; mnode < max_mem_nodes; mnode++)
 224                 if (atomic_cas_32((uint32_t *)&mem_node_config[mnode].exists,
 225                     0, 1) == 0)
 226                         break;
 227 
 228         if (mnode >= max_mem_nodes)
 229                         panic("Out of free memnodes\n");
 230 
 231         mem_node_config[mnode].physbase = (uint64_t)-1;
 232         mem_node_config[mnode].physmax = 0;
 233         atomic_inc_16(&num_memnodes);
 234         do {
 235                 oldmask = memnodes_mask;
 236                 newmask = memnodes_mask | (1ull << mnode);
 237         } while (atomic_cas_64(&memnodes_mask, oldmask, newmask) != oldmask);
 238 
 239         return (mnode);
 240 }
 241 
 242 /*
 243  * Find the intersection between a memnode and a memlist
 244  * and returns the number of pages that overlap.
 245  *
 246  * Grab the memlist lock to protect the list from DR operations.
 247  */
 248 pgcnt_t
 249 mem_node_memlist_pages(int mnode, struct memlist *mlist)
 250 {
 251         pfn_t           base, end;
 252         pfn_t           cur_base, cur_end;
 253         pgcnt_t         npgs = 0;
 254         pgcnt_t         pages;
 255         struct memlist  *pmem;
 256 
 257         if (&plat_mem_node_intersect_range != NULL) {
 258                 memlist_read_lock();
 259 
 260                 for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 261                         plat_mem_node_intersect_range(btop(pmem->ml_address),
 262                             btop(pmem->ml_size), mnode, &pages);
 263                         npgs += pages;
 264                 }
 265 
 266                 memlist_read_unlock();
 267                 return (npgs);
 268         }
 269 
 270         base = mem_node_config[mnode].physbase;
 271         end = mem_node_config[mnode].physmax;
 272 
 273         memlist_read_lock();
 274 
 275         for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 276                 cur_base = btop(pmem->ml_address);
 277                 cur_end = cur_base + btop(pmem->ml_size) - 1;
 278                 if (end < cur_base || base > cur_end)
 279                         continue;
 280                 npgs = npgs + (MIN(cur_end, end) -
 281                     MAX(cur_base, base)) + 1;
 282         }
 283 
 284         memlist_read_unlock();
 285 
 286         return (npgs);
 287 }
 288 
 289 /*
 290  * Find MIN(physbase) and MAX(physmax) over all mnodes
 291  *
 292  * Called during startup and DR to find hpm_counters limits when
 293  * interleaved_mnodes is set.
 294  * NOTE: there is a race condition with DR if it tries to change more than
 295  * one mnode in parallel. Sizing shared hpm_counters depends on finding the
 296  * min(physbase) and max(physmax) across all mnodes. Therefore, the caller of
 297  * page_ctrs_adjust must ensure that mem_node_config does not change while it
 298  * is running.
 299  */
 300 void
 301 mem_node_max_range(pfn_t *basep, pfn_t *maxp)
 302 {
 303         int mnode;
 304         pfn_t max = 0;
 305         pfn_t base = (pfn_t)-1;
 306 
 307         for (mnode = 0; mnode < max_mem_nodes; mnode++) {
 308                 if (mem_node_config[mnode].exists == 0)
 309                         continue;
 310                 if (max < mem_node_config[mnode].physmax)
 311                         max = mem_node_config[mnode].physmax;
 312                 if (base > mem_node_config[mnode].physbase)
 313                         base = mem_node_config[mnode].physbase;
 314         }
 315         ASSERT(base != (pfn_t)-1 && max != 0);
 316         *basep = base;
 317         *maxp = max;
 318 }