1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 #include <sys/systm.h>
  27 #include <sys/platform_module.h>
  28 #include <sys/sysmacros.h>
  29 #include <sys/atomic.h>
  30 #include <sys/memlist.h>
  31 #include <sys/memnode.h>
  32 #include <vm/vm_dep.h>
  33 
  34 int max_mem_nodes = 1;          /* max memory nodes on this system */
  35 
  36 struct mem_node_conf mem_node_config[MAX_MEM_NODES];
  37 int mem_node_pfn_shift;
  38 /*
  39  * num_memnodes should be updated atomically and always >=
  40  * the number of bits in memnodes_mask or the algorithm may fail.
  41  */
  42 uint16_t num_memnodes;
  43 mnodeset_t memnodes_mask; /* assumes 8*(sizeof(mnodeset_t)) >= MAX_MEM_NODES */
  44 
  45 /*
  46  * If set, mem_node_physalign should be a power of two, and
  47  * should reflect the minimum address alignment of each node.
  48  */
  49 uint64_t mem_node_physalign;
  50 
  51 /*
  52  * Platform hooks we will need.
  53  */
  54 
  55 #pragma weak plat_build_mem_nodes
  56 #pragma weak plat_slice_add
  57 #pragma weak plat_slice_del
  58 
  59 /*
  60  * Adjust the memnode config after a DR operation.
  61  *
  62  * It is rather tricky to do these updates since we can't
  63  * protect the memnode structures with locks, so we must
  64  * be mindful of the order in which updates and reads to
  65  * these values can occur.
  66  */
  67 void
  68 mem_node_add_slice(pfn_t start, pfn_t end)
  69 {
  70         int mnode;
  71         mnodeset_t newmask, oldmask;
  72 
  73         /*
  74          * DR will pass us the first pfn that is allocatable.
  75          * We need to round down to get the real start of
  76          * the slice.
  77          */
  78         if (mem_node_physalign) {
  79                 start &= ~(btop(mem_node_physalign) - 1);
  80                 end = roundup(end, btop(mem_node_physalign)) - 1;
  81         }
  82 
  83         mnode = PFN_2_MEM_NODE(start);
  84         ASSERT(mnode < max_mem_nodes);
  85 
  86         if (cas32((uint32_t *)&mem_node_config[mnode].exists, 0, 1)) {
  87                 /*
  88                  * Add slice to existing node.
  89                  */
  90                 if (start < mem_node_config[mnode].physbase)
  91                         mem_node_config[mnode].physbase = start;
  92                 if (end > mem_node_config[mnode].physmax)
  93                         mem_node_config[mnode].physmax = end;
  94         } else {
  95                 mem_node_config[mnode].physbase = start;
  96                 mem_node_config[mnode].physmax = end;
  97                 atomic_add_16(&num_memnodes, 1);
  98                 do {
  99                         oldmask = memnodes_mask;
 100                         newmask = memnodes_mask | (1ull << mnode);
 101                 } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
 102         }
 103         /*
 104          * Let the common lgrp framework know about the new memory
 105          */
 106         lgrp_config(LGRP_CONFIG_MEM_ADD, mnode, MEM_NODE_2_LGRPHAND(mnode));
 107 }
 108 
 109 /*
 110  * Remove a PFN range from a memnode.  On some platforms,
 111  * the memnode will be created with physbase at the first
 112  * allocatable PFN, but later deleted with the MC slice
 113  * base address converted to a PFN, in which case we need
 114  * to assume physbase and up.
 115  */
 116 void
 117 mem_node_del_slice(pfn_t start, pfn_t end)
 118 {
 119         int mnode;
 120         pgcnt_t delta_pgcnt, node_size;
 121         mnodeset_t omask, nmask;
 122 
 123         if (mem_node_physalign) {
 124                 start &= ~(btop(mem_node_physalign) - 1);
 125                 end = roundup(end, btop(mem_node_physalign)) - 1;
 126         }
 127         mnode = PFN_2_MEM_NODE(start);
 128 
 129         ASSERT(mnode < max_mem_nodes);
 130         ASSERT(mem_node_config[mnode].exists == 1);
 131 
 132         delta_pgcnt = end - start;
 133         node_size = mem_node_config[mnode].physmax -
 134             mem_node_config[mnode].physbase;
 135 
 136         if (node_size > delta_pgcnt) {
 137                 /*
 138                  * Subtract the slice from the memnode.
 139                  */
 140                 if (start <= mem_node_config[mnode].physbase)
 141                         mem_node_config[mnode].physbase = end + 1;
 142                 ASSERT(end <= mem_node_config[mnode].physmax);
 143                 if (end == mem_node_config[mnode].physmax)
 144                         mem_node_config[mnode].physmax = start - 1;
 145         } else {
 146 
 147                 /*
 148                  * Let the common lgrp framework know the mnode is
 149                  * leaving
 150                  */
 151                 lgrp_config(LGRP_CONFIG_MEM_DEL, mnode,
 152                     MEM_NODE_2_LGRPHAND(mnode));
 153 
 154                 /*
 155                  * Delete the whole node.
 156                  */
 157                 ASSERT(MNODE_PGCNT(mnode) == 0);
 158                 do {
 159                         omask = memnodes_mask;
 160                         nmask = omask & ~(1ull << mnode);
 161                 } while (cas64(&memnodes_mask, omask, nmask) != omask);
 162                 atomic_add_16(&num_memnodes, -1);
 163                 mem_node_config[mnode].exists = 0;
 164         }
 165 }
 166 
 167 void
 168 mem_node_add_range(pfn_t start, pfn_t end)
 169 {
 170         if (&plat_slice_add != NULL)
 171                 plat_slice_add(start, end);
 172         else
 173                 mem_node_add_slice(start, end);
 174 }
 175 
 176 void
 177 mem_node_del_range(pfn_t start, pfn_t end)
 178 {
 179         if (&plat_slice_del != NULL)
 180                 plat_slice_del(start, end);
 181         else
 182                 mem_node_del_slice(start, end);
 183 }
 184 
 185 void
 186 startup_build_mem_nodes(prom_memlist_t *list, size_t nelems)
 187 {
 188         size_t  elem;
 189         pfn_t   basepfn;
 190         pgcnt_t npgs;
 191 
 192         /* LINTED: ASSERT will always true or false */
 193         ASSERT(NBBY * sizeof (mnodeset_t) >= max_mem_nodes);
 194 
 195         if (&plat_build_mem_nodes != NULL) {
 196                 plat_build_mem_nodes(list, nelems);
 197         } else {
 198                 /*
 199                  * Boot install lists are arranged <addr, len>, ...
 200                  */
 201                 for (elem = 0; elem < nelems; list++, elem++) {
 202                         basepfn = btop(list->addr);
 203                         npgs = btop(list->size);
 204                         mem_node_add_range(basepfn, basepfn + npgs - 1);
 205                 }
 206         }
 207 }
 208 
 209 /*
 210  * Allocate an unassigned memnode.
 211  */
 212 int
 213 mem_node_alloc()
 214 {
 215         int mnode;
 216         mnodeset_t newmask, oldmask;
 217 
 218         /*
 219          * Find an unused memnode.  Update it atomically to prevent
 220          * a first time memnode creation race.
 221          */
 222         for (mnode = 0; mnode < max_mem_nodes; mnode++)
 223                 if (cas32((uint32_t *)&mem_node_config[mnode].exists,
 224                     0, 1) == 0)
 225                         break;
 226 
 227         if (mnode >= max_mem_nodes)
 228                         panic("Out of free memnodes\n");
 229 
 230         mem_node_config[mnode].physbase = (uint64_t)-1;
 231         mem_node_config[mnode].physmax = 0;
 232         atomic_add_16(&num_memnodes, 1);
 233         do {
 234                 oldmask = memnodes_mask;
 235                 newmask = memnodes_mask | (1ull << mnode);
 236         } while (cas64(&memnodes_mask, oldmask, newmask) != oldmask);
 237 
 238         return (mnode);
 239 }
 240 
 241 /*
 242  * Find the intersection between a memnode and a memlist
 243  * and returns the number of pages that overlap.
 244  *
 245  * Grab the memlist lock to protect the list from DR operations.
 246  */
 247 pgcnt_t
 248 mem_node_memlist_pages(int mnode, struct memlist *mlist)
 249 {
 250         pfn_t           base, end;
 251         pfn_t           cur_base, cur_end;
 252         pgcnt_t         npgs = 0;
 253         pgcnt_t         pages;
 254         struct memlist  *pmem;
 255 
 256         if (&plat_mem_node_intersect_range != NULL) {
 257                 memlist_read_lock();
 258 
 259                 for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 260                         plat_mem_node_intersect_range(btop(pmem->ml_address),
 261                             btop(pmem->ml_size), mnode, &pages);
 262                         npgs += pages;
 263                 }
 264 
 265                 memlist_read_unlock();
 266                 return (npgs);
 267         }
 268 
 269         base = mem_node_config[mnode].physbase;
 270         end = mem_node_config[mnode].physmax;
 271 
 272         memlist_read_lock();
 273 
 274         for (pmem = mlist; pmem; pmem = pmem->ml_next) {
 275                 cur_base = btop(pmem->ml_address);
 276                 cur_end = cur_base + btop(pmem->ml_size) - 1;
 277                 if (end < cur_base || base > cur_end)
 278                         continue;
 279                 npgs = npgs + (MIN(cur_end, end) -
 280                     MAX(cur_base, base)) + 1;
 281         }
 282 
 283         memlist_read_unlock();
 284 
 285         return (npgs);
 286 }
 287 
 288 /*
 289  * Find MIN(physbase) and MAX(physmax) over all mnodes
 290  *
 291  * Called during startup and DR to find hpm_counters limits when
 292  * interleaved_mnodes is set.
 293  * NOTE: there is a race condition with DR if it tries to change more than
 294  * one mnode in parallel. Sizing shared hpm_counters depends on finding the
 295  * min(physbase) and max(physmax) across all mnodes. Therefore, the caller of
 296  * page_ctrs_adjust must ensure that mem_node_config does not change while it
 297  * is running.
 298  */
 299 void
 300 mem_node_max_range(pfn_t *basep, pfn_t *maxp)
 301 {
 302         int mnode;
 303         pfn_t max = 0;
 304         pfn_t base = (pfn_t)-1;
 305 
 306         for (mnode = 0; mnode < max_mem_nodes; mnode++) {
 307                 if (mem_node_config[mnode].exists == 0)
 308                         continue;
 309                 if (max < mem_node_config[mnode].physmax)
 310                         max = mem_node_config[mnode].physmax;
 311                 if (base > mem_node_config[mnode].physbase)
 312                         base = mem_node_config[mnode].physbase;
 313         }
 314         ASSERT(base != (pfn_t)-1 && max != 0);
 315         *basep = base;
 316         *maxp = max;
 317 }