1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/types.h>
  26 #include <sys/kstat.h>
  27 #include <sys/param.h>
  28 #include <sys/stack.h>
  29 #include <sys/regset.h>
  30 #include <sys/thread.h>
  31 #include <sys/proc.h>
  32 #include <sys/procfs_isa.h>
  33 #include <sys/kmem.h>
  34 #include <sys/cpuvar.h>
  35 #include <sys/systm.h>
  36 #include <sys/machpcb.h>
  37 #include <sys/machasi.h>
  38 #include <sys/vis.h>
  39 #include <sys/fpu/fpusystm.h>
  40 #include <sys/cpu_module.h>
  41 #include <sys/privregs.h>
  42 #include <sys/archsystm.h>
  43 #include <sys/atomic.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/time.h>
  46 #include <sys/clock.h>
  47 #include <sys/cmp.h>
  48 #include <sys/platform_module.h>
  49 #include <sys/bl.h>
  50 #include <sys/nvpair.h>
  51 #include <sys/kdi_impl.h>
  52 #include <sys/machsystm.h>
  53 #include <sys/sysmacros.h>
  54 #include <sys/promif.h>
  55 #include <sys/pool_pset.h>
  56 #include <sys/mem.h>
  57 #include <sys/dumphdr.h>
  58 #include <vm/seg_kmem.h>
  59 #include <sys/hold_page.h>
  60 #include <sys/cpu.h>
  61 #include <sys/ivintr.h>
  62 #include <sys/clock_impl.h>
  63 #include <sys/machclock.h>
  64 
  65 int maxphys = MMU_PAGESIZE * 16;        /* 128k */
  66 int klustsize = MMU_PAGESIZE * 16;      /* 128k */
  67 
  68 /*
  69  * Initialize kernel thread's stack.
  70  */
  71 caddr_t
  72 thread_stk_init(caddr_t stk)
  73 {
  74         kfpu_t *fp;
  75         ulong_t align;
  76 
  77         /* allocate extra space for floating point state */
  78         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
  79         align = (uintptr_t)stk & 0x3f;
  80         stk -= align;           /* force v9_fpu to be 16 byte aligned */
  81         fp = (kfpu_t *)stk;
  82         fp->fpu_fprs = 0;
  83 
  84         stk -= SA(MINFRAME);
  85         return (stk);
  86 }
  87 
  88 #define WIN32_SIZE      (MAXWIN * sizeof (struct rwindow32))
  89 #define WIN64_SIZE      (MAXWIN * sizeof (struct rwindow64))
  90 
  91 kmem_cache_t    *wbuf32_cache;
  92 kmem_cache_t    *wbuf64_cache;
  93 
  94 void
  95 lwp_stk_cache_init(void)
  96 {
  97         /*
  98          * Window buffers are allocated from the static arena
  99          * because they are accessed at TL>0. We also must use
 100          * KMC_NOHASH to prevent them from straddling page
 101          * boundaries as they are accessed by physical address.
 102          */
 103         wbuf32_cache = kmem_cache_create("wbuf32_cache", WIN32_SIZE,
 104             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 105         wbuf64_cache = kmem_cache_create("wbuf64_cache", WIN64_SIZE,
 106             0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
 107 }
 108 
 109 /*
 110  * Initialize lwp's kernel stack.
 111  * Note that now that the floating point register save area (kfpu_t)
 112  * has been broken out from machpcb and aligned on a 64 byte boundary so that
 113  * we can do block load/stores to/from it, there are a couple of potential
 114  * optimizations to save stack space. 1. The floating point register save
 115  * area could be aligned on a 16 byte boundary, and the floating point code
 116  * changed to (a) check the alignment and (b) use different save/restore
 117  * macros depending upon the alignment. 2. The lwp_stk_init code below
 118  * could be changed to calculate if less space would be wasted if machpcb
 119  * was first instead of second. However there is a REGOFF macro used in
 120  * locore, syscall_trap, machdep and mlsetup that assumes that the saved
 121  * register area is a fixed distance from the %sp, and would have to be
 122  * changed to a pointer or something...JJ said later.
 123  */
 124 caddr_t
 125 lwp_stk_init(klwp_t *lwp, caddr_t stk)
 126 {
 127         struct machpcb *mpcb;
 128         kfpu_t *fp;
 129         uintptr_t aln;
 130 
 131         stk -= SA(sizeof (kfpu_t) + GSR_SIZE);
 132         aln = (uintptr_t)stk & 0x3F;
 133         stk -= aln;
 134         fp = (kfpu_t *)stk;
 135         stk -= SA(sizeof (struct machpcb));
 136         mpcb = (struct machpcb *)stk;
 137         bzero(mpcb, sizeof (struct machpcb));
 138         bzero(fp, sizeof (kfpu_t) + GSR_SIZE);
 139         lwp->lwp_regs = (void *)&mpcb->mpcb_regs;
 140         lwp->lwp_fpu = (void *)fp;
 141         mpcb->mpcb_fpu = fp;
 142         mpcb->mpcb_fpu->fpu_q = mpcb->mpcb_fpu_q;
 143         mpcb->mpcb_thread = lwp->lwp_thread;
 144         mpcb->mpcb_wbcnt = 0;
 145         if (lwp->lwp_procp->p_model == DATAMODEL_ILP32) {
 146                 mpcb->mpcb_wstate = WSTATE_USER32;
 147                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 148         } else {
 149                 mpcb->mpcb_wstate = WSTATE_USER64;
 150                 mpcb->mpcb_wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 151         }
 152         ASSERT(((uintptr_t)mpcb->mpcb_wbuf & 7) == 0);
 153         mpcb->mpcb_wbuf_pa = va_to_pa(mpcb->mpcb_wbuf);
 154         mpcb->mpcb_pa = va_to_pa(mpcb);
 155         return (stk);
 156 }
 157 
 158 void
 159 lwp_stk_fini(klwp_t *lwp)
 160 {
 161         struct machpcb *mpcb = lwptompcb(lwp);
 162 
 163         /*
 164          * there might be windows still in the wbuf due to unmapped
 165          * stack, misaligned stack pointer, etc.  We just free it.
 166          */
 167         mpcb->mpcb_wbcnt = 0;
 168         if (mpcb->mpcb_wstate == WSTATE_USER32)
 169                 kmem_cache_free(wbuf32_cache, mpcb->mpcb_wbuf);
 170         else
 171                 kmem_cache_free(wbuf64_cache, mpcb->mpcb_wbuf);
 172         mpcb->mpcb_wbuf = NULL;
 173         mpcb->mpcb_wbuf_pa = -1;
 174 }
 175 
 176 
 177 /*
 178  * Copy regs from parent to child.
 179  */
 180 void
 181 lwp_forkregs(klwp_t *lwp, klwp_t *clwp)
 182 {
 183         kthread_t *t, *pt = lwptot(lwp);
 184         struct machpcb *mpcb = lwptompcb(clwp);
 185         struct machpcb *pmpcb = lwptompcb(lwp);
 186         kfpu_t *fp, *pfp = lwptofpu(lwp);
 187         caddr_t wbuf;
 188         uint_t wstate;
 189 
 190         t = mpcb->mpcb_thread;
 191         /*
 192          * remember child's fp and wbuf since they will get erased during
 193          * the bcopy.
 194          */
 195         fp = mpcb->mpcb_fpu;
 196         wbuf = mpcb->mpcb_wbuf;
 197         wstate = mpcb->mpcb_wstate;
 198         /*
 199          * Don't copy mpcb_frame since we hand-crafted it
 200          * in thread_load().
 201          */
 202         bcopy(lwp->lwp_regs, clwp->lwp_regs, sizeof (struct machpcb) - REGOFF);
 203         mpcb->mpcb_thread = t;
 204         mpcb->mpcb_fpu = fp;
 205         fp->fpu_q = mpcb->mpcb_fpu_q;
 206 
 207         /*
 208          * It is theoretically possibly for the lwp's wstate to
 209          * be different from its value assigned in lwp_stk_init,
 210          * since lwp_stk_init assumed the data model of the process.
 211          * Here, we took on the data model of the cloned lwp.
 212          */
 213         if (mpcb->mpcb_wstate != wstate) {
 214                 if (wstate == WSTATE_USER32) {
 215                         kmem_cache_free(wbuf32_cache, wbuf);
 216                         wbuf = kmem_cache_alloc(wbuf64_cache, KM_SLEEP);
 217                         wstate = WSTATE_USER64;
 218                 } else {
 219                         kmem_cache_free(wbuf64_cache, wbuf);
 220                         wbuf = kmem_cache_alloc(wbuf32_cache, KM_SLEEP);
 221                         wstate = WSTATE_USER32;
 222                 }
 223         }
 224 
 225         mpcb->mpcb_pa = va_to_pa(mpcb);
 226         mpcb->mpcb_wbuf = wbuf;
 227         mpcb->mpcb_wbuf_pa = va_to_pa(wbuf);
 228 
 229         ASSERT(mpcb->mpcb_wstate == wstate);
 230 
 231         if (mpcb->mpcb_wbcnt != 0) {
 232                 bcopy(pmpcb->mpcb_wbuf, mpcb->mpcb_wbuf,
 233                     mpcb->mpcb_wbcnt * ((mpcb->mpcb_wstate == WSTATE_USER32) ?
 234                     sizeof (struct rwindow32) : sizeof (struct rwindow64)));
 235         }
 236 
 237         if (pt == curthread)
 238                 pfp->fpu_fprs = _fp_read_fprs();
 239         if ((pfp->fpu_en) || (pfp->fpu_fprs & FPRS_FEF)) {
 240                 if (pt == curthread && fpu_exists) {
 241                         save_gsr(clwp->lwp_fpu);
 242                 } else {
 243                         uint64_t gsr;
 244                         gsr = get_gsr(lwp->lwp_fpu);
 245                         set_gsr(gsr, clwp->lwp_fpu);
 246                 }
 247                 fp_fork(lwp, clwp);
 248         }
 249 }
 250 
 251 /*
 252  * Free lwp fpu regs.
 253  */
 254 void
 255 lwp_freeregs(klwp_t *lwp, int isexec)
 256 {
 257         kfpu_t *fp = lwptofpu(lwp);
 258 
 259         if (lwptot(lwp) == curthread)
 260                 fp->fpu_fprs = _fp_read_fprs();
 261         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))
 262                 fp_free(fp, isexec);
 263 }
 264 
 265 /*
 266  * These function are currently unused on sparc.
 267  */
 268 /*ARGSUSED*/
 269 void
 270 lwp_attach_brand_hdlrs(klwp_t *lwp)
 271 {}
 272 
 273 /*ARGSUSED*/
 274 void
 275 lwp_detach_brand_hdlrs(klwp_t *lwp)
 276 {}
 277 
 278 /*
 279  * fill in the extra register state area specified with the
 280  * specified lwp's platform-dependent non-floating-point extra
 281  * register state information
 282  */
 283 /* ARGSUSED */
 284 void
 285 xregs_getgfiller(klwp_id_t lwp, caddr_t xrp)
 286 {
 287         /* for sun4u nothing to do here, added for symmetry */
 288 }
 289 
 290 /*
 291  * fill in the extra register state area specified with the specified lwp's
 292  * platform-dependent floating-point extra register state information.
 293  * NOTE:  'lwp' might not correspond to 'curthread' since this is
 294  * called from code in /proc to get the registers of another lwp.
 295  */
 296 void
 297 xregs_getfpfiller(klwp_id_t lwp, caddr_t xrp)
 298 {
 299         prxregset_t *xregs = (prxregset_t *)xrp;
 300         kfpu_t *fp = lwptofpu(lwp);
 301         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 302         uint64_t gsr;
 303 
 304         /*
 305          * fp_fksave() does not flush the GSR register into
 306          * the lwp area, so do it now
 307          */
 308         kpreempt_disable();
 309         if (ttolwp(curthread) == lwp && fpu_exists) {
 310                 fp->fpu_fprs = _fp_read_fprs();
 311                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 312                         _fp_write_fprs(fprs);
 313                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 314                 }
 315                 save_gsr(fp);
 316         }
 317         gsr = get_gsr(fp);
 318         kpreempt_enable();
 319         PRXREG_GSR(xregs) = gsr;
 320 }
 321 
 322 /*
 323  * set the specified lwp's platform-dependent non-floating-point
 324  * extra register state based on the specified input
 325  */
 326 /* ARGSUSED */
 327 void
 328 xregs_setgfiller(klwp_id_t lwp, caddr_t xrp)
 329 {
 330         /* for sun4u nothing to do here, added for symmetry */
 331 }
 332 
 333 /*
 334  * set the specified lwp's platform-dependent floating-point
 335  * extra register state based on the specified input
 336  */
 337 void
 338 xregs_setfpfiller(klwp_id_t lwp, caddr_t xrp)
 339 {
 340         prxregset_t *xregs = (prxregset_t *)xrp;
 341         kfpu_t *fp = lwptofpu(lwp);
 342         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 343         uint64_t gsr = PRXREG_GSR(xregs);
 344 
 345         kpreempt_disable();
 346         set_gsr(gsr, lwptofpu(lwp));
 347 
 348         if ((lwp == ttolwp(curthread)) && fpu_exists) {
 349                 fp->fpu_fprs = _fp_read_fprs();
 350                 if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 351                         _fp_write_fprs(fprs);
 352                         fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 353                 }
 354                 restore_gsr(lwptofpu(lwp));
 355         }
 356         kpreempt_enable();
 357 }
 358 
 359 /*
 360  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 361  * non-floating-point extra register state information
 362  */
 363 /* ARGSUSED */
 364 void
 365 getasrs(klwp_t *lwp, asrset_t asr)
 366 {
 367         /* for sun4u nothing to do here, added for symmetry */
 368 }
 369 
 370 /*
 371  * fill in the sun4u asrs, ie, the lwp's platform-dependent
 372  * floating-point extra register state information
 373  */
 374 void
 375 getfpasrs(klwp_t *lwp, asrset_t asr)
 376 {
 377         kfpu_t *fp = lwptofpu(lwp);
 378         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 379 
 380         kpreempt_disable();
 381         if (ttolwp(curthread) == lwp)
 382                 fp->fpu_fprs = _fp_read_fprs();
 383         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 384                 if (fpu_exists && ttolwp(curthread) == lwp) {
 385                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 386                                 _fp_write_fprs(fprs);
 387                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 388                         }
 389                         save_gsr(fp);
 390                 }
 391                 asr[ASR_GSR] = (int64_t)get_gsr(fp);
 392         }
 393         kpreempt_enable();
 394 }
 395 
 396 /*
 397  * set the sun4u asrs, ie, the lwp's platform-dependent
 398  * non-floating-point extra register state information
 399  */
 400 /* ARGSUSED */
 401 void
 402 setasrs(klwp_t *lwp, asrset_t asr)
 403 {
 404         /* for sun4u nothing to do here, added for symmetry */
 405 }
 406 
 407 void
 408 setfpasrs(klwp_t *lwp, asrset_t asr)
 409 {
 410         kfpu_t *fp = lwptofpu(lwp);
 411         uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
 412 
 413         kpreempt_disable();
 414         if (ttolwp(curthread) == lwp)
 415                 fp->fpu_fprs = _fp_read_fprs();
 416         if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF)) {
 417                 set_gsr(asr[ASR_GSR], fp);
 418                 if (fpu_exists && ttolwp(curthread) == lwp) {
 419                         if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
 420                                 _fp_write_fprs(fprs);
 421                                 fp->fpu_fprs = (V9_FPU_FPRS_TYPE)fprs;
 422                         }
 423                         restore_gsr(fp);
 424                 }
 425         }
 426         kpreempt_enable();
 427 }
 428 
 429 /*
 430  * Create interrupt kstats for this CPU.
 431  */
 432 void
 433 cpu_create_intrstat(cpu_t *cp)
 434 {
 435         int             i;
 436         kstat_t         *intr_ksp;
 437         kstat_named_t   *knp;
 438         char            name[KSTAT_STRLEN];
 439         zoneid_t        zoneid;
 440 
 441         ASSERT(MUTEX_HELD(&cpu_lock));
 442 
 443         if (pool_pset_enabled())
 444                 zoneid = GLOBAL_ZONEID;
 445         else
 446                 zoneid = ALL_ZONES;
 447 
 448         intr_ksp = kstat_create_zone("cpu", cp->cpu_id, "intrstat", "misc",
 449             KSTAT_TYPE_NAMED, PIL_MAX * 2, NULL, zoneid);
 450 
 451         /*
 452          * Initialize each PIL's named kstat
 453          */
 454         if (intr_ksp != NULL) {
 455                 intr_ksp->ks_update = cpu_kstat_intrstat_update;
 456                 knp = (kstat_named_t *)intr_ksp->ks_data;
 457                 intr_ksp->ks_private = cp;
 458                 for (i = 0; i < PIL_MAX; i++) {
 459                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-time",
 460                             i + 1);
 461                         kstat_named_init(&knp[i * 2], name, KSTAT_DATA_UINT64);
 462                         (void) snprintf(name, KSTAT_STRLEN, "level-%d-count",
 463                             i + 1);
 464                         kstat_named_init(&knp[(i * 2) + 1], name,
 465                             KSTAT_DATA_UINT64);
 466                 }
 467                 kstat_install(intr_ksp);
 468         }
 469 }
 470 
 471 /*
 472  * Delete interrupt kstats for this CPU.
 473  */
 474 void
 475 cpu_delete_intrstat(cpu_t *cp)
 476 {
 477         kstat_delete_byname_zone("cpu", cp->cpu_id, "intrstat", ALL_ZONES);
 478 }
 479 
 480 /*
 481  * Convert interrupt statistics from CPU ticks to nanoseconds and
 482  * update kstat.
 483  */
 484 int
 485 cpu_kstat_intrstat_update(kstat_t *ksp, int rw)
 486 {
 487         kstat_named_t   *knp = ksp->ks_data;
 488         cpu_t           *cpup = (cpu_t *)ksp->ks_private;
 489         int             i;
 490 
 491         if (rw == KSTAT_WRITE)
 492                 return (EACCES);
 493 
 494         /*
 495          * We use separate passes to copy and convert the statistics to
 496          * nanoseconds. This assures that the snapshot of the data is as
 497          * self-consistent as possible.
 498          */
 499 
 500         for (i = 0; i < PIL_MAX; i++) {
 501                 knp[i * 2].value.ui64 = cpup->cpu_m.intrstat[i + 1][0];
 502                 knp[(i * 2) + 1].value.ui64 = cpup->cpu_stats.sys.intr[i];
 503         }
 504 
 505         for (i = 0; i < PIL_MAX; i++) {
 506                 knp[i * 2].value.ui64 =
 507                     (uint64_t)tick2ns((hrtime_t)knp[i * 2].value.ui64,
 508                     cpup->cpu_id);
 509         }
 510 
 511         return (0);
 512 }
 513 
 514 /*
 515  * Called by common/os/cpu.c for psrinfo(1m) kstats
 516  */
 517 char *
 518 cpu_fru_fmri(cpu_t *cp)
 519 {
 520         return (cpunodes[cp->cpu_id].fru_fmri);
 521 }
 522 
 523 /*
 524  * An interrupt thread is ending a time slice, so compute the interval it
 525  * ran for and update the statistic for its PIL.
 526  */
 527 void
 528 cpu_intr_swtch_enter(kthread_id_t t)
 529 {
 530         uint64_t        interval;
 531         uint64_t        start;
 532         cpu_t           *cpu;
 533 
 534         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 535         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 536 
 537         /*
 538          * We could be here with a zero timestamp. This could happen if:
 539          * an interrupt thread which no longer has a pinned thread underneath
 540          * it (i.e. it blocked at some point in its past) has finished running
 541          * its handler. intr_thread() updated the interrupt statistic for its
 542          * PIL and zeroed its timestamp. Since there was no pinned thread to
 543          * return to, swtch() gets called and we end up here.
 544          *
 545          * It can also happen if an interrupt thread in intr_thread() calls
 546          * preempt. It will have already taken care of updating stats. In
 547          * this event, the interrupt thread will be runnable.
 548          */
 549         if (t->t_intr_start) {
 550                 do {
 551                         start = t->t_intr_start;
 552                         interval = CLOCK_TICK_COUNTER() - start;
 553                 } while (cas64(&t->t_intr_start, start, 0) != start);
 554                 cpu = CPU;
 555                 if (cpu->cpu_m.divisor > 1)
 556                         interval *= cpu->cpu_m.divisor;
 557                 cpu->cpu_m.intrstat[t->t_pil][0] += interval;
 558 
 559                 atomic_add_64((uint64_t *)&cpu->cpu_intracct[cpu->cpu_mstate],
 560                     interval);
 561         } else
 562                 ASSERT(t->t_intr == NULL || t->t_state == TS_RUN);
 563 }
 564 
 565 
 566 /*
 567  * An interrupt thread is returning from swtch(). Place a starting timestamp
 568  * in its thread structure.
 569  */
 570 void
 571 cpu_intr_swtch_exit(kthread_id_t t)
 572 {
 573         uint64_t ts;
 574 
 575         ASSERT((t->t_flag & T_INTR_THREAD) != 0);
 576         ASSERT(t->t_pil > 0 && t->t_pil <= LOCK_LEVEL);
 577 
 578         do {
 579                 ts = t->t_intr_start;
 580         } while (cas64(&t->t_intr_start, ts, CLOCK_TICK_COUNTER()) != ts);
 581 }
 582 
 583 
 584 int
 585 blacklist(int cmd, const char *scheme, nvlist_t *fmri, const char *class)
 586 {
 587         if (&plat_blacklist)
 588                 return (plat_blacklist(cmd, scheme, fmri, class));
 589 
 590         return (ENOTSUP);
 591 }
 592 
 593 int
 594 kdi_pread(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 595 {
 596         extern void kdi_flush_caches(void);
 597         size_t nread = 0;
 598         uint32_t word;
 599         int slop, i;
 600 
 601         kdi_flush_caches();
 602         membar_enter();
 603 
 604         /* We might not begin on a word boundary. */
 605         if ((slop = addr & 3) != 0) {
 606                 word = ldphys(addr & ~3);
 607                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nread++)
 608                         *buf++ = ((uchar_t *)&word)[i];
 609                 addr = roundup(addr, 4);
 610         }
 611 
 612         while (nbytes > 0) {
 613                 word = ldphys(addr);
 614                 for (i = 0; i < 4 && nbytes > 0; i++, nbytes--, nread++, addr++)
 615                         *buf++ = ((uchar_t *)&word)[i];
 616         }
 617 
 618         kdi_flush_caches();
 619 
 620         *ncopiedp = nread;
 621         return (0);
 622 }
 623 
 624 int
 625 kdi_pwrite(caddr_t buf, size_t nbytes, uint64_t addr, size_t *ncopiedp)
 626 {
 627         extern void kdi_flush_caches(void);
 628         size_t nwritten = 0;
 629         uint32_t word;
 630         int slop, i;
 631 
 632         kdi_flush_caches();
 633 
 634         /* We might not begin on a word boundary. */
 635         if ((slop = addr & 3) != 0) {
 636                 word = ldphys(addr & ~3);
 637                 for (i = slop; i < 4 && nbytes > 0; i++, nbytes--, nwritten++)
 638                         ((uchar_t *)&word)[i] = *buf++;
 639                 stphys(addr & ~3, word);
 640                 addr = roundup(addr, 4);
 641         }
 642 
 643         while (nbytes > 3) {
 644                 for (word = 0, i = 0; i < 4; i++, nbytes--, nwritten++)
 645                         ((uchar_t *)&word)[i] = *buf++;
 646                 stphys(addr, word);
 647                 addr += 4;
 648         }
 649 
 650         /* We might not end with a whole word. */
 651         if (nbytes > 0) {
 652                 word = ldphys(addr);
 653                 for (i = 0; nbytes > 0; i++, nbytes--, nwritten++)
 654                         ((uchar_t *)&word)[i] = *buf++;
 655                 stphys(addr, word);
 656         }
 657 
 658         membar_enter();
 659         kdi_flush_caches();
 660 
 661         *ncopiedp = nwritten;
 662         return (0);
 663 }
 664 
 665 static void
 666 kdi_kernpanic(struct regs *regs, uint_t tt)
 667 {
 668         sync_reg_buf = *regs;
 669         sync_tt = tt;
 670 
 671         sync_handler();
 672 }
 673 
 674 static void
 675 kdi_plat_call(void (*platfn)(void))
 676 {
 677         if (platfn != NULL) {
 678                 prom_suspend_prepost();
 679                 platfn();
 680                 prom_resume_prepost();
 681         }
 682 }
 683 
 684 /*
 685  * kdi_system_claim and release are defined here for all sun4 platforms and
 686  * pointed to by mach_kdi_init() to provide default callbacks for such systems.
 687  * Specific sun4u or sun4v platforms may implement their own claim and release
 688  * routines, at which point their respective callbacks will be updated.
 689  */
 690 static void
 691 kdi_system_claim(void)
 692 {
 693         lbolt_debug_entry();
 694 }
 695 
 696 static void
 697 kdi_system_release(void)
 698 {
 699         lbolt_debug_return();
 700 }
 701 
 702 void
 703 mach_kdi_init(kdi_t *kdi)
 704 {
 705         kdi->kdi_plat_call = kdi_plat_call;
 706         kdi->kdi_kmdb_enter = kmdb_enter;
 707         kdi->pkdi_system_claim = kdi_system_claim;
 708         kdi->pkdi_system_release = kdi_system_release;
 709         kdi->mkdi_cpu_index = kdi_cpu_index;
 710         kdi->mkdi_trap_vatotte = kdi_trap_vatotte;
 711         kdi->mkdi_kernpanic = kdi_kernpanic;
 712 }
 713 
 714 
 715 /*
 716  * get_cpu_mstate() is passed an array of timestamps, NCMSTATES
 717  * long, and it fills in the array with the time spent on cpu in
 718  * each of the mstates, where time is returned in nsec.
 719  *
 720  * No guarantee is made that the returned values in times[] will
 721  * monotonically increase on sequential calls, although this will
 722  * be true in the long run. Any such guarantee must be handled by
 723  * the caller, if needed. This can happen if we fail to account
 724  * for elapsed time due to a generation counter conflict, yet we
 725  * did account for it on a prior call (see below).
 726  *
 727  * The complication is that the cpu in question may be updating
 728  * its microstate at the same time that we are reading it.
 729  * Because the microstate is only updated when the CPU's state
 730  * changes, the values in cpu_intracct[] can be indefinitely out
 731  * of date. To determine true current values, it is necessary to
 732  * compare the current time with cpu_mstate_start, and add the
 733  * difference to times[cpu_mstate].
 734  *
 735  * This can be a problem if those values are changing out from
 736  * under us. Because the code path in new_cpu_mstate() is
 737  * performance critical, we have not added a lock to it. Instead,
 738  * we have added a generation counter. Before beginning
 739  * modifications, the counter is set to 0. After modifications,
 740  * it is set to the old value plus one.
 741  *
 742  * get_cpu_mstate() will not consider the values of cpu_mstate
 743  * and cpu_mstate_start to be usable unless the value of
 744  * cpu_mstate_gen is both non-zero and unchanged, both before and
 745  * after reading the mstate information. Note that we must
 746  * protect against out-of-order loads around accesses to the
 747  * generation counter. Also, this is a best effort approach in
 748  * that we do not retry should the counter be found to have
 749  * changed.
 750  *
 751  * cpu_intracct[] is used to identify time spent in each CPU
 752  * mstate while handling interrupts. Such time should be reported
 753  * against system time, and so is subtracted out from its
 754  * corresponding cpu_acct[] time and added to
 755  * cpu_acct[CMS_SYSTEM]. Additionally, intracct time is stored in
 756  * %ticks, but acct time may be stored as %sticks, thus requiring
 757  * different conversions before they can be compared.
 758  */
 759 
 760 void
 761 get_cpu_mstate(cpu_t *cpu, hrtime_t *times)
 762 {
 763         int i;
 764         hrtime_t now, start;
 765         uint16_t gen;
 766         uint16_t state;
 767         hrtime_t intracct[NCMSTATES];
 768 
 769         /*
 770          * Load all volatile state under the protection of membar.
 771          * cpu_acct[cpu_mstate] must be loaded to avoid double counting
 772          * of (now - cpu_mstate_start) by a change in CPU mstate that
 773          * arrives after we make our last check of cpu_mstate_gen.
 774          */
 775 
 776         now = gethrtime_unscaled();
 777         gen = cpu->cpu_mstate_gen;
 778 
 779         membar_consumer();      /* guarantee load ordering */
 780         start = cpu->cpu_mstate_start;
 781         state = cpu->cpu_mstate;
 782         for (i = 0; i < NCMSTATES; i++) {
 783                 intracct[i] = cpu->cpu_intracct[i];
 784                 times[i] = cpu->cpu_acct[i];
 785         }
 786         membar_consumer();      /* guarantee load ordering */
 787 
 788         if (gen != 0 && gen == cpu->cpu_mstate_gen && now > start)
 789                 times[state] += now - start;
 790 
 791         for (i = 0; i < NCMSTATES; i++) {
 792                 scalehrtime(&times[i]);
 793                 intracct[i] = tick2ns((hrtime_t)intracct[i], cpu->cpu_id);
 794         }
 795 
 796         for (i = 0; i < NCMSTATES; i++) {
 797                 if (i == CMS_SYSTEM)
 798                         continue;
 799                 times[i] -= intracct[i];
 800                 if (times[i] < 0) {
 801                         intracct[i] += times[i];
 802                         times[i] = 0;
 803                 }
 804                 times[CMS_SYSTEM] += intracct[i];
 805         }
 806 }
 807 
 808 void
 809 mach_cpu_pause(volatile char *safe)
 810 {
 811         /*
 812          * This cpu is now safe.
 813          */
 814         *safe = PAUSE_WAIT;
 815         membar_enter(); /* make sure stores are flushed */
 816 
 817         /*
 818          * Now we wait.  When we are allowed to continue, safe
 819          * will be set to PAUSE_IDLE.
 820          */
 821         while (*safe != PAUSE_IDLE)
 822                 SMT_PAUSE();
 823 }
 824 
 825 /*ARGSUSED*/
 826 int
 827 plat_mem_do_mmio(struct uio *uio, enum uio_rw rw)
 828 {
 829         return (ENOTSUP);
 830 }
 831 
 832 /* cpu threshold for compressed dumps */
 833 #ifdef sun4v
 834 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4V_MINCPU;
 835 #else
 836 uint_t dump_plat_mincpu_default = DUMP_PLAT_SUN4U_MINCPU;
 837 #endif
 838 
 839 int
 840 dump_plat_addr()
 841 {
 842         return (0);
 843 }
 844 
 845 void
 846 dump_plat_pfn()
 847 {
 848 }
 849 
 850 /* ARGSUSED */
 851 int
 852 dump_plat_data(void *dump_cdata)
 853 {
 854         return (0);
 855 }
 856 
 857 /* ARGSUSED */
 858 int
 859 plat_hold_page(pfn_t pfn, int lock, page_t **pp_ret)
 860 {
 861         return (PLAT_HOLD_OK);
 862 }
 863 
 864 /* ARGSUSED */
 865 void
 866 plat_release_page(page_t *pp)
 867 {
 868 }
 869 
 870 /* ARGSUSED */
 871 void
 872 progressbar_key_abort(ldi_ident_t li)
 873 {
 874 }
 875 
 876 /*
 877  * We need to post a soft interrupt to reprogram the lbolt cyclic when
 878  * switching from event to cyclic driven lbolt. The following code adds
 879  * and posts the softint for sun4 platforms.
 880  */
 881 static uint64_t lbolt_softint_inum;
 882 
 883 void
 884 lbolt_softint_add(void)
 885 {
 886         lbolt_softint_inum = add_softintr(LOCK_LEVEL,
 887             (softintrfunc)lbolt_ev_to_cyclic, NULL, SOFTINT_MT);
 888 }
 889 
 890 void
 891 lbolt_softint_post(void)
 892 {
 893         setsoftint(lbolt_softint_inum);
 894 }