1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright (c) 2009,  Intel Corporation.
  27  * All Rights Reserved.
  28  */
  29 
  30 /*
  31  * CPU Device driver. The driver is not DDI-compliant.
  32  *
  33  * The driver supports following features:
  34  *      - Power management.
  35  */
  36 
  37 #include <sys/types.h>
  38 #include <sys/param.h>
  39 #include <sys/errno.h>
  40 #include <sys/modctl.h>
  41 #include <sys/kmem.h>
  42 #include <sys/conf.h>
  43 #include <sys/cmn_err.h>
  44 #include <sys/stat.h>
  45 #include <sys/debug.h>
  46 #include <sys/systm.h>
  47 #include <sys/ddi.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/sdt.h>
  50 #include <sys/epm.h>
  51 #include <sys/machsystm.h>
  52 #include <sys/x_call.h>
  53 #include <sys/cpudrv_mach.h>
  54 #include <sys/msacct.h>
  55 
  56 /*
  57  * CPU power management
  58  *
  59  * The supported power saving model is to slow down the CPU (on SPARC by
  60  * dividing the CPU clock and on x86 by dropping down a P-state).
  61  * Periodically we determine the amount of time the CPU is running
  62  * idle thread and threads in user mode during the last quantum.  If the idle
  63  * thread was running less than its low water mark for current speed for
  64  * number of consecutive sampling periods, or number of running threads in
  65  * user mode are above its high water mark, we arrange to go to the higher
  66  * speed.  If the idle thread was running more than its high water mark without
  67  * dropping a number of consecutive times below the mark, and number of threads
  68  * running in user mode are below its low water mark, we arrange to go to the
  69  * next lower speed.  While going down, we go through all the speeds.  While
  70  * going up we go to the maximum speed to minimize impact on the user, but have
  71  * provisions in the driver to go to other speeds.
  72  *
  73  * The driver does not have knowledge of a particular implementation of this
  74  * scheme and will work with all CPUs supporting this model. On SPARC, the
  75  * driver determines supported speeds by looking at 'clock-divisors' property
  76  * created by OBP. On x86, the driver retrieves the supported speeds from
  77  * ACPI.
  78  */
  79 
  80 /*
  81  * Configuration function prototypes and data structures
  82  */
  83 static int cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd);
  84 static int cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd);
  85 static int cpudrv_power(dev_info_t *dip, int comp, int level);
  86 
  87 struct dev_ops cpudrv_ops = {
  88         DEVO_REV,               /* rev */
  89         0,                      /* refcnt */
  90         nodev,                  /* getinfo */
  91         nulldev,                /* identify */
  92         nulldev,                /* probe */
  93         cpudrv_attach,          /* attach */
  94         cpudrv_detach,          /* detach */
  95         nodev,                  /* reset */
  96         (struct cb_ops *)NULL,  /* cb_ops */
  97         (struct bus_ops *)NULL, /* bus_ops */
  98         cpudrv_power,           /* power */
  99         ddi_quiesce_not_needed,         /* quiesce */
 100 };
 101 
 102 static struct modldrv modldrv = {
 103         &mod_driverops,                     /* modops */
 104         "CPU Driver",                   /* linkinfo */
 105         &cpudrv_ops,                        /* dev_ops */
 106 };
 107 
 108 static struct modlinkage modlinkage = {
 109         MODREV_1,               /* rev */
 110         &modldrv,           /* linkage */
 111         NULL
 112 };
 113 
 114 /*
 115  * Function prototypes
 116  */
 117 static int cpudrv_init(cpudrv_devstate_t *cpudsp);
 118 static void cpudrv_free(cpudrv_devstate_t *cpudsp);
 119 static int cpudrv_comp_create(cpudrv_devstate_t *cpudsp);
 120 static void cpudrv_monitor_disp(void *arg);
 121 static void cpudrv_monitor(void *arg);
 122 
 123 /*
 124  * Driver global variables
 125  */
 126 uint_t cpudrv_debug = 0;
 127 void *cpudrv_state;
 128 static uint_t cpudrv_idle_hwm = CPUDRV_IDLE_HWM;
 129 static uint_t cpudrv_idle_lwm = CPUDRV_IDLE_LWM;
 130 static uint_t cpudrv_idle_buf_zone = CPUDRV_IDLE_BUF_ZONE;
 131 static uint_t cpudrv_idle_bhwm_cnt_max = CPUDRV_IDLE_BHWM_CNT_MAX;
 132 static uint_t cpudrv_idle_blwm_cnt_max = CPUDRV_IDLE_BLWM_CNT_MAX;
 133 static uint_t cpudrv_user_hwm = CPUDRV_USER_HWM;
 134 
 135 boolean_t cpudrv_enabled = B_TRUE;
 136 
 137 /*
 138  * cpudrv_direct_pm allows user applications to directly control the
 139  * power state transitions (direct pm) without following the normal
 140  * direct pm protocol. This is needed because the normal protocol
 141  * requires that a device only be lowered when it is idle, and be
 142  * brought up when it request to do so by calling pm_raise_power().
 143  * Ignoring this protocol is harmless for CPU (other than speed).
 144  * Moreover it might be the case that CPU is never idle or wants
 145  * to be at higher speed because of the addition CPU cycles required
 146  * to run the user application.
 147  *
 148  * The driver will still report idle/busy status to the framework. Although
 149  * framework will ignore this information for direct pm devices and not
 150  * try to bring them down when idle, user applications can still use this
 151  * information if they wants.
 152  *
 153  * In the future, provide an ioctl to control setting of this mode. In
 154  * that case, this variable should move to the state structure and
 155  * be protected by the lock in the state structure.
 156  */
 157 int cpudrv_direct_pm = 0;
 158 
 159 /*
 160  * Arranges for the handler function to be called at the interval suitable
 161  * for current speed.
 162  */
 163 #define CPUDRV_MONITOR_INIT(cpudsp) { \
 164     if (cpudrv_is_enabled(cpudsp)) {          \
 165                 ASSERT(mutex_owned(&(cpudsp)->lock)); \
 166                 (cpudsp)->cpudrv_pm.timeout_id = \
 167                     timeout(cpudrv_monitor_disp, \
 168                     (cpudsp), (((cpudsp)->cpudrv_pm.cur_spd == NULL) ? \
 169                     CPUDRV_QUANT_CNT_OTHR : \
 170                     (cpudsp)->cpudrv_pm.cur_spd->quant_cnt)); \
 171         } \
 172 }
 173 
 174 /*
 175  * Arranges for the handler function not to be called back.
 176  */
 177 #define CPUDRV_MONITOR_FINI(cpudsp) { \
 178         timeout_id_t tmp_tid; \
 179         ASSERT(mutex_owned(&(cpudsp)->lock)); \
 180         tmp_tid = (cpudsp)->cpudrv_pm.timeout_id; \
 181         (cpudsp)->cpudrv_pm.timeout_id = 0; \
 182         mutex_exit(&(cpudsp)->lock); \
 183         if (tmp_tid != 0) { \
 184                 (void) untimeout(tmp_tid); \
 185                 mutex_enter(&(cpudsp)->cpudrv_pm.timeout_lock); \
 186                 while ((cpudsp)->cpudrv_pm.timeout_count != 0) \
 187                         cv_wait(&(cpudsp)->cpudrv_pm.timeout_cv, \
 188                             &(cpudsp)->cpudrv_pm.timeout_lock); \
 189                 mutex_exit(&(cpudsp)->cpudrv_pm.timeout_lock); \
 190         } \
 191         mutex_enter(&(cpudsp)->lock); \
 192 }
 193 
 194 int
 195 _init(void)
 196 {
 197         int     error;
 198 
 199         DPRINTF(D_INIT, (" _init: function called\n"));
 200         if ((error = ddi_soft_state_init(&cpudrv_state,
 201             sizeof (cpudrv_devstate_t), 0)) != 0) {
 202                 return (error);
 203         }
 204 
 205         if ((error = mod_install(&modlinkage)) != 0)  {
 206                 ddi_soft_state_fini(&cpudrv_state);
 207         }
 208 
 209         /*
 210          * Callbacks used by the PPM driver.
 211          */
 212         CPUDRV_SET_PPM_CALLBACKS();
 213         return (error);
 214 }
 215 
 216 int
 217 _fini(void)
 218 {
 219         int     error;
 220 
 221         DPRINTF(D_FINI, (" _fini: function called\n"));
 222         if ((error = mod_remove(&modlinkage)) == 0) {
 223                 ddi_soft_state_fini(&cpudrv_state);
 224         }
 225 
 226         return (error);
 227 }
 228 
 229 int
 230 _info(struct modinfo *modinfop)
 231 {
 232         return (mod_info(&modlinkage, modinfop));
 233 }
 234 
 235 /*
 236  * Driver attach(9e) entry point.
 237  */
 238 static int
 239 cpudrv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 240 {
 241         int                     instance;
 242         cpudrv_devstate_t       *cpudsp;
 243 
 244         instance = ddi_get_instance(dip);
 245 
 246         switch (cmd) {
 247         case DDI_ATTACH:
 248                 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: "
 249                     "DDI_ATTACH called\n", instance));
 250                 if (!cpudrv_is_enabled(NULL))
 251                         return (DDI_FAILURE);
 252                 if (ddi_soft_state_zalloc(cpudrv_state, instance) !=
 253                     DDI_SUCCESS) {
 254                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 255                             "can't allocate state", instance);
 256                         cpudrv_enabled = B_FALSE;
 257                         return (DDI_FAILURE);
 258                 }
 259                 if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) ==
 260                     NULL) {
 261                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 262                             "can't get state", instance);
 263                         ddi_soft_state_free(cpudrv_state, instance);
 264                         cpudrv_enabled = B_FALSE;
 265                         return (DDI_FAILURE);
 266                 }
 267                 cpudsp->dip = dip;
 268 
 269                 /*
 270                  * Find CPU number for this dev_info node.
 271                  */
 272                 if (!cpudrv_get_cpu_id(dip, &(cpudsp->cpu_id))) {
 273                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 274                             "can't convert dip to cpu_id", instance);
 275                         ddi_soft_state_free(cpudrv_state, instance);
 276                         cpudrv_enabled = B_FALSE;
 277                         return (DDI_FAILURE);
 278                 }
 279 
 280                 if (!cpudrv_is_enabled(cpudsp)) {
 281                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 282                             "not supported or it got disabled on us",
 283                             instance);
 284                         cpudrv_enabled = B_FALSE;
 285                         ddi_soft_state_free(cpudrv_state, instance);
 286                         return (DDI_FAILURE);
 287                 }
 288 
 289                 mutex_init(&cpudsp->lock, NULL, MUTEX_DRIVER, NULL);
 290                 if (cpudrv_init(cpudsp) != DDI_SUCCESS) {
 291                         cpudrv_enabled = B_FALSE;
 292                         cpudrv_free(cpudsp);
 293                         ddi_soft_state_free(cpudrv_state, instance);
 294                         return (DDI_FAILURE);
 295                 }
 296                 if (cpudrv_comp_create(cpudsp) != DDI_SUCCESS) {
 297                         cpudrv_enabled = B_FALSE;
 298                         cpudrv_free(cpudsp);
 299                         ddi_soft_state_free(cpudrv_state, instance);
 300                         return (DDI_FAILURE);
 301                 }
 302                 if (ddi_prop_update_string(DDI_DEV_T_NONE,
 303                     dip, "pm-class", "CPU") != DDI_PROP_SUCCESS) {
 304                         cpudrv_enabled = B_FALSE;
 305                         cpudrv_free(cpudsp);
 306                         ddi_soft_state_free(cpudrv_state, instance);
 307                         return (DDI_FAILURE);
 308                 }
 309 
 310                 /*
 311                  * Taskq is used to dispatch routine to monitor CPU
 312                  * activities.
 313                  */
 314                 cpudsp->cpudrv_pm.tq = ddi_taskq_create(dip,
 315                     "cpudrv_monitor", CPUDRV_TASKQ_THREADS,
 316                     TASKQ_DEFAULTPRI, 0);
 317 
 318                 mutex_init(&cpudsp->cpudrv_pm.timeout_lock, NULL,
 319                     MUTEX_DRIVER, NULL);
 320                 cv_init(&cpudsp->cpudrv_pm.timeout_cv, NULL,
 321                     CV_DEFAULT, NULL);
 322 
 323                 /*
 324                  * Driver needs to assume that CPU is running at
 325                  * unknown speed at DDI_ATTACH and switch it to the
 326                  * needed speed. We assume that initial needed speed
 327                  * is full speed for us.
 328                  */
 329                 /*
 330                  * We need to take the lock because cpudrv_monitor()
 331                  * will start running in parallel with attach().
 332                  */
 333                 mutex_enter(&cpudsp->lock);
 334                 cpudsp->cpudrv_pm.cur_spd = NULL;
 335                 cpudsp->cpudrv_pm.pm_started = B_FALSE;
 336                 /*
 337                  * We don't call pm_raise_power() directly from attach
 338                  * because driver attach for a slave CPU node can
 339                  * happen before the CPU is even initialized. We just
 340                  * start the monitoring system which understands
 341                  * unknown speed and moves CPU to top speed when it
 342                  * has been initialized.
 343                  */
 344                 CPUDRV_MONITOR_INIT(cpudsp);
 345                 mutex_exit(&cpudsp->lock);
 346 
 347                 if (!cpudrv_mach_init(cpudsp)) {
 348                         cmn_err(CE_WARN, "cpudrv_attach: instance %d: "
 349                             "cpudrv_mach_init failed", instance);
 350                         cpudrv_enabled = B_FALSE;
 351                         cpudrv_free(cpudsp);
 352                         ddi_soft_state_free(cpudrv_state, instance);
 353                         return (DDI_FAILURE);
 354                 }
 355 
 356                 CPUDRV_INSTALL_MAX_CHANGE_HANDLER(cpudsp);
 357 
 358                 (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip,
 359                     DDI_NO_AUTODETACH, 1);
 360                 ddi_report_dev(dip);
 361                 return (DDI_SUCCESS);
 362 
 363         case DDI_RESUME:
 364                 DPRINTF(D_ATTACH, ("cpudrv_attach: instance %d: "
 365                     "DDI_RESUME called\n", instance));
 366 
 367                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 368                 ASSERT(cpudsp != NULL);
 369 
 370                 /*
 371                  * Nothing to do for resume, if not doing active PM.
 372                  */
 373                 if (!cpudrv_is_enabled(cpudsp))
 374                         return (DDI_SUCCESS);
 375 
 376                 mutex_enter(&cpudsp->lock);
 377                 /*
 378                  * Driver needs to assume that CPU is running at unknown speed
 379                  * at DDI_RESUME and switch it to the needed speed. We assume
 380                  * that the needed speed is full speed for us.
 381                  */
 382                 cpudsp->cpudrv_pm.cur_spd = NULL;
 383                 CPUDRV_MONITOR_INIT(cpudsp);
 384                 mutex_exit(&cpudsp->lock);
 385                 CPUDRV_REDEFINE_TOPSPEED(dip);
 386                 return (DDI_SUCCESS);
 387 
 388         default:
 389                 return (DDI_FAILURE);
 390         }
 391 }
 392 
 393 /*
 394  * Driver detach(9e) entry point.
 395  */
 396 static int
 397 cpudrv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 398 {
 399         int                     instance;
 400         cpudrv_devstate_t       *cpudsp;
 401         cpudrv_pm_t             *cpupm;
 402 
 403         instance = ddi_get_instance(dip);
 404 
 405         switch (cmd) {
 406         case DDI_DETACH:
 407                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: "
 408                     "DDI_DETACH called\n", instance));
 409 
 410 #if defined(__x86)
 411                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 412                 ASSERT(cpudsp != NULL);
 413 
 414                 /*
 415                  * Nothing to do for detach, if no doing active PM.
 416                  */
 417                 if (!cpudrv_is_enabled(cpudsp))
 418                         return (DDI_SUCCESS);
 419 
 420                 /*
 421                  * uninstall PPC/_TPC change notification handler
 422                  */
 423                 CPUDRV_UNINSTALL_MAX_CHANGE_HANDLER(cpudsp);
 424 
 425                 /*
 426                  * destruct platform specific resource
 427                  */
 428                 if (!cpudrv_mach_fini(cpudsp))
 429                         return (DDI_FAILURE);
 430 
 431                 mutex_enter(&cpudsp->lock);
 432                 CPUDRV_MONITOR_FINI(cpudsp);
 433                 cv_destroy(&cpudsp->cpudrv_pm.timeout_cv);
 434                 mutex_destroy(&cpudsp->cpudrv_pm.timeout_lock);
 435                 ddi_taskq_destroy(cpudsp->cpudrv_pm.tq);
 436                 cpudrv_free(cpudsp);
 437                 mutex_exit(&cpudsp->lock);
 438                 mutex_destroy(&cpudsp->lock);
 439                 ddi_soft_state_free(cpudrv_state, instance);
 440                 (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip,
 441                     DDI_NO_AUTODETACH, 0);
 442                 return (DDI_SUCCESS);
 443 
 444 #else
 445                 /*
 446                  * If the only thing supported by the driver is power
 447                  * management, we can in future enhance the driver and
 448                  * framework that loads it to unload the driver when
 449                  * user has disabled CPU power management.
 450                  */
 451                 return (DDI_FAILURE);
 452 #endif
 453 
 454         case DDI_SUSPEND:
 455                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: "
 456                     "DDI_SUSPEND called\n", instance));
 457 
 458                 cpudsp = ddi_get_soft_state(cpudrv_state, instance);
 459                 ASSERT(cpudsp != NULL);
 460 
 461                 /*
 462                  * Nothing to do for suspend, if not doing active PM.
 463                  */
 464                 if (!cpudrv_is_enabled(cpudsp))
 465                         return (DDI_SUCCESS);
 466 
 467                 /*
 468                  * During a checkpoint-resume sequence, framework will
 469                  * stop interrupts to quiesce kernel activity. This will
 470                  * leave our monitoring system ineffective. Handle this
 471                  * by stopping our monitoring system and bringing CPU
 472                  * to full speed. In case we are in special direct pm
 473                  * mode, we leave the CPU at whatever speed it is. This
 474                  * is harmless other than speed.
 475                  */
 476                 mutex_enter(&cpudsp->lock);
 477                 cpupm = &(cpudsp->cpudrv_pm);
 478 
 479                 DPRINTF(D_DETACH, ("cpudrv_detach: instance %d: DDI_SUSPEND - "
 480                     "cur_spd %d, topspeed %d\n", instance,
 481                     cpupm->cur_spd->pm_level,
 482                     CPUDRV_TOPSPEED(cpupm)->pm_level));
 483 
 484                 CPUDRV_MONITOR_FINI(cpudsp);
 485 
 486                 if (!cpudrv_direct_pm && (cpupm->cur_spd !=
 487                     CPUDRV_TOPSPEED(cpupm))) {
 488                         if (cpupm->pm_busycnt < 1) {
 489                                 if ((pm_busy_component(dip, CPUDRV_COMP_NUM)
 490                                     == DDI_SUCCESS)) {
 491                                         cpupm->pm_busycnt++;
 492                                 } else {
 493                                         CPUDRV_MONITOR_INIT(cpudsp);
 494                                         mutex_exit(&cpudsp->lock);
 495                                         cmn_err(CE_WARN, "cpudrv_detach: "
 496                                             "instance %d: can't busy CPU "
 497                                             "component", instance);
 498                                         return (DDI_FAILURE);
 499                                 }
 500                         }
 501                         mutex_exit(&cpudsp->lock);
 502                         if (pm_raise_power(dip, CPUDRV_COMP_NUM,
 503                             CPUDRV_TOPSPEED(cpupm)->pm_level) !=
 504                             DDI_SUCCESS) {
 505                                 mutex_enter(&cpudsp->lock);
 506                                 CPUDRV_MONITOR_INIT(cpudsp);
 507                                 mutex_exit(&cpudsp->lock);
 508                                 cmn_err(CE_WARN, "cpudrv_detach: instance %d: "
 509                                     "can't raise CPU power level to %d",
 510                                     instance,
 511                                     CPUDRV_TOPSPEED(cpupm)->pm_level);
 512                                 return (DDI_FAILURE);
 513                         } else {
 514                                 return (DDI_SUCCESS);
 515                         }
 516                 } else {
 517                         mutex_exit(&cpudsp->lock);
 518                         return (DDI_SUCCESS);
 519                 }
 520 
 521         default:
 522                 return (DDI_FAILURE);
 523         }
 524 }
 525 
 526 /*
 527  * Driver power(9e) entry point.
 528  *
 529  * Driver's notion of current power is set *only* in power(9e) entry point
 530  * after actual power change operation has been successfully completed.
 531  */
 532 /* ARGSUSED */
 533 static int
 534 cpudrv_power(dev_info_t *dip, int comp, int level)
 535 {
 536         int                     instance;
 537         cpudrv_devstate_t       *cpudsp;
 538         cpudrv_pm_t             *cpudrvpm;
 539         cpudrv_pm_spd_t         *new_spd;
 540         boolean_t               is_ready;
 541         int                     ret;
 542 
 543         instance = ddi_get_instance(dip);
 544 
 545         DPRINTF(D_POWER, ("cpudrv_power: instance %d: level %d\n",
 546             instance, level));
 547 
 548         if ((cpudsp = ddi_get_soft_state(cpudrv_state, instance)) == NULL) {
 549                 cmn_err(CE_WARN, "cpudrv_power: instance %d: can't "
 550                     "get state", instance);
 551                 return (DDI_FAILURE);
 552         }
 553 
 554         /*
 555          * We're not ready until we can  get a cpu_t
 556          */
 557         is_ready = (cpudrv_get_cpu(cpudsp) == DDI_SUCCESS);
 558 
 559         mutex_enter(&cpudsp->lock);
 560         cpudrvpm = &(cpudsp->cpudrv_pm);
 561 
 562         /*
 563          * In normal operation, we fail if we are busy and request is
 564          * to lower the power level. We let this go through if the driver
 565          * is in special direct pm mode. On x86, we also let this through
 566          * if the change is due to a request to govern the max speed.
 567          */
 568         if (!cpudrv_direct_pm && (cpudrvpm->pm_busycnt >= 1) &&
 569             !cpudrv_is_governor_thread(cpudrvpm)) {
 570                 if ((cpudrvpm->cur_spd != NULL) &&
 571                     (level < cpudrvpm->cur_spd->pm_level)) {
 572                         mutex_exit(&cpudsp->lock);
 573                         return (DDI_FAILURE);
 574                 }
 575         }
 576 
 577         for (new_spd = cpudrvpm->head_spd; new_spd; new_spd =
 578             new_spd->down_spd) {
 579                 if (new_spd->pm_level == level)
 580                         break;
 581         }
 582         if (!new_spd) {
 583                 CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 584                 mutex_exit(&cpudsp->lock);
 585                 cmn_err(CE_WARN, "cpudrv_power: instance %d: "
 586                     "can't locate new CPU speed", instance);
 587                 return (DDI_FAILURE);
 588         }
 589 
 590         /*
 591          * We currently refuse to power manage if the CPU is not ready to
 592          * take cross calls (cross calls fail silently if CPU is not ready
 593          * for it).
 594          *
 595          * Additionally, for x86 platforms we cannot power manage an instance,
 596          * until it has been initialized.
 597          */
 598         if (is_ready) {
 599                 is_ready = CPUDRV_XCALL_IS_READY(cpudsp->cpu_id);
 600                 if (!is_ready) {
 601                         DPRINTF(D_POWER, ("cpudrv_power: instance %d: "
 602                             "CPU not ready for x-calls\n", instance));
 603                 } else if (!(is_ready = cpudrv_power_ready(cpudsp->cp))) {
 604                         DPRINTF(D_POWER, ("cpudrv_power: instance %d: "
 605                             "waiting for all CPUs to be power manageable\n",
 606                             instance));
 607                 }
 608         }
 609         if (!is_ready) {
 610                 CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 611                 mutex_exit(&cpudsp->lock);
 612                 return (DDI_FAILURE);
 613         }
 614 
 615         /*
 616          * Execute CPU specific routine on the requested CPU to
 617          * change its speed to normal-speed/divisor.
 618          */
 619         if ((ret = cpudrv_change_speed(cpudsp, new_spd)) != DDI_SUCCESS) {
 620                 cmn_err(CE_WARN, "cpudrv_power: "
 621                     "cpudrv_change_speed() return = %d", ret);
 622                 mutex_exit(&cpudsp->lock);
 623                 return (DDI_FAILURE);
 624         }
 625 
 626         /*
 627          * Reset idle threshold time for the new power level.
 628          */
 629         if ((cpudrvpm->cur_spd != NULL) && (level <
 630             cpudrvpm->cur_spd->pm_level)) {
 631                 if (pm_idle_component(dip, CPUDRV_COMP_NUM) ==
 632                     DDI_SUCCESS) {
 633                         if (cpudrvpm->pm_busycnt >= 1)
 634                                 cpudrvpm->pm_busycnt--;
 635                 } else {
 636                         cmn_err(CE_WARN, "cpudrv_power: instance %d: "
 637                             "can't idle CPU component",
 638                             ddi_get_instance(dip));
 639                 }
 640         }
 641         /*
 642          * Reset various parameters because we are now running at new speed.
 643          */
 644         cpudrvpm->lastquan_mstate[CMS_IDLE] = 0;
 645         cpudrvpm->lastquan_mstate[CMS_SYSTEM] = 0;
 646         cpudrvpm->lastquan_mstate[CMS_USER] = 0;
 647         cpudrvpm->lastquan_ticks = 0;
 648         cpudrvpm->cur_spd = new_spd;
 649         CPUDRV_RESET_GOVERNOR_THREAD(cpudrvpm);
 650         mutex_exit(&cpudsp->lock);
 651 
 652         return (DDI_SUCCESS);
 653 }
 654 
 655 /*
 656  * Initialize power management data.
 657  */
 658 static int
 659 cpudrv_init(cpudrv_devstate_t *cpudsp)
 660 {
 661         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 662         cpudrv_pm_spd_t *cur_spd;
 663         cpudrv_pm_spd_t *prev_spd = NULL;
 664         int             *speeds;
 665         uint_t          nspeeds;
 666         int             idle_cnt_percent;
 667         int             user_cnt_percent;
 668         int             i;
 669 
 670         CPUDRV_GET_SPEEDS(cpudsp, speeds, nspeeds);
 671         if (nspeeds < 2) {
 672                 /* Need at least two speeds to power manage */
 673                 CPUDRV_FREE_SPEEDS(speeds, nspeeds);
 674                 return (DDI_FAILURE);
 675         }
 676         cpupm->num_spd = nspeeds;
 677 
 678         /*
 679          * Calculate the watermarks and other parameters based on the
 680          * supplied speeds.
 681          *
 682          * One of the basic assumption is that for X amount of CPU work,
 683          * if CPU is slowed down by a factor of N, the time it takes to
 684          * do the same work will be N * X.
 685          *
 686          * The driver declares that a CPU is idle and ready for slowed down,
 687          * if amount of idle thread is more than the current speed idle_hwm
 688          * without dropping below idle_hwm a number of consecutive sampling
 689          * intervals and number of running threads in user mode are below
 690          * user_lwm.  We want to set the current user_lwm such that if we
 691          * just switched to the next slower speed with no change in real work
 692          * load, the amount of user threads at the slower speed will be such
 693          * that it falls below the slower speed's user_hwm.  If we didn't do
 694          * that then we will just come back to the higher speed as soon as we
 695          * go down even with no change in work load.
 696          * The user_hwm is a fixed precentage and not calculated dynamically.
 697          *
 698          * We bring the CPU up if idle thread at current speed is less than
 699          * the current speed idle_lwm for a number of consecutive sampling
 700          * intervals or user threads are above the user_hwm for the current
 701          * speed.
 702          */
 703         for (i = 0; i < nspeeds; i++) {
 704                 cur_spd = kmem_zalloc(sizeof (cpudrv_pm_spd_t), KM_SLEEP);
 705                 cur_spd->speed = speeds[i];
 706                 if (i == 0) {   /* normal speed */
 707                         cpupm->head_spd = cur_spd;
 708                         CPUDRV_TOPSPEED(cpupm) = cur_spd;
 709                         cur_spd->quant_cnt = CPUDRV_QUANT_CNT_NORMAL;
 710                         cur_spd->idle_hwm =
 711                             (cpudrv_idle_hwm * cur_spd->quant_cnt) / 100;
 712                         /* can't speed anymore */
 713                         cur_spd->idle_lwm = 0;
 714                         cur_spd->user_hwm = UINT_MAX;
 715                 } else {
 716                         cur_spd->quant_cnt = CPUDRV_QUANT_CNT_OTHR;
 717                         ASSERT(prev_spd != NULL);
 718                         prev_spd->down_spd = cur_spd;
 719                         cur_spd->up_spd = cpupm->head_spd;
 720 
 721                         /*
 722                          * Let's assume CPU is considered idle at full speed
 723                          * when it is spending I% of time in running the idle
 724                          * thread.  At full speed, CPU will be busy (100 - I) %
 725                          * of times.  This % of busyness increases by factor of
 726                          * N as CPU slows down.  CPU that is idle I% of times
 727                          * in full speed, it is idle (100 - ((100 - I) * N)) %
 728                          * of times in N speed.  The idle_lwm is a fixed
 729                          * percentage.  A large value of N may result in
 730                          * idle_hwm to go below idle_lwm.  We need to make sure
 731                          * that there is at least a buffer zone seperation
 732                          * between the idle_lwm and idle_hwm values.
 733                          */
 734                         idle_cnt_percent = CPUDRV_IDLE_CNT_PERCENT(
 735                             cpudrv_idle_hwm, speeds, i);
 736                         idle_cnt_percent = max(idle_cnt_percent,
 737                             (cpudrv_idle_lwm + cpudrv_idle_buf_zone));
 738                         cur_spd->idle_hwm =
 739                             (idle_cnt_percent * cur_spd->quant_cnt) / 100;
 740                         cur_spd->idle_lwm =
 741                             (cpudrv_idle_lwm * cur_spd->quant_cnt) / 100;
 742 
 743                         /*
 744                          * The lwm for user threads are determined such that
 745                          * if CPU slows down, the load of work in the
 746                          * new speed would still keep the CPU at or below the
 747                          * user_hwm in the new speed.  This is to prevent
 748                          * the quick jump back up to higher speed.
 749                          */
 750                         cur_spd->user_hwm = (cpudrv_user_hwm *
 751                             cur_spd->quant_cnt) / 100;
 752                         user_cnt_percent = CPUDRV_USER_CNT_PERCENT(
 753                             cpudrv_user_hwm, speeds, i);
 754                         prev_spd->user_lwm =
 755                             (user_cnt_percent * prev_spd->quant_cnt) / 100;
 756                 }
 757                 prev_spd = cur_spd;
 758         }
 759         /* Slowest speed. Can't slow down anymore */
 760         cur_spd->idle_hwm = UINT_MAX;
 761         cur_spd->user_lwm = -1;
 762 #ifdef  DEBUG
 763         DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: head_spd spd %d, "
 764             "num_spd %d\n", ddi_get_instance(cpudsp->dip),
 765             cpupm->head_spd->speed, cpupm->num_spd));
 766         for (cur_spd = cpupm->head_spd; cur_spd; cur_spd = cur_spd->down_spd) {
 767                 DPRINTF(D_PM_INIT, ("cpudrv_init: instance %d: speed %d, "
 768                     "down_spd spd %d, idle_hwm %d, user_lwm %d, "
 769                     "up_spd spd %d, idle_lwm %d, user_hwm %d, "
 770                     "quant_cnt %d\n", ddi_get_instance(cpudsp->dip),
 771                     cur_spd->speed,
 772                     (cur_spd->down_spd ? cur_spd->down_spd->speed : 0),
 773                     cur_spd->idle_hwm, cur_spd->user_lwm,
 774                     (cur_spd->up_spd ? cur_spd->up_spd->speed : 0),
 775                     cur_spd->idle_lwm, cur_spd->user_hwm,
 776                     cur_spd->quant_cnt));
 777         }
 778 #endif  /* DEBUG */
 779         CPUDRV_FREE_SPEEDS(speeds, nspeeds);
 780         return (DDI_SUCCESS);
 781 }
 782 
 783 /*
 784  * Free CPU power management data.
 785  */
 786 static void
 787 cpudrv_free(cpudrv_devstate_t *cpudsp)
 788 {
 789         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 790         cpudrv_pm_spd_t *cur_spd, *next_spd;
 791 
 792         cur_spd = cpupm->head_spd;
 793         while (cur_spd) {
 794                 next_spd = cur_spd->down_spd;
 795                 kmem_free(cur_spd, sizeof (cpudrv_pm_spd_t));
 796                 cur_spd = next_spd;
 797         }
 798         bzero(cpupm, sizeof (cpudrv_pm_t));
 799 }
 800 
 801 /*
 802  * Create pm-components property.
 803  */
 804 static int
 805 cpudrv_comp_create(cpudrv_devstate_t *cpudsp)
 806 {
 807         cpudrv_pm_t     *cpupm = &(cpudsp->cpudrv_pm);
 808         cpudrv_pm_spd_t *cur_spd;
 809         char            **pmc;
 810         int             size;
 811         char            name[] = "NAME=CPU Speed";
 812         int             i, j;
 813         uint_t          comp_spd;
 814         int             result = DDI_FAILURE;
 815 
 816         pmc = kmem_zalloc((cpupm->num_spd + 1) * sizeof (char *), KM_SLEEP);
 817         size = CPUDRV_COMP_SIZE();
 818         if (cpupm->num_spd > CPUDRV_COMP_MAX_VAL) {
 819                 cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
 820                     "number of speeds exceeded limits",
 821                     ddi_get_instance(cpudsp->dip));
 822                 kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *));
 823                 return (result);
 824         }
 825 
 826         for (i = cpupm->num_spd, cur_spd = cpupm->head_spd; i > 0;
 827             i--, cur_spd = cur_spd->down_spd) {
 828                 cur_spd->pm_level = i;
 829                 pmc[i] = kmem_zalloc((size * sizeof (char)), KM_SLEEP);
 830                 comp_spd = CPUDRV_COMP_SPEED(cpupm, cur_spd);
 831                 if (comp_spd > CPUDRV_COMP_MAX_VAL) {
 832                         cmn_err(CE_WARN, "cpudrv_comp_create: "
 833                             "instance %d: speed exceeded limits",
 834                             ddi_get_instance(cpudsp->dip));
 835                         for (j = cpupm->num_spd; j >= i; j--) {
 836                                 kmem_free(pmc[j], size * sizeof (char));
 837                         }
 838                         kmem_free(pmc, (cpupm->num_spd + 1) *
 839                             sizeof (char *));
 840                         return (result);
 841                 }
 842                 CPUDRV_COMP_SPRINT(pmc[i], cpupm, cur_spd, comp_spd)
 843                 DPRINTF(D_PM_COMP_CREATE, ("cpudrv_comp_create: "
 844                     "instance %d: pm-components power level %d string '%s'\n",
 845                     ddi_get_instance(cpudsp->dip), i, pmc[i]));
 846         }
 847         pmc[0] = kmem_zalloc(sizeof (name), KM_SLEEP);
 848         (void) strcat(pmc[0], name);
 849         DPRINTF(D_PM_COMP_CREATE, ("cpudrv_comp_create: instance %d: "
 850             "pm-components component name '%s'\n",
 851             ddi_get_instance(cpudsp->dip), pmc[0]));
 852 
 853         if (ddi_prop_update_string_array(DDI_DEV_T_NONE, cpudsp->dip,
 854             "pm-components", pmc, cpupm->num_spd + 1) == DDI_PROP_SUCCESS) {
 855                 result = DDI_SUCCESS;
 856         } else {
 857                 cmn_err(CE_WARN, "cpudrv_comp_create: instance %d: "
 858                     "can't create pm-components property",
 859                     ddi_get_instance(cpudsp->dip));
 860         }
 861 
 862         for (i = cpupm->num_spd; i > 0; i--) {
 863                 kmem_free(pmc[i], size * sizeof (char));
 864         }
 865         kmem_free(pmc[0], sizeof (name));
 866         kmem_free(pmc, (cpupm->num_spd + 1) * sizeof (char *));
 867         return (result);
 868 }
 869 
 870 /*
 871  * Mark a component idle.
 872  */
 873 #define CPUDRV_MONITOR_PM_IDLE_COMP(dip, cpupm) { \
 874         if ((cpupm)->pm_busycnt >= 1) { \
 875                 if (pm_idle_component((dip), CPUDRV_COMP_NUM) == \
 876                     DDI_SUCCESS) { \
 877                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: " \
 878                             "instance %d: pm_idle_component called\n", \
 879                             ddi_get_instance((dip)))); \
 880                         (cpupm)->pm_busycnt--; \
 881                 } else { \
 882                         cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
 883                             "can't idle CPU component", \
 884                             ddi_get_instance((dip))); \
 885                 } \
 886         } \
 887 }
 888 
 889 /*
 890  * Marks a component busy in both PM framework and driver state structure.
 891  */
 892 #define CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm) { \
 893         if ((cpupm)->pm_busycnt < 1) { \
 894                 if (pm_busy_component((dip), CPUDRV_COMP_NUM) == \
 895                     DDI_SUCCESS) { \
 896                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: " \
 897                             "instance %d: pm_busy_component called\n", \
 898                             ddi_get_instance((dip)))); \
 899                         (cpupm)->pm_busycnt++; \
 900                 } else { \
 901                         cmn_err(CE_WARN, "cpudrv_monitor: instance %d: " \
 902                             "can't busy CPU component", \
 903                             ddi_get_instance((dip))); \
 904                 } \
 905         } \
 906 }
 907 
 908 /*
 909  * Marks a component busy and calls pm_raise_power().
 910  */
 911 #define CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm, new_spd) { \
 912         int ret; \
 913         /* \
 914          * Mark driver and PM framework busy first so framework doesn't try \
 915          * to bring CPU to lower speed when we need to be at higher speed. \
 916          */ \
 917         CPUDRV_MONITOR_PM_BUSY_COMP((dip), (cpupm)); \
 918         mutex_exit(&(cpudsp)->lock); \
 919         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: " \
 920             "pm_raise_power called to %d\n", ddi_get_instance((dip)), \
 921                 (new_spd->pm_level))); \
 922         ret = pm_raise_power((dip), CPUDRV_COMP_NUM, (new_spd->pm_level)); \
 923         if (ret != DDI_SUCCESS) { \
 924                 cmn_err(CE_WARN, "cpudrv_monitor: instance %d: can't " \
 925                     "raise CPU power level", ddi_get_instance((dip))); \
 926         } \
 927         mutex_enter(&(cpudsp)->lock); \
 928         if (ret == DDI_SUCCESS && cpudsp->cpudrv_pm.cur_spd == NULL) { \
 929                 cpudsp->cpudrv_pm.cur_spd = new_spd; \
 930         } \
 931 }
 932 
 933 /*
 934  * In order to monitor a CPU, we need to hold cpu_lock to access CPU
 935  * statistics. Holding cpu_lock is not allowed from a callout routine.
 936  * We dispatch a taskq to do that job.
 937  */
 938 static void
 939 cpudrv_monitor_disp(void *arg)
 940 {
 941         cpudrv_devstate_t       *cpudsp = (cpudrv_devstate_t *)arg;
 942 
 943         /*
 944          * We are here because the last task has scheduled a timeout.
 945          * The queue should be empty at this time.
 946          */
 947         mutex_enter(&cpudsp->cpudrv_pm.timeout_lock);
 948         if ((ddi_taskq_dispatch(cpudsp->cpudrv_pm.tq, cpudrv_monitor, arg,
 949             DDI_NOSLEEP)) != DDI_SUCCESS) {
 950                 mutex_exit(&cpudsp->cpudrv_pm.timeout_lock);
 951                 DPRINTF(D_PM_MONITOR, ("cpudrv_monitor_disp: failed to "
 952                     "dispatch the cpudrv_monitor taskq\n"));
 953                 mutex_enter(&cpudsp->lock);
 954                 CPUDRV_MONITOR_INIT(cpudsp);
 955                 mutex_exit(&cpudsp->lock);
 956                 return;
 957         }
 958         cpudsp->cpudrv_pm.timeout_count++;
 959         mutex_exit(&cpudsp->cpudrv_pm.timeout_lock);
 960 }
 961 
 962 /*
 963  * Monitors each CPU for the amount of time idle thread was running in the
 964  * last quantum and arranges for the CPU to go to the lower or higher speed.
 965  * Called at the time interval appropriate for the current speed. The
 966  * time interval for normal speed is CPUDRV_QUANT_CNT_NORMAL. The time
 967  * interval for other speeds (including unknown speed) is
 968  * CPUDRV_QUANT_CNT_OTHR.
 969  */
 970 static void
 971 cpudrv_monitor(void *arg)
 972 {
 973         cpudrv_devstate_t       *cpudsp = (cpudrv_devstate_t *)arg;
 974         cpudrv_pm_t             *cpupm;
 975         cpudrv_pm_spd_t         *cur_spd, *new_spd;
 976         dev_info_t              *dip;
 977         uint_t                  idle_cnt, user_cnt, system_cnt;
 978         clock_t                 ticks;
 979         uint_t                  tick_cnt;
 980         hrtime_t                msnsecs[NCMSTATES];
 981         boolean_t               is_ready;
 982 
 983 #define GET_CPU_MSTATE_CNT(state, cnt) \
 984         msnsecs[state] = NSEC_TO_TICK(msnsecs[state]); \
 985         if (cpupm->lastquan_mstate[state] > msnsecs[state]) \
 986                 msnsecs[state] = cpupm->lastquan_mstate[state]; \
 987         cnt = msnsecs[state] - cpupm->lastquan_mstate[state]; \
 988         cpupm->lastquan_mstate[state] = msnsecs[state]
 989 
 990         /*
 991          * We're not ready until we can  get a cpu_t
 992          */
 993         is_ready = (cpudrv_get_cpu(cpudsp) == DDI_SUCCESS);
 994 
 995         mutex_enter(&cpudsp->lock);
 996         cpupm = &(cpudsp->cpudrv_pm);
 997         if (cpupm->timeout_id == 0) {
 998                 mutex_exit(&cpudsp->lock);
 999                 goto do_return;
1000         }
1001         cur_spd = cpupm->cur_spd;
1002         dip = cpudsp->dip;
1003 
1004         /*
1005          * We assume that a CPU is initialized and has a valid cpu_t
1006          * structure, if it is ready for cross calls. If this changes,
1007          * additional checks might be needed.
1008          *
1009          * Additionally, for x86 platforms we cannot power manage an
1010          * instance, until it has been initialized.
1011          */
1012         if (is_ready) {
1013                 is_ready = CPUDRV_XCALL_IS_READY(cpudsp->cpu_id);
1014                 if (!is_ready) {
1015                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1016                             "CPU not ready for x-calls\n",
1017                             ddi_get_instance(dip)));
1018                 } else if (!(is_ready = cpudrv_power_ready(cpudsp->cp))) {
1019                         DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1020                             "waiting for all CPUs to be power manageable\n",
1021                             ddi_get_instance(dip)));
1022                 }
1023         }
1024         if (!is_ready) {
1025                 /*
1026                  * Make sure that we are busy so that framework doesn't
1027                  * try to bring us down in this situation.
1028                  */
1029                 CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1030                 CPUDRV_MONITOR_INIT(cpudsp);
1031                 mutex_exit(&cpudsp->lock);
1032                 goto do_return;
1033         }
1034 
1035         /*
1036          * Make sure that we are still not at unknown power level.
1037          */
1038         if (cur_spd == NULL) {
1039                 DPRINTF(D_PM_MONITOR, ("cpudrv_monitor: instance %d: "
1040                     "cur_spd is unknown\n", ddi_get_instance(dip)));
1041                 CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
1042                     CPUDRV_TOPSPEED(cpupm));
1043                 /*
1044                  * We just changed the speed. Wait till at least next
1045                  * call to this routine before proceeding ahead.
1046                  */
1047                 CPUDRV_MONITOR_INIT(cpudsp);
1048                 mutex_exit(&cpudsp->lock);
1049                 goto do_return;
1050         }
1051 
1052         if (!cpupm->pm_started) {
1053                 cpupm->pm_started = B_TRUE;
1054                 cpudrv_set_supp_freqs(cpudsp);
1055         }
1056 
1057         get_cpu_mstate(cpudsp->cp, msnsecs);
1058         GET_CPU_MSTATE_CNT(CMS_IDLE, idle_cnt);
1059         GET_CPU_MSTATE_CNT(CMS_USER, user_cnt);
1060         GET_CPU_MSTATE_CNT(CMS_SYSTEM, system_cnt);
1061 
1062         /*
1063          * We can't do anything when we have just switched to a state
1064          * because there is no valid timestamp.
1065          */
1066         if (cpupm->lastquan_ticks == 0) {
1067                 cpupm->lastquan_ticks = NSEC_TO_TICK(gethrtime());
1068                 CPUDRV_MONITOR_INIT(cpudsp);
1069                 mutex_exit(&cpudsp->lock);
1070                 goto do_return;
1071         }
1072 
1073         /*
1074          * Various watermarks are based on this routine being called back
1075          * exactly at the requested period. This is not guaranteed
1076          * because this routine is called from a taskq that is dispatched
1077          * from a timeout routine.  Handle this by finding out how many
1078          * ticks have elapsed since the last call and adjusting
1079          * the idle_cnt based on the delay added to the requested period
1080          * by timeout and taskq.
1081          */
1082         ticks = NSEC_TO_TICK(gethrtime());
1083         tick_cnt = ticks - cpupm->lastquan_ticks;
1084         ASSERT(tick_cnt != 0);
1085         cpupm->lastquan_ticks = ticks;
1086 
1087         /*
1088          * Time taken between recording the current counts and
1089          * arranging the next call of this routine is an error in our
1090          * calculation. We minimize the error by calling
1091          * CPUDRV_MONITOR_INIT() here instead of end of this routine.
1092          */
1093         CPUDRV_MONITOR_INIT(cpudsp);
1094         DPRINTF(D_PM_MONITOR_VERBOSE, ("cpudrv_monitor: instance %d: "
1095             "idle count %d, user count %d, system count %d, pm_level %d, "
1096             "pm_busycnt %d\n", ddi_get_instance(dip), idle_cnt, user_cnt,
1097             system_cnt, cur_spd->pm_level, cpupm->pm_busycnt));
1098 
1099 #ifdef  DEBUG
1100         /*
1101          * Notify that timeout and taskq has caused delays and we need to
1102          * scale our parameters accordingly.
1103          *
1104          * To get accurate result, don't turn on other DPRINTFs with
1105          * the following DPRINTF. PROM calls generated by other
1106          * DPRINTFs changes the timing.
1107          */
1108         if (tick_cnt > cur_spd->quant_cnt) {
1109                 DPRINTF(D_PM_MONITOR_DELAY, ("cpudrv_monitor: instance %d: "
1110                     "tick count %d > quantum_count %u\n",
1111                     ddi_get_instance(dip), tick_cnt, cur_spd->quant_cnt));
1112         }
1113 #endif  /* DEBUG */
1114 
1115         /*
1116          * Adjust counts based on the delay added by timeout and taskq.
1117          */
1118         idle_cnt = (idle_cnt * cur_spd->quant_cnt) / tick_cnt;
1119         user_cnt = (user_cnt * cur_spd->quant_cnt) / tick_cnt;
1120 
1121         if ((user_cnt > cur_spd->user_hwm) || (idle_cnt < cur_spd->idle_lwm &&
1122             cur_spd->idle_blwm_cnt >= cpudrv_idle_blwm_cnt_max)) {
1123                 cur_spd->idle_blwm_cnt = 0;
1124                 cur_spd->idle_bhwm_cnt = 0;
1125                 /*
1126                  * In normal situation, arrange to go to next higher speed.
1127                  * If we are running in special direct pm mode, we just stay
1128                  * at the current speed.
1129                  */
1130                 if (cur_spd == cur_spd->up_spd || cpudrv_direct_pm) {
1131                         CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1132                 } else {
1133                         new_spd = cur_spd->up_spd;
1134                         CPUDRV_MONITOR_PM_BUSY_AND_RAISE(dip, cpudsp, cpupm,
1135                             new_spd);
1136                 }
1137         } else if ((user_cnt <= cur_spd->user_lwm) &&
1138             (idle_cnt >= cur_spd->idle_hwm) || !CPU_ACTIVE(cpudsp->cp)) {
1139                 cur_spd->idle_blwm_cnt = 0;
1140                 cur_spd->idle_bhwm_cnt = 0;
1141                 /*
1142                  * Arrange to go to next lower speed by informing our idle
1143                  * status to the power management framework.
1144                  */
1145                 CPUDRV_MONITOR_PM_IDLE_COMP(dip, cpupm);
1146         } else {
1147                 /*
1148                  * If we are between the idle water marks and have not
1149                  * been here enough consecutive times to be considered
1150                  * busy, just increment the count and return.
1151                  */
1152                 if ((idle_cnt < cur_spd->idle_hwm) &&
1153                     (idle_cnt >= cur_spd->idle_lwm) &&
1154                     (cur_spd->idle_bhwm_cnt < cpudrv_idle_bhwm_cnt_max)) {
1155                         cur_spd->idle_blwm_cnt = 0;
1156                         cur_spd->idle_bhwm_cnt++;
1157                         mutex_exit(&cpudsp->lock);
1158                         goto do_return;
1159                 }
1160                 if (idle_cnt < cur_spd->idle_lwm) {
1161                         cur_spd->idle_blwm_cnt++;
1162                         cur_spd->idle_bhwm_cnt = 0;
1163                 }
1164                 /*
1165                  * Arranges to stay at the current speed.
1166                  */
1167                 CPUDRV_MONITOR_PM_BUSY_COMP(dip, cpupm);
1168         }
1169         mutex_exit(&cpudsp->lock);
1170 do_return:
1171         mutex_enter(&cpupm->timeout_lock);
1172         ASSERT(cpupm->timeout_count > 0);
1173         cpupm->timeout_count--;
1174         cv_signal(&cpupm->timeout_cv);
1175         mutex_exit(&cpupm->timeout_lock);
1176 }
1177 
1178 /*
1179  * get cpu_t structure for cpudrv_devstate_t
1180  */
1181 int
1182 cpudrv_get_cpu(cpudrv_devstate_t *cpudsp)
1183 {
1184         ASSERT(cpudsp != NULL);
1185 
1186         /*
1187          * return DDI_SUCCESS if cpudrv_devstate_t
1188          * already contains cpu_t structure
1189          */
1190         if (cpudsp->cp != NULL)
1191                 return (DDI_SUCCESS);
1192 
1193         if (MUTEX_HELD(&cpu_lock)) {
1194                 cpudsp->cp = cpu_get(cpudsp->cpu_id);
1195         } else {
1196                 mutex_enter(&cpu_lock);
1197                 cpudsp->cp = cpu_get(cpudsp->cpu_id);
1198                 mutex_exit(&cpu_lock);
1199         }
1200 
1201         if (cpudsp->cp == NULL)
1202                 return (DDI_FAILURE);
1203 
1204         return (DDI_SUCCESS);
1205 }