1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/hpet_acpi.h>
  26 #include <sys/hpet.h>
  27 #include <sys/bitmap.h>
  28 #include <sys/inttypes.h>
  29 #include <sys/time.h>
  30 #include <sys/sunddi.h>
  31 #include <sys/ksynch.h>
  32 #include <sys/apic.h>
  33 #include <sys/callb.h>
  34 #include <sys/clock.h>
  35 #include <sys/archsystm.h>
  36 #include <sys/cpupart.h>
  37 
  38 static int hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags);
  39 static boolean_t hpet_install_proxy(void);
  40 static boolean_t hpet_callback(int code);
  41 static boolean_t hpet_cpr(int code);
  42 static boolean_t hpet_resume(void);
  43 static void hpet_cst_callback(uint32_t code);
  44 static boolean_t hpet_deep_idle_config(int code);
  45 static int hpet_validate_table(ACPI_TABLE_HPET *hpet_table);
  46 static boolean_t hpet_checksum_table(unsigned char *table, unsigned int len);
  47 static void *hpet_memory_map(ACPI_TABLE_HPET *hpet_table);
  48 static int hpet_start_main_counter(hpet_info_t *hip);
  49 static int hpet_stop_main_counter(hpet_info_t *hip);
  50 static uint64_t hpet_read_main_counter_value(hpet_info_t *hip);
  51 static uint64_t hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value);
  52 static uint64_t hpet_read_gen_cap(hpet_info_t *hip);
  53 static uint64_t hpet_read_gen_config(hpet_info_t *hip);
  54 static uint64_t hpet_read_gen_intrpt_stat(hpet_info_t *hip);
  55 static uint64_t hpet_read_timer_N_config(hpet_info_t *hip, uint_t n);
  56 static hpet_TN_conf_cap_t hpet_convert_timer_N_config(uint64_t conf);
  57 static void hpet_write_gen_config(hpet_info_t *hip, uint64_t l);
  58 static void hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l);
  59 static void hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l);
  60 static void hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l);
  61 static void hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n);
  62 static void hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n);
  63 static int hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip);
  64 static int hpet_timer_available(uint32_t allocated_timers, uint32_t n);
  65 static void hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n);
  66 static void hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n,
  67     uint32_t interrupt);
  68 static uint_t hpet_isr(char *arg);
  69 static uint32_t hpet_install_interrupt_handler(uint_t (*func)(char *),
  70     int vector);
  71 static void hpet_uninstall_interrupt_handler(void);
  72 static void hpet_expire_all(void);
  73 static boolean_t hpet_guaranteed_schedule(hrtime_t required_wakeup_time);
  74 static boolean_t hpet_use_hpet_timer(hrtime_t *expire);
  75 static void hpet_use_lapic_timer(hrtime_t expire);
  76 static void hpet_init_proxy_data(void);
  77 
  78 /*
  79  * hpet_state_lock is used to synchronize disabling/enabling deep c-states
  80  * and to synchronize suspend/resume.
  81  */
  82 static kmutex_t         hpet_state_lock;
  83 static struct hpet_state {
  84         boolean_t       proxy_installed;        /* CBE proxy interrupt setup */
  85         boolean_t       cpr;                    /* currently in CPR */
  86         boolean_t       cpu_deep_idle;          /* user enable/disable */
  87         boolean_t       uni_cstate;             /* disable if only one cstate */
  88 } hpet_state = { B_FALSE, B_FALSE, B_TRUE, B_TRUE};
  89 
  90 uint64_t hpet_spin_check = HPET_SPIN_CHECK;
  91 uint64_t hpet_spin_timeout = HPET_SPIN_TIMEOUT;
  92 uint64_t hpet_idle_spin_timeout = HPET_SPIN_TIMEOUT;
  93 uint64_t hpet_isr_spin_timeout = HPET_SPIN_TIMEOUT;
  94 
  95 static kmutex_t         hpet_proxy_lock;        /* lock for lAPIC proxy data */
  96 /*
  97  * hpet_proxy_users is a per-cpu array.
  98  */
  99 static hpet_proxy_t     *hpet_proxy_users;      /* one per CPU */
 100 
 101 
 102 ACPI_TABLE_HPET         *hpet_table;            /* ACPI HPET table */
 103 hpet_info_t             hpet_info;              /* Human readable Information */
 104 
 105 /*
 106  * Provide HPET access from unix.so.
 107  * Set up pointers to access symbols in pcplusmp.
 108  */
 109 static void
 110 hpet_establish_hooks(void)
 111 {
 112         hpet.install_proxy = &hpet_install_proxy;
 113         hpet.callback = &hpet_callback;
 114         hpet.use_hpet_timer = &hpet_use_hpet_timer;
 115         hpet.use_lapic_timer = &hpet_use_lapic_timer;
 116 }
 117 
 118 /*
 119  * Get the ACPI "HPET" table.
 120  * acpi_probe() calls this function from mp_startup before drivers are loaded.
 121  * acpi_probe() verified the system is using ACPI before calling this.
 122  *
 123  * There may be more than one ACPI HPET table (Itanium only?).
 124  * Intel's HPET spec defines each timer block to have up to 32 counters and
 125  * be 1024 bytes long.  There can be more than one timer block of 32 counters.
 126  * Each timer block would have an additional ACPI HPET table.
 127  * Typical x86 systems today only have 1 HPET with 3 counters.
 128  * On x86 we only consume HPET table "1" for now.
 129  */
 130 int
 131 hpet_acpi_init(int *hpet_vect, iflag_t *hpet_flags)
 132 {
 133         extern hrtime_t tsc_read(void);
 134         extern int      idle_cpu_no_deep_c;
 135         extern int      cpuid_deep_cstates_supported(void);
 136         void            *la;
 137         uint64_t        ret;
 138         uint_t          num_timers;
 139         uint_t          ti;
 140 
 141         (void) memset(&hpet_info, 0, sizeof (hpet_info));
 142         hpet.supported = HPET_NO_SUPPORT;
 143 
 144         if (idle_cpu_no_deep_c)
 145                 return (DDI_FAILURE);
 146 
 147         if (!cpuid_deep_cstates_supported())
 148                 return (DDI_FAILURE);
 149 
 150         hpet_establish_hooks();
 151 
 152         /*
 153          * Get HPET ACPI table 1.
 154          */
 155         if (ACPI_FAILURE(AcpiGetTable(ACPI_SIG_HPET, HPET_TABLE_1,
 156             (ACPI_TABLE_HEADER **)&hpet_table))) {
 157                 cmn_err(CE_NOTE, "!hpet_acpi: unable to get ACPI HPET table");
 158                 return (DDI_FAILURE);
 159         }
 160 
 161         if (hpet_validate_table(hpet_table) != AE_OK) {
 162                 cmn_err(CE_NOTE, "!hpet_acpi: invalid HPET table");
 163                 return (DDI_FAILURE);
 164         }
 165 
 166         la = hpet_memory_map(hpet_table);
 167         if (la == NULL) {
 168                 cmn_err(CE_NOTE, "!hpet_acpi: memory map HPET failed");
 169                 return (DDI_FAILURE);
 170         }
 171         hpet_info.logical_address = la;
 172 
 173         ret = hpet_read_gen_cap(&hpet_info);
 174         hpet_info.gen_cap.counter_clk_period = HPET_GCAP_CNTR_CLK_PERIOD(ret);
 175         hpet_info.gen_cap.vendor_id = HPET_GCAP_VENDOR_ID(ret);
 176         hpet_info.gen_cap.leg_route_cap = HPET_GCAP_LEG_ROUTE_CAP(ret);
 177         hpet_info.gen_cap.count_size_cap = HPET_GCAP_CNT_SIZE_CAP(ret);
 178         /*
 179          * Hardware contains the last timer's number.
 180          * Add 1 to get the number of timers.
 181          */
 182         hpet_info.gen_cap.num_tim_cap = HPET_GCAP_NUM_TIM_CAP(ret) + 1;
 183         hpet_info.gen_cap.rev_id = HPET_GCAP_REV_ID(ret);
 184 
 185         if (hpet_info.gen_cap.counter_clk_period > HPET_MAX_CLK_PERIOD) {
 186                 cmn_err(CE_NOTE, "!hpet_acpi: COUNTER_CLK_PERIOD 0x%lx > 0x%lx",
 187                     (long)hpet_info.gen_cap.counter_clk_period,
 188                     (long)HPET_MAX_CLK_PERIOD);
 189                 return (DDI_FAILURE);
 190         }
 191 
 192         num_timers = (uint_t)hpet_info.gen_cap.num_tim_cap;
 193         if ((num_timers < 3) || (num_timers > 32)) {
 194                 cmn_err(CE_NOTE, "!hpet_acpi: invalid number of HPET timers "
 195                     "%lx", (long)num_timers);
 196                 return (DDI_FAILURE);
 197         }
 198         hpet_info.timer_n_config = (hpet_TN_conf_cap_t *)kmem_zalloc(
 199             num_timers * sizeof (uint64_t), KM_SLEEP);
 200 
 201         ret = hpet_read_gen_config(&hpet_info);
 202         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 203         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 204 
 205         /*
 206          * Solaris does not use the HPET Legacy Replacement Route capabilities.
 207          * This feature has been off by default on test systems.
 208          * The HPET spec does not specify if Legacy Replacement Route is
 209          * on or off by default, so we explicitely set it off here.
 210          * It should not matter which mode the HPET is in since we use
 211          * the first available non-legacy replacement timer: timer 2.
 212          */
 213         (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
 214 
 215         ret = hpet_read_gen_config(&hpet_info);
 216         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 217         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 218 
 219         hpet_info.gen_intrpt_stat = hpet_read_gen_intrpt_stat(&hpet_info);
 220         hpet_info.main_counter_value = hpet_read_main_counter_value(&hpet_info);
 221 
 222         for (ti = 0; ti < num_timers; ++ti) {
 223                 ret = hpet_read_timer_N_config(&hpet_info, ti);
 224                 /*
 225                  * Make sure no timers are enabled (think fast reboot or
 226                  * virtual hardware).
 227                  */
 228                 if (ret & HPET_TIMER_N_INT_ENB_CNF_BIT) {
 229                         hpet_disable_timer(&hpet_info, ti);
 230                         ret &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
 231                 }
 232 
 233                 hpet_info.timer_n_config[ti] = hpet_convert_timer_N_config(ret);
 234         }
 235 
 236         /*
 237          * Be aware the Main Counter may need to be initialized in the future
 238          * if it is used for more than just Deep C-State support.
 239          * The HPET's Main Counter does not need to be initialize to a specific
 240          * value before starting it for use to wake up CPUs from Deep C-States.
 241          */
 242         if (hpet_start_main_counter(&hpet_info) != AE_OK) {
 243                 cmn_err(CE_NOTE, "!hpet_acpi: hpet_start_main_counter failed");
 244                 return (DDI_FAILURE);
 245         }
 246 
 247         hpet_info.period = hpet_info.gen_cap.counter_clk_period;
 248         /*
 249          * Read main counter twice to record HPET latency for debugging.
 250          */
 251         hpet_info.tsc[0] = tsc_read();
 252         hpet_info.hpet_main_counter_reads[0] =
 253             hpet_read_main_counter_value(&hpet_info);
 254         hpet_info.tsc[1] = tsc_read();
 255         hpet_info.hpet_main_counter_reads[1] =
 256             hpet_read_main_counter_value(&hpet_info);
 257         hpet_info.tsc[2] = tsc_read();
 258 
 259         ret = hpet_read_gen_config(&hpet_info);
 260         hpet_info.gen_config.leg_rt_cnf = HPET_GCFR_LEG_RT_CNF_BITX(ret);
 261         hpet_info.gen_config.enable_cnf = HPET_GCFR_ENABLE_CNF_BITX(ret);
 262 
 263         /*
 264          * HPET main counter reads are supported now.
 265          */
 266         hpet.supported = HPET_TIMER_SUPPORT;
 267 
 268         return (hpet_init_proxy(hpet_vect, hpet_flags));
 269 }
 270 
 271 void
 272 hpet_acpi_fini(void)
 273 {
 274         if (hpet.supported == HPET_NO_SUPPORT)
 275                 return;
 276         if (hpet.supported >= HPET_TIMER_SUPPORT)
 277                 (void) hpet_stop_main_counter(&hpet_info);
 278         if (hpet.supported > HPET_TIMER_SUPPORT)
 279                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 280 }
 281 
 282 /*
 283  * Do initial setup to use a HPET timer as a proxy for Deep C-state stalled
 284  * LAPIC Timers.  Get a free HPET timer that supports I/O APIC routed interrupt.
 285  * Setup data to handle the timer's ISR, and add the timer's interrupt.
 286  *
 287  * The ddi cannot be use to allocate the HPET timer's interrupt.
 288  * ioapic_init_intr() in mp_platform_common() later sets up the I/O APIC
 289  * to handle the HPET timer's interrupt.
 290  *
 291  * Note: FSB (MSI) interrupts are not currently supported by Intel HPETs as of
 292  * ICH9.  The HPET spec allows for MSI.  In the future MSI may be prefered.
 293  */
 294 static int
 295 hpet_init_proxy(int *hpet_vect, iflag_t *hpet_flags)
 296 {
 297         if (hpet_get_IOAPIC_intr_capable_timer(&hpet_info) == -1) {
 298                 cmn_err(CE_WARN, "!hpet_acpi: get ioapic intr failed.");
 299                 return (DDI_FAILURE);
 300         }
 301 
 302         hpet_init_proxy_data();
 303 
 304         if (hpet_install_interrupt_handler(&hpet_isr,
 305             hpet_info.cstate_timer.intr) != AE_OK) {
 306                 cmn_err(CE_WARN, "!hpet_acpi: install interrupt failed.");
 307                 return (DDI_FAILURE);
 308         }
 309         *hpet_vect = hpet_info.cstate_timer.intr;
 310         hpet_flags->intr_el = INTR_EL_LEVEL;
 311         hpet_flags->intr_po = INTR_PO_ACTIVE_HIGH;
 312         hpet_flags->bustype = BUS_PCI;               /*  we *do* conform to PCI */
 313 
 314         /*
 315          * Avoid a possibly stuck interrupt by programing the HPET's timer here
 316          * before the I/O APIC is programmed to handle this interrupt.
 317          */
 318         hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
 319             hpet_info.cstate_timer.intr);
 320 
 321         /*
 322          * All HPET functionality is supported.
 323          */
 324         hpet.supported = HPET_FULL_SUPPORT;
 325         return (DDI_SUCCESS);
 326 }
 327 
 328 /*
 329  * Called by kernel if it can support Deep C-States.
 330  */
 331 static boolean_t
 332 hpet_install_proxy(void)
 333 {
 334         if (hpet_state.proxy_installed == B_TRUE)
 335                 return (B_TRUE);
 336 
 337         if (hpet.supported != HPET_FULL_SUPPORT)
 338                 return (B_FALSE);
 339 
 340         hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 341         hpet_state.proxy_installed = B_TRUE;
 342 
 343         return (B_TRUE);
 344 }
 345 
 346 /*
 347  * Remove the interrupt that was added with add_avintr() in
 348  * hpet_install_interrupt_handler().
 349  */
 350 static void
 351 hpet_uninstall_interrupt_handler(void)
 352 {
 353         rem_avintr(NULL, CBE_HIGH_PIL, (avfunc)&hpet_isr,
 354             hpet_info.cstate_timer.intr);
 355 }
 356 
 357 static int
 358 hpet_validate_table(ACPI_TABLE_HPET *hpet_table)
 359 {
 360         ACPI_TABLE_HEADER       *table_header = (ACPI_TABLE_HEADER *)hpet_table;
 361 
 362         if (table_header->Length != sizeof (ACPI_TABLE_HPET)) {
 363                 cmn_err(CE_WARN, "!hpet_validate_table: Length %lx != sizeof ("
 364                     "ACPI_TABLE_HPET) %lx.",
 365                     (unsigned long)((ACPI_TABLE_HEADER *)hpet_table)->Length,
 366                     (unsigned long)sizeof (ACPI_TABLE_HPET));
 367                 return (AE_ERROR);
 368         }
 369 
 370         if (!ACPI_COMPARE_NAME(table_header->Signature, ACPI_SIG_HPET)) {
 371                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET table "
 372                     "signature");
 373                 return (AE_ERROR);
 374         }
 375 
 376         if (!hpet_checksum_table((unsigned char *)hpet_table,
 377             (unsigned int)table_header->Length)) {
 378                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid HPET checksum");
 379                 return (AE_ERROR);
 380         }
 381 
 382         /*
 383          * Sequence should be table number - 1.  We are using table 1.
 384          */
 385         if (hpet_table->Sequence != HPET_TABLE_1 - 1) {
 386                 cmn_err(CE_WARN, "!hpet_validate_table: Invalid Sequence %lx",
 387                     (long)hpet_table->Sequence);
 388                 return (AE_ERROR);
 389         }
 390 
 391         return (AE_OK);
 392 }
 393 
 394 static boolean_t
 395 hpet_checksum_table(unsigned char *table, unsigned int length)
 396 {
 397         unsigned char   checksum = 0;
 398         int             i;
 399 
 400         for (i = 0; i < length; ++i, ++table)
 401                 checksum += *table;
 402 
 403         return (checksum == 0);
 404 }
 405 
 406 static void *
 407 hpet_memory_map(ACPI_TABLE_HPET *hpet_table)
 408 {
 409         return (AcpiOsMapMemory(hpet_table->Address.Address, HPET_SIZE));
 410 }
 411 
 412 static int
 413 hpet_start_main_counter(hpet_info_t *hip)
 414 {
 415         uint64_t        *gcr_ptr;
 416         uint64_t        gcr;
 417 
 418         gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
 419         gcr = *gcr_ptr;
 420 
 421         gcr |= HPET_GCFR_ENABLE_CNF;
 422         *gcr_ptr = gcr;
 423         gcr = *gcr_ptr;
 424 
 425         return (gcr & HPET_GCFR_ENABLE_CNF ? AE_OK : ~AE_OK);
 426 }
 427 
 428 static int
 429 hpet_stop_main_counter(hpet_info_t *hip)
 430 {
 431         uint64_t        *gcr_ptr;
 432         uint64_t        gcr;
 433 
 434         gcr_ptr = (uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address);
 435         gcr = *gcr_ptr;
 436 
 437         gcr &= ~HPET_GCFR_ENABLE_CNF;
 438         *gcr_ptr = gcr;
 439         gcr = *gcr_ptr;
 440 
 441         return (gcr & HPET_GCFR_ENABLE_CNF ? ~AE_OK : AE_OK);
 442 }
 443 
 444 /*
 445  * Set the Legacy Replacement Route bit.
 446  * This should be called before setting up timers.
 447  * The HPET specification is silent regarding setting this after timers are
 448  * programmed.
 449  */
 450 static uint64_t
 451 hpet_set_leg_rt_cnf(hpet_info_t *hip, uint32_t new_value)
 452 {
 453         uint64_t gen_conf = hpet_read_gen_config(hip);
 454 
 455         switch (new_value) {
 456         case 0:
 457                 gen_conf &= ~HPET_GCFR_LEG_RT_CNF;
 458                 break;
 459 
 460         case HPET_GCFR_LEG_RT_CNF:
 461                 gen_conf |= HPET_GCFR_LEG_RT_CNF;
 462                 break;
 463 
 464         default:
 465                 ASSERT(new_value == 0 || new_value == HPET_GCFR_LEG_RT_CNF);
 466                 break;
 467         }
 468         hpet_write_gen_config(hip, gen_conf);
 469         return (gen_conf);
 470 }
 471 
 472 static uint64_t
 473 hpet_read_gen_cap(hpet_info_t *hip)
 474 {
 475         return (*(uint64_t *)HPET_GEN_CAP_ADDRESS(hip->logical_address));
 476 }
 477 
 478 static uint64_t
 479 hpet_read_gen_config(hpet_info_t *hip)
 480 {
 481         return (*(uint64_t *)
 482             HPET_GEN_CONFIG_ADDRESS(hip->logical_address));
 483 }
 484 
 485 static uint64_t
 486 hpet_read_gen_intrpt_stat(hpet_info_t *hip)
 487 {
 488         hip->gen_intrpt_stat = *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(
 489             hip->logical_address);
 490         return (hip->gen_intrpt_stat);
 491 }
 492 
 493 static uint64_t
 494 hpet_read_timer_N_config(hpet_info_t *hip, uint_t n)
 495 {
 496         uint64_t conf = *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
 497             hip->logical_address, n);
 498         hip->timer_n_config[n] = hpet_convert_timer_N_config(conf);
 499         return (conf);
 500 }
 501 
 502 static hpet_TN_conf_cap_t
 503 hpet_convert_timer_N_config(uint64_t conf)
 504 {
 505         hpet_TN_conf_cap_t cc = { 0 };
 506 
 507         cc.int_route_cap = HPET_TIMER_N_INT_ROUTE_CAP(conf);
 508         cc.fsb_int_del_cap = HPET_TIMER_N_FSB_INT_DEL_CAP(conf);
 509         cc.fsb_int_en_cnf = HPET_TIMER_N_FSB_EN_CNF(conf);
 510         cc.int_route_cnf = HPET_TIMER_N_INT_ROUTE_CNF(conf);
 511         cc.mode32_cnf = HPET_TIMER_N_MODE32_CNF(conf);
 512         cc.val_set_cnf = HPET_TIMER_N_VAL_SET_CNF(conf);
 513         cc.size_cap = HPET_TIMER_N_SIZE_CAP(conf);
 514         cc.per_int_cap = HPET_TIMER_N_PER_INT_CAP(conf);
 515         cc.type_cnf = HPET_TIMER_N_TYPE_CNF(conf);
 516         cc.int_enb_cnf = HPET_TIMER_N_INT_ENB_CNF(conf);
 517         cc.int_type_cnf = HPET_TIMER_N_INT_TYPE_CNF(conf);
 518 
 519         return (cc);
 520 }
 521 
 522 static uint64_t
 523 hpet_read_main_counter_value(hpet_info_t *hip)
 524 {
 525         uint64_t        value;
 526         uint32_t        *counter;
 527         uint32_t        high1, high2, low;
 528 
 529         counter = (uint32_t *)HPET_MAIN_COUNTER_ADDRESS(hip->logical_address);
 530 
 531         /*
 532          * 32-bit main counters
 533          */
 534         if (hip->gen_cap.count_size_cap == 0) {
 535                 value = (uint64_t)*counter;
 536                 hip->main_counter_value = value;
 537                 return (value);
 538         }
 539 
 540         /*
 541          * HPET spec claims a 64-bit read can be split into two 32-bit reads
 542          * by the hardware connection to the HPET.
 543          */
 544         high2 = counter[1];
 545         do {
 546                 high1 = high2;
 547                 low = counter[0];
 548                 high2 = counter[1];
 549         } while (high2 != high1);
 550 
 551         value = ((uint64_t)high1 << 32) | low;
 552         hip->main_counter_value = value;
 553         return (value);
 554 }
 555 
 556 static void
 557 hpet_write_gen_config(hpet_info_t *hip, uint64_t l)
 558 {
 559         *(uint64_t *)HPET_GEN_CONFIG_ADDRESS(hip->logical_address) = l;
 560 }
 561 
 562 static void
 563 hpet_write_gen_intrpt_stat(hpet_info_t *hip, uint64_t l)
 564 {
 565         *(uint64_t *)HPET_GEN_INTR_STAT_ADDRESS(hip->logical_address) = l;
 566 }
 567 
 568 static void
 569 hpet_write_timer_N_config(hpet_info_t *hip, uint_t n, uint64_t l)
 570 {
 571         if (hip->timer_n_config[n].size_cap == 1)
 572                 *(uint64_t *)HPET_TIMER_N_CONF_ADDRESS(
 573                     hip->logical_address, n) = l;
 574         else
 575                 *(uint32_t *)HPET_TIMER_N_CONF_ADDRESS(
 576                     hip->logical_address, n) = (uint32_t)(0xFFFFFFFF & l);
 577 }
 578 
 579 static void
 580 hpet_write_timer_N_comp(hpet_info_t *hip, uint_t n, uint64_t l)
 581 {
 582         *(uint64_t *)HPET_TIMER_N_COMP_ADDRESS(hip->logical_address, n) = l;
 583 }
 584 
 585 static void
 586 hpet_disable_timer(hpet_info_t *hip, uint32_t timer_n)
 587 {
 588         uint64_t l;
 589 
 590         l = hpet_read_timer_N_config(hip, timer_n);
 591         l &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;
 592         hpet_write_timer_N_config(hip, timer_n, l);
 593 }
 594 
 595 static void
 596 hpet_enable_timer(hpet_info_t *hip, uint32_t timer_n)
 597 {
 598         uint64_t l;
 599 
 600         l = hpet_read_timer_N_config(hip, timer_n);
 601         l |= HPET_TIMER_N_INT_ENB_CNF_BIT;
 602         hpet_write_timer_N_config(hip, timer_n, l);
 603 }
 604 
 605 /*
 606  * Add the interrupt handler for I/O APIC interrupt number (interrupt line).
 607  *
 608  * The I/O APIC line (vector) is programmed in ioapic_init_intr() called
 609  * from apic_picinit() psm_ops apic_ops entry point after we return from
 610  * apic_init() psm_ops entry point.
 611  */
 612 static uint32_t
 613 hpet_install_interrupt_handler(uint_t (*func)(char *), int vector)
 614 {
 615         uint32_t retval;
 616 
 617         retval = add_avintr(NULL, CBE_HIGH_PIL, (avfunc)func, "HPET Timer",
 618             vector, NULL, NULL, NULL, NULL);
 619         if (retval == 0) {
 620                 cmn_err(CE_WARN, "!hpet_acpi: add_avintr() failed");
 621                 return (AE_BAD_PARAMETER);
 622         }
 623         return (AE_OK);
 624 }
 625 
 626 /*
 627  * The HPET timers specify which I/O APIC interrupts they can be routed to.
 628  * Find the first available non-legacy-replacement timer and its I/O APIC irq.
 629  * Supported I/O APIC IRQs are specified in the int_route_cap bitmap in each
 630  * timer's timer_n_config register.
 631  */
 632 static int
 633 hpet_get_IOAPIC_intr_capable_timer(hpet_info_t *hip)
 634 {
 635         int     timer;
 636         int     intr;
 637 
 638         for (timer = HPET_FIRST_NON_LEGACY_TIMER;
 639             timer < hip->gen_cap.num_tim_cap; ++timer) {
 640 
 641                 if (!hpet_timer_available(hip->allocated_timers, timer))
 642                         continue;
 643 
 644                 intr = lowbit(hip->timer_n_config[timer].int_route_cap) - 1;
 645                 if (intr >= 0) {
 646                         hpet_timer_alloc(&hip->allocated_timers, timer);
 647                         hip->cstate_timer.timer = timer;
 648                         hip->cstate_timer.intr = intr;
 649                         return (timer);
 650                 }
 651         }
 652 
 653         return (-1);
 654 }
 655 
 656 /*
 657  * Mark this timer as used.
 658  */
 659 static void
 660 hpet_timer_alloc(uint32_t *allocated_timers, uint32_t n)
 661 {
 662         *allocated_timers |= 1 << n;
 663 }
 664 
 665 /*
 666  * Check if this timer is available.
 667  * No mutual exclusion because only one thread uses this.
 668  */
 669 static int
 670 hpet_timer_available(uint32_t allocated_timers, uint32_t n)
 671 {
 672         return ((allocated_timers & (1 << n)) == 0);
 673 }
 674 
 675 /*
 676  * Setup timer N to route its interrupt to I/O APIC.
 677  */
 678 static void
 679 hpet_timer_set_up(hpet_info_t *hip, uint32_t timer_n, uint32_t interrupt)
 680 {
 681         uint64_t conf;
 682 
 683         conf = hpet_read_timer_N_config(hip, timer_n);
 684 
 685         /*
 686          * Caller is required to verify this interrupt route is supported.
 687          */
 688         ASSERT(HPET_TIMER_N_INT_ROUTE_CAP(conf) & (1 << interrupt));
 689 
 690         conf &= ~HPET_TIMER_N_FSB_EN_CNF_BIT;       /* use IOAPIC */
 691         conf |= HPET_TIMER_N_INT_ROUTE_SHIFT(interrupt);
 692         conf &= ~HPET_TIMER_N_TYPE_CNF_BIT; /* non periodic */
 693         conf &= ~HPET_TIMER_N_INT_ENB_CNF_BIT;      /* disabled */
 694         conf |= HPET_TIMER_N_INT_TYPE_CNF_BIT;  /* Level Triggered */
 695 
 696         hpet_write_timer_N_config(hip, timer_n, conf);
 697 }
 698 
 699 /*
 700  * The HPET's Main Counter is not stopped before programming an HPET timer.
 701  * This will allow the HPET to be used as a time source.
 702  * The programmed timer interrupt may occur before this function returns.
 703  * Callers must block interrupts before calling this function if they must
 704  * guarantee the interrupt is handled after this function returns.
 705  *
 706  * Return 0 if main counter is less than timer after enabling timer.
 707  * The interrupt was programmed, but it may fire before this returns.
 708  * Return !0 if main counter is greater than timer after enabling timer.
 709  * In other words: the timer will not fire, and we do not know if it did fire.
 710  *
 711  * delta is in HPET ticks.
 712  *
 713  * Writing a 64-bit value to a 32-bit register will "wrap around".
 714  * A 32-bit HPET timer will wrap around in a little over 5 minutes.
 715  */
 716 int
 717 hpet_timer_program(hpet_info_t *hip, uint32_t timer, uint64_t delta)
 718 {
 719         uint64_t time, program;
 720 
 721         program = hpet_read_main_counter_value(hip);
 722         program += delta;
 723         hpet_write_timer_N_comp(hip, timer, program);
 724 
 725         time = hpet_read_main_counter_value(hip);
 726         if (time < program)
 727                 return (AE_OK);
 728 
 729         return (AE_TIME);
 730 }
 731 
 732 /*
 733  * CPR and power policy-change callback entry point.
 734  */
 735 boolean_t
 736 hpet_callback(int code)
 737 {
 738         switch (code) {
 739         case PM_DEFAULT_CPU_DEEP_IDLE:
 740                 /*FALLTHROUGH*/
 741         case PM_ENABLE_CPU_DEEP_IDLE:
 742                 /*FALLTHROUGH*/
 743         case PM_DISABLE_CPU_DEEP_IDLE:
 744                 return (hpet_deep_idle_config(code));
 745 
 746         case CB_CODE_CPR_RESUME:
 747                 /*FALLTHROUGH*/
 748         case CB_CODE_CPR_CHKPT:
 749                 return (hpet_cpr(code));
 750 
 751         case CST_EVENT_MULTIPLE_CSTATES:
 752                 hpet_cst_callback(CST_EVENT_MULTIPLE_CSTATES);
 753                 return (B_TRUE);
 754 
 755         case CST_EVENT_ONE_CSTATE:
 756                 hpet_cst_callback(CST_EVENT_ONE_CSTATE);
 757                 return (B_TRUE);
 758 
 759         default:
 760                 cmn_err(CE_NOTE, "!hpet_callback: invalid code %d\n", code);
 761                 return (B_FALSE);
 762         }
 763 }
 764 
 765 /*
 766  * According to the HPET spec 1.0a: the Operating System must save and restore
 767  * HPET event timer hardware context through ACPI sleep state transitions.
 768  * Timer registers (including the main counter) may not be preserved through
 769  * ACPI S3, S4, or S5 sleep states.  This code does not not support S1 nor S2.
 770  *
 771  * Current HPET state is already in hpet.supported and
 772  * hpet_state.proxy_installed.  hpet_info contains the proxy interrupt HPET
 773  * Timer state.
 774  *
 775  * Future projects beware: the HPET Main Counter is undefined after ACPI S3 or
 776  * S4, and it is not saved/restored here.  Future projects cannot expect the
 777  * Main Counter to be monotomically (or accurately) increasing across CPR.
 778  *
 779  * Note: the CPR Checkpoint path later calls pause_cpus() which ensures all
 780  * CPUs are awake and in a spin loop before the system suspends.  The HPET is
 781  * not needed for Deep C-state wakeup when CPUs are in cpu_pause().
 782  * It is safe to leave the HPET running as the system suspends; we just
 783  * disable the timer from generating interrupts here.
 784  */
 785 static boolean_t
 786 hpet_cpr(int code)
 787 {
 788         ulong_t         intr, dead_count = 0;
 789         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
 790         boolean_t       ret = B_TRUE;
 791 
 792         mutex_enter(&hpet_state_lock);
 793         switch (code) {
 794         case CB_CODE_CPR_CHKPT:
 795                 if (hpet_state.proxy_installed == B_FALSE)
 796                         break;
 797 
 798                 hpet_state.cpr = B_TRUE;
 799 
 800                 intr = intr_clear();
 801                 while (!mutex_tryenter(&hpet_proxy_lock)) {
 802                         /*
 803                          * spin
 804                          */
 805                         intr_restore(intr);
 806                         if (dead_count++ > hpet_spin_check) {
 807                                 dead_count = 0;
 808                                 if (gethrtime() > dead) {
 809                                         hpet_state.cpr = B_FALSE;
 810                                         mutex_exit(&hpet_state_lock);
 811                                         cmn_err(CE_NOTE, "!hpet_cpr: deadman");
 812                                         return (B_FALSE);
 813                                 }
 814                         }
 815                         intr = intr_clear();
 816                 }
 817                 hpet_expire_all();
 818                 mutex_exit(&hpet_proxy_lock);
 819                 intr_restore(intr);
 820 
 821                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 822                 break;
 823 
 824         case CB_CODE_CPR_RESUME:
 825                 if (hpet_resume() == B_TRUE)
 826                         hpet_state.cpr = B_FALSE;
 827                 else
 828                         cmn_err(CE_NOTE, "!hpet_resume failed.");
 829                 break;
 830 
 831         default:
 832                 cmn_err(CE_NOTE, "!hpet_cpr: invalid code %d\n", code);
 833                 ret = B_FALSE;
 834                 break;
 835         }
 836         mutex_exit(&hpet_state_lock);
 837         return (ret);
 838 }
 839 
 840 /*
 841  * Assume the HPET stopped in Suspend state and timer state was lost.
 842  */
 843 static boolean_t
 844 hpet_resume(void)
 845 {
 846         if (hpet.supported != HPET_TIMER_SUPPORT)
 847                 return (B_TRUE);
 848 
 849         /*
 850          * The HPET spec does not specify if Legacy Replacement Route is
 851          * on or off by default, so we set it off here.
 852          */
 853         (void) hpet_set_leg_rt_cnf(&hpet_info, 0);
 854 
 855         if (hpet_start_main_counter(&hpet_info) != AE_OK) {
 856                 cmn_err(CE_NOTE, "!hpet_resume: start main counter failed");
 857                 hpet.supported = HPET_NO_SUPPORT;
 858                 if (hpet_state.proxy_installed == B_TRUE) {
 859                         hpet_state.proxy_installed = B_FALSE;
 860                         hpet_uninstall_interrupt_handler();
 861                 }
 862                 return (B_FALSE);
 863         }
 864 
 865         if (hpet_state.proxy_installed == B_FALSE)
 866                 return (B_TRUE);
 867 
 868         hpet_timer_set_up(&hpet_info, hpet_info.cstate_timer.timer,
 869             hpet_info.cstate_timer.intr);
 870         if (hpet_state.cpu_deep_idle == B_TRUE)
 871                 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 872 
 873         return (B_TRUE);
 874 }
 875 
 876 /*
 877  * Callback to enable/disable Deep C-States based on power.conf setting.
 878  */
 879 static boolean_t
 880 hpet_deep_idle_config(int code)
 881 {
 882         ulong_t         intr, dead_count = 0;
 883         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
 884         boolean_t       ret = B_TRUE;
 885 
 886         mutex_enter(&hpet_state_lock);
 887         switch (code) {
 888         case PM_DEFAULT_CPU_DEEP_IDLE:
 889                 /*FALLTHROUGH*/
 890         case PM_ENABLE_CPU_DEEP_IDLE:
 891 
 892                 if (hpet_state.cpu_deep_idle == B_TRUE)
 893                         break;
 894 
 895                 if (hpet_state.proxy_installed == B_FALSE) {
 896                         ret = B_FALSE;  /* Deep C-States not supported */
 897                         break;
 898                 }
 899 
 900                 hpet_enable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 901                 hpet_state.cpu_deep_idle = B_TRUE;
 902                 break;
 903 
 904         case PM_DISABLE_CPU_DEEP_IDLE:
 905 
 906                 if ((hpet_state.cpu_deep_idle == B_FALSE) ||
 907                     (hpet_state.proxy_installed == B_FALSE))
 908                         break;
 909 
 910                 /*
 911                  * The order of these operations is important to avoid
 912                  * lost wakeups: Set a flag to refuse all future LAPIC Timer
 913                  * proxy requests, then wake up all CPUs from deep C-state,
 914                  * and finally disable the HPET interrupt-generating timer.
 915                  */
 916                 hpet_state.cpu_deep_idle = B_FALSE;
 917 
 918                 intr = intr_clear();
 919                 while (!mutex_tryenter(&hpet_proxy_lock)) {
 920                         /*
 921                          * spin
 922                          */
 923                         intr_restore(intr);
 924                         if (dead_count++ > hpet_spin_check) {
 925                                 dead_count = 0;
 926                                 if (gethrtime() > dead) {
 927                                         hpet_state.cpu_deep_idle = B_TRUE;
 928                                         mutex_exit(&hpet_state_lock);
 929                                         cmn_err(CE_NOTE,
 930                                             "!hpet_deep_idle_config: deadman");
 931                                         return (B_FALSE);
 932                                 }
 933                         }
 934                         intr = intr_clear();
 935                 }
 936                 hpet_expire_all();
 937                 mutex_exit(&hpet_proxy_lock);
 938                 intr_restore(intr);
 939 
 940                 hpet_disable_timer(&hpet_info, hpet_info.cstate_timer.timer);
 941                 break;
 942 
 943         default:
 944                 cmn_err(CE_NOTE, "!hpet_deep_idle_config: invalid code %d\n",
 945                     code);
 946                 ret = B_FALSE;
 947                 break;
 948         }
 949         mutex_exit(&hpet_state_lock);
 950 
 951         return (ret);
 952 }
 953 
 954 /*
 955  * Callback for _CST c-state change notifications.
 956  */
 957 static void
 958 hpet_cst_callback(uint32_t code)
 959 {
 960         ulong_t         intr, dead_count = 0;
 961         hrtime_t        dead = gethrtime() + hpet_spin_timeout;
 962 
 963         switch (code) {
 964         case CST_EVENT_ONE_CSTATE:
 965                 hpet_state.uni_cstate = B_TRUE;
 966                 intr = intr_clear();
 967                 while (!mutex_tryenter(&hpet_proxy_lock)) {
 968                         /*
 969                          * spin
 970                          */
 971                         intr_restore(intr);
 972                         if (dead_count++ > hpet_spin_check) {
 973                                 dead_count = 0;
 974                                 if (gethrtime() > dead) {
 975                                         hpet_expire_all();
 976                                         cmn_err(CE_NOTE,
 977                                             "!hpet_cst_callback: deadman");
 978                                         return;
 979                                 }
 980                         }
 981                         intr = intr_clear();
 982                 }
 983                 hpet_expire_all();
 984                 mutex_exit(&hpet_proxy_lock);
 985                 intr_restore(intr);
 986                 break;
 987 
 988         case CST_EVENT_MULTIPLE_CSTATES:
 989                 hpet_state.uni_cstate = B_FALSE;
 990                 break;
 991 
 992         default:
 993                 cmn_err(CE_NOTE, "!hpet_cst_callback: invalid code %d\n", code);
 994                 break;
 995         }
 996 }
 997 
 998 /*
 999  * Interrupt Service Routine for HPET I/O-APIC-generated interrupts.
1000  * Used to wakeup CPUs from Deep C-state when their Local APIC Timer stops.
1001  * This ISR runs on one CPU which pokes other CPUs out of Deep C-state as
1002  * needed.
1003  */
1004 /* ARGSUSED */
1005 static uint_t
1006 hpet_isr(char *arg)
1007 {
1008         uint64_t        timer_status;
1009         uint64_t        timer_mask;
1010         ulong_t         intr, dead_count = 0;
1011         hrtime_t        dead = gethrtime() + hpet_isr_spin_timeout;
1012 
1013         timer_mask = HPET_INTR_STATUS_MASK(hpet_info.cstate_timer.timer);
1014 
1015         /*
1016          * We are using a level-triggered interrupt.
1017          * HPET sets timer's General Interrupt Status Register bit N.
1018          * ISR checks this bit to see if it needs servicing.
1019          * ISR then clears this bit by writing 1 to that bit.
1020          */
1021         timer_status = hpet_read_gen_intrpt_stat(&hpet_info);
1022         if (!(timer_status & timer_mask))
1023                 return (DDI_INTR_UNCLAIMED);
1024         hpet_write_gen_intrpt_stat(&hpet_info, timer_mask);
1025 
1026         /*
1027          * Do not touch ISR data structures before checking the HPET's General
1028          * Interrupt Status register.  The General Interrupt Status register
1029          * will not be set by hardware until after timer interrupt generation
1030          * is enabled by software.  Software allocates necessary data
1031          * structures before enabling timer interrupts.  ASSERT the software
1032          * data structures required to handle this interrupt are initialized.
1033          */
1034         ASSERT(hpet_proxy_users != NULL);
1035 
1036         /*
1037          * CPUs in deep c-states do not enable interrupts until after
1038          * performing idle cleanup which includes descheduling themselves from
1039          * the HPET.  The CPU running this ISR will NEVER find itself in the
1040          * proxy list.  A lost wakeup may occur if this is false.
1041          */
1042         ASSERT(hpet_proxy_users[CPU->cpu_id] == HPET_INFINITY);
1043 
1044         /*
1045          * Higher level interrupts may deadlock with CPUs going idle if this
1046          * ISR is prempted while holding hpet_proxy_lock.
1047          */
1048         intr = intr_clear();
1049         while (!mutex_tryenter(&hpet_proxy_lock)) {
1050                 /*
1051                  * spin
1052                  */
1053                 intr_restore(intr);
1054                 if (dead_count++ > hpet_spin_check) {
1055                         dead_count = 0;
1056                         if (gethrtime() > dead) {
1057                                 hpet_expire_all();
1058                                 return (DDI_INTR_CLAIMED);
1059                         }
1060                 }
1061                 intr = intr_clear();
1062         }
1063         (void) hpet_guaranteed_schedule(HPET_INFINITY);
1064         mutex_exit(&hpet_proxy_lock);
1065         intr_restore(intr);
1066 
1067         return (DDI_INTR_CLAIMED);
1068 }
1069 
1070 /*
1071  * Used when disabling the HPET Timer interrupt.  CPUs in Deep C-state must be
1072  * woken up because they can no longer rely on the HPET's Timer to wake them.
1073  * We do not need to wait for CPUs to wakeup.
1074  */
1075 static void
1076 hpet_expire_all(void)
1077 {
1078         processorid_t   id;
1079 
1080         for (id = 0; id < max_ncpus; ++id) {
1081                 if (hpet_proxy_users[id] != HPET_INFINITY) {
1082                         hpet_proxy_users[id] = HPET_INFINITY;
1083                         if (id != CPU->cpu_id)
1084                                 poke_cpu(id);
1085                 }
1086         }
1087 }
1088 
1089 /*
1090  * To avoid missed wakeups this function must guarantee either the HPET timer
1091  * was successfully programmed to the next expire time or there are no waiting
1092  * CPUs.
1093  *
1094  * Callers cannot enter C2 or deeper if the HPET could not be programmed to
1095  * generate its next interrupt to happen at required_wakeup_time or sooner.
1096  * Returns B_TRUE if the HPET was programmed to interrupt by
1097  * required_wakeup_time, B_FALSE if not.
1098  */
1099 static boolean_t
1100 hpet_guaranteed_schedule(hrtime_t required_wakeup_time)
1101 {
1102         hrtime_t        now, next_proxy_time;
1103         processorid_t   id, next_proxy_id;
1104         int             proxy_timer = hpet_info.cstate_timer.timer;
1105         boolean_t       done = B_FALSE;
1106 
1107         ASSERT(mutex_owned(&hpet_proxy_lock));
1108 
1109         /*
1110          * Loop until we successfully program the HPET,
1111          * or no CPUs are scheduled to use the HPET as a proxy.
1112          */
1113         do {
1114                 /*
1115                  * Wake all CPUs that expired before now.
1116                  * Find the next CPU to wake up and next HPET program time.
1117                  */
1118                 now = gethrtime();
1119                 next_proxy_time = HPET_INFINITY;
1120                 next_proxy_id = CPU->cpu_id;
1121                 for (id = 0; id < max_ncpus; ++id) {
1122                         if (hpet_proxy_users[id] < now) {
1123                                 hpet_proxy_users[id] = HPET_INFINITY;
1124                                 if (id != CPU->cpu_id)
1125                                         poke_cpu(id);
1126                         } else if (hpet_proxy_users[id] < next_proxy_time) {
1127                                 next_proxy_time = hpet_proxy_users[id];
1128                                 next_proxy_id = id;
1129                         }
1130                 }
1131 
1132                 if (next_proxy_time == HPET_INFINITY) {
1133                         done = B_TRUE;
1134                         /*
1135                          * There are currently no CPUs using the HPET's Timer
1136                          * as a proxy for their LAPIC Timer.  The HPET's Timer
1137                          * does not need to be programmed.
1138                          *
1139                          * Letting the HPET timer wrap around to the current
1140                          * time is the longest possible timeout.
1141                          * A 64-bit timer will wrap around in ~ 2^44 seconds.
1142                          * A 32-bit timer will wrap around in ~ 2^12 seconds.
1143                          *
1144                          * Disabling the HPET's timer interrupt requires a
1145                          * (relatively expensive) write to the HPET.
1146                          * Instead we do nothing.
1147                          *
1148                          * We are gambling some CPU will attempt to enter a
1149                          * deep c-state before the timer wraps around.
1150                          * We assume one spurious interrupt in a little over an
1151                          * hour has less performance impact than writing to the
1152                          * HPET's timer disable bit every time all CPUs wakeup
1153                          * from deep c-state.
1154                          */
1155 
1156                 } else {
1157                         /*
1158                          * Idle CPUs disable interrupts before programming the
1159                          * HPET to prevent a lost wakeup if the HPET
1160                          * interrupts the idle cpu before it can enter a
1161                          * Deep C-State.
1162                          */
1163                         if (hpet_timer_program(&hpet_info, proxy_timer,
1164                             HRTIME_TO_HPET_TICKS(next_proxy_time - gethrtime()))
1165                             != AE_OK) {
1166                                 /*
1167                                  * We could not program the HPET to wakeup the
1168                                  * next CPU.  We must wake the CPU ourself to
1169                                  * avoid a lost wakeup.
1170                                  */
1171                                 hpet_proxy_users[next_proxy_id] = HPET_INFINITY;
1172                                 if (next_proxy_id != CPU->cpu_id)
1173                                         poke_cpu(next_proxy_id);
1174                         } else {
1175                                 done = B_TRUE;
1176                         }
1177                 }
1178 
1179         } while (!done);
1180 
1181         return (next_proxy_time <= required_wakeup_time);
1182 }
1183 
1184 /*
1185  * Use an HPET timer to act as this CPU's proxy local APIC timer.
1186  * Used in deep c-states C2 and above while the CPU's local APIC timer stalls.
1187  * Called by the idle thread with interrupts enabled.
1188  * Always returns with interrupts disabled.
1189  *
1190  * There are 3 possible outcomes from this function:
1191  * 1. The Local APIC Timer was already disabled before this function was called.
1192  *      LAPIC TIMER     : disabled
1193  *      HPET            : not scheduled to wake this CPU
1194  *      *lapic_expire   : (hrtime_t)HPET_INFINITY
1195  *      Returns         : B_TRUE
1196  * 2. Successfully programmed the HPET to act as a LAPIC Timer proxy.
1197  *      LAPIC TIMER     : disabled
1198  *      HPET            : scheduled to wake this CPU
1199  *      *lapic_expire   : hrtime_t when LAPIC timer would have expired
1200  *      Returns         : B_TRUE
1201  * 3. Failed to programmed the HPET to act as a LAPIC Timer proxy.
1202  *      LAPIC TIMER     : enabled
1203  *      HPET            : not scheduled to wake this CPU
1204  *      *lapic_expire   : (hrtime_t)HPET_INFINITY
1205  *      Returns         : B_FALSE
1206  *
1207  * The idle thread cannot enter Deep C-State in case 3.
1208  * The idle thread must re-enable & re-program the LAPIC_TIMER in case 2.
1209  */
1210 static boolean_t
1211 hpet_use_hpet_timer(hrtime_t *lapic_expire)
1212 {
1213         extern hrtime_t apic_timer_stop_count(void);
1214         extern void     apic_timer_restart(hrtime_t);
1215         hrtime_t        now, expire, dead;
1216         uint64_t        lapic_count, dead_count;
1217         cpupart_t       *cpu_part;
1218         processorid_t   cpu_sid;
1219         processorid_t   cpu_id = CPU->cpu_id;
1220         processorid_t   id;
1221         boolean_t       rslt;
1222         boolean_t       hset_update;
1223 
1224         cpu_part = CPU->cpu_part;
1225         cpu_sid = CPU->cpu_seqid;
1226 
1227         ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1228 
1229         /*
1230          * A critical section exists between when the HPET is programmed
1231          * to interrupt the CPU and when this CPU enters an idle state.
1232          * Interrupts must be blocked during that time to prevent lost
1233          * CBE wakeup interrupts from either LAPIC or HPET.
1234          *
1235          * Must block interrupts before acquiring hpet_proxy_lock to prevent
1236          * a deadlock with the ISR if the ISR runs on this CPU after the
1237          * idle thread acquires the mutex but before it clears interrupts.
1238          */
1239         ASSERT(!interrupts_enabled());
1240         lapic_count = apic_timer_stop_count();
1241         now = gethrtime();
1242         dead = now + hpet_idle_spin_timeout;
1243         *lapic_expire = expire = now + lapic_count;
1244         if (lapic_count == (hrtime_t)-1) {
1245                 /*
1246                  * LAPIC timer is currently disabled.
1247                  * Will not use the HPET as a LAPIC Timer proxy.
1248                  */
1249                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1250                 return (B_TRUE);
1251         }
1252 
1253         /*
1254          * Serialize hpet_proxy data structure manipulation.
1255          */
1256         dead_count = 0;
1257         while (!mutex_tryenter(&hpet_proxy_lock)) {
1258                 /*
1259                  * spin
1260                  */
1261                 apic_timer_restart(expire);
1262                 sti();
1263                 cli();
1264 
1265                 if (dead_count++ > hpet_spin_check) {
1266                         dead_count = 0;
1267                         hset_update = (((CPU->cpu_flags & CPU_OFFLINE) == 0) &&
1268                             (ncpus > 1));
1269                         if (hset_update &&
1270                             !bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1271                                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1272                                 return (B_FALSE);
1273                         }
1274                 }
1275 
1276                 lapic_count = apic_timer_stop_count();
1277                 now = gethrtime();
1278                 *lapic_expire = expire = now + lapic_count;
1279                 if (lapic_count == (hrtime_t)-1) {
1280                         /*
1281                          * LAPIC timer is currently disabled.
1282                          * Will not use the HPET as a LAPIC Timer proxy.
1283                          */
1284                         *lapic_expire = (hrtime_t)HPET_INFINITY;
1285                         return (B_TRUE);
1286                 }
1287                 if (now > dead) {
1288                         apic_timer_restart(expire);
1289                         *lapic_expire = (hrtime_t)HPET_INFINITY;
1290                         return (B_FALSE);
1291                 }
1292         }
1293 
1294         if ((hpet_state.cpr == B_TRUE) ||
1295             (hpet_state.cpu_deep_idle == B_FALSE) ||
1296             (hpet_state.proxy_installed == B_FALSE) ||
1297             (hpet_state.uni_cstate == B_TRUE)) {
1298                 mutex_exit(&hpet_proxy_lock);
1299                 apic_timer_restart(expire);
1300                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1301                 return (B_FALSE);
1302         }
1303 
1304         hpet_proxy_users[cpu_id] = expire;
1305 
1306         /*
1307          * We are done if another cpu is scheduled on the HPET with an
1308          * expire time before us.  The next HPET interrupt has been programmed
1309          * to fire before our expire time.
1310          */
1311         for (id = 0; id < max_ncpus; ++id) {
1312                 if ((hpet_proxy_users[id] <= expire) && (id != cpu_id)) {
1313                         mutex_exit(&hpet_proxy_lock);
1314                         return (B_TRUE);
1315                 }
1316         }
1317 
1318         /*
1319          * We are the next lAPIC to expire.
1320          * Program the HPET with our expire time.
1321          */
1322         rslt = hpet_guaranteed_schedule(expire);
1323         mutex_exit(&hpet_proxy_lock);
1324 
1325         if (rslt == B_FALSE) {
1326                 apic_timer_restart(expire);
1327                 *lapic_expire = (hrtime_t)HPET_INFINITY;
1328         }
1329 
1330         return (rslt);
1331 }
1332 
1333 /*
1334  * Called by the idle thread when waking up from Deep C-state before enabling
1335  * interrupts.  With an array data structure it is faster to always remove
1336  * ourself from the array without checking if the HPET ISR already removed.
1337  *
1338  * We use a lazy algorithm for removing CPUs from the HPET's schedule.
1339  * We do not reprogram the HPET here because this CPU has real work to do.
1340  * On a idle system the CPU was probably woken up by the HPET's ISR.
1341  * On a heavily loaded system CPUs are not going into Deep C-state.
1342  * On a moderately loaded system another CPU will usually enter Deep C-state
1343  * and reprogram the HPET before the HPET fires with our wakeup.
1344  */
1345 static void
1346 hpet_use_lapic_timer(hrtime_t expire)
1347 {
1348         extern void     apic_timer_restart(hrtime_t);
1349         processorid_t   cpu_id = CPU->cpu_id;
1350 
1351         ASSERT(CPU->cpu_thread == CPU->cpu_idle_thread);
1352         ASSERT(!interrupts_enabled());
1353 
1354         hpet_proxy_users[cpu_id] = HPET_INFINITY;
1355 
1356         /*
1357          * Do not enable a LAPIC Timer that was initially disabled.
1358          */
1359         if (expire != HPET_INFINITY)
1360                 apic_timer_restart(expire);
1361 }
1362 
1363 /*
1364  * Initialize data structure to keep track of CPUs using HPET as a proxy for
1365  * their stalled local APIC timer.  For now this is just an array.
1366  */
1367 static void
1368 hpet_init_proxy_data(void)
1369 {
1370         processorid_t   id;
1371 
1372         /*
1373          * Use max_ncpus for hot plug compliance.
1374          */
1375         hpet_proxy_users = kmem_zalloc(max_ncpus * sizeof (*hpet_proxy_users),
1376             KM_SLEEP);
1377 
1378         /*
1379          * Unused entries always contain HPET_INFINITY.
1380          */
1381         for (id = 0; id < max_ncpus; ++id)
1382                 hpet_proxy_users[id] = HPET_INFINITY;
1383 }