1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 #include <sys/sysmacros.h>
  26 #include <sys/types.h>
  27 #include <sys/mkdev.h>
  28 #include <sys/stat.h>
  29 #include <sys/sunddi.h>
  30 #include <vm/seg_kmem.h>
  31 #include <sys/machparam.h>
  32 #include <sys/sunndi.h>
  33 #include <sys/ontrap.h>
  34 #include <sys/psm.h>
  35 #include <sys/pcie.h>
  36 #include <sys/pci_cfgspace.h>
  37 #include <sys/pci_tools.h>
  38 #include <io/pci/pci_tools_ext.h>
  39 #include <sys/apic.h>
  40 #include <sys/apix.h>
  41 #include <io/pci/pci_var.h>
  42 #include <sys/pci_impl.h>
  43 #include <sys/promif.h>
  44 #include <sys/x86_archext.h>
  45 #include <sys/cpuvar.h>
  46 #include <sys/pci_cfgacc.h>
  47 
  48 #ifdef __xpv
  49 #include <sys/hypervisor.h>
  50 #endif
  51 
  52 #define PCIEX_BDF_OFFSET_DELTA  4
  53 #define PCIEX_REG_FUNC_SHIFT    (PCI_REG_FUNC_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  54 #define PCIEX_REG_DEV_SHIFT     (PCI_REG_DEV_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  55 #define PCIEX_REG_BUS_SHIFT     (PCI_REG_BUS_SHIFT + PCIEX_BDF_OFFSET_DELTA)
  56 
  57 #define SUCCESS 0
  58 
  59 extern uint64_t mcfg_mem_base;
  60 int pcitool_debug = 0;
  61 
  62 /*
  63  * Offsets of BARS in config space.  First entry of 0 means config space.
  64  * Entries here correlate to pcitool_bars_t enumerated type.
  65  */
  66 static uint8_t pci_bars[] = {
  67         0x0,
  68         PCI_CONF_BASE0,
  69         PCI_CONF_BASE1,
  70         PCI_CONF_BASE2,
  71         PCI_CONF_BASE3,
  72         PCI_CONF_BASE4,
  73         PCI_CONF_BASE5,
  74         PCI_CONF_ROM
  75 };
  76 
  77 /* Max offset allowed into config space for a particular device. */
  78 static uint64_t max_cfg_size = PCI_CONF_HDR_SIZE;
  79 
  80 static uint64_t pcitool_swap_endian(uint64_t data, int size);
  81 static int pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
  82     boolean_t io_access);
  83 static int pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag);
  84 static int pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr,
  85     boolean_t write_flag);
  86 static uint64_t pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages);
  87 static void pcitool_unmap(uint64_t virt_addr, size_t num_pages);
  88 
  89 /* Extern declarations */
  90 extern int      (*psm_intr_ops)(dev_info_t *, ddi_intr_handle_impl_t *,
  91                     psm_intr_op_t, int *);
  92 
  93 int
  94 pcitool_init(dev_info_t *dip, boolean_t is_pciex)
  95 {
  96         int instance = ddi_get_instance(dip);
  97 
  98         /* Create pcitool nodes for register access and interrupt routing. */
  99 
 100         if (ddi_create_minor_node(dip, PCI_MINOR_REG, S_IFCHR,
 101             PCI_MINOR_NUM(instance, PCI_TOOL_REG_MINOR_NUM),
 102             DDI_NT_REGACC, 0) != DDI_SUCCESS) {
 103                 return (DDI_FAILURE);
 104         }
 105 
 106         if (ddi_create_minor_node(dip, PCI_MINOR_INTR, S_IFCHR,
 107             PCI_MINOR_NUM(instance, PCI_TOOL_INTR_MINOR_NUM),
 108             DDI_NT_INTRCTL, 0) != DDI_SUCCESS) {
 109                 ddi_remove_minor_node(dip, PCI_MINOR_REG);
 110                 return (DDI_FAILURE);
 111         }
 112 
 113         if (is_pciex)
 114                 max_cfg_size = PCIE_CONF_HDR_SIZE;
 115 
 116         return (DDI_SUCCESS);
 117 }
 118 
 119 void
 120 pcitool_uninit(dev_info_t *dip)
 121 {
 122         ddi_remove_minor_node(dip, PCI_MINOR_INTR);
 123         ddi_remove_minor_node(dip, PCI_MINOR_REG);
 124 }
 125 
 126 /*ARGSUSED*/
 127 static int
 128 pcitool_set_intr(dev_info_t *dip, void *arg, int mode)
 129 {
 130         ddi_intr_handle_impl_t info_hdl;
 131         pcitool_intr_set_t iset;
 132         uint32_t old_cpu;
 133         int ret, result;
 134         size_t copyinout_size;
 135         int rval = SUCCESS;
 136         apic_get_type_t type_info;
 137 
 138         /* Version 1 of pcitool_intr_set_t doesn't have flags. */
 139         copyinout_size = (size_t)&iset.flags - (size_t)&iset;
 140 
 141         if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
 142                 return (EFAULT);
 143 
 144         switch (iset.user_version) {
 145         case PCITOOL_V1:
 146                 break;
 147 
 148         case PCITOOL_V2:
 149                 copyinout_size = sizeof (pcitool_intr_set_t);
 150                 if (ddi_copyin(arg, &iset, copyinout_size, mode) != DDI_SUCCESS)
 151                         return (EFAULT);
 152                 break;
 153 
 154         default:
 155                 iset.status = PCITOOL_OUT_OF_RANGE;
 156                 rval = ENOTSUP;
 157                 goto done_set_intr;
 158         }
 159 
 160         if (iset.flags & PCITOOL_INTR_FLAG_SET_MSI) {
 161                 rval = ENOTSUP;
 162                 iset.status = PCITOOL_IO_ERROR;
 163                 goto done_set_intr;
 164         }
 165 
 166         info_hdl.ih_private = &type_info;
 167 
 168         if ((*psm_intr_ops)(NULL, &info_hdl,
 169             PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
 170                 rval = ENOTSUP;
 171                 iset.status = PCITOOL_IO_ERROR;
 172                 goto done_set_intr;
 173         }
 174 
 175         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 176                 if (iset.old_cpu > type_info.avgi_num_cpu) {
 177                         rval = EINVAL;
 178                         iset.status = PCITOOL_INVALID_CPUID;
 179                         goto done_set_intr;
 180                 }
 181                 old_cpu = iset.old_cpu;
 182         } else {
 183                 if ((old_cpu =
 184                     pci_get_cpu_from_vecirq(iset.ino, IS_VEC)) == -1) {
 185                         iset.status = PCITOOL_IO_ERROR;
 186                         rval = EINVAL;
 187                         goto done_set_intr;
 188                 }
 189         }
 190 
 191         if (iset.ino > type_info.avgi_num_intr) {
 192                 rval = EINVAL;
 193                 iset.status = PCITOOL_INVALID_INO;
 194                 goto done_set_intr;
 195         }
 196 
 197         iset.status = PCITOOL_SUCCESS;
 198 
 199         old_cpu &= ~PSMGI_CPU_USER_BOUND;
 200 
 201         /*
 202          * For this locally-declared and used handle, ih_private will contain a
 203          * CPU value, not an ihdl_plat_t as used for global interrupt handling.
 204          */
 205         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 206                 info_hdl.ih_vector = APIX_VIRTVECTOR(old_cpu, iset.ino);
 207         } else {
 208                 info_hdl.ih_vector = iset.ino;
 209         }
 210         info_hdl.ih_private = (void *)(uintptr_t)iset.cpu_id;
 211         info_hdl.ih_flags = PSMGI_INTRBY_VEC;
 212         if (pcitool_debug)
 213                 prom_printf("user version:%d, flags:0x%x\n",
 214                     iset.user_version, iset.flags);
 215 
 216         result = ENOTSUP;
 217         if ((iset.user_version >= PCITOOL_V2) &&
 218             (iset.flags & PCITOOL_INTR_FLAG_SET_GROUP)) {
 219                 ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_GRP_SET_CPU,
 220                     &result);
 221         } else {
 222                 ret = (*psm_intr_ops)(NULL, &info_hdl, PSM_INTR_OP_SET_CPU,
 223                     &result);
 224         }
 225 
 226         if (ret != PSM_SUCCESS) {
 227                 switch (result) {
 228                 case EIO:               /* Error making the change */
 229                         rval = EIO;
 230                         iset.status = PCITOOL_IO_ERROR;
 231                         break;
 232                 case ENXIO:             /* Couldn't convert vector to irq */
 233                         rval = EINVAL;
 234                         iset.status = PCITOOL_INVALID_INO;
 235                         break;
 236                 case EINVAL:            /* CPU out of range */
 237                         rval = EINVAL;
 238                         iset.status = PCITOOL_INVALID_CPUID;
 239                         break;
 240                 case ENOTSUP:           /* Requested PSM intr ops missing */
 241                         rval = ENOTSUP;
 242                         iset.status = PCITOOL_IO_ERROR;
 243                         break;
 244                 }
 245         }
 246 
 247         /* Return original CPU. */
 248         iset.cpu_id = old_cpu;
 249 
 250         /* Return new vector */
 251         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 252                 iset.ino = APIX_VIRTVEC_VECTOR(info_hdl.ih_vector);
 253         }
 254 
 255 done_set_intr:
 256         iset.drvr_version = PCITOOL_VERSION;
 257         if (ddi_copyout(&iset, arg, copyinout_size, mode) != DDI_SUCCESS)
 258                 rval = EFAULT;
 259         return (rval);
 260 }
 261 
 262 
 263 /* It is assumed that dip != NULL */
 264 static void
 265 pcitool_get_intr_dev_info(dev_info_t *dip, pcitool_intr_dev_t *devs)
 266 {
 267         (void) strncpy(devs->driver_name,
 268             ddi_driver_name(dip), MAXMODCONFNAME-2);
 269         devs->driver_name[MAXMODCONFNAME-1] = '\0';
 270         (void) ddi_pathname(dip, devs->path);
 271         devs->dev_inst = ddi_get_instance(dip);
 272 }
 273 
 274 static int
 275 pcitool_get_intr(dev_info_t *dip, void *arg, int mode)
 276 {
 277         /* Array part isn't used here, but oh well... */
 278         pcitool_intr_get_t partial_iget;
 279         pcitool_intr_get_t *iget = &partial_iget;
 280         size_t  iget_kmem_alloc_size = 0;
 281         uint8_t num_devs_ret;
 282         int copyout_rval;
 283         int rval = SUCCESS;
 284         int circ;
 285         int i;
 286 
 287         ddi_intr_handle_impl_t info_hdl;
 288         apic_get_intr_t intr_info;
 289         apic_get_type_t type_info;
 290 
 291         /* Read in just the header part, no array section. */
 292         if (ddi_copyin(arg, &partial_iget, PCITOOL_IGET_SIZE(0), mode) !=
 293             DDI_SUCCESS)
 294                 return (EFAULT);
 295 
 296         if (partial_iget.flags & PCITOOL_INTR_FLAG_GET_MSI) {
 297                 partial_iget.status = PCITOOL_IO_ERROR;
 298                 partial_iget.num_devs_ret = 0;
 299                 rval = ENOTSUP;
 300                 goto done_get_intr;
 301         }
 302 
 303         info_hdl.ih_private = &type_info;
 304 
 305         if ((*psm_intr_ops)(NULL, &info_hdl,
 306             PSM_INTR_OP_APIC_TYPE, NULL) != PSM_SUCCESS) {
 307                 iget->status = PCITOOL_IO_ERROR;
 308                 iget->num_devs_ret = 0;
 309                 rval = EINVAL;
 310                 goto done_get_intr;
 311         }
 312 
 313         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 314                 if (partial_iget.cpu_id > type_info.avgi_num_cpu) {
 315                         partial_iget.status = PCITOOL_INVALID_CPUID;
 316                         partial_iget.num_devs_ret = 0;
 317                         rval = EINVAL;
 318                         goto done_get_intr;
 319                 }
 320         }
 321 
 322         /* Validate argument. */
 323         if ((partial_iget.ino & APIX_VIRTVEC_VECMASK) >
 324             type_info.avgi_num_intr) {
 325                 partial_iget.status = PCITOOL_INVALID_INO;
 326                 partial_iget.num_devs_ret = 0;
 327                 rval = EINVAL;
 328                 goto done_get_intr;
 329         }
 330 
 331         num_devs_ret = partial_iget.num_devs_ret;
 332         intr_info.avgi_dip_list = NULL;
 333         intr_info.avgi_req_flags =
 334             PSMGI_REQ_CPUID | PSMGI_REQ_NUM_DEVS | PSMGI_INTRBY_VEC;
 335         /*
 336          * For this locally-declared and used handle, ih_private will contain a
 337          * pointer to apic_get_intr_t, not an ihdl_plat_t as used for
 338          * global interrupt handling.
 339          */
 340         info_hdl.ih_private = &intr_info;
 341 
 342         if (strcmp(type_info.avgi_type, APIC_APIX_NAME) == 0) {
 343                 info_hdl.ih_vector =
 344                     APIX_VIRTVECTOR(partial_iget.cpu_id, partial_iget.ino);
 345         } else {
 346                 info_hdl.ih_vector = partial_iget.ino;
 347         }
 348 
 349         /* Caller wants device information returned. */
 350         if (num_devs_ret > 0) {
 351 
 352                 intr_info.avgi_req_flags |= PSMGI_REQ_GET_DEVS;
 353 
 354                 /*
 355                  * Allocate room.
 356                  * If num_devs_ret == 0 iget remains pointing to partial_iget.
 357                  */
 358                 iget_kmem_alloc_size = PCITOOL_IGET_SIZE(num_devs_ret);
 359                 iget = kmem_alloc(iget_kmem_alloc_size, KM_SLEEP);
 360 
 361                 /* Read in whole structure to verify there's room. */
 362                 if (ddi_copyin(arg, iget, iget_kmem_alloc_size, mode) !=
 363                     SUCCESS) {
 364 
 365                         /* Be consistent and just return EFAULT here. */
 366                         kmem_free(iget, iget_kmem_alloc_size);
 367 
 368                         return (EFAULT);
 369                 }
 370         }
 371 
 372         bzero(iget, PCITOOL_IGET_SIZE(num_devs_ret));
 373         iget->ino = info_hdl.ih_vector;
 374 
 375         /*
 376          * Lock device tree branch from the pci root nexus on down if info will
 377          * be extracted from dips returned from the tree.
 378          */
 379         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 380                 ndi_devi_enter(dip, &circ);
 381         }
 382 
 383         /* Call psm_intr_ops(PSM_INTR_OP_GET_INTR) to get information. */
 384         if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
 385             PSM_INTR_OP_GET_INTR, NULL)) != PSM_SUCCESS) {
 386                 iget->status = PCITOOL_IO_ERROR;
 387                 iget->num_devs_ret = 0;
 388                 rval = EINVAL;
 389                 goto done_get_intr;
 390         }
 391 
 392         /*
 393          * Fill in the pcitool_intr_get_t to be returned,
 394          * with the CPU, num_devs_ret and num_devs.
 395          */
 396         if (intr_info.avgi_cpu_id == IRQ_UNBOUND ||
 397             intr_info.avgi_cpu_id == IRQ_UNINIT)
 398                 iget->cpu_id = 0;
 399         else
 400                 iget->cpu_id = intr_info.avgi_cpu_id & ~PSMGI_CPU_USER_BOUND;
 401 
 402         /* Number of devices returned by apic. */
 403         iget->num_devs = intr_info.avgi_num_devs;
 404 
 405         /* Device info was returned. */
 406         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 407 
 408                 /*
 409                  * num devs returned is num devs ret by apic,
 410                  * space permitting.
 411                  */
 412                 iget->num_devs_ret = min(num_devs_ret, intr_info.avgi_num_devs);
 413 
 414                 /*
 415                  * Loop thru list of dips and extract driver, name and instance.
 416                  * Fill in the pcitool_intr_dev_t's with this info.
 417                  */
 418                 for (i = 0; i < iget->num_devs_ret; i++)
 419                         pcitool_get_intr_dev_info(intr_info.avgi_dip_list[i],
 420                             &iget->dev[i]);
 421 
 422                 /* Free kmem_alloc'ed memory of the apic_get_intr_t */
 423                 kmem_free(intr_info.avgi_dip_list,
 424                     intr_info.avgi_num_devs * sizeof (dev_info_t *));
 425         }
 426 
 427 done_get_intr:
 428 
 429         if (intr_info.avgi_req_flags & PSMGI_REQ_GET_DEVS) {
 430                 ndi_devi_exit(dip, circ);
 431         }
 432 
 433         iget->drvr_version = PCITOOL_VERSION;
 434         copyout_rval = ddi_copyout(iget, arg,
 435             PCITOOL_IGET_SIZE(num_devs_ret), mode);
 436 
 437         if (iget_kmem_alloc_size > 0)
 438                 kmem_free(iget, iget_kmem_alloc_size);
 439 
 440         if (copyout_rval != DDI_SUCCESS)
 441                 rval = EFAULT;
 442 
 443         return (rval);
 444 }
 445 
 446 /*ARGSUSED*/
 447 static int
 448 pcitool_intr_info(dev_info_t *dip, void *arg, int mode)
 449 {
 450         pcitool_intr_info_t intr_info;
 451         ddi_intr_handle_impl_t info_hdl;
 452         int rval = SUCCESS;
 453         apic_get_type_t type_info;
 454 
 455         /* If we need user_version, and to ret same user version as passed in */
 456         if (ddi_copyin(arg, &intr_info, sizeof (pcitool_intr_info_t), mode) !=
 457             DDI_SUCCESS) {
 458                 if (pcitool_debug)
 459                         prom_printf("Error reading arguments\n");
 460                 return (EFAULT);
 461         }
 462 
 463         if (intr_info.flags & PCITOOL_INTR_FLAG_GET_MSI)
 464                 return (ENOTSUP);
 465 
 466         info_hdl.ih_private = &type_info;
 467 
 468         /* For UPPC systems, psm_intr_ops has no entry for APIC_TYPE. */
 469         if ((rval = (*psm_intr_ops)(NULL, &info_hdl,
 470             PSM_INTR_OP_APIC_TYPE, NULL)) != PSM_SUCCESS) {
 471                 intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UPPC;
 472                 intr_info.ctlr_version = 0;
 473                 intr_info.num_intr = APIC_MAX_VECTOR;
 474         } else {
 475                 intr_info.ctlr_version = (uint32_t)info_hdl.ih_ver;
 476                 intr_info.num_cpu = type_info.avgi_num_cpu;
 477                 if (strcmp(type_info.avgi_type,
 478                     APIC_PCPLUSMP_NAME) == 0) {
 479                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_PCPLUSMP;
 480                         intr_info.num_intr = type_info.avgi_num_intr;
 481                 } else if (strcmp(type_info.avgi_type,
 482                     APIC_APIX_NAME) == 0) {
 483                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_APIX;
 484                         intr_info.num_intr = type_info.avgi_num_intr;
 485                 } else {
 486                         intr_info.ctlr_type = PCITOOL_CTLR_TYPE_UNKNOWN;
 487                         intr_info.num_intr = APIC_MAX_VECTOR;
 488                 }
 489         }
 490 
 491         intr_info.drvr_version = PCITOOL_VERSION;
 492         if (ddi_copyout(&intr_info, arg, sizeof (pcitool_intr_info_t), mode) !=
 493             DDI_SUCCESS) {
 494                 if (pcitool_debug)
 495                         prom_printf("Error returning arguments.\n");
 496                 rval = EFAULT;
 497         }
 498 
 499         return (rval);
 500 }
 501 
 502 
 503 
 504 /*
 505  * Main function for handling interrupt CPU binding requests and queries.
 506  * Need to implement later
 507  */
 508 int
 509 pcitool_intr_admn(dev_info_t *dip, void *arg, int cmd, int mode)
 510 {
 511         int rval;
 512 
 513         switch (cmd) {
 514 
 515         /* Associate a new CPU with a given vector */
 516         case PCITOOL_DEVICE_SET_INTR:
 517                 rval = pcitool_set_intr(dip, arg, mode);
 518                 break;
 519 
 520         case PCITOOL_DEVICE_GET_INTR:
 521                 rval = pcitool_get_intr(dip, arg, mode);
 522                 break;
 523 
 524         case PCITOOL_SYSTEM_INTR_INFO:
 525                 rval = pcitool_intr_info(dip, arg, mode);
 526                 break;
 527 
 528         default:
 529                 rval = ENOTSUP;
 530         }
 531 
 532         return (rval);
 533 }
 534 
 535 /*
 536  * Perform register accesses on the nexus device itself.
 537  * No explicit PCI nexus device for X86, so not applicable.
 538  */
 539 
 540 /*ARGSUSED*/
 541 int
 542 pcitool_bus_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
 543 {
 544         return (ENOTSUP);
 545 }
 546 
 547 /* Swap endianness. */
 548 static uint64_t
 549 pcitool_swap_endian(uint64_t data, int size)
 550 {
 551         typedef union {
 552                 uint64_t data64;
 553                 uint8_t data8[8];
 554         } data_split_t;
 555 
 556         data_split_t orig_data;
 557         data_split_t returned_data;
 558         int i;
 559 
 560         orig_data.data64 = data;
 561         returned_data.data64 = 0;
 562 
 563         for (i = 0; i < size; i++) {
 564                 returned_data.data8[i] = orig_data.data8[size - 1 - i];
 565         }
 566 
 567         return (returned_data.data64);
 568 }
 569 
 570 /*
 571  * A note about ontrap handling:
 572  *
 573  * X86 systems on which this module was tested return FFs instead of bus errors
 574  * when accessing devices with invalid addresses.  Ontrap handling, which
 575  * gracefully handles kernel bus errors, is installed anyway for I/O and mem
 576  * space accessing (not for pci config space), in case future X86 platforms
 577  * require it.
 578  */
 579 
 580 /* Access device.  prg is modified. */
 581 static int
 582 pcitool_cfg_access(pcitool_reg_t *prg, boolean_t write_flag,
 583     boolean_t io_access)
 584 {
 585         int size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 586         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 587         int rval = SUCCESS;
 588         uint64_t local_data;
 589         pci_cfgacc_req_t req;
 590         uint32_t max_offset;
 591 
 592         if ((size <= 0) || (size > 8) || !ISP2(size)) {
 593                 prg->status = PCITOOL_INVALID_SIZE;
 594                 return (ENOTSUP);
 595         }
 596 
 597         /*
 598          * NOTE: there is no way to verify whether or not the address is
 599          * valid other than that it is within the maximum offset.  The
 600          * put functions return void and the get functions return -1 on error.
 601          */
 602 
 603         if (io_access)
 604                 max_offset = 0xFF;
 605         else
 606                 max_offset = 0xFFF;
 607         if (prg->offset + size - 1 > max_offset) {
 608                 prg->status = PCITOOL_INVALID_ADDRESS;
 609                 return (ENOTSUP);
 610         }
 611 
 612         prg->status = PCITOOL_SUCCESS;
 613 
 614         req.rcdip = NULL;
 615         req.bdf = PCI_GETBDF(prg->bus_no, prg->dev_no, prg->func_no);
 616         req.offset = prg->offset;
 617         req.size = size;
 618         req.write = write_flag;
 619         req.ioacc = io_access;
 620         if (write_flag) {
 621                 if (big_endian) {
 622                         local_data = pcitool_swap_endian(prg->data, size);
 623                 } else {
 624                         local_data = prg->data;
 625                 }
 626                 VAL64(&req) = local_data;
 627                 pci_cfgacc_acc(&req);
 628         } else {
 629                 pci_cfgacc_acc(&req);
 630                 switch (size) {
 631                 case 1:
 632                         local_data = VAL8(&req);
 633                         break;
 634                 case 2:
 635                         local_data = VAL16(&req);
 636                         break;
 637                 case 4:
 638                         local_data = VAL32(&req);
 639                         break;
 640                 case 8:
 641                         local_data = VAL64(&req);
 642                         break;
 643                 }
 644                 if (big_endian) {
 645                         prg->data =
 646                             pcitool_swap_endian(local_data, size);
 647                 } else {
 648                         prg->data = local_data;
 649                 }
 650         }
 651         /*
 652          * Check if legacy IO config access is used, in which case
 653          * only first 256 bytes are valid.
 654          */
 655         if (req.ioacc && (prg->offset + size - 1 > 0xFF)) {
 656                 prg->status = PCITOOL_INVALID_ADDRESS;
 657                 return (ENOTSUP);
 658         }
 659 
 660         /* Set phys_addr only if MMIO is used */
 661         prg->phys_addr = 0;
 662         if (!req.ioacc && mcfg_mem_base != 0) {
 663                 prg->phys_addr = mcfg_mem_base + prg->offset +
 664                     ((prg->bus_no << PCIEX_REG_BUS_SHIFT) |
 665                     (prg->dev_no << PCIEX_REG_DEV_SHIFT) |
 666                     (prg->func_no << PCIEX_REG_FUNC_SHIFT));
 667         }
 668 
 669         return (rval);
 670 }
 671 
 672 static int
 673 pcitool_io_access(pcitool_reg_t *prg, boolean_t write_flag)
 674 {
 675         int port = (int)prg->phys_addr;
 676         size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 677         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 678         int rval = SUCCESS;
 679         on_trap_data_t otd;
 680         uint64_t local_data;
 681 
 682 
 683         /*
 684          * on_trap works like setjmp.
 685          *
 686          * A non-zero return here means on_trap has returned from an error.
 687          *
 688          * A zero return here means that on_trap has just returned from setup.
 689          */
 690         if (on_trap(&otd, OT_DATA_ACCESS)) {
 691                 no_trap();
 692                 if (pcitool_debug)
 693                         prom_printf(
 694                             "pcitool_io_access: on_trap caught an error...\n");
 695                 prg->status = PCITOOL_INVALID_ADDRESS;
 696                 return (EFAULT);
 697         }
 698 
 699         if (write_flag) {
 700 
 701                 if (big_endian) {
 702                         local_data = pcitool_swap_endian(prg->data, size);
 703                 } else {
 704                         local_data = prg->data;
 705                 }
 706 
 707                 if (pcitool_debug)
 708                         prom_printf("Writing %ld byte(s) to port 0x%x\n",
 709                             size, port);
 710 
 711                 switch (size) {
 712                 case 1:
 713                         outb(port, (uint8_t)local_data);
 714                         break;
 715                 case 2:
 716                         outw(port, (uint16_t)local_data);
 717                         break;
 718                 case 4:
 719                         outl(port, (uint32_t)local_data);
 720                         break;
 721                 default:
 722                         rval = ENOTSUP;
 723                         prg->status = PCITOOL_INVALID_SIZE;
 724                         break;
 725                 }
 726         } else {
 727                 if (pcitool_debug)
 728                         prom_printf("Reading %ld byte(s) from port 0x%x\n",
 729                             size, port);
 730 
 731                 switch (size) {
 732                 case 1:
 733                         local_data = inb(port);
 734                         break;
 735                 case 2:
 736                         local_data = inw(port);
 737                         break;
 738                 case 4:
 739                         local_data = inl(port);
 740                         break;
 741                 default:
 742                         rval = ENOTSUP;
 743                         prg->status = PCITOOL_INVALID_SIZE;
 744                         break;
 745                 }
 746 
 747                 if (rval == SUCCESS) {
 748                         if (big_endian) {
 749                                 prg->data =
 750                                     pcitool_swap_endian(local_data, size);
 751                         } else {
 752                                 prg->data = local_data;
 753                         }
 754                 }
 755         }
 756 
 757         no_trap();
 758         return (rval);
 759 }
 760 
 761 static int
 762 pcitool_mem_access(pcitool_reg_t *prg, uint64_t virt_addr, boolean_t write_flag)
 763 {
 764         size_t size = PCITOOL_ACC_ATTR_SIZE(prg->acc_attr);
 765         boolean_t big_endian = PCITOOL_ACC_IS_BIG_ENDIAN(prg->acc_attr);
 766         int rval = DDI_SUCCESS;
 767         on_trap_data_t otd;
 768         uint64_t local_data;
 769 
 770         /*
 771          * on_trap works like setjmp.
 772          *
 773          * A non-zero return here means on_trap has returned from an error.
 774          *
 775          * A zero return here means that on_trap has just returned from setup.
 776          */
 777         if (on_trap(&otd, OT_DATA_ACCESS)) {
 778                 no_trap();
 779                 if (pcitool_debug)
 780                         prom_printf(
 781                             "pcitool_mem_access: on_trap caught an error...\n");
 782                 prg->status = PCITOOL_INVALID_ADDRESS;
 783                 return (EFAULT);
 784         }
 785 
 786         if (write_flag) {
 787 
 788                 if (big_endian) {
 789                         local_data = pcitool_swap_endian(prg->data, size);
 790                 } else {
 791                         local_data = prg->data;
 792                 }
 793 
 794                 switch (size) {
 795                 case 1:
 796                         *((uint8_t *)(uintptr_t)virt_addr) = local_data;
 797                         break;
 798                 case 2:
 799                         *((uint16_t *)(uintptr_t)virt_addr) = local_data;
 800                         break;
 801                 case 4:
 802                         *((uint32_t *)(uintptr_t)virt_addr) = local_data;
 803                         break;
 804                 case 8:
 805                         *((uint64_t *)(uintptr_t)virt_addr) = local_data;
 806                         break;
 807                 default:
 808                         rval = ENOTSUP;
 809                         prg->status = PCITOOL_INVALID_SIZE;
 810                         break;
 811                 }
 812         } else {
 813                 switch (size) {
 814                 case 1:
 815                         local_data = *((uint8_t *)(uintptr_t)virt_addr);
 816                         break;
 817                 case 2:
 818                         local_data = *((uint16_t *)(uintptr_t)virt_addr);
 819                         break;
 820                 case 4:
 821                         local_data = *((uint32_t *)(uintptr_t)virt_addr);
 822                         break;
 823                 case 8:
 824                         local_data = *((uint64_t *)(uintptr_t)virt_addr);
 825                         break;
 826                 default:
 827                         rval = ENOTSUP;
 828                         prg->status = PCITOOL_INVALID_SIZE;
 829                         break;
 830                 }
 831 
 832                 if (rval == SUCCESS) {
 833                         if (big_endian) {
 834                                 prg->data =
 835                                     pcitool_swap_endian(local_data, size);
 836                         } else {
 837                                 prg->data = local_data;
 838                         }
 839                 }
 840         }
 841 
 842         no_trap();
 843         return (rval);
 844 }
 845 
 846 /*
 847  * Map up to 2 pages which contain the address we want to access.
 848  *
 849  * Mapping should span no more than 8 bytes.  With X86 it is possible for an
 850  * 8 byte value to start on a 4 byte boundary, so it can cross a page boundary.
 851  * We'll never have to map more than two pages.
 852  */
 853 
 854 static uint64_t
 855 pcitool_map(uint64_t phys_addr, size_t size, size_t *num_pages)
 856 {
 857 
 858         uint64_t page_base = phys_addr & ~MMU_PAGEOFFSET;
 859         uint64_t offset = phys_addr & MMU_PAGEOFFSET;
 860         void *virt_base;
 861         uint64_t returned_addr;
 862         pfn_t pfn;
 863 
 864         if (pcitool_debug)
 865                 prom_printf("pcitool_map: Called with PA:0x%p\n",
 866                     (void *)(uintptr_t)phys_addr);
 867 
 868         *num_pages = 1;
 869 
 870         /* Desired mapping would span more than two pages. */
 871         if ((offset + size) > (MMU_PAGESIZE * 2)) {
 872                 if (pcitool_debug)
 873                         prom_printf("boundary violation: "
 874                             "offset:0x%" PRIx64 ", size:%ld, pagesize:0x%lx\n",
 875                             offset, (uintptr_t)size, (uintptr_t)MMU_PAGESIZE);
 876                 return (NULL);
 877 
 878         } else if ((offset + size) > MMU_PAGESIZE) {
 879                 (*num_pages)++;
 880         }
 881 
 882         /* Get page(s) of virtual space. */
 883         virt_base = vmem_alloc(heap_arena, ptob(*num_pages), VM_NOSLEEP);
 884         if (virt_base == NULL) {
 885                 if (pcitool_debug)
 886                         prom_printf("Couldn't get virtual base address.\n");
 887                 return (NULL);
 888         }
 889 
 890         if (pcitool_debug)
 891                 prom_printf("Got base virtual address:0x%p\n", virt_base);
 892 
 893 #ifdef __xpv
 894         /*
 895          * We should only get here if we are dom0.
 896          * We're using a real device so we need to translate the MA to a PFN.
 897          */
 898         ASSERT(DOMAIN_IS_INITDOMAIN(xen_info));
 899         pfn = xen_assign_pfn(mmu_btop(page_base));
 900 #else
 901         pfn = btop(page_base);
 902 #endif
 903 
 904         /* Now map the allocated virtual space to the physical address. */
 905         hat_devload(kas.a_hat, virt_base, mmu_ptob(*num_pages), pfn,
 906             PROT_READ | PROT_WRITE | HAT_STRICTORDER,
 907             HAT_LOAD_LOCK);
 908 
 909         returned_addr = ((uintptr_t)(virt_base)) + offset;
 910 
 911         if (pcitool_debug)
 912                 prom_printf("pcitool_map: returning VA:0x%p\n",
 913                     (void *)(uintptr_t)returned_addr);
 914 
 915         return (returned_addr);
 916 }
 917 
 918 /* Unmap the mapped page(s). */
 919 static void
 920 pcitool_unmap(uint64_t virt_addr, size_t num_pages)
 921 {
 922         void *base_virt_addr = (void *)(uintptr_t)(virt_addr & ~MMU_PAGEOFFSET);
 923 
 924         hat_unload(kas.a_hat, base_virt_addr, ptob(num_pages),
 925             HAT_UNLOAD_UNLOCK);
 926         vmem_free(heap_arena, base_virt_addr, ptob(num_pages));
 927 }
 928 
 929 
 930 /* Perform register accesses on PCI leaf devices. */
 931 /*ARGSUSED*/
 932 int
 933 pcitool_dev_reg_ops(dev_info_t *dip, void *arg, int cmd, int mode)
 934 {
 935         boolean_t       write_flag = B_FALSE;
 936         boolean_t       io_access = B_TRUE;
 937         int             rval = 0;
 938         pcitool_reg_t   prg;
 939         uint8_t         size;
 940 
 941         uint64_t        base_addr;
 942         uint64_t        virt_addr;
 943         size_t          num_virt_pages;
 944 
 945         switch (cmd) {
 946         case (PCITOOL_DEVICE_SET_REG):
 947                 write_flag = B_TRUE;
 948 
 949         /*FALLTHRU*/
 950         case (PCITOOL_DEVICE_GET_REG):
 951                 if (pcitool_debug)
 952                         prom_printf("pci_dev_reg_ops set/get reg\n");
 953                 if (ddi_copyin(arg, &prg, sizeof (pcitool_reg_t), mode) !=
 954                     DDI_SUCCESS) {
 955                         if (pcitool_debug)
 956                                 prom_printf("Error reading arguments\n");
 957                         return (EFAULT);
 958                 }
 959 
 960                 if (prg.barnum >= (sizeof (pci_bars) / sizeof (pci_bars[0]))) {
 961                         prg.status = PCITOOL_OUT_OF_RANGE;
 962                         rval = EINVAL;
 963                         goto done_reg;
 964                 }
 965 
 966                 if (pcitool_debug)
 967                         prom_printf("raw bus:0x%x, dev:0x%x, func:0x%x\n",
 968                             prg.bus_no, prg.dev_no, prg.func_no);
 969                 /* Validate address arguments of bus / dev / func */
 970                 if (((prg.bus_no &
 971                     (PCI_REG_BUS_M >> PCI_REG_BUS_SHIFT)) !=
 972                     prg.bus_no) ||
 973                     ((prg.dev_no &
 974                     (PCI_REG_DEV_M >> PCI_REG_DEV_SHIFT)) !=
 975                     prg.dev_no) ||
 976                     ((prg.func_no &
 977                     (PCI_REG_FUNC_M >> PCI_REG_FUNC_SHIFT)) !=
 978                     prg.func_no)) {
 979                         prg.status = PCITOOL_INVALID_ADDRESS;
 980                         rval = EINVAL;
 981                         goto done_reg;
 982                 }
 983 
 984                 size = PCITOOL_ACC_ATTR_SIZE(prg.acc_attr);
 985 
 986                 /* Proper config space desired. */
 987                 if (prg.barnum == 0) {
 988 
 989                         if (pcitool_debug)
 990                                 prom_printf(
 991                                     "config access: offset:0x%" PRIx64 ", "
 992                                     "phys_addr:0x%" PRIx64 "\n",
 993                                     prg.offset, prg.phys_addr);
 994 
 995                         if (prg.offset >= max_cfg_size) {
 996                                 prg.status = PCITOOL_OUT_OF_RANGE;
 997                                 rval = EINVAL;
 998                                 goto done_reg;
 999                         }
1000                         if (max_cfg_size == PCIE_CONF_HDR_SIZE)
1001                                 io_access = B_FALSE;
1002 
1003                         rval = pcitool_cfg_access(&prg, write_flag, io_access);
1004                         if (pcitool_debug)
1005                                 prom_printf(
1006                                     "config access: data:0x%" PRIx64 "\n",
1007                                     prg.data);
1008 
1009                 /* IO/ MEM/ MEM64 space. */
1010                 } else {
1011 
1012                         pcitool_reg_t   prg2;
1013                         bcopy(&prg, &prg2, sizeof (pcitool_reg_t));
1014 
1015                         /*
1016                          * Translate BAR number into offset of the BAR in
1017                          * the device's config space.
1018                          */
1019                         prg2.offset = pci_bars[prg2.barnum];
1020                         prg2.acc_attr =
1021                             PCITOOL_ACC_ATTR_SIZE_4 | PCITOOL_ACC_ATTR_ENDN_LTL;
1022 
1023                         if (pcitool_debug)
1024                                 prom_printf(
1025                                     "barnum:%d, bar_offset:0x%" PRIx64 "\n",
1026                                     prg2.barnum, prg2.offset);
1027                         /*
1028                          * Get Bus Address Register (BAR) from config space.
1029                          * prg2.offset is the offset into config space of the
1030                          * BAR desired.  prg.status is modified on error.
1031                          */
1032                         rval = pcitool_cfg_access(&prg2, B_FALSE, B_TRUE);
1033                         if (rval != SUCCESS) {
1034                                 if (pcitool_debug)
1035                                         prom_printf("BAR access failed\n");
1036                                 prg.status = prg2.status;
1037                                 goto done_reg;
1038                         }
1039                         /*
1040                          * Reference proper PCI space based on the BAR.
1041                          * If 64 bit MEM space, need to load other half of the
1042                          * BAR first.
1043                          */
1044 
1045                         if (pcitool_debug)
1046                                 prom_printf("bar returned is 0x%" PRIx64 "\n",
1047                                     prg2.data);
1048                         if (!prg2.data) {
1049                                 if (pcitool_debug)
1050                                         prom_printf("BAR data == 0\n");
1051                                 rval = EINVAL;
1052                                 prg.status = PCITOOL_INVALID_ADDRESS;
1053                                 goto done_reg;
1054                         }
1055                         if (prg2.data == 0xffffffff) {
1056                                 if (pcitool_debug)
1057                                         prom_printf("BAR data == -1\n");
1058                                 rval = EINVAL;
1059                                 prg.status = PCITOOL_INVALID_ADDRESS;
1060                                 goto done_reg;
1061                         }
1062 
1063                         /*
1064                          * BAR has bits saying this space is IO space, unless
1065                          * this is the ROM address register.
1066                          */
1067                         if (((PCI_BASE_SPACE_M & prg2.data) ==
1068                             PCI_BASE_SPACE_IO) &&
1069                             (prg2.offset != PCI_CONF_ROM)) {
1070                                 if (pcitool_debug)
1071                                         prom_printf("IO space\n");
1072 
1073                                 prg2.data &= PCI_BASE_IO_ADDR_M;
1074                                 prg.phys_addr = prg2.data + prg.offset;
1075 
1076                                 rval = pcitool_io_access(&prg, write_flag);
1077                                 if ((rval != SUCCESS) && (pcitool_debug))
1078                                         prom_printf("IO access failed\n");
1079 
1080                                 goto done_reg;
1081 
1082 
1083                         /*
1084                          * BAR has bits saying this space is 64 bit memory
1085                          * space, unless this is the ROM address register.
1086                          *
1087                          * The 64 bit address stored in two BAR cells is not
1088                          * necessarily aligned on an 8-byte boundary.
1089                          * Need to keep the first 4 bytes read,
1090                          * and do a separate read of the high 4 bytes.
1091                          */
1092 
1093                         } else if ((PCI_BASE_TYPE_ALL & prg2.data) &&
1094                             (prg2.offset != PCI_CONF_ROM)) {
1095 
1096                                 uint32_t low_bytes =
1097                                     (uint32_t)(prg2.data & ~PCI_BASE_TYPE_ALL);
1098 
1099                                 /*
1100                                  * Don't try to read the next 4 bytes
1101                                  * past the end of BARs.
1102                                  */
1103                                 if (prg2.offset >= PCI_CONF_BASE5) {
1104                                         prg.status = PCITOOL_OUT_OF_RANGE;
1105                                         rval = EIO;
1106                                         goto done_reg;
1107                                 }
1108 
1109                                 /*
1110                                  * Access device.
1111                                  * prg2.status is modified on error.
1112                                  */
1113                                 prg2.offset += 4;
1114                                 rval = pcitool_cfg_access(&prg2,
1115                                     B_FALSE, B_TRUE);
1116                                 if (rval != SUCCESS) {
1117                                         prg.status = prg2.status;
1118                                         goto done_reg;
1119                                 }
1120 
1121                                 if (prg2.data == 0xffffffff) {
1122                                         prg.status = PCITOOL_INVALID_ADDRESS;
1123                                         prg.status = EFAULT;
1124                                         goto done_reg;
1125                                 }
1126 
1127                                 prg2.data = (prg2.data << 32) + low_bytes;
1128                                 if (pcitool_debug)
1129                                         prom_printf(
1130                                             "64 bit mem space.  "
1131                                             "64-bit bar is 0x%" PRIx64 "\n",
1132                                             prg2.data);
1133 
1134                         /* Mem32 space, including ROM */
1135                         } else {
1136 
1137                                 if (prg2.offset == PCI_CONF_ROM) {
1138                                         if (pcitool_debug)
1139                                                 prom_printf(
1140                                                     "Additional ROM "
1141                                                     "checking\n");
1142                                         /* Can't write to ROM */
1143                                         if (write_flag) {
1144                                                 prg.status = PCITOOL_ROM_WRITE;
1145                                                 rval = EIO;
1146                                                 goto done_reg;
1147 
1148                                         /* ROM disabled for reading */
1149                                         } else if (!(prg2.data & 0x00000001)) {
1150                                                 prg.status =
1151                                                     PCITOOL_ROM_DISABLED;
1152                                                 rval = EIO;
1153                                                 goto done_reg;
1154                                         }
1155                                 }
1156 
1157                                 if (pcitool_debug)
1158                                         prom_printf("32 bit mem space\n");
1159                         }
1160 
1161                         /* Common code for all IO/MEM range spaces. */
1162 
1163                         base_addr = prg2.data;
1164                         if (pcitool_debug)
1165                                 prom_printf(
1166                                     "addr portion of bar is 0x%" PRIx64 ", "
1167                                     "base=0x%" PRIx64 ", "
1168                                     "offset:0x%" PRIx64 "\n",
1169                                     prg2.data, base_addr, prg.offset);
1170                         /*
1171                          * Use offset provided by caller to index into
1172                          * desired space, then access.
1173                          * Note that prg.status is modified on error.
1174                          */
1175                         prg.phys_addr = base_addr + prg.offset;
1176 
1177                         virt_addr = pcitool_map(prg.phys_addr, size,
1178                             &num_virt_pages);
1179                         if (virt_addr == NULL) {
1180                                 prg.status = PCITOOL_IO_ERROR;
1181                                 rval = EIO;
1182                                 goto done_reg;
1183                         }
1184 
1185                         rval = pcitool_mem_access(&prg, virt_addr, write_flag);
1186                         pcitool_unmap(virt_addr, num_virt_pages);
1187                 }
1188 done_reg:
1189                 prg.drvr_version = PCITOOL_VERSION;
1190                 if (ddi_copyout(&prg, arg, sizeof (pcitool_reg_t), mode) !=
1191                     DDI_SUCCESS) {
1192                         if (pcitool_debug)
1193                                 prom_printf("Error returning arguments.\n");
1194                         rval = EFAULT;
1195                 }
1196                 break;
1197         default:
1198                 rval = ENOTTY;
1199                 break;
1200         }
1201         return (rval);
1202 }