1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 
  28 #include <sys/errno.h>
  29 #include <sys/types.h>
  30 #include <sys/conf.h>
  31 #include <sys/kmem.h>
  32 #include <sys/ddi.h>
  33 #include <sys/stat.h>
  34 #include <sys/sunddi.h>
  35 #include <sys/file.h>
  36 #include <sys/open.h>
  37 #include <sys/modctl.h>
  38 #include <sys/ddi_impldefs.h>
  39 #include <sys/sysmacros.h>
  40 #include <sys/ddidevmap.h>
  41 #include <sys/policy.h>
  42 
  43 #include <sys/vmsystm.h>
  44 #include <vm/hat_i86.h>
  45 #include <vm/hat_pte.h>
  46 #include <vm/seg_kmem.h>
  47 #include <vm/seg_mf.h>
  48 
  49 #include <xen/io/blkif_impl.h>
  50 #include <xen/io/blk_common.h>
  51 #include <xen/io/xpvtap.h>
  52 
  53 
  54 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
  55 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
  56 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
  57     cred_t *cred, int *rval);
  58 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
  59     size_t len, size_t *maplen, uint_t model);
  60 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
  61     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
  62     cred_t *cred_p);
  63 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
  64     struct pollhead **phpp);
  65 
  66 static  struct cb_ops xpvtap_cb_ops = {
  67         xpvtap_open,            /* cb_open */
  68         xpvtap_close,           /* cb_close */
  69         nodev,                  /* cb_strategy */
  70         nodev,                  /* cb_print */
  71         nodev,                  /* cb_dump */
  72         nodev,                  /* cb_read */
  73         nodev,                  /* cb_write */
  74         xpvtap_ioctl,           /* cb_ioctl */
  75         xpvtap_devmap,          /* cb_devmap */
  76         nodev,                  /* cb_mmap */
  77         xpvtap_segmap,          /* cb_segmap */
  78         xpvtap_chpoll,          /* cb_chpoll */
  79         ddi_prop_op,            /* cb_prop_op */
  80         NULL,                   /* cb_stream */
  81         D_NEW | D_MP | D_64BIT | D_DEVMAP,      /* cb_flag */
  82         CB_REV
  83 };
  84 
  85 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
  86     void **result);
  87 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
  88 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
  89 
  90 static struct dev_ops xpvtap_dev_ops = {
  91         DEVO_REV,               /* devo_rev */
  92         0,                      /* devo_refcnt */
  93         xpvtap_getinfo,         /* devo_getinfo */
  94         nulldev,                /* devo_identify */
  95         nulldev,                /* devo_probe */
  96         xpvtap_attach,          /* devo_attach */
  97         xpvtap_detach,          /* devo_detach */
  98         nodev,                  /* devo_reset */
  99         &xpvtap_cb_ops,             /* devo_cb_ops */
 100         NULL,                   /* devo_bus_ops */
 101         NULL                    /* power */
 102 };
 103 
 104 
 105 static struct modldrv xpvtap_modldrv = {
 106         &mod_driverops,             /* Type of module.  This one is a driver */
 107         "xpvtap driver",        /* Name of the module. */
 108         &xpvtap_dev_ops,    /* driver ops */
 109 };
 110 
 111 static struct modlinkage xpvtap_modlinkage = {
 112         MODREV_1,
 113         (void *) &xpvtap_modldrv,
 114         NULL
 115 };
 116 
 117 
 118 void *xpvtap_statep;
 119 
 120 
 121 static xpvtap_state_t *xpvtap_drv_init(int instance);
 122 static void xpvtap_drv_fini(xpvtap_state_t *state);
 123 static uint_t xpvtap_intr(caddr_t arg);
 124 
 125 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
 126 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
 127     xpvtap_rs_hdl_t *handle);
 128 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
 129 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
 130 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
 131 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
 132     xpvtap_rs_cleanup_t callback, void *arg);
 133 
 134 static int xpvtap_segmf_register(xpvtap_state_t *state);
 135 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
 136 
 137 static int xpvtap_user_init(xpvtap_state_t *state);
 138 static void xpvtap_user_fini(xpvtap_state_t *state);
 139 static int xpvtap_user_ring_init(xpvtap_state_t *state);
 140 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
 141 static int xpvtap_user_thread_init(xpvtap_state_t *state);
 142 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
 143 static void xpvtap_user_thread_start(caddr_t arg);
 144 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
 145 static void xpvtap_user_thread(void *arg);
 146 
 147 static void xpvtap_user_app_stop(caddr_t arg);
 148 
 149 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
 150     uint_t *uid);
 151 static int xpvtap_user_request_push(xpvtap_state_t *state,
 152     blkif_request_t *req, uint_t uid);
 153 static int xpvtap_user_response_get(xpvtap_state_t *state,
 154     blkif_response_t *resp, uint_t *uid);
 155 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
 156 
 157 
 158 /*
 159  * _init()
 160  */
 161 int
 162 _init(void)
 163 {
 164         int e;
 165 
 166         e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
 167         if (e != 0) {
 168                 return (e);
 169         }
 170 
 171         e = mod_install(&xpvtap_modlinkage);
 172         if (e != 0) {
 173                 ddi_soft_state_fini(&xpvtap_statep);
 174                 return (e);
 175         }
 176 
 177         return (0);
 178 }
 179 
 180 
 181 /*
 182  * _info()
 183  */
 184 int
 185 _info(struct modinfo *modinfop)
 186 {
 187         return (mod_info(&xpvtap_modlinkage, modinfop));
 188 }
 189 
 190 
 191 /*
 192  * _fini()
 193  */
 194 int
 195 _fini(void)
 196 {
 197         int e;
 198 
 199         e = mod_remove(&xpvtap_modlinkage);
 200         if (e != 0) {
 201                 return (e);
 202         }
 203 
 204         ddi_soft_state_fini(&xpvtap_statep);
 205 
 206         return (0);
 207 }
 208 
 209 
 210 /*
 211  * xpvtap_attach()
 212  */
 213 static int
 214 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 215 {
 216         blk_ringinit_args_t args;
 217         xpvtap_state_t *state;
 218         int instance;
 219         int e;
 220 
 221 
 222         switch (cmd) {
 223         case DDI_ATTACH:
 224                 break;
 225 
 226         case DDI_RESUME:
 227                 return (DDI_SUCCESS);
 228 
 229         default:
 230                 return (DDI_FAILURE);
 231         }
 232 
 233         /* initialize our state info */
 234         instance = ddi_get_instance(dip);
 235         state = xpvtap_drv_init(instance);
 236         if (state == NULL) {
 237                 return (DDI_FAILURE);
 238         }
 239         state->bt_dip = dip;
 240 
 241         /* Initialize the guest ring */
 242         args.ar_dip = state->bt_dip;
 243         args.ar_intr = xpvtap_intr;
 244         args.ar_intr_arg = (caddr_t)state;
 245         args.ar_ringup = xpvtap_user_thread_start;
 246         args.ar_ringup_arg = (caddr_t)state;
 247         args.ar_ringdown = xpvtap_user_app_stop;
 248         args.ar_ringdown_arg = (caddr_t)state;
 249         e = blk_ring_init(&args, &state->bt_guest_ring);
 250         if (e != DDI_SUCCESS) {
 251                 goto attachfail_ringinit;
 252         }
 253 
 254         /* create the minor node (for ioctl/mmap) */
 255         e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
 256             DDI_PSEUDO, 0);
 257         if (e != DDI_SUCCESS) {
 258                 goto attachfail_minor_node;
 259         }
 260 
 261         /* Report that driver was loaded */
 262         ddi_report_dev(dip);
 263 
 264         return (DDI_SUCCESS);
 265 
 266 attachfail_minor_node:
 267         blk_ring_fini(&state->bt_guest_ring);
 268 attachfail_ringinit:
 269         xpvtap_drv_fini(state);
 270         return (DDI_FAILURE);
 271 }
 272 
 273 
 274 /*
 275  * xpvtap_detach()
 276  */
 277 static int
 278 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 279 {
 280         xpvtap_state_t *state;
 281         int instance;
 282 
 283 
 284         instance = ddi_get_instance(dip);
 285         state = ddi_get_soft_state(xpvtap_statep, instance);
 286         if (state == NULL) {
 287                 return (DDI_FAILURE);
 288         }
 289 
 290         switch (cmd) {
 291         case DDI_DETACH:
 292                 break;
 293 
 294         case DDI_SUSPEND:
 295         default:
 296                 return (DDI_FAILURE);
 297         }
 298 
 299         xpvtap_user_thread_stop(state);
 300         blk_ring_fini(&state->bt_guest_ring);
 301         xpvtap_drv_fini(state);
 302         ddi_remove_minor_node(dip, NULL);
 303 
 304         return (DDI_SUCCESS);
 305 }
 306 
 307 
 308 /*
 309  * xpvtap_getinfo()
 310  */
 311 /*ARGSUSED*/
 312 static int
 313 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
 314 {
 315         xpvtap_state_t *state;
 316         int instance;
 317         dev_t dev;
 318         int e;
 319 
 320 
 321         dev = (dev_t)arg;
 322         instance = getminor(dev);
 323 
 324         switch (cmd) {
 325         case DDI_INFO_DEVT2DEVINFO:
 326                 state = ddi_get_soft_state(xpvtap_statep, instance);
 327                 if (state == NULL) {
 328                         return (DDI_FAILURE);
 329                 }
 330                 *result = (void *)state->bt_dip;
 331                 e = DDI_SUCCESS;
 332                 break;
 333 
 334         case DDI_INFO_DEVT2INSTANCE:
 335                 *result = (void *)(uintptr_t)instance;
 336                 e = DDI_SUCCESS;
 337                 break;
 338 
 339         default:
 340                 e = DDI_FAILURE;
 341                 break;
 342         }
 343 
 344         return (e);
 345 }
 346 
 347 
 348 /*
 349  * xpvtap_open()
 350  */
 351 /*ARGSUSED*/
 352 static int
 353 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
 354 {
 355         xpvtap_state_t *state;
 356         int instance;
 357 
 358 
 359         if (secpolicy_xvm_control(cred)) {
 360                 return (EPERM);
 361         }
 362 
 363         instance = getminor(*devp);
 364         state = ddi_get_soft_state(xpvtap_statep, instance);
 365         if (state == NULL) {
 366                 return (ENXIO);
 367         }
 368 
 369         /* we should only be opened once */
 370         mutex_enter(&state->bt_open.bo_mutex);
 371         if (state->bt_open.bo_opened) {
 372                 mutex_exit(&state->bt_open.bo_mutex);
 373                 return (EBUSY);
 374         }
 375         state->bt_open.bo_opened = B_TRUE;
 376         mutex_exit(&state->bt_open.bo_mutex);
 377 
 378         /*
 379          * save the apps address space. need it for mapping/unmapping grefs
 380          * since will be doing it in a separate kernel thread.
 381          */
 382         state->bt_map.um_as = curproc->p_as;
 383 
 384         return (0);
 385 }
 386 
 387 
 388 /*
 389  * xpvtap_close()
 390  */
 391 /*ARGSUSED*/
 392 static int
 393 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
 394 {
 395         xpvtap_state_t *state;
 396         int instance;
 397 
 398 
 399         instance = getminor(devp);
 400         state = ddi_get_soft_state(xpvtap_statep, instance);
 401         if (state == NULL) {
 402                 return (ENXIO);
 403         }
 404 
 405         /*
 406          * wake thread so it can cleanup and wait for it to exit so we can
 407          * be sure it's not in the middle of processing a request/response.
 408          */
 409         mutex_enter(&state->bt_thread.ut_mutex);
 410         state->bt_thread.ut_wake = B_TRUE;
 411         state->bt_thread.ut_exit = B_TRUE;
 412         cv_signal(&state->bt_thread.ut_wake_cv);
 413         if (!state->bt_thread.ut_exit_done) {
 414                 cv_wait(&state->bt_thread.ut_exit_done_cv,
 415                     &state->bt_thread.ut_mutex);
 416         }
 417         ASSERT(state->bt_thread.ut_exit_done);
 418         mutex_exit(&state->bt_thread.ut_mutex);
 419 
 420         state->bt_map.um_as = NULL;
 421         state->bt_map.um_guest_pages = NULL;
 422 
 423         /*
 424          * when the ring is brought down, a userland hotplug script is run
 425          * which tries to bring the userland app down. We'll wait for a bit
 426          * for the user app to exit. Notify the thread waiting that the app
 427          * has closed the driver.
 428          */
 429         mutex_enter(&state->bt_open.bo_mutex);
 430         ASSERT(state->bt_open.bo_opened);
 431         state->bt_open.bo_opened = B_FALSE;
 432         cv_signal(&state->bt_open.bo_exit_cv);
 433         mutex_exit(&state->bt_open.bo_mutex);
 434 
 435         return (0);
 436 }
 437 
 438 
 439 /*
 440  * xpvtap_ioctl()
 441  */
 442 /*ARGSUSED*/
 443 static int
 444 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
 445     int *rval)
 446 {
 447         xpvtap_state_t *state;
 448         int instance;
 449 
 450 
 451         if (secpolicy_xvm_control(cred)) {
 452                 return (EPERM);
 453         }
 454 
 455         instance = getminor(dev);
 456         if (instance == -1) {
 457                 return (EBADF);
 458         }
 459 
 460         state = ddi_get_soft_state(xpvtap_statep, instance);
 461         if (state == NULL) {
 462                 return (EBADF);
 463         }
 464 
 465         switch (cmd) {
 466         case XPVTAP_IOCTL_RESP_PUSH:
 467                 /*
 468                  * wake thread, thread handles guest requests and user app
 469                  * responses.
 470                  */
 471                 mutex_enter(&state->bt_thread.ut_mutex);
 472                 state->bt_thread.ut_wake = B_TRUE;
 473                 cv_signal(&state->bt_thread.ut_wake_cv);
 474                 mutex_exit(&state->bt_thread.ut_mutex);
 475                 break;
 476 
 477         default:
 478                 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
 479                 return (ENXIO);
 480         }
 481 
 482         return (0);
 483 }
 484 
 485 
 486 /*
 487  * xpvtap_segmap()
 488  */
 489 /*ARGSUSED*/
 490 static int
 491 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
 492     off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
 493     cred_t *cred_p)
 494 {
 495         struct segmf_crargs a;
 496         xpvtap_state_t *state;
 497         int instance;
 498         int e;
 499 
 500 
 501         if (secpolicy_xvm_control(cred_p)) {
 502                 return (EPERM);
 503         }
 504 
 505         instance = getminor(dev);
 506         state = ddi_get_soft_state(xpvtap_statep, instance);
 507         if (state == NULL) {
 508                 return (EBADF);
 509         }
 510 
 511         /* the user app should be doing a MAP_SHARED mapping */
 512         if ((flags & MAP_TYPE) != MAP_SHARED) {
 513                 return (EINVAL);
 514         }
 515 
 516         /*
 517          * if this is the user ring (offset = 0), devmap it (which ends up in
 518          * xpvtap_devmap). devmap will alloc and map the ring into the
 519          * app's VA space.
 520          */
 521         if (off == 0) {
 522                 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
 523                     prot, maxprot, flags, cred_p);
 524                 return (e);
 525         }
 526 
 527         /* this should be the mmap for the gref pages (offset = PAGESIZE) */
 528         if (off != PAGESIZE) {
 529                 return (EINVAL);
 530         }
 531 
 532         /* make sure we get the size we're expecting */
 533         if (len != XPVTAP_GREF_BUFSIZE) {
 534                 return (EINVAL);
 535         }
 536 
 537         /*
 538          * reserve user app VA space for the gref pages and use segmf to
 539          * manage the backing store for the physical memory. segmf will
 540          * map in/out the grefs and fault them in/out.
 541          */
 542         ASSERT(asp == state->bt_map.um_as);
 543         as_rangelock(asp);
 544         if ((flags & MAP_FIXED) == 0) {
 545                 map_addr(addrp, len, 0, 0, flags);
 546                 if (*addrp == NULL) {
 547                         as_rangeunlock(asp);
 548                         return (ENOMEM);
 549                 }
 550         } else {
 551                 /* User specified address */
 552                 (void) as_unmap(asp, *addrp, len);
 553         }
 554         a.dev = dev;
 555         a.prot = (uchar_t)prot;
 556         a.maxprot = (uchar_t)maxprot;
 557         e = as_map(asp, *addrp, len, segmf_create, &a);
 558         if (e != 0) {
 559                 as_rangeunlock(asp);
 560                 return (e);
 561         }
 562         as_rangeunlock(asp);
 563 
 564         /*
 565          * Stash user base address, and compute address where the request
 566          * array will end up.
 567          */
 568         state->bt_map.um_guest_pages = (caddr_t)*addrp;
 569         state->bt_map.um_guest_size = (size_t)len;
 570 
 571         /* register an as callback so we can cleanup when the app goes away */
 572         e = as_add_callback(asp, xpvtap_segmf_unregister, state,
 573             AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
 574         if (e != 0) {
 575                 (void) as_unmap(asp, *addrp, len);
 576                 return (EINVAL);
 577         }
 578 
 579         /* wake thread to see if there are requests already queued up */
 580         mutex_enter(&state->bt_thread.ut_mutex);
 581         state->bt_thread.ut_wake = B_TRUE;
 582         cv_signal(&state->bt_thread.ut_wake_cv);
 583         mutex_exit(&state->bt_thread.ut_mutex);
 584 
 585         return (0);
 586 }
 587 
 588 
 589 /*
 590  * xpvtap_devmap()
 591  */
 592 /*ARGSUSED*/
 593 static int
 594 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
 595     size_t *maplen, uint_t model)
 596 {
 597         xpvtap_user_ring_t *usring;
 598         xpvtap_state_t *state;
 599         int instance;
 600         int e;
 601 
 602 
 603         instance = getminor(dev);
 604         state = ddi_get_soft_state(xpvtap_statep, instance);
 605         if (state == NULL) {
 606                 return (EBADF);
 607         }
 608 
 609         /* we should only get here if the offset was == 0 */
 610         if (off != 0) {
 611                 return (EINVAL);
 612         }
 613 
 614         /* we should only be mapping in one page */
 615         if (len != PAGESIZE) {
 616                 return (EINVAL);
 617         }
 618 
 619         /*
 620          * we already allocated the user ring during driver attach, all we
 621          * need to do is map it into the user app's VA.
 622          */
 623         usring = &state->bt_user_ring;
 624         e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
 625             PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
 626         if (e < 0) {
 627                 return (e);
 628         }
 629 
 630         /* return the size to compete the devmap */
 631         *maplen = PAGESIZE;
 632 
 633         return (0);
 634 }
 635 
 636 
 637 /*
 638  * xpvtap_chpoll()
 639  */
 640 static int
 641 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
 642     struct pollhead **phpp)
 643 {
 644         xpvtap_user_ring_t *usring;
 645         xpvtap_state_t *state;
 646         int instance;
 647 
 648 
 649         instance = getminor(dev);
 650         if (instance == -1) {
 651                 return (EBADF);
 652         }
 653         state = ddi_get_soft_state(xpvtap_statep, instance);
 654         if (state == NULL) {
 655                 return (EBADF);
 656         }
 657 
 658         if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
 659                 *reventsp = 0;
 660                 return (EINVAL);
 661         }
 662 
 663         /*
 664          * if we pushed requests on the user ring since the last poll, wakeup
 665          * the user app
 666          */
 667         usring = &state->bt_user_ring;
 668         if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
 669 
 670                 /*
 671                  * XXX - is this faster here or xpvtap_user_request_push??
 672                  * prelim data says here.  Because less membars or because
 673                  * user thread will spin in poll requests before getting to
 674                  * responses?
 675                  */
 676                 RING_PUSH_REQUESTS(&usring->ur_ring);
 677 
 678                 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
 679                 *reventsp =  POLLIN | POLLRDNORM;
 680 
 681         /* no new requests */
 682         } else {
 683                 *reventsp = 0;
 684                 if (!anyyet) {
 685                         *phpp = &state->bt_pollhead;
 686                 }
 687         }
 688 
 689         return (0);
 690 }
 691 
 692 
 693 /*
 694  * xpvtap_drv_init()
 695  */
 696 static xpvtap_state_t *
 697 xpvtap_drv_init(int instance)
 698 {
 699         xpvtap_state_t *state;
 700         int e;
 701 
 702 
 703         e = ddi_soft_state_zalloc(xpvtap_statep, instance);
 704         if (e != DDI_SUCCESS) {
 705                 return (NULL);
 706         }
 707         state = ddi_get_soft_state(xpvtap_statep, instance);
 708         if (state == NULL) {
 709                 goto drvinitfail_get_soft_state;
 710         }
 711 
 712         state->bt_instance = instance;
 713         mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
 714         cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
 715         state->bt_open.bo_opened = B_FALSE;
 716         state->bt_map.um_registered = B_FALSE;
 717 
 718         /* initialize user ring, thread, mapping state */
 719         e = xpvtap_user_init(state);
 720         if (e != DDI_SUCCESS) {
 721                 goto drvinitfail_userinit;
 722         }
 723 
 724         return (state);
 725 
 726 drvinitfail_userinit:
 727         cv_destroy(&state->bt_open.bo_exit_cv);
 728         mutex_destroy(&state->bt_open.bo_mutex);
 729 drvinitfail_get_soft_state:
 730         (void) ddi_soft_state_free(xpvtap_statep, instance);
 731         return (NULL);
 732 }
 733 
 734 
 735 /*
 736  * xpvtap_drv_fini()
 737  */
 738 static void
 739 xpvtap_drv_fini(xpvtap_state_t *state)
 740 {
 741         xpvtap_user_fini(state);
 742         cv_destroy(&state->bt_open.bo_exit_cv);
 743         mutex_destroy(&state->bt_open.bo_mutex);
 744         (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
 745 }
 746 
 747 
 748 /*
 749  * xpvtap_intr()
 750  *    this routine will be called when we have a request on the guest ring.
 751  */
 752 static uint_t
 753 xpvtap_intr(caddr_t arg)
 754 {
 755         xpvtap_state_t *state;
 756 
 757 
 758         state = (xpvtap_state_t *)arg;
 759 
 760         /* wake thread, thread handles guest requests and user app responses */
 761         mutex_enter(&state->bt_thread.ut_mutex);
 762         state->bt_thread.ut_wake = B_TRUE;
 763         cv_signal(&state->bt_thread.ut_wake_cv);
 764         mutex_exit(&state->bt_thread.ut_mutex);
 765 
 766         return (DDI_INTR_CLAIMED);
 767 }
 768 
 769 
 770 /*
 771  * xpvtap_segmf_register()
 772  */
 773 static int
 774 xpvtap_segmf_register(xpvtap_state_t *state)
 775 {
 776         struct seg *seg;
 777         uint64_t pte_ma;
 778         struct as *as;
 779         caddr_t uaddr;
 780         uint_t pgcnt;
 781         int i;
 782 
 783 
 784         as = state->bt_map.um_as;
 785         pgcnt = btopr(state->bt_map.um_guest_size);
 786         uaddr = state->bt_map.um_guest_pages;
 787 
 788         if (pgcnt == 0) {
 789                 return (DDI_FAILURE);
 790         }
 791 
 792         AS_LOCK_ENTER(as, RW_READER);
 793 
 794         seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
 795         if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
 796             (seg->s_base + seg->s_size))) {
 797                 AS_LOCK_EXIT(as);
 798                 return (DDI_FAILURE);
 799         }
 800 
 801         /*
 802          * lock down the htables so the HAT can't steal them. Register the
 803          * PTE MA's for each gref page with seg_mf so we can do user space
 804          * gref mappings.
 805          */
 806         for (i = 0; i < pgcnt; i++) {
 807                 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
 808                 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
 809                     PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
 810                     HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
 811                 hat_release_mapping(as->a_hat, uaddr);
 812                 segmf_add_gref_pte(seg, uaddr, pte_ma);
 813                 uaddr += PAGESIZE;
 814         }
 815 
 816         state->bt_map.um_registered = B_TRUE;
 817 
 818         AS_LOCK_EXIT(as);
 819 
 820         return (DDI_SUCCESS);
 821 }
 822 
 823 
 824 /*
 825  * xpvtap_segmf_unregister()
 826  *    as_callback routine
 827  */
 828 /*ARGSUSED*/
 829 static void
 830 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
 831 {
 832         xpvtap_state_t *state;
 833         caddr_t uaddr;
 834         uint_t pgcnt;
 835         int i;
 836 
 837 
 838         state = (xpvtap_state_t *)arg;
 839         if (!state->bt_map.um_registered) {
 840                 /* remove the callback (which is this routine) */
 841                 (void) as_delete_callback(as, arg);
 842                 return;
 843         }
 844 
 845         pgcnt = btopr(state->bt_map.um_guest_size);
 846         uaddr = state->bt_map.um_guest_pages;
 847 
 848         /* unmap any outstanding req's grefs */
 849         xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
 850 
 851         /* Unlock the gref pages */
 852         for (i = 0; i < pgcnt; i++) {
 853                 AS_LOCK_ENTER(as, RW_WRITER);
 854                 hat_prepare_mapping(as->a_hat, uaddr, NULL);
 855                 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
 856                 hat_release_mapping(as->a_hat, uaddr);
 857                 AS_LOCK_EXIT(as);
 858                 uaddr += PAGESIZE;
 859         }
 860 
 861         /* remove the callback (which is this routine) */
 862         (void) as_delete_callback(as, arg);
 863 
 864         state->bt_map.um_registered = B_FALSE;
 865 }
 866 
 867 
 868 /*
 869  * xpvtap_user_init()
 870  */
 871 static int
 872 xpvtap_user_init(xpvtap_state_t *state)
 873 {
 874         xpvtap_user_map_t *map;
 875         int e;
 876 
 877 
 878         map = &state->bt_map;
 879 
 880         /* Setup the ring between the driver and user app */
 881         e = xpvtap_user_ring_init(state);
 882         if (e != DDI_SUCCESS) {
 883                 return (DDI_FAILURE);
 884         }
 885 
 886         /*
 887          * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
 888          * is the same number of requests as the guest ring. Initialize the
 889          * state we use to track request IDs to the user app. These IDs will
 890          * also identify which group of gref pages correspond with the
 891          * request.
 892          */
 893         xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
 894 
 895         /*
 896          * allocate the space to store a copy of each outstanding requests. We
 897          * will need to reference the ID and the number of segments when we
 898          * get the response from the user app.
 899          */
 900         map->um_outstanding_reqs = kmem_zalloc(
 901             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
 902             KM_SLEEP);
 903 
 904         /*
 905          * initialize the thread we use to process guest requests and user
 906          * responses.
 907          */
 908         e = xpvtap_user_thread_init(state);
 909         if (e != DDI_SUCCESS) {
 910                 goto userinitfail_user_thread_init;
 911         }
 912 
 913         return (DDI_SUCCESS);
 914 
 915 userinitfail_user_thread_init:
 916         xpvtap_rs_fini(&map->um_rs);
 917         kmem_free(map->um_outstanding_reqs,
 918             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
 919         xpvtap_user_ring_fini(state);
 920         return (DDI_FAILURE);
 921 }
 922 
 923 
 924 /*
 925  * xpvtap_user_ring_init()
 926  */
 927 static int
 928 xpvtap_user_ring_init(xpvtap_state_t *state)
 929 {
 930         xpvtap_user_ring_t *usring;
 931 
 932 
 933         usring = &state->bt_user_ring;
 934 
 935         /* alocate and initialize the page for the shared user ring */
 936         usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
 937             DDI_UMEM_SLEEP, &usring->ur_cookie);
 938         SHARED_RING_INIT(usring->ur_sring);
 939         FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
 940         usring->ur_prod_polled = 0;
 941 
 942         return (DDI_SUCCESS);
 943 }
 944 
 945 
 946 /*
 947  * xpvtap_user_thread_init()
 948  */
 949 static int
 950 xpvtap_user_thread_init(xpvtap_state_t *state)
 951 {
 952         xpvtap_user_thread_t *thread;
 953         char taskqname[32];
 954 
 955 
 956         thread = &state->bt_thread;
 957 
 958         mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
 959         cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
 960         cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
 961         thread->ut_wake = B_FALSE;
 962         thread->ut_exit = B_FALSE;
 963         thread->ut_exit_done = B_TRUE;
 964 
 965         /* create but don't start the user thread */
 966         (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
 967         thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
 968             TASKQ_DEFAULTPRI, 0);
 969         if (thread->ut_taskq == NULL) {
 970                 goto userinitthrfail_taskq_create;
 971         }
 972 
 973         return (DDI_SUCCESS);
 974 
 975 userinitthrfail_taskq_dispatch:
 976         ddi_taskq_destroy(thread->ut_taskq);
 977 userinitthrfail_taskq_create:
 978         cv_destroy(&thread->ut_exit_done_cv);
 979         cv_destroy(&thread->ut_wake_cv);
 980         mutex_destroy(&thread->ut_mutex);
 981 
 982         return (DDI_FAILURE);
 983 }
 984 
 985 
 986 /*
 987  * xpvtap_user_thread_start()
 988  */
 989 static void
 990 xpvtap_user_thread_start(caddr_t arg)
 991 {
 992         xpvtap_user_thread_t *thread;
 993         xpvtap_state_t *state;
 994         int e;
 995 
 996 
 997         state = (xpvtap_state_t *)arg;
 998         thread = &state->bt_thread;
 999 
1000         /* start the user thread */
1001         thread->ut_exit_done = B_FALSE;
1002         e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1003             DDI_SLEEP);
1004         if (e != DDI_SUCCESS) {
1005                 thread->ut_exit_done = B_TRUE;
1006                 cmn_err(CE_WARN, "Unable to start user thread\n");
1007         }
1008 }
1009 
1010 
1011 /*
1012  * xpvtap_user_thread_stop()
1013  */
1014 static void
1015 xpvtap_user_thread_stop(xpvtap_state_t *state)
1016 {
1017         /* wake thread so it can exit */
1018         mutex_enter(&state->bt_thread.ut_mutex);
1019         state->bt_thread.ut_wake = B_TRUE;
1020         state->bt_thread.ut_exit = B_TRUE;
1021         cv_signal(&state->bt_thread.ut_wake_cv);
1022         if (!state->bt_thread.ut_exit_done) {
1023                 cv_wait(&state->bt_thread.ut_exit_done_cv,
1024                     &state->bt_thread.ut_mutex);
1025         }
1026         mutex_exit(&state->bt_thread.ut_mutex);
1027         ASSERT(state->bt_thread.ut_exit_done);
1028 }
1029 
1030 
1031 /*
1032  * xpvtap_user_fini()
1033  */
1034 static void
1035 xpvtap_user_fini(xpvtap_state_t *state)
1036 {
1037         xpvtap_user_map_t *map;
1038 
1039 
1040         map = &state->bt_map;
1041 
1042         xpvtap_user_thread_fini(state);
1043         xpvtap_rs_fini(&map->um_rs);
1044         kmem_free(map->um_outstanding_reqs,
1045             sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1046         xpvtap_user_ring_fini(state);
1047 }
1048 
1049 
1050 /*
1051  * xpvtap_user_ring_fini()
1052  */
1053 static void
1054 xpvtap_user_ring_fini(xpvtap_state_t *state)
1055 {
1056         ddi_umem_free(state->bt_user_ring.ur_cookie);
1057 }
1058 
1059 
1060 /*
1061  * xpvtap_user_thread_fini()
1062  */
1063 static void
1064 xpvtap_user_thread_fini(xpvtap_state_t *state)
1065 {
1066         ddi_taskq_destroy(state->bt_thread.ut_taskq);
1067         cv_destroy(&state->bt_thread.ut_exit_done_cv);
1068         cv_destroy(&state->bt_thread.ut_wake_cv);
1069         mutex_destroy(&state->bt_thread.ut_mutex);
1070 }
1071 
1072 
1073 /*
1074  * xpvtap_user_thread()
1075  */
1076 static void
1077 xpvtap_user_thread(void *arg)
1078 {
1079         xpvtap_user_thread_t *thread;
1080         blkif_response_t resp;
1081         xpvtap_state_t *state;
1082         blkif_request_t req;
1083         boolean_t b;
1084         uint_t uid;
1085         int e;
1086 
1087 
1088         state = (xpvtap_state_t *)arg;
1089         thread = &state->bt_thread;
1090 
1091 xpvtap_thread_start:
1092         /* See if we are supposed to exit */
1093         mutex_enter(&thread->ut_mutex);
1094         if (thread->ut_exit) {
1095                 thread->ut_exit_done = B_TRUE;
1096                 cv_signal(&state->bt_thread.ut_exit_done_cv);
1097                 mutex_exit(&thread->ut_mutex);
1098                 return;
1099         }
1100 
1101         /*
1102          * if we aren't supposed to be awake, wait until someone wakes us.
1103          * when we wake up, check for a kill or someone telling us to exit.
1104          */
1105         if (!thread->ut_wake) {
1106                 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1107                 if ((e == 0) || (thread->ut_exit)) {
1108                         thread->ut_exit = B_TRUE;
1109                         mutex_exit(&thread->ut_mutex);
1110                         goto xpvtap_thread_start;
1111                 }
1112         }
1113 
1114         /* if someone didn't wake us, go back to the start of the thread */
1115         if (!thread->ut_wake) {
1116                 mutex_exit(&thread->ut_mutex);
1117                 goto xpvtap_thread_start;
1118         }
1119 
1120         /* we are awake */
1121         thread->ut_wake = B_FALSE;
1122         mutex_exit(&thread->ut_mutex);
1123 
1124         /* process requests from the guest */
1125         do {
1126                 /*
1127                  * check for requests from the guest. if we don't have any,
1128                  * break out of the loop.
1129                  */
1130                 e = blk_ring_request_get(state->bt_guest_ring, &req);
1131                 if (e == B_FALSE) {
1132                         break;
1133                 }
1134 
1135                 /* we got a request, map the grefs into the user app's VA */
1136                 e = xpvtap_user_request_map(state, &req, &uid);
1137                 if (e != DDI_SUCCESS) {
1138                         /*
1139                          * If we couldn't map the request (e.g. user app hasn't
1140                          * opened the device yet), requeue it and try again
1141                          * later
1142                          */
1143                         blk_ring_request_requeue(state->bt_guest_ring);
1144                         break;
1145                 }
1146 
1147                 /* push the request to the user app */
1148                 e = xpvtap_user_request_push(state, &req, uid);
1149                 if (e != DDI_SUCCESS) {
1150                         resp.id = req.id;
1151                         resp.operation = req.operation;
1152                         resp.status = BLKIF_RSP_ERROR;
1153                         blk_ring_response_put(state->bt_guest_ring, &resp);
1154                 }
1155         } while (!thread->ut_exit);
1156 
1157         /* process reponses from the user app */
1158         do {
1159                 /*
1160                  * check for responses from the user app. if we don't have any,
1161                  * break out of the loop.
1162                  */
1163                 b = xpvtap_user_response_get(state, &resp, &uid);
1164                 if (b != B_TRUE) {
1165                         break;
1166                 }
1167 
1168                 /*
1169                  * if we got a response, unmap the grefs from the matching
1170                  * request.
1171                  */
1172                 xpvtap_user_request_unmap(state, uid);
1173 
1174                 /* push the response to the guest */
1175                 blk_ring_response_put(state->bt_guest_ring, &resp);
1176         } while (!thread->ut_exit);
1177 
1178         goto xpvtap_thread_start;
1179 }
1180 
1181 
1182 /*
1183  * xpvtap_user_request_map()
1184  */
1185 static int
1186 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1187     uint_t *uid)
1188 {
1189         grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1190         struct seg *seg;
1191         struct as *as;
1192         domid_t domid;
1193         caddr_t uaddr;
1194         uint_t flags;
1195         int i;
1196         int e;
1197 
1198 
1199         domid = xvdi_get_oeid(state->bt_dip);
1200 
1201         as = state->bt_map.um_as;
1202         if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1203                 return (DDI_FAILURE);
1204         }
1205 
1206         /* has to happen after segmap returns */
1207         if (!state->bt_map.um_registered) {
1208                 /* register the pte's with segmf */
1209                 e = xpvtap_segmf_register(state);
1210                 if (e != DDI_SUCCESS) {
1211                         return (DDI_FAILURE);
1212                 }
1213         }
1214 
1215         /* alloc an ID for the user ring */
1216         e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1217         if (e != DDI_SUCCESS) {
1218                 return (DDI_FAILURE);
1219         }
1220 
1221         /* if we don't have any segments to map, we're done */
1222         if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1223             (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1224             (req->nr_segments == 0)) {
1225                 return (DDI_SUCCESS);
1226         }
1227 
1228         /* get the apps gref address */
1229         uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1230 
1231         AS_LOCK_ENTER(as, RW_READER);
1232         seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1233         if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1234             (seg->s_base + seg->s_size))) {
1235                 AS_LOCK_EXIT(as);
1236                 return (DDI_FAILURE);
1237         }
1238 
1239         /* if we are reading from disk, we are writing into memory */
1240         flags = 0;
1241         if (req->operation == BLKIF_OP_READ) {
1242                 flags |= SEGMF_GREF_WR;
1243         }
1244 
1245         /* Load the grefs into seg_mf */
1246         for (i = 0; i < req->nr_segments; i++) {
1247                 gref[i] = req->seg[i].gref;
1248         }
1249         (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1250             domid);
1251 
1252         AS_LOCK_EXIT(as);
1253 
1254         return (DDI_SUCCESS);
1255 }
1256 
1257 
1258 /*
1259  * xpvtap_user_request_push()
1260  */
1261 static int
1262 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1263     uint_t uid)
1264 {
1265         blkif_request_t *outstanding_req;
1266         blkif_front_ring_t *uring;
1267         blkif_request_t *target;
1268         xpvtap_user_map_t *map;
1269 
1270 
1271         uring = &state->bt_user_ring.ur_ring;
1272         map = &state->bt_map;
1273 
1274         target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1275 
1276         /*
1277          * Save request from the frontend. used for ID mapping and unmap
1278          * on response/cleanup
1279          */
1280         outstanding_req = &map->um_outstanding_reqs[uid];
1281         bcopy(req, outstanding_req, sizeof (*outstanding_req));
1282 
1283         /* put the request on the user ring */
1284         bcopy(req, target, sizeof (*req));
1285         target->id = (uint64_t)uid;
1286         uring->req_prod_pvt++;
1287 
1288         pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1289 
1290         return (DDI_SUCCESS);
1291 }
1292 
1293 
1294 static void
1295 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1296 {
1297         blkif_request_t *req;
1298         struct seg *seg;
1299         struct as *as;
1300         caddr_t uaddr;
1301         int e;
1302 
1303 
1304         as = state->bt_map.um_as;
1305         if (as == NULL) {
1306                 return;
1307         }
1308 
1309         /* get a copy of the original request */
1310         req = &state->bt_map.um_outstanding_reqs[uid];
1311 
1312         /* unmap the grefs for this request */
1313         if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1314             (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1315             (req->nr_segments != 0)) {
1316                 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1317                 AS_LOCK_ENTER(as, RW_READER);
1318                 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1319                 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1320                     (seg->s_base + seg->s_size))) {
1321                         AS_LOCK_EXIT(as);
1322                         xpvtap_rs_free(state->bt_map.um_rs, uid);
1323                         return;
1324                 }
1325 
1326                 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1327                 if (e != 0) {
1328                         cmn_err(CE_WARN, "unable to release grefs");
1329                 }
1330 
1331                 AS_LOCK_EXIT(as);
1332         }
1333 
1334         /* free up the user ring id */
1335         xpvtap_rs_free(state->bt_map.um_rs, uid);
1336 }
1337 
1338 
1339 static int
1340 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1341     uint_t *uid)
1342 {
1343         blkif_front_ring_t *uring;
1344         blkif_response_t *target;
1345 
1346 
1347         uring = &state->bt_user_ring.ur_ring;
1348 
1349         if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1350                 return (B_FALSE);
1351         }
1352 
1353         target = NULL;
1354         target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1355         if (target == NULL) {
1356                 return (B_FALSE);
1357         }
1358 
1359         /* copy out the user app response */
1360         bcopy(target, resp, sizeof (*resp));
1361         uring->rsp_cons++;
1362 
1363         /* restore the quests id from the original request */
1364         *uid = (uint_t)resp->id;
1365         resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1366 
1367         return (B_TRUE);
1368 }
1369 
1370 
1371 /*
1372  * xpvtap_user_app_stop()
1373  */
1374 static void xpvtap_user_app_stop(caddr_t arg)
1375 {
1376         xpvtap_state_t *state;
1377         clock_t rc;
1378 
1379         state = (xpvtap_state_t *)arg;
1380 
1381         /*
1382          * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383          * problem, we just won't auto-detach the driver.
1384          */
1385         mutex_enter(&state->bt_open.bo_mutex);
1386         if (state->bt_open.bo_opened) {
1387                 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1388                     &state->bt_open.bo_mutex, drv_usectohz(10000000),
1389                     TR_CLOCK_TICK);
1390                 if (rc <= 0) {
1391                         cmn_err(CE_NOTE, "!user process still has driver open, "
1392                             "deferring detach\n");
1393                 }
1394         }
1395         mutex_exit(&state->bt_open.bo_mutex);
1396 }
1397 
1398 
1399 /*
1400  * xpvtap_rs_init()
1401  *    Initialize the resource structure. init() returns a handle to be used
1402  *    for the rest of the resource functions. This code is written assuming
1403  *    that min_val will be close to 0. Therefore, we will allocate the free
1404  *    buffer only taking max_val into account.
1405  */
1406 static void
1407 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1408 {
1409         xpvtap_rs_t *rstruct;
1410         uint_t array_size;
1411         uint_t index;
1412 
1413 
1414         ASSERT(handle != NULL);
1415         ASSERT(min_val < max_val);
1416 
1417         /* alloc space for resource structure */
1418         rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1419 
1420         /*
1421          * Test to see if the max value is 64-bit aligned. If so, we don't need
1422          * to allocate an extra 64-bit word. alloc space for free buffer
1423          * (8 bytes per uint64_t).
1424          */
1425         if ((max_val & 0x3F) == 0) {
1426                 rstruct->rs_free_size = (max_val >> 6) * 8;
1427         } else {
1428                 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1429         }
1430         rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1431 
1432         /* Initialize resource structure */
1433         rstruct->rs_min = min_val;
1434         rstruct->rs_last = min_val;
1435         rstruct->rs_max = max_val;
1436         mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1437         rstruct->rs_flushing = B_FALSE;
1438 
1439         /* Mark all resources as free */
1440         array_size = rstruct->rs_free_size >> 3;
1441         for (index = 0; index < array_size; index++) {
1442                 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1443         }
1444 
1445         /* setup handle which is returned from this function */
1446         *handle = rstruct;
1447 }
1448 
1449 
1450 /*
1451  * xpvtap_rs_fini()
1452  *    Frees up the space allocated in init().  Notice that a pointer to the
1453  *    handle is used for the parameter.  fini() will set the handle to NULL
1454  *    before returning.
1455  */
1456 static void
1457 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1458 {
1459         xpvtap_rs_t *rstruct;
1460 
1461 
1462         ASSERT(handle != NULL);
1463 
1464         rstruct = (xpvtap_rs_t *)*handle;
1465 
1466         mutex_destroy(&rstruct->rs_mutex);
1467         kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1468         kmem_free(rstruct, sizeof (xpvtap_rs_t));
1469 
1470         /* set handle to null.  This helps catch bugs. */
1471         *handle = NULL;
1472 }
1473 
1474 
1475 /*
1476  * xpvtap_rs_alloc()
1477  *    alloc a resource. If alloc fails, we are out of resources.
1478  */
1479 static int
1480 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1481 {
1482         xpvtap_rs_t *rstruct;
1483         uint_t array_idx;
1484         uint64_t free;
1485         uint_t index;
1486         uint_t last;
1487         uint_t min;
1488         uint_t max;
1489 
1490 
1491         ASSERT(handle != NULL);
1492         ASSERT(resource != NULL);
1493 
1494         rstruct = (xpvtap_rs_t *)handle;
1495 
1496         mutex_enter(&rstruct->rs_mutex);
1497         min = rstruct->rs_min;
1498         max = rstruct->rs_max;
1499 
1500         /*
1501          * Find a free resource. This will return out of the loop once it finds
1502          * a free resource. There are a total of 'max'-'min'+1 resources.
1503          * Performs a round robin allocation.
1504          */
1505         for (index = min; index <= max; index++) {
1506 
1507                 array_idx = rstruct->rs_last >> 6;
1508                 free = rstruct->rs_free[array_idx];
1509                 last = rstruct->rs_last & 0x3F;
1510 
1511                 /* if the next resource to check is free */
1512                 if ((free & ((uint64_t)1 << last)) != 0) {
1513                         /* we are using this resource */
1514                         *resource = rstruct->rs_last;
1515 
1516                         /* take it out of the free list */
1517                         rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1518 
1519                         /*
1520                          * increment the last count so we start checking the
1521                          * next resource on the next alloc().  Note the rollover
1522                          * at 'max'+1.
1523                          */
1524                         rstruct->rs_last++;
1525                         if (rstruct->rs_last > max) {
1526                                 rstruct->rs_last = rstruct->rs_min;
1527                         }
1528 
1529                         /* unlock the resource structure */
1530                         mutex_exit(&rstruct->rs_mutex);
1531 
1532                         return (DDI_SUCCESS);
1533                 }
1534 
1535                 /*
1536                  * This resource is not free, lets go to the next one. Note the
1537                  * rollover at 'max'.
1538                  */
1539                 rstruct->rs_last++;
1540                 if (rstruct->rs_last > max) {
1541                         rstruct->rs_last = rstruct->rs_min;
1542                 }
1543         }
1544 
1545         mutex_exit(&rstruct->rs_mutex);
1546 
1547         return (DDI_FAILURE);
1548 }
1549 
1550 
1551 /*
1552  * xpvtap_rs_free()
1553  *    Free the previously alloc'd resource.  Once a resource has been free'd,
1554  *    it can be used again when alloc is called.
1555  */
1556 static void
1557 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1558 {
1559         xpvtap_rs_t *rstruct;
1560         uint_t array_idx;
1561         uint_t offset;
1562 
1563 
1564         ASSERT(handle != NULL);
1565 
1566         rstruct = (xpvtap_rs_t *)handle;
1567         ASSERT(resource >= rstruct->rs_min);
1568         ASSERT(resource <= rstruct->rs_max);
1569 
1570         if (!rstruct->rs_flushing) {
1571                 mutex_enter(&rstruct->rs_mutex);
1572         }
1573 
1574         /* Put the resource back in the free list */
1575         array_idx = resource >> 6;
1576         offset = resource & 0x3F;
1577         rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1578 
1579         if (!rstruct->rs_flushing) {
1580                 mutex_exit(&rstruct->rs_mutex);
1581         }
1582 }
1583 
1584 
1585 /*
1586  * xpvtap_rs_flush()
1587  */
1588 static void
1589 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1590     void *arg)
1591 {
1592         xpvtap_rs_t *rstruct;
1593         uint_t array_idx;
1594         uint64_t free;
1595         uint_t index;
1596         uint_t last;
1597         uint_t min;
1598         uint_t max;
1599 
1600 
1601         ASSERT(handle != NULL);
1602 
1603         rstruct = (xpvtap_rs_t *)handle;
1604 
1605         mutex_enter(&rstruct->rs_mutex);
1606         min = rstruct->rs_min;
1607         max = rstruct->rs_max;
1608 
1609         rstruct->rs_flushing = B_TRUE;
1610 
1611         /*
1612          * for all resources not free, call the callback routine to clean it
1613          * up.
1614          */
1615         for (index = min; index <= max; index++) {
1616 
1617                 array_idx = rstruct->rs_last >> 6;
1618                 free = rstruct->rs_free[array_idx];
1619                 last = rstruct->rs_last & 0x3F;
1620 
1621                 /* if the next resource to check is not free */
1622                 if ((free & ((uint64_t)1 << last)) == 0) {
1623                         /* call the callback to cleanup */
1624                         (*callback)(arg, rstruct->rs_last);
1625 
1626                         /* put it back in the free list */
1627                         rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1628                 }
1629 
1630                 /* go to the next one. Note the rollover at 'max' */
1631                 rstruct->rs_last++;
1632                 if (rstruct->rs_last > max) {
1633                         rstruct->rs_last = rstruct->rs_min;
1634                 }
1635         }
1636 
1637         mutex_exit(&rstruct->rs_mutex);
1638 }