1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * University Copyright- Copyright (c) 1982, 1986, 1988
  32  * The Regents of the University of California
  33  * All Rights Reserved
  34  *
  35  * University Acknowledgment- Portions of this document are derived from
  36  * software developed by the University of California, Berkeley, and its
  37  * contributors.
  38  */
  39 
  40 /*
  41  * VM - segment of a mapped device.
  42  *
  43  * This segment driver is used when mapping character special devices.
  44  */
  45 
  46 #include <sys/types.h>
  47 #include <sys/t_lock.h>
  48 #include <sys/sysmacros.h>
  49 #include <sys/vtrace.h>
  50 #include <sys/systm.h>
  51 #include <sys/vmsystm.h>
  52 #include <sys/mman.h>
  53 #include <sys/errno.h>
  54 #include <sys/kmem.h>
  55 #include <sys/cmn_err.h>
  56 #include <sys/vnode.h>
  57 #include <sys/proc.h>
  58 #include <sys/conf.h>
  59 #include <sys/debug.h>
  60 #include <sys/ddidevmap.h>
  61 #include <sys/ddi_implfuncs.h>
  62 #include <sys/lgrp.h>
  63 
  64 #include <vm/page.h>
  65 #include <vm/hat.h>
  66 #include <vm/as.h>
  67 #include <vm/seg.h>
  68 #include <vm/seg_dev.h>
  69 #include <vm/seg_kp.h>
  70 #include <vm/seg_kmem.h>
  71 #include <vm/vpage.h>
  72 
  73 #include <sys/sunddi.h>
  74 #include <sys/esunddi.h>
  75 #include <sys/fs/snode.h>
  76 
  77 
  78 #if DEBUG
  79 int segdev_debug;
  80 #define DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
  81 #else
  82 #define DEBUGF(level, args)
  83 #endif
  84 
  85 /* Default timeout for devmap context management */
  86 #define CTX_TIMEOUT_VALUE 0
  87 
  88 #define HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  89                         { mutex_enter(&dhp->dh_lock); }
  90 
  91 #define RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
  92                         { mutex_exit(&dhp->dh_lock); }
  93 
  94 #define round_down_p2(a, s)     ((a) & ~((s) - 1))
  95 #define round_up_p2(a, s)       (((a) + (s) - 1) & ~((s) - 1))
  96 
  97 /*
  98  * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
  99  * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
 100  */
 101 #define VA_PA_ALIGNED(uvaddr, paddr, pgsize)            \
 102         (((uvaddr | paddr) & (pgsize - 1)) == 0)
 103 #define VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize)     \
 104         (((uvaddr ^ paddr) & (pgsize - 1)) == 0)
 105 
 106 #define vpgtob(n)       ((n) * sizeof (struct vpage))   /* For brevity */
 107 
 108 #define VTOCVP(vp)      (VTOS(vp)->s_commonvp)       /* we "know" it's an snode */
 109 
 110 static struct devmap_ctx *devmapctx_list = NULL;
 111 static struct devmap_softlock *devmap_slist = NULL;
 112 
 113 /*
 114  * mutex, vnode and page for the page of zeros we use for the trash mappings.
 115  * One trash page is allocated on the first ddi_umem_setup call that uses it
 116  * XXX Eventually, we may want to combine this with what segnf does when all
 117  * hat layers implement HAT_NOFAULT.
 118  *
 119  * The trash page is used when the backing store for a userland mapping is
 120  * removed but the application semantics do not take kindly to a SIGBUS.
 121  * In that scenario, the applications pages are mapped to some dummy page
 122  * which returns garbage on read and writes go into a common place.
 123  * (Perfect for NO_FAULT semantics)
 124  * The device driver is responsible to communicating to the app with some
 125  * other mechanism that such remapping has happened and the app should take
 126  * corrective action.
 127  * We can also use an anonymous memory page as there is no requirement to
 128  * keep the page locked, however this complicates the fault code. RFE.
 129  */
 130 static struct vnode trashvp;
 131 static struct page *trashpp;
 132 
 133 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
 134 static vmem_t *umem_np_arena;
 135 
 136 /* Set the cookie to a value we know will never be a valid umem_cookie */
 137 #define DEVMAP_DEVMEM_COOKIE    ((ddi_umem_cookie_t)0x1)
 138 
 139 /*
 140  * Macros to check if type of devmap handle
 141  */
 142 #define cookie_is_devmem(c)     \
 143         ((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
 144 
 145 #define cookie_is_pmem(c)       \
 146         ((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
 147 
 148 #define cookie_is_kpmem(c)      (!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
 149         ((c)->type == KMEM_PAGEABLE))
 150 
 151 #define dhp_is_devmem(dhp)      \
 152         (cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 153 
 154 #define dhp_is_pmem(dhp)        \
 155         (cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 156 
 157 #define dhp_is_kpmem(dhp)       \
 158         (cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
 159 
 160 /*
 161  * Private seg op routines.
 162  */
 163 static int      segdev_dup(struct seg *, struct seg *);
 164 static int      segdev_unmap(struct seg *, caddr_t, size_t);
 165 static void     segdev_free(struct seg *);
 166 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
 167                     enum fault_type, enum seg_rw);
 168 static faultcode_t segdev_faulta(struct seg *, caddr_t);
 169 static int      segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
 170 static int      segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
 171 static void     segdev_badop(void);
 172 static int      segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
 173 static size_t   segdev_incore(struct seg *, caddr_t, size_t, char *);
 174 static int      segdev_lockop(struct seg *, caddr_t, size_t, int, int,
 175                     ulong_t *, size_t);
 176 static int      segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
 177 static u_offset_t       segdev_getoffset(struct seg *, caddr_t);
 178 static int      segdev_gettype(struct seg *, caddr_t);
 179 static int      segdev_getvp(struct seg *, caddr_t, struct vnode **);
 180 static int      segdev_advise(struct seg *, caddr_t, size_t, uint_t);
 181 static void     segdev_dump(struct seg *);
 182 static int      segdev_pagelock(struct seg *, caddr_t, size_t,
 183                     struct page ***, enum lock_type, enum seg_rw);
 184 static int      segdev_setpagesize(struct seg *, caddr_t, size_t, uint_t);
 185 static int      segdev_getmemid(struct seg *, caddr_t, memid_t *);
 186 
 187 /*
 188  * XXX  this struct is used by rootnex_map_fault to identify
 189  *      the segment it has been passed. So if you make it
 190  *      "static" you'll need to fix rootnex_map_fault.
 191  */
 192 struct seg_ops segdev_ops = {
 193         .dup            = segdev_dup,
 194         .unmap          = segdev_unmap,
 195         .free           = segdev_free,
 196         .fault          = segdev_fault,
 197         .faulta         = segdev_faulta,
 198         .setprot        = segdev_setprot,
 199         .checkprot      = segdev_checkprot,
 200         .kluster        = (int (*)())segdev_badop,
 201         .sync           = segdev_sync,
 202         .incore         = segdev_incore,
 203         .lockop         = segdev_lockop,
 204         .getprot        = segdev_getprot,
 205         .getoffset      = segdev_getoffset,
 206         .gettype        = segdev_gettype,
 207         .getvp          = segdev_getvp,
 208         .advise         = segdev_advise,
 209         .dump           = segdev_dump,
 210         .pagelock       = segdev_pagelock,
 211         .setpagesize    = segdev_setpagesize,
 212         .getmemid       = segdev_getmemid,
 213 };
 214 
 215 /*
 216  * Private segdev support routines
 217  */
 218 static struct segdev_data *sdp_alloc(void);
 219 
 220 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
 221     size_t, enum seg_rw);
 222 
 223 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
 224     struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
 225 
 226 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
 227     size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
 228 
 229 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
 230 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
 231 static void devmap_softlock_rele(devmap_handle_t *);
 232 static void devmap_ctx_rele(devmap_handle_t *);
 233 
 234 static void devmap_ctxto(void *);
 235 
 236 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
 237     caddr_t addr);
 238 
 239 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
 240     ulong_t *opfn, ulong_t *pagesize);
 241 
 242 static void free_devmap_handle(devmap_handle_t *dhp);
 243 
 244 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 245     struct seg *newseg);
 246 
 247 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
 248 
 249 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
 250 
 251 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
 252 
 253 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
 254     offset_t off, size_t len, uint_t flags);
 255 
 256 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
 257     caddr_t addr, size_t *llen, caddr_t *laddr);
 258 
 259 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
 260 
 261 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
 262 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
 263 
 264 static void *devmap_umem_alloc_np(size_t size, size_t flags);
 265 static void devmap_umem_free_np(void *addr, size_t size);
 266 
 267 /*
 268  * routines to lock and unlock underlying segkp segment for
 269  * KMEM_PAGEABLE type cookies.
 270  */
 271 static faultcode_t  acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
 272 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
 273 
 274 /*
 275  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
 276  * drivers with devmap_access callbacks
 277  */
 278 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
 279         enum fault_type);
 280 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
 281         enum fault_type);
 282 
 283 static kmutex_t devmapctx_lock;
 284 
 285 static kmutex_t devmap_slock;
 286 
 287 /*
 288  * Initialize the thread callbacks and thread private data.
 289  */
 290 static struct devmap_ctx *
 291 devmap_ctxinit(dev_t dev, ulong_t id)
 292 {
 293         struct devmap_ctx       *devctx;
 294         struct devmap_ctx       *tmp;
 295         dev_info_t              *dip;
 296 
 297         tmp =  kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
 298 
 299         mutex_enter(&devmapctx_lock);
 300 
 301         dip = e_ddi_hold_devi_by_dev(dev, 0);
 302         ASSERT(dip != NULL);
 303         ddi_release_devi(dip);
 304 
 305         for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
 306                 if ((devctx->dip == dip) && (devctx->id == id))
 307                         break;
 308 
 309         if (devctx == NULL) {
 310                 devctx = tmp;
 311                 devctx->dip = dip;
 312                 devctx->id = id;
 313                 mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
 314                 cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
 315                 devctx->next = devmapctx_list;
 316                 devmapctx_list = devctx;
 317         } else
 318                 kmem_free(tmp, sizeof (struct devmap_ctx));
 319 
 320         mutex_enter(&devctx->lock);
 321         devctx->refcnt++;
 322         mutex_exit(&devctx->lock);
 323         mutex_exit(&devmapctx_lock);
 324 
 325         return (devctx);
 326 }
 327 
 328 /*
 329  * Timeout callback called if a CPU has not given up the device context
 330  * within dhp->dh_timeout_length ticks
 331  */
 332 static void
 333 devmap_ctxto(void *data)
 334 {
 335         struct devmap_ctx *devctx = data;
 336 
 337         TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO,
 338             "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
 339         mutex_enter(&devctx->lock);
 340         /*
 341          * Set oncpu = 0 so the next mapping trying to get the device context
 342          * can.
 343          */
 344         devctx->oncpu = 0;
 345         devctx->timeout = 0;
 346         cv_signal(&devctx->cv);
 347         mutex_exit(&devctx->lock);
 348 }
 349 
 350 /*
 351  * Create a device segment.
 352  */
 353 int
 354 segdev_create(struct seg *seg, void *argsp)
 355 {
 356         struct segdev_data *sdp;
 357         struct segdev_crargs *a = (struct segdev_crargs *)argsp;
 358         devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
 359         int error;
 360 
 361         /*
 362          * Since the address space is "write" locked, we
 363          * don't need the segment lock to protect "segdev" data.
 364          */
 365         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 366 
 367         hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
 368 
 369         sdp = sdp_alloc();
 370 
 371         sdp->mapfunc = a->mapfunc;
 372         sdp->offset = a->offset;
 373         sdp->prot = a->prot;
 374         sdp->maxprot = a->maxprot;
 375         sdp->type = a->type;
 376         sdp->pageprot = 0;
 377         sdp->softlockcnt = 0;
 378         sdp->vpage = NULL;
 379 
 380         if (sdp->mapfunc == NULL)
 381                 sdp->devmap_data = dhp;
 382         else
 383                 sdp->devmap_data = dhp = NULL;
 384 
 385         sdp->hat_flags = a->hat_flags;
 386         sdp->hat_attr = a->hat_attr;
 387 
 388         /*
 389          * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
 390          */
 391         ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
 392 
 393         /*
 394          * Hold shadow vnode -- segdev only deals with
 395          * character (VCHR) devices. We use the common
 396          * vp to hang pages on.
 397          */
 398         sdp->vp = specfind(a->dev, VCHR);
 399         ASSERT(sdp->vp != NULL);
 400 
 401         seg->s_ops = &segdev_ops;
 402         seg->s_data = sdp;
 403 
 404         while (dhp != NULL) {
 405                 dhp->dh_seg = seg;
 406                 dhp = dhp->dh_next;
 407         }
 408 
 409         /*
 410          * Inform the vnode of the new mapping.
 411          */
 412         /*
 413          * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
 414          * dhp specific maxprot because spec_addmap does not use maxprot.
 415          */
 416         error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset,
 417             seg->s_as, seg->s_base, seg->s_size,
 418             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 419 
 420         if (error != 0) {
 421                 sdp->devmap_data = NULL;
 422                 hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
 423                     HAT_UNLOAD_UNMAP);
 424         } else {
 425                 /*
 426                  * Mappings of /dev/null don't count towards the VSZ of a
 427                  * process.  Mappings of /dev/null have no mapping type.
 428                  */
 429                 if ((segop_gettype(seg, seg->s_base) & (MAP_SHARED |
 430                     MAP_PRIVATE)) == 0) {
 431                         seg->s_as->a_resvsize -= seg->s_size;
 432                 }
 433         }
 434 
 435         return (error);
 436 }
 437 
 438 static struct segdev_data *
 439 sdp_alloc(void)
 440 {
 441         struct segdev_data *sdp;
 442 
 443         sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
 444         rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
 445 
 446         return (sdp);
 447 }
 448 
 449 /*
 450  * Duplicate seg and return new segment in newseg.
 451  */
 452 static int
 453 segdev_dup(struct seg *seg, struct seg *newseg)
 454 {
 455         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 456         struct segdev_data *newsdp;
 457         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 458         size_t npages;
 459         int ret;
 460 
 461         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP,
 462             "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg);
 463 
 464         DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
 465             (void *)dhp, (void *)seg));
 466 
 467         /*
 468          * Since the address space is "write" locked, we
 469          * don't need the segment lock to protect "segdev" data.
 470          */
 471         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 472 
 473         newsdp = sdp_alloc();
 474 
 475         newseg->s_ops = seg->s_ops;
 476         newseg->s_data = (void *)newsdp;
 477 
 478         VN_HOLD(sdp->vp);
 479         newsdp->vp   = sdp->vp;
 480         newsdp->mapfunc = sdp->mapfunc;
 481         newsdp->offset       = sdp->offset;
 482         newsdp->pageprot = sdp->pageprot;
 483         newsdp->prot = sdp->prot;
 484         newsdp->maxprot = sdp->maxprot;
 485         newsdp->type = sdp->type;
 486         newsdp->hat_attr = sdp->hat_attr;
 487         newsdp->hat_flags = sdp->hat_flags;
 488         newsdp->softlockcnt = 0;
 489 
 490         /*
 491          * Initialize per page data if the segment we are
 492          * dup'ing has per page information.
 493          */
 494         npages = seg_pages(newseg);
 495 
 496         if (sdp->vpage != NULL) {
 497                 size_t nbytes = vpgtob(npages);
 498 
 499                 newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
 500                 bcopy(sdp->vpage, newsdp->vpage, nbytes);
 501         } else
 502                 newsdp->vpage = NULL;
 503 
 504         /*
 505          * duplicate devmap handles
 506          */
 507         if (dhp != NULL) {
 508                 ret = devmap_handle_dup(dhp,
 509                     (devmap_handle_t **)&newsdp->devmap_data, newseg);
 510                 if (ret != 0) {
 511                         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1,
 512                             "segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
 513                             ret, (void *)dhp, (void *)seg);
 514                         DEBUGF(1, (CE_CONT,
 515                             "segdev_dup: ret %x dhp %p seg %p\n",
 516                             ret, (void *)dhp, (void *)seg));
 517                         return (ret);
 518                 }
 519         }
 520 
 521         /*
 522          * Inform the common vnode of the new mapping.
 523          */
 524         return (VOP_ADDMAP(VTOCVP(newsdp->vp),
 525             newsdp->offset, newseg->s_as,
 526             newseg->s_base, newseg->s_size, newsdp->prot,
 527             newsdp->maxprot, sdp->type, CRED(), NULL));
 528 }
 529 
 530 /*
 531  * duplicate devmap handles
 532  */
 533 static int
 534 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
 535     struct seg *newseg)
 536 {
 537         devmap_handle_t *newdhp_save = NULL;
 538         devmap_handle_t *newdhp = NULL;
 539         struct devmap_callback_ctl *callbackops;
 540 
 541         while (dhp != NULL) {
 542                 newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 543 
 544                 /* Need to lock the original dhp while copying if REMAP */
 545                 HOLD_DHP_LOCK(dhp);
 546                 bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 547                 RELE_DHP_LOCK(dhp);
 548                 newdhp->dh_seg = newseg;
 549                 newdhp->dh_next = NULL;
 550                 if (newdhp_save != NULL)
 551                         newdhp_save->dh_next = newdhp;
 552                 else
 553                         *new_dhp = newdhp;
 554                 newdhp_save = newdhp;
 555 
 556                 callbackops = &newdhp->dh_callbackops;
 557 
 558                 if (dhp->dh_softlock != NULL)
 559                         newdhp->dh_softlock = devmap_softlock_init(
 560                             newdhp->dh_dev,
 561                             (ulong_t)callbackops->devmap_access);
 562                 if (dhp->dh_ctx != NULL)
 563                         newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 564                             (ulong_t)callbackops->devmap_access);
 565 
 566                 /*
 567                  * Initialize dh_lock if we want to do remap.
 568                  */
 569                 if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
 570                         mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
 571                         newdhp->dh_flags |= DEVMAP_LOCK_INITED;
 572                 }
 573 
 574                 if (callbackops->devmap_dup != NULL) {
 575                         int ret;
 576 
 577                         /*
 578                          * Call the dup callback so that the driver can
 579                          * duplicate its private data.
 580                          */
 581                         ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
 582                             (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
 583 
 584                         if (ret != 0) {
 585                                 /*
 586                                  * We want to free up this segment as the driver
 587                                  * has indicated that we can't dup it.  But we
 588                                  * don't want to call the drivers, devmap_unmap,
 589                                  * callback function as the driver does not
 590                                  * think this segment exists. The caller of
 591                                  * devmap_dup will call seg_free on newseg
 592                                  * as it was the caller that allocated the
 593                                  * segment.
 594                                  */
 595                                 DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
 596                                     "newdhp %p dhp %p\n", (void *)newdhp,
 597                                     (void *)dhp));
 598                                 callbackops->devmap_unmap = NULL;
 599                                 return (ret);
 600                         }
 601                 }
 602 
 603                 dhp = dhp->dh_next;
 604         }
 605 
 606         return (0);
 607 }
 608 
 609 /*
 610  * Split a segment at addr for length len.
 611  */
 612 /*ARGSUSED*/
 613 static int
 614 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
 615 {
 616         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
 617         register struct segdev_data *nsdp;
 618         register struct seg *nseg;
 619         register size_t opages;         /* old segment size in pages */
 620         register size_t npages;         /* new segment size in pages */
 621         register size_t dpages;         /* pages being deleted (unmapped) */
 622         register size_t nbytes;
 623         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
 624         devmap_handle_t *dhpp;
 625         devmap_handle_t *newdhp;
 626         struct devmap_callback_ctl *callbackops;
 627         caddr_t nbase;
 628         offset_t off;
 629         ulong_t nsize;
 630         size_t mlen, sz;
 631 
 632         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP,
 633             "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
 634             (void *)dhp, (void *)seg, (void *)addr, len);
 635 
 636         DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
 637             (void *)dhp, (void *)seg, (void *)addr, len));
 638 
 639         /*
 640          * Since the address space is "write" locked, we
 641          * don't need the segment lock to protect "segdev" data.
 642          */
 643         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
 644 
 645         if ((sz = sdp->softlockcnt) > 0) {
 646                 /*
 647                  * Fail the unmap if pages are SOFTLOCKed through this mapping.
 648                  * softlockcnt is protected from change by the as write lock.
 649                  */
 650                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1,
 651                     "segdev_unmap:error softlockcnt = %ld", sz);
 652                 DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
 653                 return (EAGAIN);
 654         }
 655 
 656         /*
 657          * Check for bad sizes
 658          */
 659         if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
 660             (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
 661                 panic("segdev_unmap");
 662 
 663         if (dhp != NULL) {
 664                 devmap_handle_t *tdhp;
 665                 /*
 666                  * If large page size was used in hat_devload(),
 667                  * the same page size must be used in hat_unload().
 668                  */
 669                 dhpp = tdhp = devmap_find_handle(dhp, addr);
 670                 while (tdhp != NULL) {
 671                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
 672                                 break;
 673                         }
 674                         tdhp = tdhp->dh_next;
 675                 }
 676                 if (tdhp != NULL) {     /* found a dhp using large pages */
 677                         size_t slen = len;
 678                         size_t mlen;
 679                         size_t soff;
 680 
 681                         soff = (ulong_t)(addr - dhpp->dh_uvaddr);
 682                         while (slen != 0) {
 683                                 mlen = MIN(slen, (dhpp->dh_len - soff));
 684                                 hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
 685                                     dhpp->dh_len, HAT_UNLOAD_UNMAP);
 686                                 dhpp = dhpp->dh_next;
 687                                 ASSERT(slen >= mlen);
 688                                 slen -= mlen;
 689                                 soff = 0;
 690                         }
 691                 } else
 692                         hat_unload(seg->s_as->a_hat, addr, len,
 693                             HAT_UNLOAD_UNMAP);
 694         } else {
 695                 /*
 696                  * Unload any hardware translations in the range
 697                  * to be taken out.
 698                  */
 699                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
 700         }
 701 
 702         /*
 703          * get the user offset which will used in the driver callbacks
 704          */
 705         off = sdp->offset + (offset_t)(addr - seg->s_base);
 706 
 707         /*
 708          * Inform the vnode of the unmapping.
 709          */
 710         ASSERT(sdp->vp != NULL);
 711         (void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
 712             sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
 713 
 714         /*
 715          * Check for entire segment
 716          */
 717         if (addr == seg->s_base && len == seg->s_size) {
 718                 seg_free(seg);
 719                 return (0);
 720         }
 721 
 722         opages = seg_pages(seg);
 723         dpages = btop(len);
 724         npages = opages - dpages;
 725 
 726         /*
 727          * Check for beginning of segment
 728          */
 729         if (addr == seg->s_base) {
 730                 if (sdp->vpage != NULL) {
 731                         register struct vpage *ovpage;
 732 
 733                         ovpage = sdp->vpage; /* keep pointer to vpage */
 734 
 735                         nbytes = vpgtob(npages);
 736                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 737                         bcopy(&ovpage[dpages], sdp->vpage, nbytes);
 738 
 739                         /* free up old vpage */
 740                         kmem_free(ovpage, vpgtob(opages));
 741                 }
 742 
 743                 /*
 744                  * free devmap handles from the beginning of the mapping.
 745                  */
 746                 if (dhp != NULL)
 747                         devmap_handle_unmap_head(dhp, len);
 748 
 749                 sdp->offset += (offset_t)len;
 750 
 751                 seg->s_base += len;
 752                 seg->s_size -= len;
 753 
 754                 return (0);
 755         }
 756 
 757         /*
 758          * Check for end of segment
 759          */
 760         if (addr + len == seg->s_base + seg->s_size) {
 761                 if (sdp->vpage != NULL) {
 762                         register struct vpage *ovpage;
 763 
 764                         ovpage = sdp->vpage; /* keep pointer to vpage */
 765 
 766                         nbytes = vpgtob(npages);
 767                         sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 768                         bcopy(ovpage, sdp->vpage, nbytes);
 769 
 770                         /* free up old vpage */
 771                         kmem_free(ovpage, vpgtob(opages));
 772                 }
 773                 seg->s_size -= len;
 774 
 775                 /*
 776                  * free devmap handles from addr to the end of the mapping.
 777                  */
 778                 if (dhp != NULL)
 779                         devmap_handle_unmap_tail(dhp, addr);
 780 
 781                 return (0);
 782         }
 783 
 784         /*
 785          * The section to go is in the middle of the segment,
 786          * have to make it into two segments.  nseg is made for
 787          * the high end while seg is cut down at the low end.
 788          */
 789         nbase = addr + len;                             /* new seg base */
 790         nsize = (seg->s_base + seg->s_size) - nbase;      /* new seg size */
 791         seg->s_size = addr - seg->s_base;         /* shrink old seg */
 792         nseg = seg_alloc(seg->s_as, nbase, nsize);
 793         if (nseg == NULL)
 794                 panic("segdev_unmap seg_alloc");
 795 
 796         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2,
 797             "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg);
 798         DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
 799             (void *)seg, (void *)nseg));
 800         nsdp = sdp_alloc();
 801 
 802         nseg->s_ops = seg->s_ops;
 803         nseg->s_data = (void *)nsdp;
 804 
 805         VN_HOLD(sdp->vp);
 806         nsdp->mapfunc = sdp->mapfunc;
 807         nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
 808         nsdp->vp     = sdp->vp;
 809         nsdp->pageprot = sdp->pageprot;
 810         nsdp->prot   = sdp->prot;
 811         nsdp->maxprot = sdp->maxprot;
 812         nsdp->type = sdp->type;
 813         nsdp->hat_attr = sdp->hat_attr;
 814         nsdp->hat_flags = sdp->hat_flags;
 815         nsdp->softlockcnt = 0;
 816 
 817         /*
 818          * Initialize per page data if the segment we are
 819          * dup'ing has per page information.
 820          */
 821         if (sdp->vpage != NULL) {
 822                 /* need to split vpage into two arrays */
 823                 register size_t nnbytes;
 824                 register size_t nnpages;
 825                 register struct vpage *ovpage;
 826 
 827                 ovpage = sdp->vpage;         /* keep pointer to vpage */
 828 
 829                 npages = seg_pages(seg);        /* seg has shrunk */
 830                 nbytes = vpgtob(npages);
 831                 nnpages = seg_pages(nseg);
 832                 nnbytes = vpgtob(nnpages);
 833 
 834                 sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
 835                 bcopy(ovpage, sdp->vpage, nbytes);
 836 
 837                 nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
 838                 bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
 839 
 840                 /* free up old vpage */
 841                 kmem_free(ovpage, vpgtob(opages));
 842         } else
 843                 nsdp->vpage = NULL;
 844 
 845         /*
 846          * unmap dhps.
 847          */
 848         if (dhp == NULL) {
 849                 nsdp->devmap_data = NULL;
 850                 return (0);
 851         }
 852         while (dhp != NULL) {
 853                 callbackops = &dhp->dh_callbackops;
 854                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3,
 855                     "segdev_unmap: dhp=%p addr=%p", dhp, addr);
 856                 DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
 857                     (void *)dhp, (void *)addr,
 858                     (void *)dhp->dh_uvaddr, dhp->dh_len));
 859 
 860                 if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
 861                         dhpp = dhp->dh_next;
 862                         dhp->dh_next = NULL;
 863                         dhp = dhpp;
 864                 } else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
 865                         dhp = dhp->dh_next;
 866                 } else if (addr > dhp->dh_uvaddr &&
 867                     (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
 868                         /*
 869                          * <addr, addr+len> is enclosed by dhp.
 870                          * create a newdhp that begins at addr+len and
 871                          * ends at dhp->dh_uvaddr+dhp->dh_len.
 872                          */
 873                         newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
 874                         HOLD_DHP_LOCK(dhp);
 875                         bcopy(dhp, newdhp, sizeof (devmap_handle_t));
 876                         RELE_DHP_LOCK(dhp);
 877                         newdhp->dh_seg = nseg;
 878                         newdhp->dh_next = dhp->dh_next;
 879                         if (dhp->dh_softlock != NULL)
 880                                 newdhp->dh_softlock = devmap_softlock_init(
 881                                     newdhp->dh_dev,
 882                                     (ulong_t)callbackops->devmap_access);
 883                         if (dhp->dh_ctx != NULL)
 884                                 newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
 885                                     (ulong_t)callbackops->devmap_access);
 886                         if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
 887                                 mutex_init(&newdhp->dh_lock,
 888                                     NULL, MUTEX_DEFAULT, NULL);
 889                         }
 890                         if (callbackops->devmap_unmap != NULL)
 891                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 892                                     off, len, dhp, &dhp->dh_pvtp,
 893                                     newdhp, &newdhp->dh_pvtp);
 894                         mlen = len + (addr - dhp->dh_uvaddr);
 895                         devmap_handle_reduce_len(newdhp, mlen);
 896                         nsdp->devmap_data = newdhp;
 897                         /* XX Changing len should recalculate LARGE flag */
 898                         dhp->dh_len = addr - dhp->dh_uvaddr;
 899                         dhpp = dhp->dh_next;
 900                         dhp->dh_next = NULL;
 901                         dhp = dhpp;
 902                 } else if ((addr > dhp->dh_uvaddr) &&
 903                     ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
 904                         mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
 905                         /*
 906                          * <addr, addr+len> spans over dhps.
 907                          */
 908                         if (callbackops->devmap_unmap != NULL)
 909                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 910                                     off, mlen, (devmap_cookie_t *)dhp,
 911                                     &dhp->dh_pvtp, NULL, NULL);
 912                         /* XX Changing len should recalculate LARGE flag */
 913                         dhp->dh_len = addr - dhp->dh_uvaddr;
 914                         dhpp = dhp->dh_next;
 915                         dhp->dh_next = NULL;
 916                         dhp = dhpp;
 917                         nsdp->devmap_data = dhp;
 918                 } else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
 919                         /*
 920                          * dhp is enclosed by <addr, addr+len>.
 921                          */
 922                         dhp->dh_seg = nseg;
 923                         nsdp->devmap_data = dhp;
 924                         dhp = devmap_handle_unmap(dhp);
 925                         nsdp->devmap_data = dhp; /* XX redundant? */
 926                 } else if (((addr + len) > dhp->dh_uvaddr) &&
 927                     ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
 928                         mlen = addr + len - dhp->dh_uvaddr;
 929                         if (callbackops->devmap_unmap != NULL)
 930                                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
 931                                     dhp->dh_uoff, mlen, NULL,
 932                                     NULL, dhp, &dhp->dh_pvtp);
 933                         devmap_handle_reduce_len(dhp, mlen);
 934                         nsdp->devmap_data = dhp;
 935                         dhp->dh_seg = nseg;
 936                         dhp = dhp->dh_next;
 937                 } else {
 938                         dhp->dh_seg = nseg;
 939                         dhp = dhp->dh_next;
 940                 }
 941         }
 942         return (0);
 943 }
 944 
 945 /*
 946  * Utility function handles reducing the length of a devmap handle during unmap
 947  * Note that is only used for unmapping the front portion of the handler,
 948  * i.e., we are bumping up the offset/pfn etc up by len
 949  * Do not use if reducing length at the tail.
 950  */
 951 static void
 952 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
 953 {
 954         struct ddi_umem_cookie *cp;
 955         struct devmap_pmem_cookie *pcp;
 956         /*
 957          * adjust devmap handle fields
 958          */
 959         ASSERT(len < dhp->dh_len);
 960 
 961         /* Make sure only page-aligned changes are done */
 962         ASSERT((len & PAGEOFFSET) == 0);
 963 
 964         dhp->dh_len -= len;
 965         dhp->dh_uoff += (offset_t)len;
 966         dhp->dh_roff += (offset_t)len;
 967         dhp->dh_uvaddr += len;
 968         /* Need to grab dhp lock if REMAP */
 969         HOLD_DHP_LOCK(dhp);
 970         cp = dhp->dh_cookie;
 971         if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
 972                 if (cookie_is_devmem(cp)) {
 973                         dhp->dh_pfn += btop(len);
 974                 } else if (cookie_is_pmem(cp)) {
 975                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
 976                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
 977                             dhp->dh_roff < ptob(pcp->dp_npages));
 978                 } else {
 979                         ASSERT(dhp->dh_roff < cp->size);
 980                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
 981                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
 982                         ASSERT((dhp->dh_cvaddr + len) <=
 983                             (cp->cvaddr + cp->size));
 984 
 985                         dhp->dh_cvaddr += len;
 986                 }
 987         }
 988         /* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
 989         RELE_DHP_LOCK(dhp);
 990 }
 991 
 992 /*
 993  * Free devmap handle, dhp.
 994  * Return the next devmap handle on the linked list.
 995  */
 996 static devmap_handle_t *
 997 devmap_handle_unmap(devmap_handle_t *dhp)
 998 {
 999         struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
1000         struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
1001         devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
1002 
1003         ASSERT(dhp != NULL);
1004 
1005         /*
1006          * before we free up dhp, call the driver's devmap_unmap entry point
1007          * to free resources allocated for this dhp.
1008          */
1009         if (callbackops->devmap_unmap != NULL) {
1010                 (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
1011                     dhp->dh_len, NULL, NULL, NULL, NULL);
1012         }
1013 
1014         if (dhpp == dhp) {      /* releasing first dhp, change sdp data */
1015                 sdp->devmap_data = dhp->dh_next;
1016         } else {
1017                 while (dhpp->dh_next != dhp) {
1018                         dhpp = dhpp->dh_next;
1019                 }
1020                 dhpp->dh_next = dhp->dh_next;
1021         }
1022         dhpp = dhp->dh_next; /* return value is next dhp in chain */
1023 
1024         if (dhp->dh_softlock != NULL)
1025                 devmap_softlock_rele(dhp);
1026 
1027         if (dhp->dh_ctx != NULL)
1028                 devmap_ctx_rele(dhp);
1029 
1030         if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1031                 mutex_destroy(&dhp->dh_lock);
1032         }
1033         kmem_free(dhp, sizeof (devmap_handle_t));
1034 
1035         return (dhpp);
1036 }
1037 
1038 /*
1039  * Free complete devmap handles from dhp for len bytes
1040  * dhp can be either the first handle or a subsequent handle
1041  */
1042 static void
1043 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
1044 {
1045         struct devmap_callback_ctl *callbackops;
1046 
1047         /*
1048          * free the devmap handles covered by len.
1049          */
1050         while (len >= dhp->dh_len) {
1051                 len -= dhp->dh_len;
1052                 dhp = devmap_handle_unmap(dhp);
1053         }
1054         if (len != 0) { /* partial unmap at head of first remaining dhp */
1055                 callbackops = &dhp->dh_callbackops;
1056 
1057                 /*
1058                  * Call the unmap callback so the drivers can make
1059                  * adjustment on its private data.
1060                  */
1061                 if (callbackops->devmap_unmap != NULL)
1062                         (*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
1063                             dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
1064                 devmap_handle_reduce_len(dhp, len);
1065         }
1066 }
1067 
1068 /*
1069  * Free devmap handles to truncate  the mapping after addr
1070  * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
1071  *      Also could then use the routine in middle unmap case too
1072  */
1073 static void
1074 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
1075 {
1076         register struct seg *seg = dhp->dh_seg;
1077         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1078         register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
1079         struct devmap_callback_ctl *callbackops;
1080         register devmap_handle_t *dhpp;
1081         size_t maplen;
1082         ulong_t off;
1083         size_t len;
1084 
1085         maplen = (size_t)(addr - dhp->dh_uvaddr);
1086         dhph = devmap_find_handle(dhph, addr);
1087 
1088         while (dhph != NULL) {
1089                 if (maplen == 0) {
1090                         dhph =  devmap_handle_unmap(dhph);
1091                 } else {
1092                         callbackops = &dhph->dh_callbackops;
1093                         len = dhph->dh_len - maplen;
1094                         off = (ulong_t)sdp->offset + (addr - seg->s_base);
1095                         /*
1096                          * Call the unmap callback so the driver
1097                          * can make adjustments on its private data.
1098                          */
1099                         if (callbackops->devmap_unmap != NULL)
1100                                 (*callbackops->devmap_unmap)(dhph,
1101                                     dhph->dh_pvtp, off, len,
1102                                     (devmap_cookie_t *)dhph,
1103                                     &dhph->dh_pvtp, NULL, NULL);
1104                         /* XXX Reducing len needs to recalculate LARGE flag */
1105                         dhph->dh_len = maplen;
1106                         maplen = 0;
1107                         dhpp = dhph->dh_next;
1108                         dhph->dh_next = NULL;
1109                         dhph = dhpp;
1110                 }
1111         } /* end while */
1112 }
1113 
1114 /*
1115  * Free a segment.
1116  */
1117 static void
1118 segdev_free(struct seg *seg)
1119 {
1120         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1121         devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
1122 
1123         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FREE,
1124             "segdev_free: dhp=%p seg=%p", (void *)dhp, (void *)seg);
1125         DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
1126             (void *)dhp, (void *)seg));
1127 
1128         /*
1129          * Since the address space is "write" locked, we
1130          * don't need the segment lock to protect "segdev" data.
1131          */
1132         ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
1133 
1134         while (dhp != NULL)
1135                 dhp = devmap_handle_unmap(dhp);
1136 
1137         VN_RELE(sdp->vp);
1138         if (sdp->vpage != NULL)
1139                 kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
1140 
1141         rw_destroy(&sdp->lock);
1142         kmem_free(sdp, sizeof (*sdp));
1143 }
1144 
1145 static void
1146 free_devmap_handle(devmap_handle_t *dhp)
1147 {
1148         register devmap_handle_t *dhpp;
1149 
1150         /*
1151          * free up devmap handle
1152          */
1153         while (dhp != NULL) {
1154                 dhpp = dhp->dh_next;
1155                 if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
1156                         mutex_destroy(&dhp->dh_lock);
1157                 }
1158 
1159                 if (dhp->dh_softlock != NULL)
1160                         devmap_softlock_rele(dhp);
1161 
1162                 if (dhp->dh_ctx != NULL)
1163                         devmap_ctx_rele(dhp);
1164 
1165                 kmem_free(dhp, sizeof (devmap_handle_t));
1166                 dhp = dhpp;
1167         }
1168 }
1169 
1170 /*
1171  * routines to lock and unlock underlying segkp segment for
1172  * KMEM_PAGEABLE type cookies.
1173  * segkp only allows a single pending F_SOFTLOCK
1174  * we keep track of number of locks in the cookie so we can
1175  * have multiple pending faults and manage the calls to segkp.
1176  * RFE: if segkp supports either pagelock or can support multiple
1177  * calls to F_SOFTLOCK, then these routines can go away.
1178  *      If pagelock, segdev_faultpage can fault on a page by page basis
1179  *              and simplifies the code quite a bit.
1180  *      if multiple calls allowed but not partial ranges, then need for
1181  *      cookie->lock and locked count goes away, code can call as_fault directly
1182  */
1183 static faultcode_t
1184 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1185 {
1186         int err = 0;
1187         ASSERT(cookie_is_kpmem(cookie));
1188         /*
1189          * Fault in pages in segkp with F_SOFTLOCK.
1190          * We want to hold the lock until all pages have been loaded.
1191          * segkp only allows single caller to hold SOFTLOCK, so cookie
1192          * holds a count so we dont call into segkp multiple times
1193          */
1194         mutex_enter(&cookie->lock);
1195 
1196         /*
1197          * Check for overflow in locked field
1198          */
1199         if ((UINT32_MAX - cookie->locked) < npages) {
1200                 err = FC_MAKE_ERR(ENOMEM);
1201         } else if (cookie->locked == 0) {
1202                 /* First time locking */
1203                 err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
1204                     cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
1205         }
1206         if (!err) {
1207                 cookie->locked += npages;
1208         }
1209         mutex_exit(&cookie->lock);
1210         return (err);
1211 }
1212 
1213 static void
1214 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
1215 {
1216         mutex_enter(&cookie->lock);
1217         ASSERT(cookie_is_kpmem(cookie));
1218         ASSERT(cookie->locked >= npages);
1219         cookie->locked -= (uint_t)npages;
1220         if (cookie->locked == 0) {
1221                 /* Last unlock */
1222                 if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
1223                     cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
1224                         panic("segdev releasing kpmem lock %p", (void *)cookie);
1225         }
1226         mutex_exit(&cookie->lock);
1227 }
1228 
1229 /*
1230  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
1231  * drivers with devmap_access callbacks
1232  * slock->softlocked basically works like a rw lock
1233  *      -ve counts => F_SOFTLOCK in progress
1234  *      +ve counts => F_INVAL/F_PROT in progress
1235  * We allow only one F_SOFTLOCK at a time
1236  * but can have multiple pending F_INVAL/F_PROT calls
1237  *
1238  * This routine waits using cv_wait_sig so killing processes is more graceful
1239  * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
1240  */
1241 static int devmap_softlock_enter(
1242         struct devmap_softlock *slock,
1243         size_t npages,
1244         enum fault_type type)
1245 {
1246         if (npages == 0)
1247                 return (0);
1248         mutex_enter(&(slock->lock));
1249         switch (type) {
1250         case F_SOFTLOCK :
1251                 while (slock->softlocked) {
1252                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1253                                 /* signalled */
1254                                 mutex_exit(&(slock->lock));
1255                                 return (EINTR);
1256                         }
1257                 }
1258                 slock->softlocked -= npages; /* -ve count => locked */
1259                 break;
1260         case F_INVAL :
1261         case F_PROT :
1262                 while (slock->softlocked < 0)
1263                         if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
1264                                 /* signalled */
1265                                 mutex_exit(&(slock->lock));
1266                                 return (EINTR);
1267                         }
1268                 slock->softlocked += npages; /* +ve count => f_invals */
1269                 break;
1270         default:
1271                 ASSERT(0);
1272         }
1273         mutex_exit(&(slock->lock));
1274         return (0);
1275 }
1276 
1277 static void devmap_softlock_exit(
1278         struct devmap_softlock *slock,
1279         size_t npages,
1280         enum fault_type type)
1281 {
1282         if (slock == NULL)
1283                 return;
1284         mutex_enter(&(slock->lock));
1285         switch (type) {
1286         case F_SOFTLOCK :
1287                 ASSERT(-slock->softlocked >= npages);
1288                 slock->softlocked += npages; /* -ve count is softlocked */
1289                 if (slock->softlocked == 0)
1290                         cv_signal(&slock->cv);
1291                 break;
1292         case F_INVAL :
1293         case F_PROT:
1294                 ASSERT(slock->softlocked >= npages);
1295                 slock->softlocked -= npages;
1296                 if (slock->softlocked == 0)
1297                         cv_signal(&slock->cv);
1298                 break;
1299         default:
1300                 ASSERT(0);
1301         }
1302         mutex_exit(&(slock->lock));
1303 }
1304 
1305 /*
1306  * Do a F_SOFTUNLOCK call over the range requested.
1307  * The range must have already been F_SOFTLOCK'ed.
1308  * The segment lock should be held, (but not the segment private lock?)
1309  *  The softunlock code below does not adjust for large page sizes
1310  *      assumes the caller already did any addr/len adjustments for
1311  *      pagesize mappings before calling.
1312  */
1313 /*ARGSUSED*/
1314 static void
1315 segdev_softunlock(
1316         struct hat *hat,                /* the hat */
1317         struct seg *seg,                /* seg_dev of interest */
1318         caddr_t addr,                   /* base address of range */
1319         size_t len,                     /* number of bytes */
1320         enum seg_rw rw)                 /* type of access at fault */
1321 {
1322         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1323         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1324 
1325         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SOFTUNLOCK,
1326             "segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx",
1327             dhp_head, sdp, addr, len);
1328         DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
1329             "addr %p len %lx\n",
1330             (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
1331 
1332         hat_unlock(hat, addr, len);
1333 
1334         if (dhp_head != NULL) {
1335                 devmap_handle_t *dhp;
1336                 size_t mlen;
1337                 size_t tlen = len;
1338                 ulong_t off;
1339 
1340                 dhp = devmap_find_handle(dhp_head, addr);
1341                 ASSERT(dhp != NULL);
1342 
1343                 off = (ulong_t)(addr - dhp->dh_uvaddr);
1344                 while (tlen != 0) {
1345                         mlen = MIN(tlen, (dhp->dh_len - off));
1346 
1347                         /*
1348                          * unlock segkp memory, locked during F_SOFTLOCK
1349                          */
1350                         if (dhp_is_kpmem(dhp)) {
1351                                 release_kpmem_lock(
1352                                     (struct ddi_umem_cookie *)dhp->dh_cookie,
1353                                     btopr(mlen));
1354                         }
1355 
1356                         /*
1357                          * Do the softlock accounting for devmap_access
1358                          */
1359                         if (dhp->dh_callbackops.devmap_access != NULL) {
1360                                 devmap_softlock_exit(dhp->dh_softlock,
1361                                     btopr(mlen), F_SOFTLOCK);
1362                         }
1363 
1364                         tlen -= mlen;
1365                         dhp = dhp->dh_next;
1366                         off = 0;
1367                 }
1368         }
1369 
1370         mutex_enter(&freemem_lock);
1371         ASSERT(sdp->softlockcnt >= btopr(len));
1372         sdp->softlockcnt -= btopr(len);
1373         mutex_exit(&freemem_lock);
1374         if (sdp->softlockcnt == 0) {
1375                 /*
1376                  * All SOFTLOCKS are gone. Wakeup any waiting
1377                  * unmappers so they can try again to unmap.
1378                  * Check for waiters first without the mutex
1379                  * held so we don't always grab the mutex on
1380                  * softunlocks.
1381                  */
1382                 if (AS_ISUNMAPWAIT(seg->s_as)) {
1383                         mutex_enter(&seg->s_as->a_contents);
1384                         if (AS_ISUNMAPWAIT(seg->s_as)) {
1385                                 AS_CLRUNMAPWAIT(seg->s_as);
1386                                 cv_broadcast(&seg->s_as->a_cv);
1387                         }
1388                         mutex_exit(&seg->s_as->a_contents);
1389                 }
1390         }
1391 
1392 }
1393 
1394 /*
1395  * Handle fault for a single page.
1396  * Done in a separate routine so we can handle errors more easily.
1397  * This routine is called only from segdev_faultpages()
1398  * when looping over the range of addresses requested. The segment lock is held.
1399  */
1400 static faultcode_t
1401 segdev_faultpage(
1402         struct hat *hat,                /* the hat */
1403         struct seg *seg,                /* seg_dev of interest */
1404         caddr_t addr,                   /* address in as */
1405         struct vpage *vpage,            /* pointer to vpage for seg, addr */
1406         enum fault_type type,           /* type of fault */
1407         enum seg_rw rw,                 /* type of access at fault */
1408         devmap_handle_t *dhp)           /* devmap handle if any for this page */
1409 {
1410         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1411         uint_t prot;
1412         pfn_t pfnum = PFN_INVALID;
1413         u_offset_t offset;
1414         uint_t hat_flags;
1415         dev_info_t *dip;
1416 
1417         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE,
1418             "segdev_faultpage: dhp=%p seg=%p addr=%p", dhp, seg, addr);
1419         DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
1420             (void *)dhp, (void *)seg, (void *)addr));
1421 
1422         /*
1423          * Initialize protection value for this page.
1424          * If we have per page protection values check it now.
1425          */
1426         if (sdp->pageprot) {
1427                 uint_t protchk;
1428 
1429                 switch (rw) {
1430                 case S_READ:
1431                         protchk = PROT_READ;
1432                         break;
1433                 case S_WRITE:
1434                         protchk = PROT_WRITE;
1435                         break;
1436                 case S_EXEC:
1437                         protchk = PROT_EXEC;
1438                         break;
1439                 case S_OTHER:
1440                 default:
1441                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1442                         break;
1443                 }
1444 
1445                 prot = VPP_PROT(vpage);
1446                 if ((prot & protchk) == 0)
1447                         return (FC_PROT);       /* illegal access type */
1448         } else {
1449                 prot = sdp->prot;
1450                 /* caller has already done segment level protection check */
1451         }
1452 
1453         if (type == F_SOFTLOCK) {
1454                 mutex_enter(&freemem_lock);
1455                 sdp->softlockcnt++;
1456                 mutex_exit(&freemem_lock);
1457         }
1458 
1459         hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
1460         offset = sdp->offset + (u_offset_t)(addr - seg->s_base);
1461         /*
1462          * In the devmap framework, sdp->mapfunc is set to NULL.  we can get
1463          * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
1464          * seg->s_base.
1465          */
1466         if (dhp == NULL) {
1467                 /* If segment has devmap_data, then dhp should be non-NULL */
1468                 ASSERT(sdp->devmap_data == NULL);
1469                 pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
1470                     (off_t)offset, prot);
1471                 prot |= sdp->hat_attr;
1472         } else {
1473                 ulong_t off;
1474                 struct ddi_umem_cookie *cp;
1475                 struct devmap_pmem_cookie *pcp;
1476 
1477                 /* ensure the dhp passed in contains addr. */
1478                 ASSERT(dhp == devmap_find_handle(
1479                     (devmap_handle_t *)sdp->devmap_data, addr));
1480 
1481                 off = addr - dhp->dh_uvaddr;
1482 
1483                 /*
1484                  * This routine assumes that the caller makes sure that the
1485                  * fields in dhp used below are unchanged due to remap during
1486                  * this call. Caller does HOLD_DHP_LOCK if neeed
1487                  */
1488                 cp = dhp->dh_cookie;
1489                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1490                         pfnum = PFN_INVALID;
1491                 } else if (cookie_is_devmem(cp)) {
1492                         pfnum = dhp->dh_pfn + btop(off);
1493                 } else if (cookie_is_pmem(cp)) {
1494                         pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
1495                         ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
1496                             dhp->dh_roff < ptob(pcp->dp_npages));
1497                         pfnum = page_pptonum(
1498                             pcp->dp_pparray[btop(off + dhp->dh_roff)]);
1499                 } else {
1500                         ASSERT(dhp->dh_roff < cp->size);
1501                         ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
1502                             dhp->dh_cvaddr < (cp->cvaddr + cp->size));
1503                         ASSERT((dhp->dh_cvaddr + off) <=
1504                             (cp->cvaddr + cp->size));
1505                         ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
1506                             (cp->cvaddr + cp->size));
1507 
1508                         switch (cp->type) {
1509                         case UMEM_LOCKED :
1510                                 if (cp->pparray != NULL) {
1511                                         ASSERT((dhp->dh_roff &
1512                                             PAGEOFFSET) == 0);
1513                                         pfnum = page_pptonum(
1514                                             cp->pparray[btop(off +
1515                                             dhp->dh_roff)]);
1516                                 } else {
1517                                         pfnum = hat_getpfnum(
1518                                             ((proc_t *)cp->procp)->p_as->a_hat,
1519                                             cp->cvaddr + off);
1520                                 }
1521                         break;
1522                         case UMEM_TRASH :
1523                                 pfnum = page_pptonum(trashpp);
1524                                 /*
1525                                  * We should set hat_flags to HAT_NOFAULT also
1526                                  * However, not all hat layers implement this
1527                                  */
1528                                 break;
1529                         case KMEM_PAGEABLE:
1530                         case KMEM_NON_PAGEABLE:
1531                                 pfnum = hat_getpfnum(kas.a_hat,
1532                                     dhp->dh_cvaddr + off);
1533                                 break;
1534                         default :
1535                                 pfnum = PFN_INVALID;
1536                                 break;
1537                         }
1538                 }
1539                 prot |= dhp->dh_hat_attr;
1540         }
1541         if (pfnum == PFN_INVALID) {
1542                 return (FC_MAKE_ERR(EFAULT));
1543         }
1544         /* prot should already be OR'ed in with hat_attributes if needed */
1545 
1546         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE_CK1,
1547             "segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x",
1548             pfnum, pf_is_memory(pfnum), prot, hat_flags);
1549         DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
1550             "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
1551 
1552         if (pf_is_memory(pfnum) || (dhp != NULL)) {
1553                 /*
1554                  * It's not _really_ required here to pass sdp->hat_flags
1555                  * to hat_devload even though we do it.
1556                  * This is because hat figures it out DEVMEM mappings
1557                  * are non-consistent, anyway.
1558                  */
1559                 hat_devload(hat, addr, PAGESIZE, pfnum,
1560                     prot, hat_flags | sdp->hat_flags);
1561                 return (0);
1562         }
1563 
1564         /*
1565          * Fall through to the case where devmap is not used and need to call
1566          * up the device tree to set up the mapping
1567          */
1568 
1569         dip = VTOS(VTOCVP(sdp->vp))->s_dip;
1570         ASSERT(dip);
1571 
1572         /*
1573          * When calling ddi_map_fault, we do not OR in sdp->hat_attr
1574          * This is because this calls drivers which may not expect
1575          * prot to have any other values than PROT_ALL
1576          * The root nexus driver has a hack to peek into the segment
1577          * structure and then OR in sdp->hat_attr.
1578          * XX In case the bus_ops interfaces are ever revisited
1579          * we need to fix this. prot should include other hat attributes
1580          */
1581         if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
1582             (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
1583                 return (FC_MAKE_ERR(EFAULT));
1584         }
1585         return (0);
1586 }
1587 
1588 static faultcode_t
1589 segdev_fault(
1590         struct hat *hat,                /* the hat */
1591         struct seg *seg,                /* the seg_dev of interest */
1592         caddr_t addr,                   /* the address of the fault */
1593         size_t len,                     /* the length of the range */
1594         enum fault_type type,           /* type of fault */
1595         enum seg_rw rw)                 /* type of access at fault */
1596 {
1597         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1598         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
1599         devmap_handle_t *dhp;
1600         struct devmap_softlock *slock = NULL;
1601         ulong_t slpage = 0;
1602         ulong_t off;
1603         caddr_t maddr = addr;
1604         int err;
1605         int err_is_faultcode = 0;
1606 
1607         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_FAULT,
1608             "segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x",
1609             (void *)dhp_head, (void *)seg, (void *)addr, len, type);
1610         DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
1611             "addr %p len %lx type %x\n",
1612             (void *)dhp_head, (void *)seg, (void *)addr, len, type));
1613 
1614         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
1615 
1616         /* Handle non-devmap case */
1617         if (dhp_head == NULL)
1618                 return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
1619 
1620         /* Find devmap handle */
1621         if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
1622                 return (FC_NOMAP);
1623 
1624         /*
1625          * The seg_dev driver does not implement copy-on-write,
1626          * and always loads translations with maximal allowed permissions
1627          * but we got an fault trying to access the device.
1628          * Servicing the fault is not going to result in any better result
1629          * RFE: If we want devmap_access callbacks to be involved in F_PROT
1630          *      faults, then the code below is written for that
1631          *      Pending resolution of the following:
1632          *      - determine if the F_INVAL/F_SOFTLOCK syncing
1633          *      is needed for F_PROT also or not. The code below assumes it does
1634          *      - If driver sees F_PROT and calls devmap_load with same type,
1635          *      then segdev_faultpages will fail with FC_PROT anyway, need to
1636          *      change that so calls from devmap_load to segdev_faultpages for
1637          *      F_PROT type are retagged to F_INVAL.
1638          * RFE: Today we dont have drivers that use devmap and want to handle
1639          *      F_PROT calls. The code in segdev_fault* is written to allow
1640          *      this case but is not tested. A driver that needs this capability
1641          *      should be able to remove the short-circuit case; resolve the
1642          *      above issues and "should" work.
1643          */
1644         if (type == F_PROT) {
1645                 return (FC_PROT);
1646         }
1647 
1648         /*
1649          * Loop through dhp list calling devmap_access or segdev_faultpages for
1650          * each devmap handle.
1651          * drivers which implement devmap_access can interpose on faults and do
1652          * device-appropriate special actions before calling devmap_load.
1653          */
1654 
1655         /*
1656          * Unfortunately, this simple loop has turned out to expose a variety
1657          * of complex problems which results in the following convoluted code.
1658          *
1659          * First, a desire to handle a serialization of F_SOFTLOCK calls
1660          * to the driver within the framework.
1661          *      This results in a dh_softlock structure that is on a per device
1662          *      (or device instance) basis and serializes devmap_access calls.
1663          *      Ideally we would need to do this for underlying
1664          *      memory/device regions that are being faulted on
1665          *      but that is hard to identify and with REMAP, harder
1666          * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
1667          *      to F_SOFTLOCK calls to the driver.
1668          * These serializations are to simplify the driver programmer model.
1669          * To support these two features, the code first goes through the
1670          *      devmap handles and counts the pages (slpage) that are covered
1671          *      by devmap_access callbacks.
1672          * This part ends with a devmap_softlock_enter call
1673          *      which allows only one F_SOFTLOCK active on a device instance,
1674          *      but multiple F_INVAL/F_PROTs can be active except when a
1675          *      F_SOFTLOCK is active
1676          *
1677          * Next, we dont short-circuit the fault code upfront to call
1678          *      segdev_softunlock for F_SOFTUNLOCK, because we must use
1679          *      the same length when we softlock and softunlock.
1680          *
1681          *      -Hat layers may not support softunlocking lengths less than the
1682          *      original length when there is large page support.
1683          *      -kpmem locking is dependent on keeping the lengths same.
1684          *      -if drivers handled F_SOFTLOCK, they probably also expect to
1685          *              see an F_SOFTUNLOCK of the same length
1686          *      Hence, if extending lengths during softlock,
1687          *      softunlock has to make the same adjustments and goes through
1688          *      the same loop calling segdev_faultpages/segdev_softunlock
1689          *      But some of the synchronization and error handling is different
1690          */
1691 
1692         if (type != F_SOFTUNLOCK) {
1693                 devmap_handle_t *dhpp = dhp;
1694                 size_t slen = len;
1695 
1696                 /*
1697                  * Calculate count of pages that are :
1698                  * a) within the (potentially extended) fault region
1699                  * b) AND covered by devmap handle with devmap_access
1700                  */
1701                 off = (ulong_t)(addr - dhpp->dh_uvaddr);
1702                 while (slen != 0) {
1703                         size_t mlen;
1704 
1705                         /*
1706                          * Softlocking on a region that allows remap is
1707                          * unsupported due to unresolved locking issues
1708                          * XXX: unclear what these are?
1709                          *      One potential is that if there is a pending
1710                          *      softlock, then a remap should not be allowed
1711                          *      until the unlock is done. This is easily
1712                          *      fixed by returning error in devmap*remap on
1713                          *      checking the dh->dh_softlock->softlocked value
1714                          */
1715                         if ((type == F_SOFTLOCK) &&
1716                             (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
1717                                 return (FC_NOSUPPORT);
1718                         }
1719 
1720                         mlen = MIN(slen, (dhpp->dh_len - off));
1721                         if (dhpp->dh_callbackops.devmap_access) {
1722                                 size_t llen;
1723                                 caddr_t laddr;
1724                                 /*
1725                                  * use extended length for large page mappings
1726                                  */
1727                                 HOLD_DHP_LOCK(dhpp);
1728                                 if ((sdp->pageprot == 0) &&
1729                                     (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
1730                                         devmap_get_large_pgsize(dhpp,
1731                                             mlen, maddr, &llen, &laddr);
1732                                 } else {
1733                                         llen = mlen;
1734                                 }
1735                                 RELE_DHP_LOCK(dhpp);
1736 
1737                                 slpage += btopr(llen);
1738                                 slock = dhpp->dh_softlock;
1739                         }
1740                         maddr += mlen;
1741                         ASSERT(slen >= mlen);
1742                         slen -= mlen;
1743                         dhpp = dhpp->dh_next;
1744                         off = 0;
1745                 }
1746                 /*
1747                  * synchonize with other faulting threads and wait till safe
1748                  * devmap_softlock_enter might return due to signal in cv_wait
1749                  *
1750                  * devmap_softlock_enter has to be called outside of while loop
1751                  * to prevent a deadlock if len spans over multiple dhps.
1752                  * dh_softlock is based on device instance and if multiple dhps
1753                  * use the same device instance, the second dhp's LOCK call
1754                  * will hang waiting on the first to complete.
1755                  * devmap_setup verifies that slocks in a dhp_chain are same.
1756                  * RFE: this deadlock only hold true for F_SOFTLOCK. For
1757                  *      F_INVAL/F_PROT, since we now allow multiple in parallel,
1758                  *      we could have done the softlock_enter inside the loop
1759                  *      and supported multi-dhp mappings with dissimilar devices
1760                  */
1761                 if (err = devmap_softlock_enter(slock, slpage, type))
1762                         return (FC_MAKE_ERR(err));
1763         }
1764 
1765         /* reset 'maddr' to the start addr of the range of fault. */
1766         maddr = addr;
1767 
1768         /* calculate the offset corresponds to 'addr' in the first dhp. */
1769         off = (ulong_t)(addr - dhp->dh_uvaddr);
1770 
1771         /*
1772          * The fault length may span over multiple dhps.
1773          * Loop until the total length is satisfied.
1774          */
1775         while (len != 0) {
1776                 size_t llen;
1777                 size_t mlen;
1778                 caddr_t laddr;
1779 
1780                 /*
1781                  * mlen is the smaller of 'len' and the length
1782                  * from addr to the end of mapping defined by dhp.
1783                  */
1784                 mlen = MIN(len, (dhp->dh_len - off));
1785 
1786                 HOLD_DHP_LOCK(dhp);
1787                 /*
1788                  * Pass the extended length and address to devmap_access
1789                  * if large pagesize is used for loading address translations.
1790                  */
1791                 if ((sdp->pageprot == 0) &&
1792                     (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
1793                         devmap_get_large_pgsize(dhp, mlen, maddr,
1794                             &llen, &laddr);
1795                         ASSERT(maddr == addr || laddr == maddr);
1796                 } else {
1797                         llen = mlen;
1798                         laddr = maddr;
1799                 }
1800 
1801                 if (dhp->dh_callbackops.devmap_access != NULL) {
1802                         offset_t aoff;
1803 
1804                         aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
1805 
1806                         /*
1807                          * call driver's devmap_access entry point which will
1808                          * call devmap_load/contextmgmt to load the translations
1809                          *
1810                          * We drop the dhp_lock before calling access so
1811                          * drivers can call devmap_*_remap within access
1812                          */
1813                         RELE_DHP_LOCK(dhp);
1814 
1815                         err = (*dhp->dh_callbackops.devmap_access)(
1816                             dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
1817                 } else {
1818                         /*
1819                          * If no devmap_access entry point, then load mappings
1820                          * hold dhp_lock across faultpages if REMAP
1821                          */
1822                         err = segdev_faultpages(hat, seg, laddr, llen,
1823                             type, rw, dhp);
1824                         err_is_faultcode = 1;
1825                         RELE_DHP_LOCK(dhp);
1826                 }
1827 
1828                 if (err) {
1829                         if ((type == F_SOFTLOCK) && (maddr > addr)) {
1830                                 /*
1831                                  * If not first dhp, use
1832                                  * segdev_fault(F_SOFTUNLOCK) for prior dhps
1833                                  * While this is recursion, it is incorrect to
1834                                  * call just segdev_softunlock
1835                                  * if we are using either large pages
1836                                  * or devmap_access. It will be more right
1837                                  * to go through the same loop as above
1838                                  * rather than call segdev_softunlock directly
1839                                  * It will use the right lenghths as well as
1840                                  * call into the driver devmap_access routines.
1841                                  */
1842                                 size_t done = (size_t)(maddr - addr);
1843                                 (void) segdev_fault(hat, seg, addr, done,
1844                                     F_SOFTUNLOCK, S_OTHER);
1845                                 /*
1846                                  * reduce slpage by number of pages
1847                                  * released by segdev_softunlock
1848                                  */
1849                                 ASSERT(slpage >= btopr(done));
1850                                 devmap_softlock_exit(slock,
1851                                     slpage - btopr(done), type);
1852                         } else {
1853                                 devmap_softlock_exit(slock, slpage, type);
1854                         }
1855 
1856 
1857                         /*
1858                          * Segdev_faultpages() already returns a faultcode,
1859                          * hence, result from segdev_faultpages() should be
1860                          * returned directly.
1861                          */
1862                         if (err_is_faultcode)
1863                                 return (err);
1864                         return (FC_MAKE_ERR(err));
1865                 }
1866 
1867                 maddr += mlen;
1868                 ASSERT(len >= mlen);
1869                 len -= mlen;
1870                 dhp = dhp->dh_next;
1871                 off = 0;
1872 
1873                 ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
1874         }
1875         /*
1876          * release the softlock count at end of fault
1877          * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
1878          */
1879         if ((type == F_INVAL) || (type == F_PROT))
1880                 devmap_softlock_exit(slock, slpage, type);
1881         return (0);
1882 }
1883 
1884 /*
1885  * segdev_faultpages
1886  *
1887  * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
1888  * This routine assumes that the callers makes sure that the fields
1889  * in dhp used below are not changed due to remap during this call.
1890  * Caller does HOLD_DHP_LOCK if neeed
1891  * This routine returns a faultcode_t as a return value for segdev_fault.
1892  */
1893 static faultcode_t
1894 segdev_faultpages(
1895         struct hat *hat,                /* the hat */
1896         struct seg *seg,                /* the seg_dev of interest */
1897         caddr_t addr,                   /* the address of the fault */
1898         size_t len,                     /* the length of the range */
1899         enum fault_type type,           /* type of fault */
1900         enum seg_rw rw,                 /* type of access at fault */
1901         devmap_handle_t *dhp)           /* devmap handle */
1902 {
1903         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
1904         register caddr_t a;
1905         struct vpage *vpage;
1906         struct ddi_umem_cookie *kpmem_cookie = NULL;
1907         int err;
1908 
1909         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGES,
1910             "segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx",
1911             (void *)dhp, (void *)seg, (void *)addr, len);
1912         DEBUGF(5, (CE_CONT, "segdev_faultpages: "
1913             "dhp %p seg %p addr %p len %lx\n",
1914             (void *)dhp, (void *)seg, (void *)addr, len));
1915 
1916         /*
1917          * The seg_dev driver does not implement copy-on-write,
1918          * and always loads translations with maximal allowed permissions
1919          * but we got an fault trying to access the device.
1920          * Servicing the fault is not going to result in any better result
1921          * XXX: If we want to allow devmap_access to handle F_PROT calls,
1922          * This code should be removed and let the normal fault handling
1923          * take care of finding the error
1924          */
1925         if (type == F_PROT) {
1926                 return (FC_PROT);
1927         }
1928 
1929         if (type == F_SOFTUNLOCK) {
1930                 segdev_softunlock(hat, seg, addr, len, rw);
1931                 return (0);
1932         }
1933 
1934         /*
1935          * For kernel pageable memory, fault/lock segkp pages
1936          * We hold this until the completion of this
1937          * fault (INVAL/PROT) or till unlock (SOFTLOCK).
1938          */
1939         if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
1940                 kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
1941                 if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
1942                         return (err);
1943         }
1944 
1945         /*
1946          * If we have the same protections for the entire segment,
1947          * insure that the access being attempted is legitimate.
1948          */
1949         rw_enter(&sdp->lock, RW_READER);
1950         if (sdp->pageprot == 0) {
1951                 uint_t protchk;
1952 
1953                 switch (rw) {
1954                 case S_READ:
1955                         protchk = PROT_READ;
1956                         break;
1957                 case S_WRITE:
1958                         protchk = PROT_WRITE;
1959                         break;
1960                 case S_EXEC:
1961                         protchk = PROT_EXEC;
1962                         break;
1963                 case S_OTHER:
1964                 default:
1965                         protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
1966                         break;
1967                 }
1968 
1969                 if ((sdp->prot & protchk) == 0) {
1970                         rw_exit(&sdp->lock);
1971                         /* undo kpmem locking */
1972                         if (kpmem_cookie != NULL) {
1973                                 release_kpmem_lock(kpmem_cookie, btopr(len));
1974                         }
1975                         return (FC_PROT);       /* illegal access type */
1976                 }
1977         }
1978 
1979         /*
1980          * we do a single hat_devload for the range if
1981          *   - devmap framework (dhp is not NULL),
1982          *   - pageprot == 0, i.e., no per-page protection set and
1983          *   - is device pages, irrespective of whether we are using large pages
1984          */
1985         if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
1986                 pfn_t pfnum;
1987                 uint_t hat_flags;
1988 
1989                 if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
1990                         rw_exit(&sdp->lock);
1991                         return (FC_NOMAP);
1992                 }
1993 
1994                 if (type == F_SOFTLOCK) {
1995                         mutex_enter(&freemem_lock);
1996                         sdp->softlockcnt += btopr(len);
1997                         mutex_exit(&freemem_lock);
1998                 }
1999 
2000                 hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
2001                 pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
2002                 ASSERT(!pf_is_memory(pfnum));
2003 
2004                 hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
2005                     hat_flags | sdp->hat_flags);
2006                 rw_exit(&sdp->lock);
2007                 return (0);
2008         }
2009 
2010         /* Handle cases where we have to loop through fault handling per-page */
2011 
2012         if (sdp->vpage == NULL)
2013                 vpage = NULL;
2014         else
2015                 vpage = &sdp->vpage[seg_page(seg, addr)];
2016 
2017         /* loop over the address range handling each fault */
2018         for (a = addr; a < addr + len; a += PAGESIZE) {
2019                 if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
2020                         break;
2021                 }
2022                 if (vpage != NULL)
2023                         vpage++;
2024         }
2025         rw_exit(&sdp->lock);
2026         if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
2027                 size_t done = (size_t)(a - addr); /* pages fault successfully */
2028                 if (done > 0) {
2029                         /* use softunlock for those pages */
2030                         segdev_softunlock(hat, seg, addr, done, S_OTHER);
2031                 }
2032                 if (kpmem_cookie != NULL) {
2033                         /* release kpmem lock for rest of pages */
2034                         ASSERT(len >= done);
2035                         release_kpmem_lock(kpmem_cookie, btopr(len - done));
2036                 }
2037         } else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
2038                 /* for non-SOFTLOCK cases, release kpmem */
2039                 release_kpmem_lock(kpmem_cookie, btopr(len));
2040         }
2041         return (err);
2042 }
2043 
2044 /*
2045  * Asynchronous page fault.  We simply do nothing since this
2046  * entry point is not supposed to load up the translation.
2047  */
2048 /*ARGSUSED*/
2049 static faultcode_t
2050 segdev_faulta(struct seg *seg, caddr_t addr)
2051 {
2052         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FAULTA,
2053             "segdev_faulta: seg=%p addr=%p", (void *)seg, (void *)addr);
2054         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2055 
2056         return (0);
2057 }
2058 
2059 static int
2060 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2061 {
2062         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2063         register devmap_handle_t *dhp;
2064         register struct vpage *vp, *evp;
2065         devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
2066         ulong_t off;
2067         size_t mlen, sz;
2068 
2069         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT,
2070             "segdev_setprot:start seg=%p addr=%p len=%lx prot=%x",
2071             (void *)seg, (void *)addr, len, prot);
2072         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2073 
2074         if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
2075                 /*
2076                  * Fail the setprot if pages are SOFTLOCKed through this
2077                  * mapping.
2078                  * Softlockcnt is protected from change by the as read lock.
2079                  */
2080                 TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT_CK1,
2081                     "segdev_setprot:error softlockcnt=%lx", sz);
2082                 DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
2083                 return (EAGAIN);
2084         }
2085 
2086         if (dhp_head != NULL) {
2087                 if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
2088                         return (EINVAL);
2089 
2090                 /*
2091                  * check if violate maxprot.
2092                  */
2093                 off = (ulong_t)(addr - dhp->dh_uvaddr);
2094                 mlen  = len;
2095                 while (dhp) {
2096                         if ((dhp->dh_maxprot & prot) != prot)
2097                                 return (EACCES);        /* violated maxprot */
2098 
2099                         if (mlen > (dhp->dh_len - off)) {
2100                                 mlen -= dhp->dh_len - off;
2101                                 dhp = dhp->dh_next;
2102                                 off = 0;
2103                         } else
2104                                 break;
2105                 }
2106         } else {
2107                 if ((sdp->maxprot & prot) != prot)
2108                         return (EACCES);
2109         }
2110 
2111         rw_enter(&sdp->lock, RW_WRITER);
2112         if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
2113                 if (sdp->prot == prot) {
2114                         rw_exit(&sdp->lock);
2115                         return (0);                     /* all done */
2116                 }
2117                 sdp->prot = (uchar_t)prot;
2118         } else {
2119                 sdp->pageprot = 1;
2120                 if (sdp->vpage == NULL) {
2121                         /*
2122                          * First time through setting per page permissions,
2123                          * initialize all the vpage structures to prot
2124                          */
2125                         sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
2126                             KM_SLEEP);
2127                         evp = &sdp->vpage[seg_pages(seg)];
2128                         for (vp = sdp->vpage; vp < evp; vp++)
2129                                 VPP_SETPROT(vp, sdp->prot);
2130                 }
2131                 /*
2132                  * Now go change the needed vpages protections.
2133                  */
2134                 evp = &sdp->vpage[seg_page(seg, addr + len)];
2135                 for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
2136                         VPP_SETPROT(vp, prot);
2137         }
2138         rw_exit(&sdp->lock);
2139 
2140         if (dhp_head != NULL) {
2141                 devmap_handle_t *tdhp;
2142                 /*
2143                  * If large page size was used in hat_devload(),
2144                  * the same page size must be used in hat_unload().
2145                  */
2146                 dhp = tdhp = devmap_find_handle(dhp_head, addr);
2147                 while (tdhp != NULL) {
2148                         if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
2149                                 break;
2150                         }
2151                         tdhp = tdhp->dh_next;
2152                 }
2153                 if (tdhp) {
2154                         size_t slen = len;
2155                         size_t mlen;
2156                         size_t soff;
2157 
2158                         soff = (ulong_t)(addr - dhp->dh_uvaddr);
2159                         while (slen != 0) {
2160                                 mlen = MIN(slen, (dhp->dh_len - soff));
2161                                 hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
2162                                     dhp->dh_len, HAT_UNLOAD);
2163                                 dhp = dhp->dh_next;
2164                                 ASSERT(slen >= mlen);
2165                                 slen -= mlen;
2166                                 soff = 0;
2167                         }
2168                         return (0);
2169                 }
2170         }
2171 
2172         if ((prot & ~PROT_USER) == PROT_NONE) {
2173                 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
2174         } else {
2175                 /*
2176                  * RFE: the segment should keep track of all attributes
2177                  * allowing us to remove the deprecated hat_chgprot
2178                  * and use hat_chgattr.
2179                  */
2180                 hat_chgprot(seg->s_as->a_hat, addr, len, prot);
2181         }
2182 
2183         return (0);
2184 }
2185 
2186 static int
2187 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
2188 {
2189         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2190         struct vpage *vp, *evp;
2191 
2192         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_CHECKPROT,
2193             "segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x",
2194             (void *)seg, (void *)addr, len, prot);
2195         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2196 
2197         /*
2198          * If segment protection can be used, simply check against them
2199          */
2200         rw_enter(&sdp->lock, RW_READER);
2201         if (sdp->pageprot == 0) {
2202                 register int err;
2203 
2204                 err = ((sdp->prot & prot) != prot) ? EACCES : 0;
2205                 rw_exit(&sdp->lock);
2206                 return (err);
2207         }
2208 
2209         /*
2210          * Have to check down to the vpage level
2211          */
2212         evp = &sdp->vpage[seg_page(seg, addr + len)];
2213         for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
2214                 if ((VPP_PROT(vp) & prot) != prot) {
2215                         rw_exit(&sdp->lock);
2216                         return (EACCES);
2217                 }
2218         }
2219         rw_exit(&sdp->lock);
2220         return (0);
2221 }
2222 
2223 static int
2224 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
2225 {
2226         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2227         size_t pgno;
2228 
2229         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_GETPROT,
2230             "segdev_getprot:start seg=%p addr=%p len=%lx protv=%p",
2231             (void *)seg, (void *)addr, len, (void *)protv);
2232         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2233 
2234         pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
2235         if (pgno != 0) {
2236                 rw_enter(&sdp->lock, RW_READER);
2237                 if (sdp->pageprot == 0) {
2238                         do {
2239                                 protv[--pgno] = sdp->prot;
2240                         } while (pgno != 0);
2241                 } else {
2242                         size_t pgoff = seg_page(seg, addr);
2243 
2244                         do {
2245                                 pgno--;
2246                                 protv[pgno] =
2247                                     VPP_PROT(&sdp->vpage[pgno + pgoff]);
2248                         } while (pgno != 0);
2249                 }
2250                 rw_exit(&sdp->lock);
2251         }
2252         return (0);
2253 }
2254 
2255 static u_offset_t
2256 segdev_getoffset(register struct seg *seg, caddr_t addr)
2257 {
2258         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2259 
2260         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETOFFSET,
2261             "segdev_getoffset:start seg=%p addr=%p", (void *)seg, (void *)addr);
2262 
2263         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2264 
2265         return ((u_offset_t)sdp->offset + (addr - seg->s_base));
2266 }
2267 
2268 /*ARGSUSED*/
2269 static int
2270 segdev_gettype(register struct seg *seg, caddr_t addr)
2271 {
2272         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2273 
2274         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETTYPE,
2275             "segdev_gettype:start seg=%p addr=%p", (void *)seg, (void *)addr);
2276 
2277         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2278 
2279         return (sdp->type);
2280 }
2281 
2282 
2283 /*ARGSUSED*/
2284 static int
2285 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
2286 {
2287         register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
2288 
2289         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETVP,
2290             "segdev_getvp:start seg=%p addr=%p", (void *)seg, (void *)addr);
2291 
2292         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2293 
2294         /*
2295          * Note that this vp is the common_vp of the device, where the
2296          * pages are hung ..
2297          */
2298         *vpp = VTOCVP(sdp->vp);
2299 
2300         return (0);
2301 }
2302 
2303 static void
2304 segdev_badop(void)
2305 {
2306         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGDEV_BADOP,
2307             "segdev_badop:start");
2308         panic("segdev_badop");
2309         /*NOTREACHED*/
2310 }
2311 
2312 /*
2313  * segdev pages are not in the cache, and thus can't really be controlled.
2314  * Hence, syncs are simply always successful.
2315  */
2316 /*ARGSUSED*/
2317 static int
2318 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
2319 {
2320         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SYNC, "segdev_sync:start");
2321 
2322         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2323 
2324         return (0);
2325 }
2326 
2327 /*
2328  * segdev pages are always "in core".
2329  */
2330 /*ARGSUSED*/
2331 static size_t
2332 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
2333 {
2334         size_t v = 0;
2335 
2336         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_INCORE, "segdev_incore:start");
2337 
2338         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2339 
2340         for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
2341             v += PAGESIZE)
2342                 *vec++ = 1;
2343         return (v);
2344 }
2345 
2346 /*
2347  * segdev pages are not in the cache, and thus can't really be controlled.
2348  * Hence, locks are simply always successful.
2349  */
2350 /*ARGSUSED*/
2351 static int
2352 segdev_lockop(struct seg *seg, caddr_t addr,
2353     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
2354 {
2355         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_LOCKOP, "segdev_lockop:start");
2356 
2357         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2358 
2359         return (0);
2360 }
2361 
2362 /*
2363  * segdev pages are not in the cache, and thus can't really be controlled.
2364  * Hence, advise is simply always successful.
2365  */
2366 /*ARGSUSED*/
2367 static int
2368 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
2369 {
2370         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_ADVISE, "segdev_advise:start");
2371 
2372         ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
2373 
2374         return (0);
2375 }
2376 
2377 /*
2378  * segdev pages are not dumped, so we just return
2379  */
2380 /*ARGSUSED*/
2381 static void
2382 segdev_dump(struct seg *seg)
2383 {}
2384 
2385 /*
2386  * ddi_segmap_setup:    Used by drivers who wish specify mapping attributes
2387  *                      for a segment.  Called from a drivers segmap(9E)
2388  *                      routine.
2389  */
2390 /*ARGSUSED*/
2391 int
2392 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
2393     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
2394     ddi_device_acc_attr_t *accattrp, uint_t rnumber)
2395 {
2396         struct segdev_crargs dev_a;
2397         int (*mapfunc)(dev_t dev, off_t off, int prot);
2398         uint_t hat_attr;
2399         pfn_t pfn;
2400         int     error, i;
2401 
2402         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP_SETUP,
2403             "ddi_segmap_setup:start");
2404 
2405         if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
2406                 return (ENODEV);
2407 
2408         /*
2409          * Character devices that support the d_mmap
2410          * interface can only be mmap'ed shared.
2411          */
2412         if ((flags & MAP_TYPE) != MAP_SHARED)
2413                 return (EINVAL);
2414 
2415         /*
2416          * Check that this region is indeed mappable on this platform.
2417          * Use the mapping function.
2418          */
2419         if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
2420                 return (ENXIO);
2421 
2422         /*
2423          * Check to ensure that the entire range is
2424          * legal and we are not trying to map in
2425          * more than the device will let us.
2426          */
2427         for (i = 0; i < len; i += PAGESIZE) {
2428                 if (i == 0) {
2429                         /*
2430                          * Save the pfn at offset here. This pfn will be
2431                          * used later to get user address.
2432                          */
2433                         if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
2434                             maxprot)) == PFN_INVALID)
2435                                 return (ENXIO);
2436                 } else {
2437                         if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
2438                             PFN_INVALID)
2439                                 return (ENXIO);
2440                 }
2441         }
2442 
2443         as_rangelock(as);
2444         /* Pick an address w/o worrying about any vac alignment constraints. */
2445         error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
2446         if (error != 0) {
2447                 as_rangeunlock(as);
2448                 return (error);
2449         }
2450 
2451         dev_a.mapfunc = mapfunc;
2452         dev_a.dev = dev;
2453         dev_a.offset = (offset_t)offset;
2454         dev_a.type = flags & MAP_TYPE;
2455         dev_a.prot = (uchar_t)prot;
2456         dev_a.maxprot = (uchar_t)maxprot;
2457         dev_a.hat_attr = hat_attr;
2458         dev_a.hat_flags = 0;
2459         dev_a.devmap_data = NULL;
2460 
2461         error = as_map(as, *addrp, len, segdev_create, &dev_a);
2462         as_rangeunlock(as);
2463         return (error);
2464 
2465 }
2466 
2467 /*ARGSUSED*/
2468 static int
2469 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
2470     struct page ***ppp, enum lock_type type, enum seg_rw rw)
2471 {
2472         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_PAGELOCK,
2473             "segdev_pagelock:start");
2474         return (ENOTSUP);
2475 }
2476 
2477 /*ARGSUSED*/
2478 static int
2479 segdev_setpagesize(struct seg *seg, caddr_t addr, size_t len,
2480     uint_t szc)
2481 {
2482         return (ENOTSUP);
2483 }
2484 
2485 /*
2486  * devmap_device: Used by devmap framework to establish mapping
2487  *                called by devmap_seup(9F) during map setup time.
2488  */
2489 /*ARGSUSED*/
2490 static int
2491 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
2492     offset_t off, size_t len, uint_t flags)
2493 {
2494         devmap_handle_t *rdhp, *maxdhp;
2495         struct segdev_crargs dev_a;
2496         int     err;
2497         uint_t maxprot = PROT_ALL;
2498         offset_t offset = 0;
2499         pfn_t pfn;
2500         struct devmap_pmem_cookie *pcp;
2501 
2502         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVICE,
2503             "devmap_device:start dhp=%p addr=%p off=%llx, len=%lx",
2504             (void *)dhp, (void *)addr, off, len);
2505 
2506         DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
2507             (void *)dhp, (void *)addr, off, len));
2508 
2509         as_rangelock(as);
2510         if ((flags & MAP_FIXED) == 0) {
2511                 offset_t aligned_off;
2512 
2513                 rdhp = maxdhp = dhp;
2514                 while (rdhp != NULL) {
2515                         maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
2516                             maxdhp : rdhp;
2517                         rdhp = rdhp->dh_next;
2518                         maxprot |= dhp->dh_maxprot;
2519                 }
2520                 offset = maxdhp->dh_uoff - dhp->dh_uoff;
2521 
2522                 /*
2523                  * Use the dhp that has the
2524                  * largest len to get user address.
2525                  */
2526                 /*
2527                  * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
2528                  * use 0 which is as good as any other.
2529                  */
2530                 if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
2531                         aligned_off = (offset_t)0;
2532                 } else if (dhp_is_devmem(maxdhp)) {
2533                         aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
2534                 } else if (dhp_is_pmem(maxdhp)) {
2535                         pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
2536                         pfn = page_pptonum(
2537                             pcp->dp_pparray[btop(maxdhp->dh_roff)]);
2538                         aligned_off = (offset_t)ptob(pfn) - offset;
2539                 } else {
2540                         aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
2541                             offset;
2542                 }
2543 
2544                 /*
2545                  * Pick an address aligned to dh_cookie.
2546                  * for kernel memory/user memory, cookie is cvaddr.
2547                  * for device memory, cookie is physical address.
2548                  */
2549                 map_addr(addr, len, aligned_off, 1, flags);
2550                 if (*addr == NULL) {
2551                         as_rangeunlock(as);
2552                         return (ENOMEM);
2553                 }
2554         } else {
2555                 /*
2556                  * User-specified address; blow away any previous mappings.
2557                  */
2558                 (void) as_unmap(as, *addr, len);
2559         }
2560 
2561         dev_a.mapfunc = NULL;
2562         dev_a.dev = dhp->dh_dev;
2563         dev_a.type = flags & MAP_TYPE;
2564         dev_a.offset = off;
2565         /*
2566          * sdp->maxprot has the least restrict protection of all dhps.
2567          */
2568         dev_a.maxprot = maxprot;
2569         dev_a.prot = dhp->dh_prot;
2570         /*
2571          * devmap uses dhp->dh_hat_attr for hat.
2572          */
2573         dev_a.hat_flags = 0;
2574         dev_a.hat_attr = 0;
2575         dev_a.devmap_data = (void *)dhp;
2576 
2577         err = as_map(as, *addr, len, segdev_create, &dev_a);
2578         as_rangeunlock(as);
2579         return (err);
2580 }
2581 
2582 int
2583 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
2584     uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
2585     size_t, uint_t, uint_t))
2586 {
2587         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2588         struct devmap_ctx *devctx;
2589         int do_timeout = 0;
2590         int ret;
2591 
2592 #ifdef lint
2593         pvtp = pvtp;
2594 #endif
2595 
2596         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT,
2597             "devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx",
2598             (void *)dhp, off, len);
2599         DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
2600             (void *)dhp, off, len));
2601 
2602         if (ctxmgt == NULL)
2603                 return (FC_HWERR);
2604 
2605         devctx = dhp->dh_ctx;
2606 
2607         /*
2608          * If we are on an MP system with more than one cpu running
2609          * and if a thread on some CPU already has the context, wait
2610          * for it to finish if there is a hysteresis timeout.
2611          *
2612          * We call cv_wait() instead of cv_wait_sig() because
2613          * it does not matter much if it returned due to a signal
2614          * or due to a cv_signal() or cv_broadcast().  In either event
2615          * we need to complete the mapping otherwise the processes
2616          * will die with a SEGV.
2617          */
2618         if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
2619                 TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK1,
2620                     "devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p",
2621                     devctx, dhp);
2622                 do_timeout = 1;
2623                 mutex_enter(&devctx->lock);
2624                 while (devctx->oncpu)
2625                         cv_wait(&devctx->cv, &devctx->lock);
2626                 devctx->oncpu = 1;
2627                 mutex_exit(&devctx->lock);
2628         }
2629 
2630         /*
2631          * Call the contextmgt callback so that the driver can handle
2632          * the fault.
2633          */
2634         ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
2635 
2636         /*
2637          * If devmap_access() returned -1, then there was a hardware
2638          * error so we need to convert the return value to something
2639          * that trap() will understand.  Otherwise, the return value
2640          * is already a fault code generated by devmap_unload()
2641          * or devmap_load().
2642          */
2643         if (ret) {
2644                 TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK2,
2645                     "devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p",
2646                     ret, dhp, devctx);
2647                 DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
2648                     ret, (void *)dhp));
2649                 if (devctx->oncpu) {
2650                         mutex_enter(&devctx->lock);
2651                         devctx->oncpu = 0;
2652                         cv_signal(&devctx->cv);
2653                         mutex_exit(&devctx->lock);
2654                 }
2655                 return (FC_HWERR);
2656         }
2657 
2658         /*
2659          * Setup the timeout if we need to
2660          */
2661         if (do_timeout) {
2662                 mutex_enter(&devctx->lock);
2663                 if (dhp->dh_timeout_length > 0) {
2664                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK3,
2665                             "devmap_do_ctxmgt:timeout set");
2666                         devctx->timeout = timeout(devmap_ctxto,
2667                             devctx, dhp->dh_timeout_length);
2668                 } else {
2669                         /*
2670                          * We don't want to wait so set oncpu to
2671                          * 0 and wake up anyone waiting.
2672                          */
2673                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK4,
2674                             "devmap_do_ctxmgt:timeout not set");
2675                         devctx->oncpu = 0;
2676                         cv_signal(&devctx->cv);
2677                 }
2678                 mutex_exit(&devctx->lock);
2679         }
2680 
2681         return (DDI_SUCCESS);
2682 }
2683 
2684 /*
2685  *                                       end of mapping
2686  *                    poff   fault_offset         |
2687  *            base     |        |                 |
2688  *              |      |        |                 |
2689  *              V      V        V                 V
2690  *  +-----------+---------------+-------+---------+-------+
2691  *              ^               ^       ^         ^
2692  *              |<--- offset--->|<-len->|         |
2693  *              |<--- dh_len(size of mapping) --->|
2694  *                     |<--  pg -->|
2695  *                              -->|rlen|<--
2696  */
2697 static ulong_t
2698 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
2699     ulong_t *opfn, ulong_t *pagesize)
2700 {
2701         register int level;
2702         ulong_t pg;
2703         ulong_t poff;
2704         ulong_t base;
2705         caddr_t uvaddr;
2706         long rlen;
2707 
2708         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP,
2709             "devmap_roundup:start dhp=%p off=%lx len=%lx",
2710             (void *)dhp, offset, len);
2711         DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
2712             (void *)dhp, offset, len));
2713 
2714         /*
2715          * get the max. pagesize that is aligned within the range
2716          * <dh_pfn, dh_pfn+offset>.
2717          *
2718          * The calculations below use physical address to ddetermine
2719          * the page size to use. The same calculations can use the
2720          * virtual address to determine the page size.
2721          */
2722         base = (ulong_t)ptob(dhp->dh_pfn);
2723         for (level = dhp->dh_mmulevel; level >= 0; level--) {
2724                 pg = page_get_pagesize(level);
2725                 poff = ((base + offset) & ~(pg - 1));
2726                 uvaddr = dhp->dh_uvaddr + (poff - base);
2727                 if ((poff >= base) &&
2728                     ((poff + pg) <= (base + dhp->dh_len)) &&
2729                     VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
2730                         break;
2731         }
2732 
2733         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK1,
2734             "devmap_roundup: base=%lx poff=%lx dhp=%p",
2735             base, poff, dhp);
2736         DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
2737             base, poff, dhp->dh_pfn));
2738 
2739         ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
2740         ASSERT(level >= 0);
2741 
2742         *pagesize = pg;
2743         *opfn = dhp->dh_pfn + btop(poff - base);
2744 
2745         rlen = len + offset - (poff - base + pg);
2746 
2747         ASSERT(rlen < (long)len);
2748 
2749         TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK2,
2750             "devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p",
2751             (void *)dhp, level, rlen, pagesize, opfn);
2752         DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
2753             "level %x rlen %lx psize %lx opfn %lx\n",
2754             (void *)dhp, level, rlen, *pagesize, *opfn));
2755 
2756         return ((ulong_t)((rlen > 0) ? rlen : 0));
2757 }
2758 
2759 /*
2760  * find the dhp that contains addr.
2761  */
2762 static devmap_handle_t *
2763 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
2764 {
2765         devmap_handle_t *dhp;
2766 
2767         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_FIND_HANDLE,
2768             "devmap_find_handle:start");
2769 
2770         dhp = dhp_head;
2771         while (dhp) {
2772                 if (addr >= dhp->dh_uvaddr &&
2773                     addr < (dhp->dh_uvaddr + dhp->dh_len))
2774                         return (dhp);
2775                 dhp = dhp->dh_next;
2776         }
2777 
2778         return ((devmap_handle_t *)NULL);
2779 }
2780 
2781 /*
2782  * devmap_unload:
2783  *                      Marks a segdev segment or pages if offset->offset+len
2784  *                      is not the entire segment as intercept and unloads the
2785  *                      pages in the range offset -> offset+len.
2786  */
2787 int
2788 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
2789 {
2790         register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2791         caddr_t addr;
2792         ulong_t size;
2793         ssize_t soff;
2794 
2795         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_UNLOAD,
2796             "devmap_unload:start dhp=%p offset=%llx len=%lx",
2797             (void *)dhp, offset, len);
2798         DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
2799             (void *)dhp, offset, len));
2800 
2801         soff = (ssize_t)(offset - dhp->dh_uoff);
2802         soff = round_down_p2(soff, PAGESIZE);
2803         if (soff < 0 || soff >= dhp->dh_len)
2804                 return (FC_MAKE_ERR(EINVAL));
2805 
2806         /*
2807          * Address and size must be page aligned.  Len is set to the
2808          * number of bytes in the number of pages that are required to
2809          * support len.  Offset is set to the byte offset of the first byte
2810          * of the page that contains offset.
2811          */
2812         len = round_up_p2(len, PAGESIZE);
2813 
2814         /*
2815          * If len is == 0, then calculate the size by getting
2816          * the number of bytes from offset to the end of the segment.
2817          */
2818         if (len == 0)
2819                 size = dhp->dh_len - soff;
2820         else {
2821                 size = len;
2822                 if ((soff + size) > dhp->dh_len)
2823                         return (FC_MAKE_ERR(EINVAL));
2824         }
2825 
2826         /*
2827          * The address is offset bytes from the base address of
2828          * the dhp.
2829          */
2830         addr = (caddr_t)(soff + dhp->dh_uvaddr);
2831 
2832         /*
2833          * If large page size was used in hat_devload(),
2834          * the same page size must be used in hat_unload().
2835          */
2836         if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
2837                 hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2838                     dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2839         } else {
2840                 hat_unload(dhp->dh_seg->s_as->a_hat,  addr, size,
2841                     HAT_UNLOAD|HAT_UNLOAD_OTHER);
2842         }
2843 
2844         return (0);
2845 }
2846 
2847 /*
2848  * calculates the optimal page size that will be used for hat_devload().
2849  */
2850 static void
2851 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
2852     size_t *llen, caddr_t *laddr)
2853 {
2854         ulong_t off;
2855         ulong_t pfn;
2856         ulong_t pgsize;
2857         uint_t first = 1;
2858 
2859         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GET_LARGE_PGSIZE,
2860             "devmap_get_large_pgsize:start");
2861 
2862         /*
2863          * RFE - Code only supports large page mappings for devmem
2864          * This code could be changed in future if we want to support
2865          * large page mappings for kernel exported memory.
2866          */
2867         ASSERT(dhp_is_devmem(dhp));
2868         ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
2869 
2870         *llen = 0;
2871         off = (ulong_t)(addr - dhp->dh_uvaddr);
2872         while ((long)len > 0) {
2873                 /*
2874                  * get the optimal pfn to minimize address translations.
2875                  * devmap_roundup() returns residue bytes for next round
2876                  * calculations.
2877                  */
2878                 len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
2879 
2880                 if (first) {
2881                         *laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
2882                         first = 0;
2883                 }
2884 
2885                 *llen += pgsize;
2886                 off = ptob(pfn - dhp->dh_pfn) + pgsize;
2887         }
2888         /* Large page mapping len/addr cover more range than original fault */
2889         ASSERT(*llen >= len && *laddr <= addr);
2890         ASSERT((*laddr + *llen) >= (addr + len));
2891 }
2892 
2893 /*
2894  * Initialize the devmap_softlock structure.
2895  */
2896 static struct devmap_softlock *
2897 devmap_softlock_init(dev_t dev, ulong_t id)
2898 {
2899         struct devmap_softlock *slock;
2900         struct devmap_softlock *tmp;
2901 
2902         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_INIT,
2903             "devmap_softlock_init:start");
2904 
2905         tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
2906         mutex_enter(&devmap_slock);
2907 
2908         for (slock = devmap_slist; slock != NULL; slock = slock->next)
2909                 if ((slock->dev == dev) && (slock->id == id))
2910                         break;
2911 
2912         if (slock == NULL) {
2913                 slock = tmp;
2914                 slock->dev = dev;
2915                 slock->id = id;
2916                 mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
2917                 cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
2918                 slock->next = devmap_slist;
2919                 devmap_slist = slock;
2920         } else
2921                 kmem_free(tmp, sizeof (struct devmap_softlock));
2922 
2923         mutex_enter(&slock->lock);
2924         slock->refcnt++;
2925         mutex_exit(&slock->lock);
2926         mutex_exit(&devmap_slock);
2927 
2928         return (slock);
2929 }
2930 
2931 /*
2932  * Wake up processes that sleep on softlocked.
2933  * Free dh_softlock if refcnt is 0.
2934  */
2935 static void
2936 devmap_softlock_rele(devmap_handle_t *dhp)
2937 {
2938         struct devmap_softlock *slock = dhp->dh_softlock;
2939         struct devmap_softlock *tmp;
2940         struct devmap_softlock *parent;
2941 
2942         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_RELE,
2943             "devmap_softlock_rele:start");
2944 
2945         mutex_enter(&devmap_slock);
2946         mutex_enter(&slock->lock);
2947 
2948         ASSERT(slock->refcnt > 0);
2949 
2950         slock->refcnt--;
2951 
2952         /*
2953          * If no one is using the device, free up the slock data.
2954          */
2955         if (slock->refcnt == 0) {
2956                 slock->softlocked = 0;
2957                 cv_signal(&slock->cv);
2958 
2959                 if (devmap_slist == slock)
2960                         devmap_slist = slock->next;
2961                 else {
2962                         parent = devmap_slist;
2963                         for (tmp = devmap_slist->next; tmp != NULL;
2964                             tmp = tmp->next) {
2965                                 if (tmp == slock) {
2966                                         parent->next = tmp->next;
2967                                         break;
2968                                 }
2969                                 parent = tmp;
2970                         }
2971                 }
2972                 mutex_exit(&slock->lock);
2973                 mutex_destroy(&slock->lock);
2974                 cv_destroy(&slock->cv);
2975                 kmem_free(slock, sizeof (struct devmap_softlock));
2976         } else
2977                 mutex_exit(&slock->lock);
2978 
2979         mutex_exit(&devmap_slock);
2980 }
2981 
2982 /*
2983  * Wake up processes that sleep on dh_ctx->locked.
2984  * Free dh_ctx if refcnt is 0.
2985  */
2986 static void
2987 devmap_ctx_rele(devmap_handle_t *dhp)
2988 {
2989         struct devmap_ctx *devctx = dhp->dh_ctx;
2990         struct devmap_ctx *tmp;
2991         struct devmap_ctx *parent;
2992         timeout_id_t tid;
2993 
2994         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE,
2995             "devmap_ctx_rele:start");
2996 
2997         mutex_enter(&devmapctx_lock);
2998         mutex_enter(&devctx->lock);
2999 
3000         ASSERT(devctx->refcnt > 0);
3001 
3002         devctx->refcnt--;
3003 
3004         /*
3005          * If no one is using the device, free up the devctx data.
3006          */
3007         if (devctx->refcnt == 0) {
3008                 /*
3009                  * Untimeout any threads using this mapping as they are about
3010                  * to go away.
3011                  */
3012                 if (devctx->timeout != 0) {
3013                         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE_CK1,
3014                             "devmap_ctx_rele:untimeout ctx->timeout");
3015 
3016                         tid = devctx->timeout;
3017                         mutex_exit(&devctx->lock);
3018                         (void) untimeout(tid);
3019                         mutex_enter(&devctx->lock);
3020                 }
3021 
3022                 devctx->oncpu = 0;
3023                 cv_signal(&devctx->cv);
3024 
3025                 if (devmapctx_list == devctx)
3026                         devmapctx_list = devctx->next;
3027                 else {
3028                         parent = devmapctx_list;
3029                         for (tmp = devmapctx_list->next; tmp != NULL;
3030                             tmp = tmp->next) {
3031                                 if (tmp == devctx) {
3032                                         parent->next = tmp->next;
3033                                         break;
3034                                 }
3035                                 parent = tmp;
3036                         }
3037                 }
3038                 mutex_exit(&devctx->lock);
3039                 mutex_destroy(&devctx->lock);
3040                 cv_destroy(&devctx->cv);
3041                 kmem_free(devctx, sizeof (struct devmap_ctx));
3042         } else
3043                 mutex_exit(&devctx->lock);
3044 
3045         mutex_exit(&devmapctx_lock);
3046 }
3047 
3048 /*
3049  * devmap_load:
3050  *                      Marks a segdev segment or pages if offset->offset+len
3051  *                      is not the entire segment as nointercept and faults in
3052  *                      the pages in the range offset -> offset+len.
3053  */
3054 int
3055 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
3056     uint_t rw)
3057 {
3058         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3059         struct as *asp = dhp->dh_seg->s_as;
3060         caddr_t addr;
3061         ulong_t size;
3062         ssize_t soff;   /* offset from the beginning of the segment */
3063         int rc;
3064 
3065         TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_LOAD,
3066             "devmap_load:start dhp=%p offset=%llx len=%lx",
3067             (void *)dhp, offset, len);
3068 
3069         DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
3070             (void *)dhp, offset, len));
3071 
3072         /*
3073          *      Hat layer only supports devload to process' context for which
3074          *      the as lock is held. Verify here and return error if drivers
3075          *      inadvertently call devmap_load on a wrong devmap handle.
3076          */
3077         if ((asp != &kas) && !AS_LOCK_HELD(asp, &asp->a_lock))
3078                 return (FC_MAKE_ERR(EINVAL));
3079 
3080         soff = (ssize_t)(offset - dhp->dh_uoff);
3081         soff = round_down_p2(soff, PAGESIZE);
3082         if (soff < 0 || soff >= dhp->dh_len)
3083                 return (FC_MAKE_ERR(EINVAL));
3084 
3085         /*
3086          * Address and size must be page aligned.  Len is set to the
3087          * number of bytes in the number of pages that are required to
3088          * support len.  Offset is set to the byte offset of the first byte
3089          * of the page that contains offset.
3090          */
3091         len = round_up_p2(len, PAGESIZE);
3092 
3093         /*
3094          * If len == 0, then calculate the size by getting
3095          * the number of bytes from offset to the end of the segment.
3096          */
3097         if (len == 0)
3098                 size = dhp->dh_len - soff;
3099         else {
3100                 size = len;
3101                 if ((soff + size) > dhp->dh_len)
3102                         return (FC_MAKE_ERR(EINVAL));
3103         }
3104 
3105         /*
3106          * The address is offset bytes from the base address of
3107          * the segment.
3108          */
3109         addr = (caddr_t)(soff + dhp->dh_uvaddr);
3110 
3111         HOLD_DHP_LOCK(dhp);
3112         rc = segdev_faultpages(asp->a_hat,
3113             dhp->dh_seg, addr, size, type, rw, dhp);
3114         RELE_DHP_LOCK(dhp);
3115         return (rc);
3116 }
3117 
3118 int
3119 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
3120     size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3121 {
3122         register devmap_handle_t *dhp;
3123         int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
3124             size_t *, uint_t);
3125         int (*mmap)(dev_t, off_t, int);
3126         struct devmap_callback_ctl *callbackops;
3127         devmap_handle_t *dhp_head = NULL;
3128         devmap_handle_t *dhp_prev = NULL;
3129         devmap_handle_t *dhp_curr;
3130         caddr_t addr;
3131         int map_flag;
3132         int ret;
3133         ulong_t total_len;
3134         size_t map_len;
3135         size_t resid_len = len;
3136         offset_t map_off = off;
3137         struct devmap_softlock *slock = NULL;
3138 
3139 #ifdef lint
3140         cred = cred;
3141 #endif
3142 
3143         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SETUP,
3144             "devmap_setup:start off=%llx len=%lx", off, len);
3145         DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
3146             off, len));
3147 
3148         devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
3149         mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
3150 
3151         /*
3152          * driver must provide devmap(9E) entry point in cb_ops to use the
3153          * devmap framework.
3154          */
3155         if (devmap == NULL || devmap == nulldev || devmap == nodev)
3156                 return (EINVAL);
3157 
3158         /*
3159          * To protect from an inadvertent entry because the devmap entry point
3160          * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
3161          * mmap is NULL.
3162          */
3163         map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
3164         if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
3165                 return (EINVAL);
3166 
3167         /*
3168          * devmap allows mmap(2) to map multiple registers.
3169          * one devmap_handle is created for each register mapped.
3170          */
3171         for (total_len = 0; total_len < len; total_len += map_len) {
3172                 dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
3173 
3174                 if (dhp_prev != NULL)
3175                         dhp_prev->dh_next = dhp;
3176                 else
3177                         dhp_head = dhp;
3178                 dhp_prev = dhp;
3179 
3180                 dhp->dh_prot = prot;
3181                 dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
3182                 dhp->dh_dev = dev;
3183                 dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
3184                 dhp->dh_uoff = map_off;
3185 
3186                 /*
3187                  * Get mapping specific info from
3188                  * the driver, such as rnumber, roff, len, callbackops,
3189                  * accattrp and, if the mapping is for kernel memory,
3190                  * ddi_umem_cookie.
3191                  */
3192                 if ((ret = cdev_devmap(dev, dhp, map_off,
3193                     resid_len, &map_len, get_udatamodel())) != 0) {
3194                         free_devmap_handle(dhp_head);
3195                         return (ENXIO);
3196                 }
3197 
3198                 if (map_len & PAGEOFFSET) {
3199                         free_devmap_handle(dhp_head);
3200                         return (EINVAL);
3201                 }
3202 
3203                 callbackops = &dhp->dh_callbackops;
3204 
3205                 if ((callbackops->devmap_access == NULL) ||
3206                     (callbackops->devmap_access == nulldev) ||
3207                     (callbackops->devmap_access == nodev)) {
3208                         /*
3209                          * Normally devmap does not support MAP_PRIVATE unless
3210                          * the drivers provide a valid devmap_access routine.
3211                          */
3212                         if ((flags & MAP_PRIVATE) != 0) {
3213                                 free_devmap_handle(dhp_head);
3214                                 return (EINVAL);
3215                         }
3216                 } else {
3217                         /*
3218                          * Initialize dhp_softlock and dh_ctx if the drivers
3219                          * provide devmap_access.
3220                          */
3221                         dhp->dh_softlock = devmap_softlock_init(dev,
3222                             (ulong_t)callbackops->devmap_access);
3223                         dhp->dh_ctx = devmap_ctxinit(dev,
3224                             (ulong_t)callbackops->devmap_access);
3225 
3226                         /*
3227                          * segdev_fault can only work when all
3228                          * dh_softlock in a multi-dhp mapping
3229                          * are same. see comments in segdev_fault
3230                          * This code keeps track of the first
3231                          * dh_softlock allocated in slock and
3232                          * compares all later allocations and if
3233                          * not similar, returns an error.
3234                          */
3235                         if (slock == NULL)
3236                                 slock = dhp->dh_softlock;
3237                         if (slock != dhp->dh_softlock) {
3238                                 free_devmap_handle(dhp_head);
3239                                 return (ENOTSUP);
3240                         }
3241                 }
3242 
3243                 map_off += map_len;
3244                 resid_len -= map_len;
3245         }
3246 
3247         /*
3248          * get the user virtual address and establish the mapping between
3249          * uvaddr and device physical address.
3250          */
3251         if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
3252             != 0) {
3253                 /*
3254                  * free devmap handles if error during the mapping.
3255                  */
3256                 free_devmap_handle(dhp_head);
3257 
3258                 return (ret);
3259         }
3260 
3261         /*
3262          * call the driver's devmap_map callback to do more after the mapping,
3263          * such as to allocate driver private data for context management.
3264          */
3265         dhp = dhp_head;
3266         map_off = off;
3267         addr = *addrp;
3268         while (dhp != NULL) {
3269                 callbackops = &dhp->dh_callbackops;
3270                 dhp->dh_uvaddr = addr;
3271                 dhp_curr = dhp;
3272                 if (callbackops->devmap_map != NULL) {
3273                         ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
3274                             dev, flags, map_off,
3275                             dhp->dh_len, &dhp->dh_pvtp);
3276                         if (ret != 0) {
3277                                 struct segdev_data *sdp;
3278 
3279                                 /*
3280                                  * call driver's devmap_unmap entry point
3281                                  * to free driver resources.
3282                                  */
3283                                 dhp = dhp_head;
3284                                 map_off = off;
3285                                 while (dhp != dhp_curr) {
3286                                         callbackops = &dhp->dh_callbackops;
3287                                         if (callbackops->devmap_unmap != NULL) {
3288                                                 (*callbackops->devmap_unmap)(
3289                                                     dhp, dhp->dh_pvtp,
3290                                                     map_off, dhp->dh_len,
3291                                                     NULL, NULL, NULL, NULL);
3292                                         }
3293                                         map_off += dhp->dh_len;
3294                                         dhp = dhp->dh_next;
3295                                 }
3296                                 sdp = dhp_head->dh_seg->s_data;
3297                                 sdp->devmap_data = NULL;
3298                                 free_devmap_handle(dhp_head);
3299                                 return (ENXIO);
3300                         }
3301                 }
3302                 map_off += dhp->dh_len;
3303                 addr += dhp->dh_len;
3304                 dhp = dhp->dh_next;
3305         }
3306 
3307         return (0);
3308 }
3309 
3310 int
3311 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
3312     off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
3313 {
3314         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP,
3315             "devmap_segmap:start");
3316         return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
3317             (size_t)len, prot, maxprot, flags, cred));
3318 }
3319 
3320 /*
3321  * Called from devmap_devmem_setup/remap to see if can use large pages for
3322  * this device mapping.
3323  * Also calculate the max. page size for this mapping.
3324  * this page size will be used in fault routine for
3325  * optimal page size calculations.
3326  */
3327 static void
3328 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
3329 {
3330         ASSERT(dhp_is_devmem(dhp));
3331         dhp->dh_mmulevel = 0;
3332 
3333         /*
3334          * use large page size only if:
3335          *  1. device memory.
3336          *  2. mmu supports multiple page sizes,
3337          *  3. Driver did not disallow it
3338          *  4. dhp length is at least as big as the large pagesize
3339          *  5. the uvaddr and pfn are large pagesize aligned
3340          */
3341         if (page_num_pagesizes() > 1 &&
3342             !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
3343                 ulong_t base;
3344                 int level;
3345 
3346                 base = (ulong_t)ptob(dhp->dh_pfn);
3347                 for (level = 1; level < page_num_pagesizes(); level++) {
3348                         size_t pgsize = page_get_pagesize(level);
3349                         if ((dhp->dh_len < pgsize) ||
3350                             (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
3351                             base, pgsize))) {
3352                                 break;
3353                         }
3354                 }
3355                 dhp->dh_mmulevel = level - 1;
3356         }
3357         if (dhp->dh_mmulevel > 0) {
3358                 dhp->dh_flags |= DEVMAP_FLAG_LARGE;
3359         } else {
3360                 dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3361         }
3362 }
3363 
3364 /*
3365  * Called by driver devmap routine to pass device specific info to
3366  * the framework.    used for device memory mapping only.
3367  */
3368 int
3369 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3370     struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
3371     size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
3372 {
3373         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3374         ddi_acc_handle_t handle;
3375         ddi_map_req_t mr;
3376         ddi_acc_hdl_t *hp;
3377         int err;
3378 
3379         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_SETUP,
3380             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3381             (void *)dhp, roff, rnumber, (uint_t)len);
3382         DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
3383             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3384 
3385         /*
3386          * First to check if this function has been called for this dhp.
3387          */
3388         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3389                 return (DDI_FAILURE);
3390 
3391         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3392                 return (DDI_FAILURE);
3393 
3394         if (flags & DEVMAP_MAPPING_INVALID) {
3395                 /*
3396                  * Don't go up the tree to get pfn if the driver specifies
3397                  * DEVMAP_MAPPING_INVALID in flags.
3398                  *
3399                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3400                  * remap permission.
3401                  */
3402                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3403                         return (DDI_FAILURE);
3404                 }
3405                 dhp->dh_pfn = PFN_INVALID;
3406         } else {
3407                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3408                 if (handle == NULL)
3409                         return (DDI_FAILURE);
3410 
3411                 hp = impl_acc_hdl_get(handle);
3412                 hp->ah_vers = VERS_ACCHDL;
3413                 hp->ah_dip = dip;
3414                 hp->ah_rnumber = rnumber;
3415                 hp->ah_offset = roff;
3416                 hp->ah_len = len;
3417                 if (accattrp != NULL)
3418                         hp->ah_acc = *accattrp;
3419 
3420                 mr.map_op = DDI_MO_MAP_LOCKED;
3421                 mr.map_type = DDI_MT_RNUMBER;
3422                 mr.map_obj.rnumber = rnumber;
3423                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3424                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3425                 mr.map_handlep = hp;
3426                 mr.map_vers = DDI_MAP_VERSION;
3427 
3428                 /*
3429                  * up the device tree to get pfn.
3430                  * The rootnex_map_regspec() routine in nexus drivers has been
3431                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3432                  */
3433                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
3434                 dhp->dh_hat_attr = hp->ah_hat_flags;
3435                 impl_acc_hdl_free(handle);
3436 
3437                 if (err)
3438                         return (DDI_FAILURE);
3439         }
3440         /* Should not be using devmem setup for memory pages */
3441         ASSERT(!pf_is_memory(dhp->dh_pfn));
3442 
3443         /* Only some of the flags bits are settable by the driver */
3444         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3445         dhp->dh_len = ptob(btopr(len));
3446 
3447         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3448         dhp->dh_roff = ptob(btop(roff));
3449 
3450         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3451         devmap_devmem_large_page_setup(dhp);
3452         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3453         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3454 
3455 
3456         if (callbackops != NULL) {
3457                 bcopy(callbackops, &dhp->dh_callbackops,
3458                     sizeof (struct devmap_callback_ctl));
3459         }
3460 
3461         /*
3462          * Initialize dh_lock if we want to do remap.
3463          */
3464         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3465                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3466                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3467         }
3468 
3469         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3470 
3471         return (DDI_SUCCESS);
3472 }
3473 
3474 int
3475 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3476     uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
3477     uint_t flags, ddi_device_acc_attr_t *accattrp)
3478 {
3479         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3480         ddi_acc_handle_t handle;
3481         ddi_map_req_t mr;
3482         ddi_acc_hdl_t *hp;
3483         pfn_t   pfn;
3484         uint_t  hat_flags;
3485         int     err;
3486 
3487         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_REMAP,
3488             "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
3489             (void *)dhp, roff, rnumber, (uint_t)len);
3490         DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
3491             "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
3492 
3493         /*
3494          * Return failure if setup has not been done or no remap permission
3495          * has been granted during the setup.
3496          */
3497         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3498             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3499                 return (DDI_FAILURE);
3500 
3501         /* Only DEVMAP_MAPPING_INVALID flag supported for remap */
3502         if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
3503                 return (DDI_FAILURE);
3504 
3505         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3506                 return (DDI_FAILURE);
3507 
3508         if (!(flags & DEVMAP_MAPPING_INVALID)) {
3509                 handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
3510                 if (handle == NULL)
3511                         return (DDI_FAILURE);
3512         }
3513 
3514         HOLD_DHP_LOCK(dhp);
3515 
3516         /*
3517          * Unload the old mapping, so next fault will setup the new mappings
3518          * Do this while holding the dhp lock so other faults dont reestablish
3519          * the mappings
3520          */
3521         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3522             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3523 
3524         if (flags & DEVMAP_MAPPING_INVALID) {
3525                 dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
3526                 dhp->dh_pfn = PFN_INVALID;
3527         } else {
3528                 /* clear any prior DEVMAP_MAPPING_INVALID flag */
3529                 dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
3530                 hp = impl_acc_hdl_get(handle);
3531                 hp->ah_vers = VERS_ACCHDL;
3532                 hp->ah_dip = dip;
3533                 hp->ah_rnumber = rnumber;
3534                 hp->ah_offset = roff;
3535                 hp->ah_len = len;
3536                 if (accattrp != NULL)
3537                         hp->ah_acc = *accattrp;
3538 
3539                 mr.map_op = DDI_MO_MAP_LOCKED;
3540                 mr.map_type = DDI_MT_RNUMBER;
3541                 mr.map_obj.rnumber = rnumber;
3542                 mr.map_prot = maxprot & dhp->dh_orig_maxprot;
3543                 mr.map_flags = DDI_MF_DEVICE_MAPPING;
3544                 mr.map_handlep = hp;
3545                 mr.map_vers = DDI_MAP_VERSION;
3546 
3547                 /*
3548                  * up the device tree to get pfn.
3549                  * The rootnex_map_regspec() routine in nexus drivers has been
3550                  * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
3551                  */
3552                 err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
3553                 hat_flags = hp->ah_hat_flags;
3554                 impl_acc_hdl_free(handle);
3555                 if (err) {
3556                         RELE_DHP_LOCK(dhp);
3557                         return (DDI_FAILURE);
3558                 }
3559                 /*
3560                  * Store result of ddi_map first in local variables, as we do
3561                  * not want to overwrite the existing dhp with wrong data.
3562                  */
3563                 dhp->dh_pfn = pfn;
3564                 dhp->dh_hat_attr = hat_flags;
3565         }
3566 
3567         /* clear the large page size flag */
3568         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3569 
3570         dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
3571         dhp->dh_roff = ptob(btop(roff));
3572 
3573         /* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
3574         devmap_devmem_large_page_setup(dhp);
3575         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3576         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3577 
3578         RELE_DHP_LOCK(dhp);
3579         return (DDI_SUCCESS);
3580 }
3581 
3582 /*
3583  * called by driver devmap routine to pass kernel virtual address  mapping
3584  * info to the framework.    used only for kernel memory
3585  * allocated from ddi_umem_alloc().
3586  */
3587 int
3588 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
3589     struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
3590     offset_t off, size_t len, uint_t maxprot, uint_t flags,
3591     ddi_device_acc_attr_t *accattrp)
3592 {
3593         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3594         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3595 
3596 #ifdef lint
3597         dip = dip;
3598 #endif
3599 
3600         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_SETUP,
3601             "devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx",
3602             (void *)dhp, off, cookie, len);
3603         DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
3604             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3605 
3606         if (cookie == NULL)
3607                 return (DDI_FAILURE);
3608 
3609         /* For UMEM_TRASH, this restriction is not needed */
3610         if ((off + len) > cp->size)
3611                 return (DDI_FAILURE);
3612 
3613         /* check if the cache attributes are supported */
3614         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3615                 return (DDI_FAILURE);
3616 
3617         /*
3618          * First to check if this function has been called for this dhp.
3619          */
3620         if (dhp->dh_flags & DEVMAP_SETUP_DONE)
3621                 return (DDI_FAILURE);
3622 
3623         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3624                 return (DDI_FAILURE);
3625 
3626         if (flags & DEVMAP_MAPPING_INVALID) {
3627                 /*
3628                  * If DEVMAP_MAPPING_INVALID is specified, we have to grant
3629                  * remap permission.
3630                  */
3631                 if (!(flags & DEVMAP_ALLOW_REMAP)) {
3632                         return (DDI_FAILURE);
3633                 }
3634         } else {
3635                 dhp->dh_cookie = cookie;
3636                 dhp->dh_roff = ptob(btop(off));
3637                 dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3638                 /* set HAT cache attributes */
3639                 i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3640                 /* set HAT endianess attributes */
3641                 i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3642         }
3643 
3644         /*
3645          * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
3646          * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
3647          * create consistent mappings but our intention was to create
3648          * non-consistent mappings.
3649          *
3650          * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
3651          * mappings.
3652          *
3653          * kernel exported memory: hat figures it out it's memory and always
3654          * creates consistent mappings.
3655          *
3656          * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
3657          *
3658          * /dev/kmem: consistent mappings are created unless they are
3659          * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
3660          * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
3661          * mappings of /dev/kmem. See common/io/mem.c
3662          */
3663 
3664         /* Only some of the flags bits are settable by the driver */
3665         dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
3666 
3667         dhp->dh_len = ptob(btopr(len));
3668         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3669         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3670 
3671         if (callbackops != NULL) {
3672                 bcopy(callbackops, &dhp->dh_callbackops,
3673                     sizeof (struct devmap_callback_ctl));
3674         }
3675         /*
3676          * Initialize dh_lock if we want to do remap.
3677          */
3678         if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
3679                 mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
3680                 dhp->dh_flags |= DEVMAP_LOCK_INITED;
3681         }
3682 
3683         dhp->dh_flags |= DEVMAP_SETUP_DONE;
3684 
3685         return (DDI_SUCCESS);
3686 }
3687 
3688 int
3689 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
3690     ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
3691     uint_t flags, ddi_device_acc_attr_t *accattrp)
3692 {
3693         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3694         struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
3695 
3696         TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_REMAP,
3697             "devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx",
3698             (void *)dhp, off, cookie, len);
3699         DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
3700             "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
3701 
3702 #ifdef lint
3703         dip = dip;
3704         accattrp = accattrp;
3705 #endif
3706         /*
3707          * Reture failure if setup has not been done or no remap permission
3708          * has been granted during the setup.
3709          */
3710         if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
3711             (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
3712                 return (DDI_FAILURE);
3713 
3714         /* No flags supported for remap yet */
3715         if (flags != 0)
3716                 return (DDI_FAILURE);
3717 
3718         /* check if the cache attributes are supported */
3719         if (i_ddi_check_cache_attr(flags) == B_FALSE)
3720                 return (DDI_FAILURE);
3721 
3722         if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
3723                 return (DDI_FAILURE);
3724 
3725         /* For UMEM_TRASH, this restriction is not needed */
3726         if ((off + len) > cp->size)
3727                 return (DDI_FAILURE);
3728 
3729         HOLD_DHP_LOCK(dhp);
3730         /*
3731          * Unload the old mapping, so next fault will setup the new mappings
3732          * Do this while holding the dhp lock so other faults dont reestablish
3733          * the mappings
3734          */
3735         hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
3736             dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
3737 
3738         dhp->dh_cookie = cookie;
3739         dhp->dh_roff = ptob(btop(off));
3740         dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
3741         /* set HAT cache attributes */
3742         i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
3743         /* set HAT endianess attributes */
3744         i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
3745 
3746         /* clear the large page size flag */
3747         dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
3748 
3749         dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
3750         ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
3751         RELE_DHP_LOCK(dhp);
3752         return (DDI_SUCCESS);
3753 }
3754 
3755 /*
3756  * to set timeout value for the driver's context management callback, e.g.
3757  * devmap_access().
3758  */
3759 void
3760 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
3761 {
3762         devmap_handle_t *dhp = (devmap_handle_t *)dhc;
3763 
3764         TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SET_CTX_TIMEOUT,
3765             "devmap_set_ctx_timeout:start dhp=%p ticks=%x",
3766             (void *)dhp, ticks);
3767         dhp->dh_timeout_length = ticks;
3768 }
3769 
3770 int
3771 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
3772     size_t len, uint_t type, uint_t rw)
3773 {
3774 #ifdef lint
3775         pvtp = pvtp;
3776 #endif
3777 
3778         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DEFAULT_ACCESS,
3779             "devmap_default_access:start");
3780         return (devmap_load(dhp, off, len, type, rw));
3781 }
3782 
3783 /*
3784  * segkmem_alloc() wrapper to allocate memory which is both
3785  * non-relocatable (for DR) and sharelocked, since the rest
3786  * of this segment driver requires it.
3787  */
3788 static void *
3789 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
3790 {
3791         ASSERT(vmp != NULL);
3792         ASSERT(kvseg.s_base != NULL);
3793         vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
3794         return (segkmem_alloc(vmp, size, vmflag));
3795 }
3796 
3797 /*
3798  * This is where things are a bit incestuous with seg_kmem: unlike
3799  * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
3800  * we need to do a bit of a dance around that to prevent duplication of
3801  * code until we decide to bite the bullet and implement a new kernel
3802  * segment for driver-allocated memory that is exported to user space.
3803  */
3804 static void
3805 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
3806 {
3807         page_t *pp;
3808         caddr_t addr = inaddr;
3809         caddr_t eaddr;
3810         pgcnt_t npages = btopr(size);
3811 
3812         ASSERT(vmp != NULL);
3813         ASSERT(kvseg.s_base != NULL);
3814         ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
3815 
3816         hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
3817 
3818         for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
3819                 /*
3820                  * Use page_find() instead of page_lookup() to find the page
3821                  * since we know that it is hashed and has a shared lock.
3822                  */
3823                 pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
3824 
3825                 if (pp == NULL)
3826                         panic("devmap_free_pages: page not found");
3827                 if (!page_tryupgrade(pp)) {
3828                         page_unlock(pp);
3829                         pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
3830                             SE_EXCL);
3831                         if (pp == NULL)
3832                                 panic("devmap_free_pages: page already freed");
3833                 }
3834                 /* Clear p_lckcnt so page_destroy() doesn't update availrmem */
3835                 pp->p_lckcnt = 0;
3836                 page_destroy(pp, 0);
3837         }
3838         page_unresv(npages);
3839 
3840         if (vmp != NULL)
3841                 vmem_free(vmp, inaddr, size);
3842 }
3843 
3844 /*
3845  * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
3846  * allocating non-pageable kmem in response to a ddi_umem_alloc()
3847  * default request. For now we allocate our own pages and we keep
3848  * them long-term sharelocked, since: A) the fault routines expect the
3849  * memory to already be locked; B) pageable umem is already long-term
3850  * locked; C) it's a lot of work to make it otherwise, particularly
3851  * since the nexus layer expects the pages to never fault. An RFE is to
3852  * not keep the pages long-term locked, but instead to be able to
3853  * take faults on them and simply look them up in kvp in case we
3854  * fault on them. Even then, we must take care not to let pageout
3855  * steal them from us since the data must remain resident; if we
3856  * do this we must come up with some way to pin the pages to prevent
3857  * faults while a driver is doing DMA to/from them.
3858  */
3859 static void *
3860 devmap_umem_alloc_np(size_t size, size_t flags)
3861 {
3862         void *buf;
3863         int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
3864 
3865         buf = vmem_alloc(umem_np_arena, size, vmflags);
3866         if (buf != NULL)
3867                 bzero(buf, size);
3868         return (buf);
3869 }
3870 
3871 static void
3872 devmap_umem_free_np(void *addr, size_t size)
3873 {
3874         vmem_free(umem_np_arena, addr, size);
3875 }
3876 
3877 /*
3878  * allocate page aligned kernel memory for exporting to user land.
3879  * The devmap framework will use the cookie allocated by ddi_umem_alloc()
3880  * to find a user virtual address that is in same color as the address
3881  * allocated here.
3882  */
3883 void *
3884 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
3885 {
3886         register size_t len = ptob(btopr(size));
3887         void *buf = NULL;
3888         struct ddi_umem_cookie *cp;
3889         int iflags = 0;
3890 
3891         *cookie = NULL;
3892 
3893         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_ALLOC,
3894             "devmap_umem_alloc:start");
3895         if (len == 0)
3896                 return ((void *)NULL);
3897 
3898         /*
3899          * allocate cookie
3900          */
3901         if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
3902             flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
3903                 ASSERT(flags & DDI_UMEM_NOSLEEP);
3904                 return ((void *)NULL);
3905         }
3906 
3907         if (flags & DDI_UMEM_PAGEABLE) {
3908                 /* Only one of the flags is allowed */
3909                 ASSERT(!(flags & DDI_UMEM_TRASH));
3910                 /* initialize resource with 0 */
3911                 iflags = KPD_ZERO;
3912 
3913                 /*
3914                  * to allocate unlocked pageable memory, use segkp_get() to
3915                  * create a segkp segment.  Since segkp can only service kas,
3916                  * other segment drivers such as segdev have to do
3917                  * as_fault(segkp, SOFTLOCK) in its fault routine,
3918                  */
3919                 if (flags & DDI_UMEM_NOSLEEP)
3920                         iflags |= KPD_NOWAIT;
3921 
3922                 if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
3923                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3924                         return ((void *)NULL);
3925                 }
3926                 cp->type = KMEM_PAGEABLE;
3927                 mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
3928                 cp->locked = 0;
3929         } else if (flags & DDI_UMEM_TRASH) {
3930                 /* Only one of the flags is allowed */
3931                 ASSERT(!(flags & DDI_UMEM_PAGEABLE));
3932                 cp->type = UMEM_TRASH;
3933                 buf = NULL;
3934         } else {
3935                 if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
3936                         kmem_free(cp, sizeof (struct ddi_umem_cookie));
3937                         return ((void *)NULL);
3938                 }
3939 
3940                 cp->type = KMEM_NON_PAGEABLE;
3941         }
3942 
3943         /*
3944          * need to save size here.  size will be used when
3945          * we do kmem_free.
3946          */
3947         cp->size = len;
3948         cp->cvaddr = (caddr_t)buf;
3949 
3950         *cookie =  (void *)cp;
3951         return (buf);
3952 }
3953 
3954 void
3955 ddi_umem_free(ddi_umem_cookie_t cookie)
3956 {
3957         struct ddi_umem_cookie *cp;
3958 
3959         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_FREE,
3960             "devmap_umem_free:start");
3961 
3962         /*
3963          * if cookie is NULL, no effects on the system
3964          */
3965         if (cookie == NULL)
3966                 return;
3967 
3968         cp = (struct ddi_umem_cookie *)cookie;
3969 
3970         switch (cp->type) {
3971         case KMEM_PAGEABLE :
3972                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3973                 /*
3974                  * Check if there are still any pending faults on the cookie
3975                  * while the driver is deleting it,
3976                  * XXX - could change to an ASSERT but wont catch errant drivers
3977                  */
3978                 mutex_enter(&cp->lock);
3979                 if (cp->locked) {
3980                         mutex_exit(&cp->lock);
3981                         panic("ddi_umem_free for cookie with pending faults %p",
3982                             (void *)cp);
3983                         return;
3984                 }
3985 
3986                 segkp_release(segkp, cp->cvaddr);
3987 
3988                 /*
3989                  * release mutex associated with this cookie.
3990                  */
3991                 mutex_destroy(&cp->lock);
3992                 break;
3993         case KMEM_NON_PAGEABLE :
3994                 ASSERT(cp->cvaddr != NULL && cp->size != 0);
3995                 devmap_umem_free_np(cp->cvaddr, cp->size);
3996                 break;
3997         case UMEM_TRASH :
3998                 break;
3999         case UMEM_LOCKED :
4000                 /* Callers should use ddi_umem_unlock for this type */
4001                 ddi_umem_unlock(cookie);
4002                 /* Frees the cookie too */
4003                 return;
4004         default:
4005                 /* panic so we can diagnose the underlying cause */
4006                 panic("ddi_umem_free: illegal cookie type 0x%x\n",
4007                     cp->type);
4008         }
4009 
4010         kmem_free(cookie, sizeof (struct ddi_umem_cookie));
4011 }
4012 
4013 
4014 static int
4015 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
4016 {
4017         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4018 
4019         /*
4020          * It looks as if it is always mapped shared
4021          */
4022         TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GETMEMID,
4023             "segdev_getmemid:start");
4024         memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
4025         memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
4026         return (0);
4027 }
4028 
4029 /*
4030  * ddi_umem_alloc() non-pageable quantum cache max size.
4031  * This is just a SWAG.
4032  */
4033 #define DEVMAP_UMEM_QUANTUM     (8*PAGESIZE)
4034 
4035 /*
4036  * Initialize seg_dev from boot. This routine sets up the trash page
4037  * and creates the umem_np_arena used to back non-pageable memory
4038  * requests.
4039  */
4040 void
4041 segdev_init(void)
4042 {
4043         struct seg kseg;
4044 
4045         umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
4046             devmap_alloc_pages, devmap_free_pages, heap_arena,
4047             DEVMAP_UMEM_QUANTUM, VM_SLEEP);
4048 
4049         kseg.s_as = &kas;
4050         trashpp = page_create_va(&trashvp, 0, PAGESIZE,
4051             PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
4052         if (trashpp == NULL)
4053                 panic("segdev_init: failed to create trash page");
4054         pagezero(trashpp, 0, PAGESIZE);
4055         page_downgrade(trashpp);
4056 }
4057 
4058 /*
4059  * Invoke platform-dependent support routines so that /proc can have
4060  * the platform code deal with curious hardware.
4061  */
4062 int
4063 segdev_copyfrom(struct seg *seg,
4064     caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
4065 {
4066         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4067         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4068 
4069         return (e_ddi_copyfromdev(sp->s_dip,
4070             (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
4071 }
4072 
4073 int
4074 segdev_copyto(struct seg *seg,
4075     caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
4076 {
4077         struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
4078         struct snode *sp = VTOS(VTOCVP(sdp->vp));
4079 
4080         return (e_ddi_copytodev(sp->s_dip,
4081             (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));
4082 }