1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright 2012, Josef 'Jeff' Sipek <jeffpc@31bits.net>. All rights reserved.
  25  */
  26 
  27 #include <sys/types.h>
  28 #include <sys/param.h>
  29 #include <sys/systm.h>
  30 #include <sys/vm.h>
  31 #include <sys/proc.h>
  32 #include <sys/file.h>
  33 #include <sys/conf.h>
  34 #include <sys/kmem.h>
  35 #include <sys/mem.h>
  36 #include <sys/mman.h>
  37 #include <sys/vnode.h>
  38 #include <sys/errno.h>
  39 #include <sys/memlist.h>
  40 #include <sys/dumphdr.h>
  41 #include <sys/dumpadm.h>
  42 #include <sys/ksyms.h>
  43 #include <sys/compress.h>
  44 #include <sys/stream.h>
  45 #include <sys/strsun.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/bitmap.h>
  48 #include <sys/modctl.h>
  49 #include <sys/utsname.h>
  50 #include <sys/systeminfo.h>
  51 #include <sys/vmem.h>
  52 #include <sys/log.h>
  53 #include <sys/var.h>
  54 #include <sys/debug.h>
  55 #include <sys/sunddi.h>
  56 #include <fs/fs_subr.h>
  57 #include <sys/fs/snode.h>
  58 #include <sys/ontrap.h>
  59 #include <sys/panic.h>
  60 #include <sys/dkio.h>
  61 #include <sys/vtoc.h>
  62 #include <sys/errorq.h>
  63 #include <sys/fm/util.h>
  64 #include <sys/fs/zfs.h>
  65 
  66 #include <vm/hat.h>
  67 #include <vm/as.h>
  68 #include <vm/page.h>
  69 #include <vm/pvn.h>
  70 #include <vm/seg.h>
  71 #include <vm/seg_kmem.h>
  72 #include <sys/clock_impl.h>
  73 #include <sys/hold_page.h>
  74 
  75 /*
  76  * exported vars
  77  */
  78 kmutex_t        dump_lock;              /* lock for dump configuration */
  79 dumphdr_t       *dumphdr;               /* dump header */
  80 int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
  81 vnode_t         *dumpvp;                /* dump device vnode pointer */
  82 u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
  83 char            *dumppath;              /* pathname of dump device */
  84 int             dump_timeout = 120;     /* timeout for dumping pages */
  85 int             dump_timeleft;          /* portion of dump_timeout remaining */
  86 int             dump_ioerr;             /* dump i/o error */
  87 char        *dump_stack_scratch; /* scratch area for saving stack summary */
  88 
  89 /*
  90  * Tunables for dump.  These can be set via /etc/system.
  91  *
  92  * dump_metrics_on      if set, metrics are collected in the kernel, passed
  93  *      to savecore via the dump file, and recorded by savecore in
  94  *      METRICS.txt.
  95  */
  96 
  97 /* tunables for pre-reserved heap */
  98 uint_t dump_kmem_permap = 1024;
  99 uint_t dump_kmem_pages = 8;
 100 
 101 /*
 102  * Compression metrics are accumulated nano-second subtotals. The
 103  * results are normalized by the number of pages dumped. A report is
 104  * generated when dumpsys() completes and is saved in the dump image
 105  * after the trailing dump header.
 106  *
 107  * Metrics are always collected. Set the variable dump_metrics_on to
 108  * cause metrics to be saved in the crash file, where savecore will
 109  * save it in the file METRICS.txt.
 110  */
 111 #define PERPAGES \
 112         PERPAGE(bitmap) PERPAGE(map) PERPAGE(unmap) \
 113         PERPAGE(copy) PERPAGE(compress) \
 114         PERPAGE(write) \
 115         PERPAGE(inwait) PERPAGE(outwait)
 116 
 117 typedef struct perpage {
 118 #define PERPAGE(x) hrtime_t x;
 119         PERPAGES
 120 #undef PERPAGE
 121 } perpage_t;
 122 
 123 /*
 124  * This macro controls the code generation for collecting dump
 125  * performance information. By default, the code is generated, but
 126  * automatic saving of the information is disabled. If dump_metrics_on
 127  * is set to 1, the timing information is passed to savecore via the
 128  * crash file, where it is appended to the file dump-dir/METRICS.txt.
 129  */
 130 #define COLLECT_METRICS
 131 
 132 #ifdef COLLECT_METRICS
 133 uint_t dump_metrics_on = 0;     /* set to 1 to enable recording metrics */
 134 
 135 #define HRSTART(v, m)           v##ts.m = gethrtime()
 136 #define HRSTOP(v, m)            v.m += gethrtime() - v##ts.m
 137 #define HRBEGIN(v, m, s)        v##ts.m = gethrtime(); v.size += s
 138 #define HREND(v, m)             v.m += gethrtime() - v##ts.m
 139 #define HRNORM(v, m, n)         v.m /= (n)
 140 
 141 #else
 142 #define HRSTART(v, m)
 143 #define HRSTOP(v, m)
 144 #define HRBEGIN(v, m, s)
 145 #define HREND(v, m)
 146 #define HRNORM(v, m, n)
 147 #endif  /* COLLECT_METRICS */
 148 
 149 static char dump_osimage_uuid[36 + 1];
 150 
 151 #define isdigit(ch)     ((ch) >= '0' && (ch) <= '9')
 152 #define isxdigit(ch)    (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
 153                         ((ch) >= 'A' && (ch) <= 'F'))
 154 
 155 /*
 156  * Dynamic state when dumpsys() is running.
 157  */
 158 typedef struct dumpsync {
 159         pgcnt_t npages;                 /* subtotal of pages dumped */
 160         pgcnt_t pages_mapped;           /* subtotal of pages mapped */
 161         pgcnt_t pages_used;             /* subtotal of pages used per map */
 162         size_t nwrite;                  /* subtotal of bytes written */
 163         uint_t percent;                 /* dump progress */
 164         uint_t percent_done;            /* dump progress reported */
 165         hrtime_t start;                 /* start time */
 166         hrtime_t elapsed;               /* elapsed time when completed */
 167         hrtime_t iotime;                /* time spent writing nwrite bytes */
 168         hrtime_t iowait;                /* time spent waiting for output */
 169         hrtime_t iowaitts;              /* iowait timestamp */
 170         perpage_t perpage;              /* metrics */
 171         perpage_t perpagets;
 172 } dumpsync_t;
 173 
 174 static dumpsync_t dumpsync;             /* synchronization vars */
 175 
 176 /*
 177  * configuration vars for dumpsys
 178  */
 179 typedef struct dumpcfg {
 180         perpage_t perpage;      /* per page metrics */
 181         perpage_t perpagets;    /* per page metrics (timestamps) */
 182         char *page;             /* buffer for page copy */
 183         char *lzbuf;            /* lzjb output */
 184 
 185         char *cmap;             /* array of input (map) buffers */
 186         ulong_t *bitmap;        /* bitmap for marking pages to dump */
 187         pgcnt_t bitmapsize;     /* size of bitmap */
 188         pid_t *pids;            /* list of process IDs at dump time */
 189 } dumpcfg_t;
 190 
 191 static dumpcfg_t dumpcfg;       /* config vars */
 192 
 193 /*
 194  * The dump I/O buffer.
 195  *
 196  * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
 197  * sized according to the optimum device transfer speed.
 198  */
 199 typedef struct dumpbuf {
 200         vnode_t *cdev_vp;       /* VCHR open of the dump device */
 201         len_t   vp_limit;       /* maximum write offset */
 202         offset_t vp_off;        /* current dump device offset */
 203         char    *cur;           /* dump write pointer */
 204         char    *start;         /* dump buffer address */
 205         char    *end;           /* dump buffer end */
 206         size_t  size;           /* size of dumpbuf in bytes */
 207         size_t  iosize;         /* best transfer size for device */
 208 } dumpbuf_t;
 209 
 210 static dumpbuf_t dumpbuf;       /* I/O buffer */
 211 
 212 /*
 213  * The dump I/O buffer must be at least one page, at most xfer_size
 214  * bytes, and should scale with physmem in between.  The transfer size
 215  * passed in will either represent a global default (maxphys) or the
 216  * best size for the device.  The size of the dumpbuf I/O buffer is
 217  * limited by dumpbuf_limit (8MB by default) because the dump
 218  * performance saturates beyond a certain size.  The default is to
 219  * select 1/4096 of the memory.
 220  */
 221 static int      dumpbuf_fraction = 12;  /* memory size scale factor */
 222 static size_t   dumpbuf_limit = 8 << 20;  /* max I/O buf size */
 223 
 224 static size_t
 225 dumpbuf_iosize(size_t xfer_size)
 226 {
 227         size_t iosize = ptob(physmem >> dumpbuf_fraction);
 228 
 229         if (iosize < PAGESIZE)
 230                 iosize = PAGESIZE;
 231         else if (iosize > xfer_size)
 232                 iosize = xfer_size;
 233         if (iosize > dumpbuf_limit)
 234                 iosize = dumpbuf_limit;
 235         return (iosize & PAGEMASK);
 236 }
 237 
 238 /*
 239  * resize the I/O buffer
 240  */
 241 static void
 242 dumpbuf_resize(void)
 243 {
 244         char *old_buf = dumpbuf.start;
 245         size_t old_size = dumpbuf.size;
 246         char *new_buf;
 247         size_t new_size;
 248 
 249         ASSERT(MUTEX_HELD(&dump_lock));
 250 
 251         new_size = dumpbuf_iosize(MAX(dumpbuf.iosize, maxphys));
 252         if (new_size <= old_size)
 253                 return; /* no need to reallocate buffer */
 254 
 255         new_buf = kmem_alloc(new_size, KM_SLEEP);
 256         dumpbuf.size = new_size;
 257         dumpbuf.start = new_buf;
 258         dumpbuf.end = new_buf + new_size;
 259         kmem_free(old_buf, old_size);
 260 }
 261 
 262 /*
 263  * dump_update_clevel is called when dumpadm configures the dump device.
 264  *      Allocate the minimum configuration for now.
 265  *
 266  * When the dump file is configured we reserve a minimum amount of
 267  * memory for use at crash time. But we reserve VA for all the memory
 268  * we really want in order to do the fastest dump possible. The VA is
 269  * backed by pages not being dumped, according to the bitmap. If
 270  * there is insufficient spare memory, however, we fall back to the
 271  * minimum.
 272  *
 273  * Live dump (savecore -L) always uses the minimum config.
 274  *
 275  * For single-threaded dumps, the panic CPU does lzjb compression.
 276  *
 277  */
 278 static void
 279 dump_update_clevel()
 280 {
 281         dumpcfg_t *old = &dumpcfg;
 282         dumpcfg_t newcfg = *old;
 283         dumpcfg_t *new = &newcfg;
 284 
 285         ASSERT(MUTEX_HELD(&dump_lock));
 286 
 287         /*
 288          * Free the previously allocated bufs and VM.
 289          */
 290         if (old->lzbuf)
 291                 kmem_free(old->lzbuf, PAGESIZE);
 292         if (old->page)
 293                 kmem_free(old->page, PAGESIZE);
 294 
 295         if (old->cmap)
 296                 /* VM space for mapping pages */
 297                 vmem_xfree(heap_arena, old->cmap, PAGESIZE);
 298 
 299         /*
 300          * Allocate new data structures and buffers, and also figure the max
 301          * desired size.
 302          */
 303         new->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 304         new->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 305 
 306         new->cmap = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
 307                                 0, 0, NULL, NULL, VM_SLEEP);
 308 
 309         /*
 310          * Reserve memory for kmem allocation calls made during crash
 311          * dump.  The hat layer allocates memory for each mapping
 312          * created, and the I/O path allocates buffers and data structs.
 313          * Add a few pages for safety.
 314          */
 315         kmem_dump_init(dump_kmem_permap + (dump_kmem_pages * PAGESIZE));
 316 
 317         /* set new config pointers */
 318         *old = *new;
 319 }
 320 
 321 /*
 322  * Define a struct memlist walker to optimize bitnum to pfn
 323  * lookup. The walker maintains the state of the list traversal.
 324  */
 325 typedef struct dumpmlw {
 326         struct memlist  *mp;            /* current memlist */
 327         pgcnt_t         basenum;        /* bitnum base offset */
 328         pgcnt_t         mppages;        /* current memlist size */
 329         pgcnt_t         mpleft;         /* size to end of current memlist */
 330         pfn_t           mpaddr;         /* first pfn in memlist */
 331 } dumpmlw_t;
 332 
 333 /* initialize the walker */
 334 static inline void
 335 dump_init_memlist_walker(dumpmlw_t *pw)
 336 {
 337         pw->mp = phys_install;
 338         pw->basenum = 0;
 339         pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 340         pw->mpleft = pw->mppages;
 341         pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 342 }
 343 
 344 /*
 345  * Lookup pfn given bitnum. The memlist can be quite long on some
 346  * systems (e.g.: one per board). To optimize sequential lookups, the
 347  * caller initializes and presents a memlist walker.
 348  */
 349 static pfn_t
 350 dump_bitnum_to_pfn(pgcnt_t bitnum, dumpmlw_t *pw)
 351 {
 352         bitnum -= pw->basenum;
 353         while (pw->mp != NULL) {
 354                 if (bitnum < pw->mppages) {
 355                         pw->mpleft = pw->mppages - bitnum;
 356                         return (pw->mpaddr + bitnum);
 357                 }
 358                 bitnum -= pw->mppages;
 359                 pw->basenum += pw->mppages;
 360                 pw->mp = pw->mp->ml_next;
 361                 if (pw->mp != NULL) {
 362                         pw->mppages = pw->mp->ml_size >> PAGESHIFT;
 363                         pw->mpleft = pw->mppages;
 364                         pw->mpaddr = pw->mp->ml_address >> PAGESHIFT;
 365                 }
 366         }
 367         return (PFN_INVALID);
 368 }
 369 
 370 static pgcnt_t
 371 dump_pfn_to_bitnum(pfn_t pfn)
 372 {
 373         struct memlist *mp;
 374         pgcnt_t bitnum = 0;
 375 
 376         for (mp = phys_install; mp != NULL; mp = mp->ml_next) {
 377                 if (pfn >= (mp->ml_address >> PAGESHIFT) &&
 378                     pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT))
 379                         return (bitnum + pfn - (mp->ml_address >> PAGESHIFT));
 380                 bitnum += mp->ml_size >> PAGESHIFT;
 381         }
 382         return ((pgcnt_t)-1);
 383 }
 384 
 385 static void
 386 dumphdr_init(void)
 387 {
 388         pgcnt_t npages;
 389 
 390         ASSERT(MUTEX_HELD(&dump_lock));
 391 
 392         if (dumphdr == NULL) {
 393                 dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
 394                 dumphdr->dump_magic = DUMP_MAGIC;
 395                 dumphdr->dump_version = DUMP_VERSION;
 396                 dumphdr->dump_wordsize = DUMP_WORDSIZE;
 397                 dumphdr->dump_pageshift = PAGESHIFT;
 398                 dumphdr->dump_pagesize = PAGESIZE;
 399                 dumphdr->dump_utsname = utsname;
 400                 (void) strcpy(dumphdr->dump_platform, platform);
 401                 dumpbuf.size = dumpbuf_iosize(maxphys);
 402                 dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP);
 403                 dumpbuf.end = dumpbuf.start + dumpbuf.size;
 404                 dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
 405                 dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP);
 406                 (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
 407                     sizeof (dumphdr->dump_uuid));
 408         }
 409 
 410         npages = num_phys_pages();
 411 
 412         if (dumpcfg.bitmapsize != npages) {
 413                 void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
 414 
 415                 if (dumpcfg.bitmap != NULL)
 416                         kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.
 417                             bitmapsize));
 418                 dumpcfg.bitmap = map;
 419                 dumpcfg.bitmapsize = npages;
 420         }
 421 }
 422 
 423 /*
 424  * Establish a new dump device.
 425  */
 426 int
 427 dumpinit(vnode_t *vp, char *name, int justchecking)
 428 {
 429         vnode_t *cvp;
 430         vattr_t vattr;
 431         vnode_t *cdev_vp;
 432         int error = 0;
 433 
 434         ASSERT(MUTEX_HELD(&dump_lock));
 435 
 436         dumphdr_init();
 437 
 438         cvp = common_specvp(vp);
 439         if (cvp == dumpvp)
 440                 return (0);
 441 
 442         /*
 443          * Determine whether this is a plausible dump device.  We want either:
 444          * (1) a real device that's not mounted and has a cb_dump routine, or
 445          * (2) a swapfile on some filesystem that has a vop_dump routine.
 446          */
 447         if ((error = VOP_OPEN(&cvp, FREAD | FWRITE, kcred, NULL)) != 0)
 448                 return (error);
 449 
 450         vattr.va_mask = AT_SIZE | AT_TYPE | AT_RDEV;
 451         if ((error = VOP_GETATTR(cvp, &vattr, 0, kcred, NULL)) == 0) {
 452                 if (vattr.va_type == VBLK || vattr.va_type == VCHR) {
 453                         if (devopsp[getmajor(vattr.va_rdev)]->
 454                             devo_cb_ops->cb_dump == nodev)
 455                                 error = ENOTSUP;
 456                         else if (vfs_devismounted(vattr.va_rdev))
 457                                 error = EBUSY;
 458                         if (strcmp(ddi_driver_name(VTOS(cvp)->s_dip),
 459                             ZFS_DRIVER) == 0 &&
 460                             IS_SWAPVP(common_specvp(cvp)))
 461                                         error = EBUSY;
 462                 } else {
 463                         if (vn_matchopval(cvp, VOPNAME_DUMP, fs_nosys) ||
 464                             !IS_SWAPVP(cvp))
 465                                 error = ENOTSUP;
 466                 }
 467         }
 468 
 469         if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE)
 470                 error = ENOSPC;
 471 
 472         if (error || justchecking) {
 473                 (void) VOP_CLOSE(cvp, FREAD | FWRITE, 1, (offset_t)0,
 474                     kcred, NULL);
 475                 return (error);
 476         }
 477 
 478         VN_HOLD(cvp);
 479 
 480         if (dumpvp != NULL)
 481                 dumpfini();     /* unconfigure the old dump device */
 482 
 483         dumpvp = cvp;
 484         dumpvp_size = vattr.va_size & -DUMP_OFFSET;
 485         dumppath = kmem_alloc(strlen(name) + 1, KM_SLEEP);
 486         (void) strcpy(dumppath, name);
 487         dumpbuf.iosize = 0;
 488 
 489         /*
 490          * If the dump device is a block device, attempt to open up the
 491          * corresponding character device and determine its maximum transfer
 492          * size.  We use this information to potentially resize dumpbuf to a
 493          * larger and more optimal size for performing i/o to the dump device.
 494          */
 495         if (cvp->v_type == VBLK &&
 496             (cdev_vp = makespecvp(VTOS(cvp)->s_dev, VCHR)) != NULL) {
 497                 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
 498                         size_t blk_size;
 499                         struct dk_cinfo dki;
 500                         struct dk_minfo minf;
 501 
 502                         if (VOP_IOCTL(cdev_vp, DKIOCGMEDIAINFO,
 503                             (intptr_t)&minf, FKIOCTL, kcred, NULL, NULL)
 504                             == 0 && minf.dki_lbsize != 0)
 505                                 blk_size = minf.dki_lbsize;
 506                         else
 507                                 blk_size = DEV_BSIZE;
 508 
 509                         if (VOP_IOCTL(cdev_vp, DKIOCINFO, (intptr_t)&dki,
 510                             FKIOCTL, kcred, NULL, NULL) == 0) {
 511                                 dumpbuf.iosize = dki.dki_maxtransfer * blk_size;
 512                                 dumpbuf_resize();
 513                         }
 514                         /*
 515                          * If we are working with a zvol then dumpify it
 516                          * if it's not being used as swap.
 517                          */
 518                         if (strcmp(dki.dki_dname, ZVOL_DRIVER) == 0) {
 519                                 if (IS_SWAPVP(common_specvp(cvp)))
 520                                         error = EBUSY;
 521                                 else if ((error = VOP_IOCTL(cdev_vp,
 522                                     DKIOCDUMPINIT, NULL, FKIOCTL, kcred,
 523                                     NULL, NULL)) != 0)
 524                                         dumpfini();
 525                         }
 526 
 527                         (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
 528                             kcred, NULL);
 529                 }
 530 
 531                 VN_RELE(cdev_vp);
 532         }
 533 
 534         cmn_err(CE_CONT, "?dump on %s size %llu MB\n", name, dumpvp_size >> 20);
 535 
 536         dump_update_clevel();
 537 
 538         return (error);
 539 }
 540 
 541 void
 542 dumpfini(void)
 543 {
 544         vattr_t vattr;
 545         boolean_t is_zfs = B_FALSE;
 546         vnode_t *cdev_vp;
 547         ASSERT(MUTEX_HELD(&dump_lock));
 548 
 549         kmem_free(dumppath, strlen(dumppath) + 1);
 550 
 551         /*
 552          * Determine if we are using zvols for our dump device
 553          */
 554         vattr.va_mask = AT_RDEV;
 555         if (VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL) == 0) {
 556                 is_zfs = (getmajor(vattr.va_rdev) ==
 557                     ddi_name_to_major(ZFS_DRIVER)) ? B_TRUE : B_FALSE;
 558         }
 559 
 560         /*
 561          * If we have a zvol dump device then we call into zfs so
 562          * that it may have a chance to cleanup.
 563          */
 564         if (is_zfs &&
 565             (cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR)) != NULL) {
 566                 if (VOP_OPEN(&cdev_vp, FREAD | FWRITE, kcred, NULL) == 0) {
 567                         (void) VOP_IOCTL(cdev_vp, DKIOCDUMPFINI, NULL, FKIOCTL,
 568                             kcred, NULL, NULL);
 569                         (void) VOP_CLOSE(cdev_vp, FREAD | FWRITE, 1, 0,
 570                             kcred, NULL);
 571                 }
 572                 VN_RELE(cdev_vp);
 573         }
 574 
 575         (void) VOP_CLOSE(dumpvp, FREAD | FWRITE, 1, (offset_t)0, kcred, NULL);
 576 
 577         VN_RELE(dumpvp);
 578 
 579         dumpvp = NULL;
 580         dumpvp_size = 0;
 581         dumppath = NULL;
 582 }
 583 
 584 static offset_t
 585 dumpvp_flush(void)
 586 {
 587         size_t size = P2ROUNDUP(dumpbuf.cur - dumpbuf.start, PAGESIZE);
 588         hrtime_t iotime;
 589         int err;
 590 
 591         if (dumpbuf.vp_off + size > dumpbuf.vp_limit) {
 592                 dump_ioerr = ENOSPC;
 593                 dumpbuf.vp_off = dumpbuf.vp_limit;
 594         } else if (size != 0) {
 595                 iotime = gethrtime();
 596                 dumpsync.iowait += iotime - dumpsync.iowaitts;
 597                 if (panicstr)
 598                         err = VOP_DUMP(dumpvp, dumpbuf.start,
 599                             lbtodb(dumpbuf.vp_off), btod(size), NULL);
 600                 else
 601                         err = vn_rdwr(UIO_WRITE, dumpbuf.cdev_vp != NULL ?
 602                             dumpbuf.cdev_vp : dumpvp, dumpbuf.start, size,
 603                             dumpbuf.vp_off, UIO_SYSSPACE, 0, dumpbuf.vp_limit,
 604                             kcred, 0);
 605                 if (err && dump_ioerr == 0)
 606                         dump_ioerr = err;
 607                 dumpsync.iowaitts = gethrtime();
 608                 dumpsync.iotime += dumpsync.iowaitts - iotime;
 609                 dumpsync.nwrite += size;
 610                 dumpbuf.vp_off += size;
 611         }
 612         dumpbuf.cur = dumpbuf.start;
 613         dump_timeleft = dump_timeout;
 614         return (dumpbuf.vp_off);
 615 }
 616 
 617 /* maximize write speed by keeping seek offset aligned with size */
 618 void
 619 dumpvp_write(const void *va, size_t size)
 620 {
 621         size_t len, off, sz;
 622 
 623         while (size != 0) {
 624                 len = MIN(size, dumpbuf.end - dumpbuf.cur);
 625                 if (len == 0) {
 626                         off = P2PHASE(dumpbuf.vp_off, dumpbuf.size);
 627                         if (off == 0 || !ISP2(dumpbuf.size)) {
 628                                 (void) dumpvp_flush();
 629                         } else {
 630                                 sz = dumpbuf.size - off;
 631                                 dumpbuf.cur = dumpbuf.start + sz;
 632                                 (void) dumpvp_flush();
 633                                 ovbcopy(dumpbuf.start + sz, dumpbuf.start, off);
 634                                 dumpbuf.cur += off;
 635                         }
 636                 } else {
 637                         bcopy(va, dumpbuf.cur, len);
 638                         va = (char *)va + len;
 639                         dumpbuf.cur += len;
 640                         size -= len;
 641                 }
 642         }
 643 }
 644 
 645 /*ARGSUSED*/
 646 static void
 647 dumpvp_ksyms_write(const void *src, void *dst, size_t size)
 648 {
 649         dumpvp_write(src, size);
 650 }
 651 
 652 /*
 653  * Mark 'pfn' in the bitmap and dump its translation table entry.
 654  */
 655 void
 656 dump_addpage(struct as *as, void *va, pfn_t pfn)
 657 {
 658         mem_vtop_t mem_vtop;
 659         pgcnt_t bitnum;
 660 
 661         if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
 662                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
 663                         dumphdr->dump_npages++;
 664                         BT_SET(dumpcfg.bitmap, bitnum);
 665                 }
 666                 dumphdr->dump_nvtop++;
 667                 mem_vtop.m_as = as;
 668                 mem_vtop.m_va = va;
 669                 mem_vtop.m_pfn = pfn;
 670                 dumpvp_write(&mem_vtop, sizeof (mem_vtop_t));
 671         }
 672         dump_timeleft = dump_timeout;
 673 }
 674 
 675 /*
 676  * Mark 'pfn' in the bitmap
 677  */
 678 void
 679 dump_page(pfn_t pfn)
 680 {
 681         pgcnt_t bitnum;
 682 
 683         if ((bitnum = dump_pfn_to_bitnum(pfn)) != (pgcnt_t)-1) {
 684                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
 685                         dumphdr->dump_npages++;
 686                         BT_SET(dumpcfg.bitmap, bitnum);
 687                 }
 688         }
 689         dump_timeleft = dump_timeout;
 690 }
 691 
 692 /*
 693  * Dump the <as, va, pfn> information for a given address space.
 694  * SEGOP_DUMP() will call dump_addpage() for each page in the segment.
 695  */
 696 static void
 697 dump_as(struct as *as)
 698 {
 699         struct seg *seg;
 700 
 701         AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
 702         for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) {
 703                 if (seg->s_as != as)
 704                         break;
 705                 if (seg->s_ops == NULL)
 706                         continue;
 707                 SEGOP_DUMP(seg);
 708         }
 709         AS_LOCK_EXIT(as, &as->a_lock);
 710 
 711         if (seg != NULL)
 712                 cmn_err(CE_WARN, "invalid segment %p in address space %p",
 713                     (void *)seg, (void *)as);
 714 }
 715 
 716 static int
 717 dump_process(pid_t pid)
 718 {
 719         proc_t *p = sprlock(pid);
 720 
 721         if (p == NULL)
 722                 return (-1);
 723         if (p->p_as != &kas) {
 724                 mutex_exit(&p->p_lock);
 725                 dump_as(p->p_as);
 726                 mutex_enter(&p->p_lock);
 727         }
 728 
 729         sprunlock(p);
 730 
 731         return (0);
 732 }
 733 
 734 /*
 735  * The following functions (dump_summary(), dump_ereports(), and
 736  * dump_messages()), write data to an uncompressed area within the
 737  * crashdump. The layout of these is
 738  *
 739  * +------------------------------------------------------------+
 740  * |     compressed pages       | summary | ereports | messages |
 741  * +------------------------------------------------------------+
 742  *
 743  * With the advent of saving a compressed crash dump by default, we
 744  * need to save a little more data to describe the failure mode in
 745  * an uncompressed buffer available before savecore uncompresses
 746  * the dump. Initially this is a copy of the stack trace. Additional
 747  * summary information should be added here.
 748  */
 749 
 750 void
 751 dump_summary(void)
 752 {
 753         u_offset_t dumpvp_start;
 754         summary_dump_t sd;
 755 
 756         if (dumpvp == NULL || dumphdr == NULL)
 757                 return;
 758 
 759         dumpbuf.cur = dumpbuf.start;
 760 
 761         dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE +
 762             DUMP_ERPTSIZE);
 763         dumpvp_start = dumpbuf.vp_limit - DUMP_SUMMARYSIZE;
 764         dumpbuf.vp_off = dumpvp_start;
 765 
 766         sd.sd_magic = SUMMARY_MAGIC;
 767         sd.sd_ssum = checksum32(dump_stack_scratch, STACK_BUF_SIZE);
 768         dumpvp_write(&sd, sizeof (sd));
 769         dumpvp_write(dump_stack_scratch, STACK_BUF_SIZE);
 770 
 771         sd.sd_magic = 0; /* indicate end of summary */
 772         dumpvp_write(&sd, sizeof (sd));
 773         (void) dumpvp_flush();
 774 }
 775 
 776 void
 777 dump_ereports(void)
 778 {
 779         u_offset_t dumpvp_start;
 780         erpt_dump_t ed;
 781 
 782         if (dumpvp == NULL || dumphdr == NULL)
 783                 return;
 784 
 785         dumpbuf.cur = dumpbuf.start;
 786         dumpbuf.vp_limit = dumpvp_size - (DUMP_OFFSET + DUMP_LOGSIZE);
 787         dumpvp_start = dumpbuf.vp_limit - DUMP_ERPTSIZE;
 788         dumpbuf.vp_off = dumpvp_start;
 789 
 790         fm_ereport_dump();
 791         if (panicstr)
 792                 errorq_dump();
 793 
 794         bzero(&ed, sizeof (ed)); /* indicate end of ereports */
 795         dumpvp_write(&ed, sizeof (ed));
 796         (void) dumpvp_flush();
 797 
 798         if (!panicstr) {
 799                 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
 800                     (size_t)(dumpbuf.vp_off - dumpvp_start),
 801                     B_INVAL | B_FORCE, kcred, NULL);
 802         }
 803 }
 804 
 805 void
 806 dump_messages(void)
 807 {
 808         log_dump_t ld;
 809         mblk_t *mctl, *mdata;
 810         queue_t *q, *qlast;
 811         u_offset_t dumpvp_start;
 812 
 813         if (dumpvp == NULL || dumphdr == NULL || log_consq == NULL)
 814                 return;
 815 
 816         dumpbuf.cur = dumpbuf.start;
 817         dumpbuf.vp_limit = dumpvp_size - DUMP_OFFSET;
 818         dumpvp_start = dumpbuf.vp_limit - DUMP_LOGSIZE;
 819         dumpbuf.vp_off = dumpvp_start;
 820 
 821         qlast = NULL;
 822         do {
 823                 for (q = log_consq; q->q_next != qlast; q = q->q_next)
 824                         continue;
 825                 for (mctl = q->q_first; mctl != NULL; mctl = mctl->b_next) {
 826                         dump_timeleft = dump_timeout;
 827                         mdata = mctl->b_cont;
 828                         ld.ld_magic = LOG_MAGIC;
 829                         ld.ld_msgsize = MBLKL(mctl->b_cont);
 830                         ld.ld_csum = checksum32(mctl->b_rptr, MBLKL(mctl));
 831                         ld.ld_msum = checksum32(mdata->b_rptr, MBLKL(mdata));
 832                         dumpvp_write(&ld, sizeof (ld));
 833                         dumpvp_write(mctl->b_rptr, MBLKL(mctl));
 834                         dumpvp_write(mdata->b_rptr, MBLKL(mdata));
 835                 }
 836         } while ((qlast = q) != log_consq);
 837 
 838         ld.ld_magic = 0;                /* indicate end of messages */
 839         dumpvp_write(&ld, sizeof (ld));
 840         (void) dumpvp_flush();
 841         if (!panicstr) {
 842                 (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
 843                     (size_t)(dumpbuf.vp_off - dumpvp_start),
 844                     B_INVAL | B_FORCE, kcred, NULL);
 845         }
 846 }
 847 
 848 /*
 849  * Copy pages, trapping ECC errors. Also, for robustness, trap data
 850  * access in case something goes wrong in the hat layer and the
 851  * mapping is broken.
 852  */
 853 static int
 854 dump_pagecopy(void *src, void *dst)
 855 {
 856         long *wsrc = (long *)src;
 857         long *wdst = (long *)dst;
 858         const ulong_t ncopies = PAGESIZE / sizeof (long);
 859         volatile int w = 0;
 860         volatile int ueoff = -1;
 861         on_trap_data_t otd;
 862 
 863         if (on_trap(&otd, OT_DATA_EC | OT_DATA_ACCESS)) {
 864                 if (ueoff == -1)
 865                         ueoff = w * sizeof (long);
 866                 /* report "bad ECC" or "bad address" */
 867 #ifdef _LP64
 868                 if (otd.ot_trap & OT_DATA_EC)
 869                         wdst[w++] = 0x00badecc00badecc;
 870                 else
 871                         wdst[w++] = 0x00badadd00badadd;
 872 #else
 873                 if (otd.ot_trap & OT_DATA_EC)
 874                         wdst[w++] = 0x00badecc;
 875                 else
 876                         wdst[w++] = 0x00badadd;
 877 #endif
 878         }
 879         while (w < ncopies) {
 880                 wdst[w] = wsrc[w];
 881                 w++;
 882         }
 883         no_trap();
 884         return (ueoff);
 885 }
 886 
 887 #ifdef  COLLECT_METRICS
 888 size_t
 889 dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size)
 890 {
 891         dumpcfg_t *cfg = &dumpcfg;
 892         int myid = CPU->cpu_seqid;
 893         int i, compress_ratio;
 894         int sec, iorate;
 895         char *e = buf + size;
 896         char *p = buf;
 897 
 898         sec = ds->elapsed / (1000 * 1000 * 1000ULL);
 899         if (sec < 1)
 900                 sec = 1;
 901 
 902         if (ds->iotime < 1)
 903                 ds->iotime = 1;
 904         iorate = (ds->nwrite * 100000ULL) / ds->iotime;
 905 
 906         compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1);
 907 
 908 #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
 909 
 910         P("Master cpu_seqid,%d\n", CPU->cpu_seqid);
 911         P("Master cpu_id,%d\n", CPU->cpu_id);
 912         P("dump_flags,0x%x\n", dumphdr->dump_flags);
 913         P("dump_ioerr,%d\n", dump_ioerr);
 914 
 915         P("Compression type,serial lzjb\n");
 916         P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
 917             100);
 918 
 919         P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
 920         P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
 921         P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
 922         P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
 923         P("dumpbuf.size,%ld\n", dumpbuf.size);
 924 
 925         P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
 926         P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
 927         P("Dump time,%d\n", sec);
 928 
 929         if (ds->pages_mapped > 0)
 930                 P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
 931                     / ds->pages_mapped));
 932 
 933         P("\nPer-page metrics:\n");
 934         if (ds->npages > 0) {
 935 #define PERPAGE(x)      ds->perpage.x += cfg->perpage.x;
 936                 PERPAGES;
 937 #undef PERPAGE
 938 #define PERPAGE(x) \
 939                 P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages));
 940                 PERPAGES;
 941 #undef PERPAGE
 942 
 943                 P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait /
 944                     ds->npages));
 945         }
 946 #undef P
 947         if (p < e)
 948                 bzero(p, e - p);
 949         return (p - buf);
 950 }
 951 #endif  /* COLLECT_METRICS */
 952 
 953 /*
 954  * Dump the system.
 955  */
 956 void
 957 dumpsys(void)
 958 {
 959         dumpsync_t *ds = &dumpsync;
 960         pfn_t pfn;
 961         pgcnt_t bitnum;
 962         proc_t *p;
 963         pid_t npids, pidx;
 964         char *content;
 965         char *buf;
 966         size_t size;
 967         dumpmlw_t mlw;
 968         dumpcsize_t datatag;
 969         dumpdatahdr_t datahdr;
 970 
 971         if (dumpvp == NULL || dumphdr == NULL) {
 972                 uprintf("skipping system dump - no dump device configured\n");
 973                 return;
 974         }
 975         dumpbuf.cur = dumpbuf.start;
 976 
 977         /* clear the sync variables */
 978         bzero(ds, sizeof (*ds));
 979 
 980         /*
 981          * Calculate the starting block for dump.  If we're dumping on a
 982          * swap device, start 1/5 of the way in; otherwise, start at the
 983          * beginning.  And never use the first page -- it may be a disk label.
 984          */
 985         if (dumpvp->v_flag & VISSWAP)
 986                 dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
 987         else
 988                 dumphdr->dump_start = DUMP_OFFSET;
 989 
 990         dumphdr->dump_flags = DF_VALID | DF_COMPLETE | DF_LIVE | DF_COMPRESSED;
 991         dumphdr->dump_crashtime = gethrestime_sec();
 992         dumphdr->dump_npages = 0;
 993         dumphdr->dump_nvtop = 0;
 994         bzero(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.bitmapsize));
 995         dump_timeleft = dump_timeout;
 996 
 997         if (panicstr) {
 998                 dumphdr->dump_flags &= ~DF_LIVE;
 999                 (void) VOP_DUMPCTL(dumpvp, DUMP_FREE, NULL, NULL);
1000                 (void) VOP_DUMPCTL(dumpvp, DUMP_ALLOC, NULL, NULL);
1001                 (void) vsnprintf(dumphdr->dump_panicstring, DUMP_PANICSIZE,
1002                     panicstr, panicargs);
1003 
1004         }
1005 
1006         if (dump_conflags & DUMP_ALL)
1007                 content = "all";
1008         else if (dump_conflags & DUMP_CURPROC)
1009                 content = "kernel + curproc";
1010         else
1011                 content = "kernel";
1012         uprintf("dumping to %s, offset %lld, content: %s\n", dumppath,
1013             dumphdr->dump_start, content);
1014 
1015         /* Make sure nodename is current */
1016         bcopy(utsname.nodename, dumphdr->dump_utsname.nodename, SYS_NMLN);
1017 
1018         /*
1019          * If this is a live dump, try to open a VCHR vnode for better
1020          * performance. We must take care to flush the buffer cache
1021          * first.
1022          */
1023         if (!panicstr) {
1024                 vnode_t *cdev_vp, *cmn_cdev_vp;
1025 
1026                 ASSERT(dumpbuf.cdev_vp == NULL);
1027                 cdev_vp = makespecvp(VTOS(dumpvp)->s_dev, VCHR);
1028                 if (cdev_vp != NULL) {
1029                         cmn_cdev_vp = common_specvp(cdev_vp);
1030                         if (VOP_OPEN(&cmn_cdev_vp, FREAD | FWRITE, kcred, NULL)
1031                             == 0) {
1032                                 if (vn_has_cached_data(dumpvp))
1033                                         (void) pvn_vplist_dirty(dumpvp, 0, NULL,
1034                                             B_INVAL | B_TRUNC, kcred);
1035                                 dumpbuf.cdev_vp = cmn_cdev_vp;
1036                         } else {
1037                                 VN_RELE(cdev_vp);
1038                         }
1039                 }
1040         }
1041 
1042         /*
1043          * Store a hires timestamp so we can look it up during debugging.
1044          */
1045         lbolt_debug_entry();
1046 
1047         /*
1048          * Leave room for the message and ereport save areas and terminal dump
1049          * header.
1050          */
1051         dumpbuf.vp_limit = dumpvp_size - DUMP_LOGSIZE - DUMP_OFFSET -
1052             DUMP_ERPTSIZE;
1053 
1054         /*
1055          * Write out the symbol table.  It's no longer compressed,
1056          * so its 'size' and 'csize' are equal.
1057          */
1058         dumpbuf.vp_off = dumphdr->dump_ksyms = dumphdr->dump_start + PAGESIZE;
1059         dumphdr->dump_ksyms_size = dumphdr->dump_ksyms_csize =
1060             ksyms_snapshot(dumpvp_ksyms_write, NULL, LONG_MAX);
1061 
1062         /*
1063          * Write out the translation map.
1064          */
1065         dumphdr->dump_map = dumpvp_flush();
1066         dump_as(&kas);
1067         dumphdr->dump_nvtop += dump_plat_addr();
1068 
1069         /*
1070          * call into hat, which may have unmapped pages that also need to
1071          * be in the dump
1072          */
1073         hat_dump();
1074 
1075         if (dump_conflags & DUMP_ALL) {
1076                 mutex_enter(&pidlock);
1077 
1078                 for (npids = 0, p = practive; p != NULL; p = p->p_next)
1079                         dumpcfg.pids[npids++] = p->p_pid;
1080 
1081                 mutex_exit(&pidlock);
1082 
1083                 for (pidx = 0; pidx < npids; pidx++)
1084                         (void) dump_process(dumpcfg.pids[pidx]);
1085 
1086                 dump_init_memlist_walker(&mlw);
1087                 for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1088                         dump_timeleft = dump_timeout;
1089                         pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1090                         /*
1091                          * Some hypervisors do not have all pages available to
1092                          * be accessed by the guest OS.  Check for page
1093                          * accessibility.
1094                          */
1095                         if (plat_hold_page(pfn, PLAT_HOLD_NO_LOCK, NULL) !=
1096                             PLAT_HOLD_OK)
1097                                 continue;
1098                         BT_SET(dumpcfg.bitmap, bitnum);
1099                 }
1100                 dumphdr->dump_npages = dumpcfg.bitmapsize;
1101                 dumphdr->dump_flags |= DF_ALL;
1102 
1103         } else if (dump_conflags & DUMP_CURPROC) {
1104                 /*
1105                  * Determine which pid is to be dumped.  If we're panicking, we
1106                  * dump the process associated with panic_thread (if any).  If
1107                  * this is a live dump, we dump the process associated with
1108                  * curthread.
1109                  */
1110                 npids = 0;
1111                 if (panicstr) {
1112                         if (panic_thread != NULL &&
1113                             panic_thread->t_procp != NULL &&
1114                             panic_thread->t_procp != &p0) {
1115                                 dumpcfg.pids[npids++] =
1116                                     panic_thread->t_procp->p_pid;
1117                         }
1118                 } else {
1119                         dumpcfg.pids[npids++] = curthread->t_procp->p_pid;
1120                 }
1121 
1122                 if (npids && dump_process(dumpcfg.pids[0]) == 0)
1123                         dumphdr->dump_flags |= DF_CURPROC;
1124                 else
1125                         dumphdr->dump_flags |= DF_KERNEL;
1126 
1127         } else {
1128                 dumphdr->dump_flags |= DF_KERNEL;
1129         }
1130 
1131         dumphdr->dump_hashmask = (1 << highbit(dumphdr->dump_nvtop - 1)) - 1;
1132 
1133         /*
1134          * Write out the pfn table.
1135          */
1136         dumphdr->dump_pfn = dumpvp_flush();
1137         dump_init_memlist_walker(&mlw);
1138         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1139                 dump_timeleft = dump_timeout;
1140                 if (!BT_TEST(dumpcfg.bitmap, bitnum))
1141                         continue;
1142                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1143                 ASSERT(pfn != PFN_INVALID);
1144                 dumpvp_write(&pfn, sizeof (pfn_t));
1145         }
1146         dump_plat_pfn();
1147 
1148         /*
1149          * Write out all the pages.
1150          * Map pages, copy them handling UEs, compress, and write them out.
1151          */
1152         dumphdr->dump_data = dumpvp_flush();
1153 
1154         ASSERT(dumpcfg.page);
1155         bzero(&dumpcfg.perpage, sizeof (dumpcfg.perpage));
1156 
1157         ds->start = gethrtime();
1158         ds->iowaitts = ds->start;
1159 
1160         if (panicstr)
1161                 kmem_dump_begin();
1162 
1163         dump_init_memlist_walker(&mlw);
1164         for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
1165                 size_t csize;
1166 
1167                 dump_timeleft = dump_timeout;
1168                 HRSTART(ds->perpage, bitmap);
1169                 if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
1170                         HRSTOP(ds->perpage, bitmap);
1171                         continue;
1172                 }
1173                 HRSTOP(ds->perpage, bitmap);
1174 
1175                 pfn = dump_bitnum_to_pfn(bitnum, &mlw);
1176                 ASSERT(pfn != PFN_INVALID);
1177 
1178                 HRSTART(ds->perpage, map);
1179                 hat_devload(kas.a_hat, dumpcfg.cmap, PAGESIZE, pfn, PROT_READ,
1180                             HAT_LOAD_NOCONSIST);
1181                 HRSTOP(ds->perpage, map);
1182 
1183                 dump_pagecopy(dumpcfg.cmap, dumpcfg.page);
1184 
1185                 HRSTART(ds->perpage, unmap);
1186                 hat_unload(kas.a_hat, dumpcfg.cmap, PAGESIZE, HAT_UNLOAD);
1187                 HRSTOP(ds->perpage, unmap);
1188 
1189                 HRSTART(dumpcfg.perpage, compress);
1190                 csize = compress(dumpcfg.page, dumpcfg.lzbuf, PAGESIZE);
1191                 HRSTOP(dumpcfg.perpage, compress);
1192 
1193                 HRSTART(dumpcfg.perpage, write);
1194                 dumpvp_write(&csize, sizeof (csize));
1195                 dumpvp_write(dumpcfg.lzbuf, csize);
1196                 HRSTOP(dumpcfg.perpage, write);
1197 
1198                 if (dump_ioerr) {
1199                         dumphdr->dump_flags &= ~DF_COMPLETE;
1200                         dumphdr->dump_npages = ds->npages;
1201                         break;
1202                 }
1203                 if (++ds->npages * 100LL / dumphdr->dump_npages > ds->percent_done) {
1204                         int sec;
1205 
1206                         sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000;
1207                         uprintf("^\r%2d:%02d %3d%% done", sec / 60, sec % 60,
1208                                 ++ds->percent_done);
1209                         if (!panicstr)
1210                                 delay(1);       /* let the output be sent */
1211                 }
1212         }
1213 
1214         ds->elapsed = gethrtime() - ds->start;
1215         if (ds->elapsed < 1)
1216                 ds->elapsed = 1;
1217 
1218         /* record actual pages dumped */
1219         dumphdr->dump_npages = ds->npages;
1220 
1221         /* platform-specific data */
1222         dumphdr->dump_npages += dump_plat_data(dumpcfg.page);
1223 
1224         /* note any errors by clearing DF_COMPLETE */
1225         if (dump_ioerr || ds->npages < dumphdr->dump_npages)
1226                 dumphdr->dump_flags &= ~DF_COMPLETE;
1227 
1228         /* end of stream blocks */
1229         datatag = 0;
1230         dumpvp_write(&datatag, sizeof (datatag));
1231 
1232         bzero(&datahdr, sizeof (datahdr));
1233 
1234         /* buffer for metrics */
1235         buf = dumpcfg.page;
1236         size = MIN(PAGESIZE, DUMP_OFFSET - sizeof (dumphdr_t) -
1237             sizeof (dumpdatahdr_t));
1238 
1239         /* finish the kmem intercepts, collect kmem verbose info */
1240         if (panicstr) {
1241                 datahdr.dump_metrics = kmem_dump_finish(buf, size);
1242                 buf += datahdr.dump_metrics;
1243                 size -= datahdr.dump_metrics;
1244         }
1245 
1246         /* record in the header whether this is a fault-management panic */
1247         if (panicstr)
1248                 dumphdr->dump_fm_panic = is_fm_panic();
1249 
1250         /* compression info in data header */
1251         datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC;
1252         datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION;
1253         datahdr.dump_maxcsize = PAGESIZE;
1254         datahdr.dump_maxrange = 1;
1255         datahdr.dump_nstreams = 1;
1256         datahdr.dump_clevel = 0;
1257 #ifdef COLLECT_METRICS
1258         if (dump_metrics_on)
1259                 datahdr.dump_metrics += dumpsys_metrics(ds, buf, size);
1260 #endif
1261         datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data;
1262 
1263         /*
1264          * Write out the initial and terminal dump headers.
1265          */
1266         dumpbuf.vp_off = dumphdr->dump_start;
1267         dumpvp_write(dumphdr, sizeof (dumphdr_t));
1268         (void) dumpvp_flush();
1269 
1270         dumpbuf.vp_limit = dumpvp_size;
1271         dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET;
1272         dumpvp_write(dumphdr, sizeof (dumphdr_t));
1273         dumpvp_write(&datahdr, sizeof (dumpdatahdr_t));
1274         dumpvp_write(dumpcfg.page, datahdr.dump_metrics);
1275 
1276         (void) dumpvp_flush();
1277 
1278         uprintf("\r%3d%% done: %llu pages dumped, ",
1279             ds->percent_done, (u_longlong_t)ds->npages);
1280 
1281         if (dump_ioerr == 0) {
1282                 uprintf("dump succeeded\n");
1283         } else {
1284                 uprintf("dump failed: error %d\n", dump_ioerr);
1285 #ifdef DEBUG
1286                 if (panicstr)
1287                         debug_enter("dump failed");
1288 #endif
1289         }
1290 
1291         /*
1292          * Write out all undelivered messages.  This has to be the *last*
1293          * thing we do because the dump process itself emits messages.
1294          */
1295         if (panicstr) {
1296                 dump_summary();
1297                 dump_ereports();
1298                 dump_messages();
1299         }
1300 
1301         delay(2 * hz);  /* let people see the 'done' message */
1302         dump_timeleft = 0;
1303         dump_ioerr = 0;
1304 
1305         /* restore settings after live dump completes */
1306         if (!panicstr) {
1307                 /* release any VCHR open of the dump device */
1308                 if (dumpbuf.cdev_vp != NULL) {
1309                         (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0,
1310                             kcred, NULL);
1311                         VN_RELE(dumpbuf.cdev_vp);
1312                         dumpbuf.cdev_vp = NULL;
1313                 }
1314         }
1315 }
1316 
1317 /*
1318  * This function is called whenever the memory size, as represented
1319  * by the phys_install list, changes.
1320  */
1321 void
1322 dump_resize()
1323 {
1324         mutex_enter(&dump_lock);
1325         dumphdr_init();
1326         dumpbuf_resize();
1327         dump_update_clevel();
1328         mutex_exit(&dump_lock);
1329 }
1330 
1331 /*
1332  * This function allows for dynamic resizing of a dump area. It assumes that
1333  * the underlying device has update its appropriate size(9P).
1334  */
1335 int
1336 dumpvp_resize()
1337 {
1338         int error;
1339         vattr_t vattr;
1340 
1341         mutex_enter(&dump_lock);
1342         vattr.va_mask = AT_SIZE;
1343         if ((error = VOP_GETATTR(dumpvp, &vattr, 0, kcred, NULL)) != 0) {
1344                 mutex_exit(&dump_lock);
1345                 return (error);
1346         }
1347 
1348         if (error == 0 && vattr.va_size < 2 * DUMP_LOGSIZE + DUMP_ERPTSIZE) {
1349                 mutex_exit(&dump_lock);
1350                 return (ENOSPC);
1351         }
1352 
1353         dumpvp_size = vattr.va_size & -DUMP_OFFSET;
1354         mutex_exit(&dump_lock);
1355         return (0);
1356 }
1357 
1358 int
1359 dump_set_uuid(const char *uuidstr)
1360 {
1361         const char *ptr;
1362         int i;
1363 
1364         if (uuidstr == NULL || strnlen(uuidstr, 36 + 1) != 36)
1365                 return (EINVAL);
1366 
1367         /* uuid_parse is not common code so check manually */
1368         for (i = 0, ptr = uuidstr; i < 36; i++, ptr++) {
1369                 switch (i) {
1370                 case 8:
1371                 case 13:
1372                 case 18:
1373                 case 23:
1374                         if (*ptr != '-')
1375                                 return (EINVAL);
1376                         break;
1377 
1378                 default:
1379                         if (!isxdigit(*ptr))
1380                                 return (EINVAL);
1381                         break;
1382                 }
1383         }
1384 
1385         if (dump_osimage_uuid[0] != '\0')
1386                 return (EALREADY);
1387 
1388         (void) strncpy(dump_osimage_uuid, uuidstr, 36 + 1);
1389 
1390         cmn_err(CE_CONT, "?This Solaris instance has UUID %s\n",
1391             dump_osimage_uuid);
1392 
1393         return (0);
1394 }
1395 
1396 const char *
1397 dump_get_uuid(void)
1398 {
1399         return (dump_osimage_uuid[0] != '\0' ? dump_osimage_uuid : "");
1400 }