Print this page
[mq]: core-v2

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/dumpsubr.c
          +++ new/usr/src/uts/common/os/dumpsubr.c
↓ open down ↓ 13 lines elided ↑ open up ↑
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
       24 + * Copyright 2012, Josef 'Jeff' Sipek <jeffpc@31bits.net>. All rights reserved.
  24   25   */
  25   26  
  26   27  #include <sys/types.h>
  27   28  #include <sys/param.h>
  28   29  #include <sys/systm.h>
  29   30  #include <sys/vm.h>
  30   31  #include <sys/proc.h>
  31   32  #include <sys/file.h>
  32   33  #include <sys/conf.h>
  33   34  #include <sys/kmem.h>
↓ open down ↓ 30 lines elided ↑ open up ↑
  64   65  
  65   66  #include <vm/hat.h>
  66   67  #include <vm/as.h>
  67   68  #include <vm/page.h>
  68   69  #include <vm/pvn.h>
  69   70  #include <vm/seg.h>
  70   71  #include <vm/seg_kmem.h>
  71   72  #include <sys/clock_impl.h>
  72   73  #include <sys/hold_page.h>
  73   74  
  74      -#include <bzip2/bzlib.h>
  75      -
  76   75  /*
  77      - * Crash dump time is dominated by disk write time.  To reduce this,
  78      - * the stronger compression method bzip2 is applied to reduce the dump
  79      - * size and hence reduce I/O time.  However, bzip2 is much more
  80      - * computationally expensive than the existing lzjb algorithm, so to
  81      - * avoid increasing compression time, CPUs that are otherwise idle
  82      - * during panic are employed to parallelize the compression task.
  83      - * Many helper CPUs are needed to prevent bzip2 from being a
  84      - * bottleneck, and on systems with too few CPUs, the lzjb algorithm is
  85      - * parallelized instead. Lastly, I/O and compression are performed by
  86      - * different CPUs, and are hence overlapped in time, unlike the older
  87      - * serial code.
  88      - *
  89      - * Another important consideration is the speed of the dump
  90      - * device. Faster disks need less CPUs in order to benefit from
  91      - * parallel lzjb versus parallel bzip2. Therefore, the CPU count
  92      - * threshold for switching from parallel lzjb to paralled bzip2 is
  93      - * elevated for faster disks. The dump device speed is adduced from
  94      - * the setting for dumpbuf.iosize, see dump_update_clevel.
  95      - */
  96      -
  97      -/*
  98   76   * exported vars
  99   77   */
 100   78  kmutex_t        dump_lock;              /* lock for dump configuration */
 101   79  dumphdr_t       *dumphdr;               /* dump header */
 102   80  int             dump_conflags = DUMP_KERNEL; /* dump configuration flags */
 103   81  vnode_t         *dumpvp;                /* dump device vnode pointer */
 104   82  u_offset_t      dumpvp_size;            /* size of dump device, in bytes */
 105   83  char            *dumppath;              /* pathname of dump device */
 106   84  int             dump_timeout = 120;     /* timeout for dumping pages */
 107   85  int             dump_timeleft;          /* portion of dump_timeout remaining */
 108   86  int             dump_ioerr;             /* dump i/o error */
 109      -int             dump_check_used;        /* enable check for used pages */
 110   87  char        *dump_stack_scratch; /* scratch area for saving stack summary */
 111   88  
 112   89  /*
 113      - * Tunables for dump compression and parallelism. These can be set via
 114      - * /etc/system.
       90 + * Tunables for dump.  These can be set via /etc/system.
 115   91   *
 116      - * dump_ncpu_low        number of helpers for parallel lzjb
 117      - *      This is also the minimum configuration.
 118      - *
 119      - * dump_bzip2_level     bzip2 compression level: 1-9
 120      - *      Higher numbers give greater compression, but take more memory
 121      - *      and time. Memory used per helper is ~(dump_bzip2_level * 1MB).
 122      - *
 123      - * dump_plat_mincpu     the cross-over limit for using bzip2 (per platform):
 124      - *      if dump_plat_mincpu == 0, then always do single threaded dump
 125      - *      if ncpu >= dump_plat_mincpu then try to use bzip2
 126      - *
 127   92   * dump_metrics_on      if set, metrics are collected in the kernel, passed
 128   93   *      to savecore via the dump file, and recorded by savecore in
 129   94   *      METRICS.txt.
 130   95   */
 131      -uint_t dump_ncpu_low = 4;       /* minimum config for parallel lzjb */
 132      -uint_t dump_bzip2_level = 1;    /* bzip2 level (1-9) */
 133   96  
 134      -/* Use dump_plat_mincpu_default unless this variable is set by /etc/system */
 135      -#define MINCPU_NOT_SET  ((uint_t)-1)
 136      -uint_t dump_plat_mincpu = MINCPU_NOT_SET;
 137      -
 138   97  /* tunables for pre-reserved heap */
 139   98  uint_t dump_kmem_permap = 1024;
 140   99  uint_t dump_kmem_pages = 8;
 141  100  
 142      -/* Define multiple buffers per helper to avoid stalling */
 143      -#define NCBUF_PER_HELPER        2
 144      -#define NCMAP_PER_HELPER        4
 145      -
 146      -/* minimum number of helpers configured */
 147      -#define MINHELPERS      (dump_ncpu_low)
 148      -#define MINCBUFS        (MINHELPERS * NCBUF_PER_HELPER)
 149      -
 150  101  /*
 151      - * Define constant parameters.
 152      - *
 153      - * CBUF_SIZE            size of an output buffer
 154      - *
 155      - * CBUF_MAPSIZE         size of virtual range for mapping pages
 156      - *
 157      - * CBUF_MAPNP           size of virtual range in pages
 158      - *
 159      - */
 160      -#define DUMP_1KB        ((size_t)1 << 10)
 161      -#define DUMP_1MB        ((size_t)1 << 20)
 162      -#define CBUF_SIZE       ((size_t)1 << 17)
 163      -#define CBUF_MAPSHIFT   (22)
 164      -#define CBUF_MAPSIZE    ((size_t)1 << CBUF_MAPSHIFT)
 165      -#define CBUF_MAPNP      ((size_t)1 << (CBUF_MAPSHIFT - PAGESHIFT))
 166      -
 167      -/*
 168  102   * Compression metrics are accumulated nano-second subtotals. The
 169  103   * results are normalized by the number of pages dumped. A report is
 170  104   * generated when dumpsys() completes and is saved in the dump image
 171  105   * after the trailing dump header.
 172  106   *
 173  107   * Metrics are always collected. Set the variable dump_metrics_on to
 174  108   * cause metrics to be saved in the crash file, where savecore will
 175  109   * save it in the file METRICS.txt.
 176  110   */
 177  111  #define PERPAGES \
↓ open down ↓ 27 lines elided ↑ open up ↑
 205  139  #define HRNORM(v, m, n)         v.m /= (n)
 206  140  
 207  141  #else
 208  142  #define HRSTART(v, m)
 209  143  #define HRSTOP(v, m)
 210  144  #define HRBEGIN(v, m, s)
 211  145  #define HREND(v, m)
 212  146  #define HRNORM(v, m, n)
 213  147  #endif  /* COLLECT_METRICS */
 214  148  
 215      -/*
 216      - * Buffers for copying and compressing memory pages.
 217      - *
 218      - * cbuf_t buffer controllers: used for both input and output.
 219      - *
 220      - * The buffer state indicates how it is being used:
 221      - *
 222      - * CBUF_FREEMAP: CBUF_MAPSIZE virtual address range is available for
 223      - * mapping input pages.
 224      - *
 225      - * CBUF_INREADY: input pages are mapped and ready for compression by a
 226      - * helper.
 227      - *
 228      - * CBUF_USEDMAP: mapping has been consumed by a helper. Needs unmap.
 229      - *
 230      - * CBUF_FREEBUF: CBUF_SIZE output buffer, which is available.
 231      - *
 232      - * CBUF_WRITE: CBUF_SIZE block of compressed pages from a helper,
 233      - * ready to write out.
 234      - *
 235      - * CBUF_ERRMSG: CBUF_SIZE block of error messages from a helper
 236      - * (reports UE errors.)
 237      - */
 238      -
 239      -typedef enum cbufstate {
 240      -        CBUF_FREEMAP,
 241      -        CBUF_INREADY,
 242      -        CBUF_USEDMAP,
 243      -        CBUF_FREEBUF,
 244      -        CBUF_WRITE,
 245      -        CBUF_ERRMSG
 246      -} cbufstate_t;
 247      -
 248      -typedef struct cbuf cbuf_t;
 249      -
 250      -struct cbuf {
 251      -        cbuf_t *next;                   /* next in list */
 252      -        cbufstate_t state;              /* processing state */
 253      -        size_t used;                    /* amount used */
 254      -        size_t size;                    /* mem size */
 255      -        char *buf;                      /* kmem or vmem */
 256      -        pgcnt_t pagenum;                /* index to pfn map */
 257      -        pgcnt_t bitnum;                 /* first set bitnum */
 258      -        pfn_t pfn;                      /* first pfn in mapped range */
 259      -        int off;                        /* byte offset to first pfn */
 260      -};
 261      -
 262  149  static char dump_osimage_uuid[36 + 1];
 263  150  
 264  151  #define isdigit(ch)     ((ch) >= '0' && (ch) <= '9')
 265  152  #define isxdigit(ch)    (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \
 266  153                          ((ch) >= 'A' && (ch) <= 'F'))
 267  154  
 268  155  /*
 269      - * cqueue_t queues: a uni-directional channel for communication
 270      - * from the master to helper tasks or vice-versa using put and
 271      - * get primitives. Both mappings and data buffers are passed via
 272      - * queues. Producers close a queue when done. The number of
 273      - * active producers is reference counted so the consumer can
 274      - * detect end of data. Concurrent access is mediated by atomic
 275      - * operations for panic dump, or mutex/cv for live dump.
 276      - *
 277      - * There a four queues, used as follows:
 278      - *
 279      - * Queue                Dataflow                NewState
 280      - * --------------------------------------------------
 281      - * mainq                master -> master        FREEMAP
 282      - * master has initialized or unmapped an input buffer
 283      - * --------------------------------------------------
 284      - * helperq              master -> helper        INREADY
 285      - * master has mapped input for use by helper
 286      - * --------------------------------------------------
 287      - * mainq                master <- helper        USEDMAP
 288      - * helper is done with input
 289      - * --------------------------------------------------
 290      - * freebufq             master -> helper        FREEBUF
 291      - * master has initialized or written an output buffer
 292      - * --------------------------------------------------
 293      - * mainq                master <- helper        WRITE
 294      - * block of compressed pages from a helper
 295      - * --------------------------------------------------
 296      - * mainq                master <- helper        ERRMSG
 297      - * error messages from a helper (memory error case)
 298      - * --------------------------------------------------
 299      - * writerq              master <- master        WRITE
 300      - * non-blocking queue of blocks to write
 301      - * --------------------------------------------------
 302      - */
 303      -typedef struct cqueue {
 304      -        cbuf_t *volatile first;         /* first in list */
 305      -        cbuf_t *last;                   /* last in list */
 306      -        hrtime_t ts;                    /* timestamp */
 307      -        hrtime_t empty;                 /* total time empty */
 308      -        kmutex_t mutex;                 /* live state lock */
 309      -        kcondvar_t cv;                  /* live wait var */
 310      -        lock_t spinlock;                /* panic mode spin lock */
 311      -        volatile uint_t open;           /* producer ref count */
 312      -} cqueue_t;
 313      -
 314      -/*
 315      - * Convenience macros for using the cqueue functions
 316      - * Note that the caller must have defined "dumpsync_t *ds"
 317      - */
 318      -#define CQ_IS_EMPTY(q)                                  \
 319      -        (ds->q.first == NULL)
 320      -
 321      -#define CQ_OPEN(q)                                      \
 322      -        atomic_inc_uint(&ds->q.open)
 323      -
 324      -#define CQ_CLOSE(q)                                     \
 325      -        dumpsys_close_cq(&ds->q, ds->live)
 326      -
 327      -#define CQ_PUT(q, cp, st)                               \
 328      -        dumpsys_put_cq(&ds->q, cp, st, ds->live)
 329      -
 330      -#define CQ_GET(q)                                       \
 331      -        dumpsys_get_cq(&ds->q, ds->live)
 332      -
 333      -/*
 334  156   * Dynamic state when dumpsys() is running.
 335  157   */
 336  158  typedef struct dumpsync {
 337  159          pgcnt_t npages;                 /* subtotal of pages dumped */
 338  160          pgcnt_t pages_mapped;           /* subtotal of pages mapped */
 339  161          pgcnt_t pages_used;             /* subtotal of pages used per map */
 340  162          size_t nwrite;                  /* subtotal of bytes written */
 341      -        uint_t live;                    /* running live dump */
 342      -        uint_t neednl;                  /* will need to print a newline */
 343  163          uint_t percent;                 /* dump progress */
 344  164          uint_t percent_done;            /* dump progress reported */
 345      -        cqueue_t freebufq;              /* free kmem bufs for writing */
 346      -        cqueue_t mainq;                 /* input for main task */
 347      -        cqueue_t helperq;               /* input for helpers */
 348      -        cqueue_t writerq;               /* input for writer */
 349  165          hrtime_t start;                 /* start time */
 350  166          hrtime_t elapsed;               /* elapsed time when completed */
 351  167          hrtime_t iotime;                /* time spent writing nwrite bytes */
 352  168          hrtime_t iowait;                /* time spent waiting for output */
 353  169          hrtime_t iowaitts;              /* iowait timestamp */
 354  170          perpage_t perpage;              /* metrics */
 355  171          perpage_t perpagets;
 356      -        int dumpcpu;                    /* master cpu */
 357  172  } dumpsync_t;
 358  173  
 359  174  static dumpsync_t dumpsync;             /* synchronization vars */
 360  175  
 361  176  /*
 362      - * helper_t helpers: contains the context for a stream. CPUs run in
 363      - * parallel at dump time; each CPU creates a single stream of
 364      - * compression data.  Stream data is divided into CBUF_SIZE blocks.
 365      - * The blocks are written in order within a stream. But, blocks from
 366      - * multiple streams can be interleaved. Each stream is identified by a
 367      - * unique tag.
 368      - */
 369      -typedef struct helper {
 370      -        int helper;                     /* bound helper id */
 371      -        int tag;                        /* compression stream tag */
 372      -        perpage_t perpage;              /* per page metrics */
 373      -        perpage_t perpagets;            /* per page metrics (timestamps) */
 374      -        taskqid_t taskqid;              /* live dump task ptr */
 375      -        int in, out;                    /* buffer offsets */
 376      -        cbuf_t *cpin, *cpout, *cperr;   /* cbuf objects in process */
 377      -        dumpsync_t *ds;                 /* pointer to sync vars */
 378      -        size_t used;                    /* counts input consumed */
 379      -        char *page;                     /* buffer for page copy */
 380      -        char *lzbuf;                    /* lzjb output */
 381      -        bz_stream bzstream;             /* bzip2 state */
 382      -} helper_t;
 383      -
 384      -#define MAINHELPER      (-1)            /* helper is also the main task */
 385      -#define FREEHELPER      (-2)            /* unbound helper */
 386      -#define DONEHELPER      (-3)            /* helper finished */
 387      -
 388      -/*
 389  177   * configuration vars for dumpsys
 390  178   */
 391  179  typedef struct dumpcfg {
 392      -        int     threshold;      /* ncpu threshold for bzip2 */
 393      -        int     nhelper;        /* number of helpers */
 394      -        int     nhelper_used;   /* actual number of helpers used */
 395      -        int     ncmap;          /* number VA pages for compression */
 396      -        int     ncbuf;          /* number of bufs for compression */
 397      -        int     ncbuf_used;     /* number of bufs in use */
 398      -        uint_t  clevel;         /* dump compression level */
 399      -        helper_t *helper;       /* array of helpers */
 400      -        cbuf_t  *cmap;          /* array of input (map) buffers */
 401      -        cbuf_t  *cbuf;          /* array of output  buffers */
 402      -        ulong_t *helpermap;     /* set of dumpsys helper CPU ids */
 403      -        ulong_t *bitmap;        /* bitmap for marking pages to dump */
 404      -        ulong_t *rbitmap;       /* bitmap for used CBUF_MAPSIZE ranges */
 405      -        pgcnt_t bitmapsize;     /* size of bitmap */
 406      -        pgcnt_t rbitmapsize;    /* size of bitmap for ranges */
 407      -        pgcnt_t found4m;        /* number ranges allocated by dump */
 408      -        pgcnt_t foundsm;        /* number small pages allocated by dump */
 409      -        pid_t   *pids;          /* list of process IDs at dump time */
 410      -        size_t  maxsize;        /* memory size needed at dump time */
 411      -        size_t  maxvmsize;      /* size of reserved VM */
 412      -        char    *maxvm;         /* reserved VM for spare pages */
 413      -        lock_t  helper_lock;    /* protect helper state */
 414      -        char    helpers_wanted; /* flag to enable parallelism */
      180 +        perpage_t perpage;      /* per page metrics */
      181 +        perpage_t perpagets;    /* per page metrics (timestamps) */
      182 +        char *page;             /* buffer for page copy */
      183 +        char *lzbuf;            /* lzjb output */
      184 +
      185 +        char *cmap;             /* array of input (map) buffers */
      186 +        ulong_t *bitmap;        /* bitmap for marking pages to dump */
      187 +        pgcnt_t bitmapsize;     /* size of bitmap */
      188 +        pid_t *pids;            /* list of process IDs at dump time */
 415  189  } dumpcfg_t;
 416  190  
 417  191  static dumpcfg_t dumpcfg;       /* config vars */
 418  192  
 419  193  /*
 420  194   * The dump I/O buffer.
 421  195   *
 422  196   * There is one I/O buffer used by dumpvp_write and dumvp_flush. It is
 423  197   * sized according to the optimum device transfer speed.
 424  198   */
↓ open down ↓ 1 lines elided ↑ open up ↑
 426  200          vnode_t *cdev_vp;       /* VCHR open of the dump device */
 427  201          len_t   vp_limit;       /* maximum write offset */
 428  202          offset_t vp_off;        /* current dump device offset */
 429  203          char    *cur;           /* dump write pointer */
 430  204          char    *start;         /* dump buffer address */
 431  205          char    *end;           /* dump buffer end */
 432  206          size_t  size;           /* size of dumpbuf in bytes */
 433  207          size_t  iosize;         /* best transfer size for device */
 434  208  } dumpbuf_t;
 435  209  
 436      -dumpbuf_t dumpbuf;              /* I/O buffer */
      210 +static dumpbuf_t dumpbuf;       /* I/O buffer */
 437  211  
 438  212  /*
 439  213   * The dump I/O buffer must be at least one page, at most xfer_size
 440  214   * bytes, and should scale with physmem in between.  The transfer size
 441  215   * passed in will either represent a global default (maxphys) or the
 442  216   * best size for the device.  The size of the dumpbuf I/O buffer is
 443  217   * limited by dumpbuf_limit (8MB by default) because the dump
 444  218   * performance saturates beyond a certain size.  The default is to
 445  219   * select 1/4096 of the memory.
 446  220   */
 447  221  static int      dumpbuf_fraction = 12;  /* memory size scale factor */
 448      -static size_t   dumpbuf_limit = 8 * DUMP_1MB;   /* max I/O buf size */
      222 +static size_t   dumpbuf_limit = 8 << 20;        /* max I/O buf size */
 449  223  
 450  224  static size_t
 451  225  dumpbuf_iosize(size_t xfer_size)
 452  226  {
 453  227          size_t iosize = ptob(physmem >> dumpbuf_fraction);
 454  228  
 455  229          if (iosize < PAGESIZE)
 456  230                  iosize = PAGESIZE;
 457  231          else if (iosize > xfer_size)
 458  232                  iosize = xfer_size;
↓ open down ↓ 21 lines elided ↑ open up ↑
 480  254  
 481  255          new_buf = kmem_alloc(new_size, KM_SLEEP);
 482  256          dumpbuf.size = new_size;
 483  257          dumpbuf.start = new_buf;
 484  258          dumpbuf.end = new_buf + new_size;
 485  259          kmem_free(old_buf, old_size);
 486  260  }
 487  261  
 488  262  /*
 489  263   * dump_update_clevel is called when dumpadm configures the dump device.
 490      - *      Calculate number of helpers and buffers.
 491      - *      Allocate the minimum configuration for now.
      264 + *      Allocate the minimum configuration for now.
 492  265   *
 493  266   * When the dump file is configured we reserve a minimum amount of
 494  267   * memory for use at crash time. But we reserve VA for all the memory
 495  268   * we really want in order to do the fastest dump possible. The VA is
 496  269   * backed by pages not being dumped, according to the bitmap. If
 497  270   * there is insufficient spare memory, however, we fall back to the
 498  271   * minimum.
 499  272   *
 500  273   * Live dump (savecore -L) always uses the minimum config.
 501  274   *
 502      - * clevel 0 is single threaded lzjb
 503      - * clevel 1 is parallel lzjb
 504      - * clevel 2 is parallel bzip2
      275 + * For single-threaded dumps, the panic CPU does lzjb compression.
 505  276   *
 506      - * The ncpu threshold is selected with dump_plat_mincpu.
 507      - * On OPL, set_platform_defaults() overrides the sun4u setting.
 508      - * The actual values are defined via DUMP_PLAT_*_MINCPU macros.
 509      - *
 510      - * Architecture         Threshold       Algorithm
 511      - * sun4u                <  51           parallel lzjb
 512      - * sun4u                >= 51           parallel bzip2(*)
 513      - * sun4u OPL            <  8            parallel lzjb
 514      - * sun4u OPL            >= 8            parallel bzip2(*)
 515      - * sun4v                <  128          parallel lzjb
 516      - * sun4v                >= 128          parallel bzip2(*)
 517      - * x86                  < 11            parallel lzjb
 518      - * x86                  >= 11           parallel bzip2(*)
 519      - * 32-bit               N/A             single-threaded lzjb
 520      - *
 521      - * (*) bzip2 is only chosen if there is sufficient available
 522      - * memory for buffers at dump time. See dumpsys_get_maxmem().
 523      - *
 524      - * Faster dump devices have larger I/O buffers. The threshold value is
 525      - * increased according to the size of the dump I/O buffer, because
 526      - * parallel lzjb performs better with faster disks. For buffers >= 1MB
 527      - * the threshold is 3X; for buffers >= 256K threshold is 2X.
 528      - *
 529      - * For parallel dumps, the number of helpers is ncpu-1. The CPU
 530      - * running panic runs the main task. For single-threaded dumps, the
 531      - * panic CPU does lzjb compression (it is tagged as MAINHELPER.)
 532      - *
 533      - * Need multiple buffers per helper so that they do not block waiting
 534      - * for the main task.
 535      - *                              parallel        single-threaded
 536      - * Number of output buffers:    nhelper*2               1
 537      - * Number of mapping buffers:   nhelper*4               1
 538      - *
 539  277   */
 540  278  static void
 541  279  dump_update_clevel()
 542  280  {
 543      -        int tag;
 544      -        size_t bz2size;
 545      -        helper_t *hp, *hpend;
 546      -        cbuf_t *cp, *cpend;
 547  281          dumpcfg_t *old = &dumpcfg;
 548  282          dumpcfg_t newcfg = *old;
 549  283          dumpcfg_t *new = &newcfg;
 550  284  
 551  285          ASSERT(MUTEX_HELD(&dump_lock));
 552  286  
 553  287          /*
 554  288           * Free the previously allocated bufs and VM.
 555  289           */
 556      -        if (old->helper != NULL) {
      290 +        if (old->lzbuf)
      291 +                kmem_free(old->lzbuf, PAGESIZE);
      292 +        if (old->page)
      293 +                kmem_free(old->page, PAGESIZE);
 557  294  
 558      -                /* helpers */
 559      -                hpend = &old->helper[old->nhelper];
 560      -                for (hp = old->helper; hp != hpend; hp++) {
 561      -                        if (hp->lzbuf != NULL)
 562      -                                kmem_free(hp->lzbuf, PAGESIZE);
 563      -                        if (hp->page != NULL)
 564      -                                kmem_free(hp->page, PAGESIZE);
 565      -                }
 566      -                kmem_free(old->helper, old->nhelper * sizeof (helper_t));
 567      -
      295 +        if (old->cmap)
 568  296                  /* VM space for mapping pages */
 569      -                cpend = &old->cmap[old->ncmap];
 570      -                for (cp = old->cmap; cp != cpend; cp++)
 571      -                        vmem_xfree(heap_arena, cp->buf, CBUF_MAPSIZE);
 572      -                kmem_free(old->cmap, old->ncmap * sizeof (cbuf_t));
      297 +                vmem_xfree(heap_arena, old->cmap, PAGESIZE);
 573  298  
 574      -                /* output bufs */
 575      -                cpend = &old->cbuf[old->ncbuf];
 576      -                for (cp = old->cbuf; cp != cpend; cp++)
 577      -                        if (cp->buf != NULL)
 578      -                                kmem_free(cp->buf, cp->size);
 579      -                kmem_free(old->cbuf, old->ncbuf * sizeof (cbuf_t));
 580      -
 581      -                /* reserved VM for dumpsys_get_maxmem */
 582      -                if (old->maxvmsize > 0)
 583      -                        vmem_xfree(heap_arena, old->maxvm, old->maxvmsize);
 584      -        }
 585      -
 586  299          /*
 587      -         * Allocate memory and VM.
 588      -         * One CPU runs dumpsys, the rest are helpers.
      300 +         * Allocate new data structures and buffers, and also figure the max
      301 +         * desired size.
 589  302           */
 590      -        new->nhelper = ncpus - 1;
 591      -        if (new->nhelper < 1)
 592      -                new->nhelper = 1;
      303 +        new->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
      304 +        new->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 593  305  
 594      -        if (new->nhelper > DUMP_MAX_NHELPER)
 595      -                new->nhelper = DUMP_MAX_NHELPER;
      306 +        new->cmap = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE,
      307 +                                0, 0, NULL, NULL, VM_SLEEP);
 596  308  
 597      -        /* use platform default, unless /etc/system overrides */
 598      -        if (dump_plat_mincpu == MINCPU_NOT_SET)
 599      -                dump_plat_mincpu = dump_plat_mincpu_default;
 600      -
 601      -        /* increase threshold for faster disks */
 602      -        new->threshold = dump_plat_mincpu;
 603      -        if (dumpbuf.iosize >= DUMP_1MB)
 604      -                new->threshold *= 3;
 605      -        else if (dumpbuf.iosize >= (256 * DUMP_1KB))
 606      -                new->threshold *= 2;
 607      -
 608      -        /* figure compression level based upon the computed threshold. */
 609      -        if (dump_plat_mincpu == 0 || new->nhelper < 2) {
 610      -                new->clevel = 0;
 611      -                new->nhelper = 1;
 612      -        } else if ((new->nhelper + 1) >= new->threshold) {
 613      -                new->clevel = DUMP_CLEVEL_BZIP2;
 614      -        } else {
 615      -                new->clevel = DUMP_CLEVEL_LZJB;
 616      -        }
 617      -
 618      -        if (new->clevel == 0) {
 619      -                new->ncbuf = 1;
 620      -                new->ncmap = 1;
 621      -        } else {
 622      -                new->ncbuf = NCBUF_PER_HELPER * new->nhelper;
 623      -                new->ncmap = NCMAP_PER_HELPER * new->nhelper;
 624      -        }
 625      -
 626  309          /*
 627      -         * Allocate new data structures and buffers for MINHELPERS,
 628      -         * and also figure the max desired size.
 629      -         */
 630      -        bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
 631      -        new->maxsize = 0;
 632      -        new->maxvmsize = 0;
 633      -        new->maxvm = NULL;
 634      -        tag = 1;
 635      -        new->helper = kmem_zalloc(new->nhelper * sizeof (helper_t), KM_SLEEP);
 636      -        hpend = &new->helper[new->nhelper];
 637      -        for (hp = new->helper; hp != hpend; hp++) {
 638      -                hp->tag = tag++;
 639      -                if (hp < &new->helper[MINHELPERS]) {
 640      -                        hp->lzbuf = kmem_alloc(PAGESIZE, KM_SLEEP);
 641      -                        hp->page = kmem_alloc(PAGESIZE, KM_SLEEP);
 642      -                } else if (new->clevel < DUMP_CLEVEL_BZIP2) {
 643      -                        new->maxsize += 2 * PAGESIZE;
 644      -                } else {
 645      -                        new->maxsize += PAGESIZE;
 646      -                }
 647      -                if (new->clevel >= DUMP_CLEVEL_BZIP2)
 648      -                        new->maxsize += bz2size;
 649      -        }
 650      -
 651      -        new->cbuf = kmem_zalloc(new->ncbuf * sizeof (cbuf_t), KM_SLEEP);
 652      -        cpend = &new->cbuf[new->ncbuf];
 653      -        for (cp = new->cbuf; cp != cpend; cp++) {
 654      -                cp->state = CBUF_FREEBUF;
 655      -                cp->size = CBUF_SIZE;
 656      -                if (cp < &new->cbuf[MINCBUFS])
 657      -                        cp->buf = kmem_alloc(cp->size, KM_SLEEP);
 658      -                else
 659      -                        new->maxsize += cp->size;
 660      -        }
 661      -
 662      -        new->cmap = kmem_zalloc(new->ncmap * sizeof (cbuf_t), KM_SLEEP);
 663      -        cpend = &new->cmap[new->ncmap];
 664      -        for (cp = new->cmap; cp != cpend; cp++) {
 665      -                cp->state = CBUF_FREEMAP;
 666      -                cp->size = CBUF_MAPSIZE;
 667      -                cp->buf = vmem_xalloc(heap_arena, CBUF_MAPSIZE, CBUF_MAPSIZE,
 668      -                    0, 0, NULL, NULL, VM_SLEEP);
 669      -        }
 670      -
 671      -        /* reserve VA to be backed with spare pages at crash time */
 672      -        if (new->maxsize > 0) {
 673      -                new->maxsize = P2ROUNDUP(new->maxsize, PAGESIZE);
 674      -                new->maxvmsize = P2ROUNDUP(new->maxsize, CBUF_MAPSIZE);
 675      -                new->maxvm = vmem_xalloc(heap_arena, new->maxvmsize,
 676      -                    CBUF_MAPSIZE, 0, 0, NULL, NULL, VM_SLEEP);
 677      -        }
 678      -
 679      -        /*
 680  310           * Reserve memory for kmem allocation calls made during crash
 681  311           * dump.  The hat layer allocates memory for each mapping
 682  312           * created, and the I/O path allocates buffers and data structs.
 683  313           * Add a few pages for safety.
 684  314           */
 685      -        kmem_dump_init((new->ncmap * dump_kmem_permap) +
 686      -            (dump_kmem_pages * PAGESIZE));
      315 +        kmem_dump_init(dump_kmem_permap + (dump_kmem_pages * PAGESIZE));
 687  316  
 688  317          /* set new config pointers */
 689  318          *old = *new;
 690  319  }
 691  320  
 692  321  /*
 693  322   * Define a struct memlist walker to optimize bitnum to pfn
 694  323   * lookup. The walker maintains the state of the list traversal.
 695  324   */
 696  325  typedef struct dumpmlw {
↓ open down ↓ 49 lines elided ↑ open up ↑
 746  375  
 747  376          for (mp = phys_install; mp != NULL; mp = mp->ml_next) {
 748  377                  if (pfn >= (mp->ml_address >> PAGESHIFT) &&
 749  378                      pfn < ((mp->ml_address + mp->ml_size) >> PAGESHIFT))
 750  379                          return (bitnum + pfn - (mp->ml_address >> PAGESHIFT));
 751  380                  bitnum += mp->ml_size >> PAGESHIFT;
 752  381          }
 753  382          return ((pgcnt_t)-1);
 754  383  }
 755  384  
 756      -/*
 757      - * Set/test bitmap for a CBUF_MAPSIZE range which includes pfn. The
 758      - * mapping of pfn to range index is imperfect because pfn and bitnum
 759      - * do not have the same phase. To make sure a CBUF_MAPSIZE range is
 760      - * covered, call this for both ends:
 761      - *      dump_set_used(base)
 762      - *      dump_set_used(base+CBUF_MAPNP-1)
 763      - *
 764      - * This is used during a panic dump to mark pages allocated by
 765      - * dumpsys_get_maxmem(). The macro IS_DUMP_PAGE(pp) is used by
 766      - * page_get_mnode_freelist() to make sure pages used by dump are never
 767      - * allocated.
 768      - */
 769      -#define CBUF_MAPP2R(pfn)        ((pfn) >> (CBUF_MAPSHIFT - PAGESHIFT))
 770      -
 771  385  static void
 772      -dump_set_used(pfn_t pfn)
 773      -{
 774      -
 775      -        pgcnt_t bitnum, rbitnum;
 776      -
 777      -        bitnum = dump_pfn_to_bitnum(pfn);
 778      -        ASSERT(bitnum != (pgcnt_t)-1);
 779      -
 780      -        rbitnum = CBUF_MAPP2R(bitnum);
 781      -        ASSERT(rbitnum < dumpcfg.rbitmapsize);
 782      -
 783      -        BT_SET(dumpcfg.rbitmap, rbitnum);
 784      -}
 785      -
 786      -int
 787      -dump_test_used(pfn_t pfn)
 788      -{
 789      -        pgcnt_t bitnum, rbitnum;
 790      -
 791      -        bitnum = dump_pfn_to_bitnum(pfn);
 792      -        ASSERT(bitnum != (pgcnt_t)-1);
 793      -
 794      -        rbitnum = CBUF_MAPP2R(bitnum);
 795      -        ASSERT(rbitnum < dumpcfg.rbitmapsize);
 796      -
 797      -        return (BT_TEST(dumpcfg.rbitmap, rbitnum));
 798      -}
 799      -
 800      -/*
 801      - * dumpbzalloc and dumpbzfree are callbacks from the bzip2 library.
 802      - * dumpsys_get_maxmem() uses them for BZ2_bzCompressInit().
 803      - */
 804      -static void *
 805      -dumpbzalloc(void *opaque, int items, int size)
 806      -{
 807      -        size_t *sz;
 808      -        char *ret;
 809      -
 810      -        ASSERT(opaque != NULL);
 811      -        sz = opaque;
 812      -        ret = dumpcfg.maxvm + *sz;
 813      -        *sz += items * size;
 814      -        *sz = P2ROUNDUP(*sz, BZ2_BZALLOC_ALIGN);
 815      -        ASSERT(*sz <= dumpcfg.maxvmsize);
 816      -        return (ret);
 817      -}
 818      -
 819      -/*ARGSUSED*/
 820      -static void
 821      -dumpbzfree(void *opaque, void *addr)
 822      -{
 823      -}
 824      -
 825      -/*
 826      - * Perform additional checks on the page to see if we can really use
 827      - * it. The kernel (kas) pages are always set in the bitmap. However,
 828      - * boot memory pages (prom_ppages or P_BOOTPAGES) are not in the
 829      - * bitmap. So we check for them.
 830      - */
 831      -static inline int
 832      -dump_pfn_check(pfn_t pfn)
 833      -{
 834      -        page_t *pp = page_numtopp_nolock(pfn);
 835      -        if (pp == NULL || pp->p_pagenum != pfn ||
 836      -#if defined(__sparc)
 837      -            pp->p_vnode == &promvp ||
 838      -#else
 839      -            PP_ISBOOTPAGES(pp) ||
 840      -#endif
 841      -            pp->p_toxic != 0)
 842      -                return (0);
 843      -        return (1);
 844      -}
 845      -
 846      -/*
 847      - * Check a range to see if all contained pages are available and
 848      - * return non-zero if the range can be used.
 849      - */
 850      -static inline int
 851      -dump_range_check(pgcnt_t start, pgcnt_t end, pfn_t pfn)
 852      -{
 853      -        for (; start < end; start++, pfn++) {
 854      -                if (BT_TEST(dumpcfg.bitmap, start))
 855      -                        return (0);
 856      -                if (!dump_pfn_check(pfn))
 857      -                        return (0);
 858      -        }
 859      -        return (1);
 860      -}
 861      -
 862      -/*
 863      - * dumpsys_get_maxmem() is called during panic. Find unused ranges
 864      - * and use them for buffers. If we find enough memory switch to
 865      - * parallel bzip2, otherwise use parallel lzjb.
 866      - *
 867      - * It searches the dump bitmap in 2 passes. The first time it looks
 868      - * for CBUF_MAPSIZE ranges. On the second pass it uses small pages.
 869      - */
 870      -static void
 871      -dumpsys_get_maxmem()
 872      -{
 873      -        dumpcfg_t *cfg = &dumpcfg;
 874      -        cbuf_t *endcp = &cfg->cbuf[cfg->ncbuf];
 875      -        helper_t *endhp = &cfg->helper[cfg->nhelper];
 876      -        pgcnt_t bitnum, end;
 877      -        size_t sz, endsz, bz2size;
 878      -        pfn_t pfn, off;
 879      -        cbuf_t *cp;
 880      -        helper_t *hp, *ohp;
 881      -        dumpmlw_t mlw;
 882      -        int k;
 883      -
 884      -        /*
 885      -         * Setting dump_plat_mincpu to 0 at any time forces a serial
 886      -         * dump.
 887      -         */
 888      -        if (dump_plat_mincpu == 0) {
 889      -                cfg->clevel = 0;
 890      -                return;
 891      -        }
 892      -
 893      -        /*
 894      -         * There may be no point in looking for spare memory. If
 895      -         * dumping all memory, then none is spare. If doing a serial
 896      -         * dump, then already have buffers.
 897      -         */
 898      -        if (cfg->maxsize == 0 || cfg->clevel < DUMP_CLEVEL_LZJB ||
 899      -            (dump_conflags & DUMP_ALL) != 0) {
 900      -                if (cfg->clevel > DUMP_CLEVEL_LZJB)
 901      -                        cfg->clevel = DUMP_CLEVEL_LZJB;
 902      -                return;
 903      -        }
 904      -
 905      -        sz = 0;
 906      -        cfg->found4m = 0;
 907      -        cfg->foundsm = 0;
 908      -
 909      -        /* bitmap of ranges used to estimate which pfns are being used */
 910      -        bzero(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.rbitmapsize));
 911      -
 912      -        /* find ranges that are not being dumped to use for buffers */
 913      -        dump_init_memlist_walker(&mlw);
 914      -        for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 915      -                dump_timeleft = dump_timeout;
 916      -                end = bitnum + CBUF_MAPNP;
 917      -                pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 918      -                ASSERT(pfn != PFN_INVALID);
 919      -
 920      -                /* skip partial range at end of mem segment */
 921      -                if (mlw.mpleft < CBUF_MAPNP) {
 922      -                        end = bitnum + mlw.mpleft;
 923      -                        continue;
 924      -                }
 925      -
 926      -                /* skip non aligned pages */
 927      -                off = P2PHASE(pfn, CBUF_MAPNP);
 928      -                if (off != 0) {
 929      -                        end -= off;
 930      -                        continue;
 931      -                }
 932      -
 933      -                if (!dump_range_check(bitnum, end, pfn))
 934      -                        continue;
 935      -
 936      -                ASSERT((sz + CBUF_MAPSIZE) <= cfg->maxvmsize);
 937      -                hat_devload(kas.a_hat, cfg->maxvm + sz, CBUF_MAPSIZE, pfn,
 938      -                    PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
 939      -                sz += CBUF_MAPSIZE;
 940      -                cfg->found4m++;
 941      -
 942      -                /* set the bitmap for both ends to be sure to cover the range */
 943      -                dump_set_used(pfn);
 944      -                dump_set_used(pfn + CBUF_MAPNP - 1);
 945      -
 946      -                if (sz >= cfg->maxsize)
 947      -                        goto foundmax;
 948      -        }
 949      -
 950      -        /* Add small pages if we can't find enough large pages. */
 951      -        dump_init_memlist_walker(&mlw);
 952      -        for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum = end) {
 953      -                dump_timeleft = dump_timeout;
 954      -                end = bitnum + CBUF_MAPNP;
 955      -                pfn = dump_bitnum_to_pfn(bitnum, &mlw);
 956      -                ASSERT(pfn != PFN_INVALID);
 957      -
 958      -                /* Find any non-aligned pages at start and end of segment. */
 959      -                off = P2PHASE(pfn, CBUF_MAPNP);
 960      -                if (mlw.mpleft < CBUF_MAPNP) {
 961      -                        end = bitnum + mlw.mpleft;
 962      -                } else if (off != 0) {
 963      -                        end -= off;
 964      -                } else if (cfg->found4m && dump_test_used(pfn)) {
 965      -                        continue;
 966      -                }
 967      -
 968      -                for (; bitnum < end; bitnum++, pfn++) {
 969      -                        dump_timeleft = dump_timeout;
 970      -                        if (BT_TEST(dumpcfg.bitmap, bitnum))
 971      -                                continue;
 972      -                        if (!dump_pfn_check(pfn))
 973      -                                continue;
 974      -                        ASSERT((sz + PAGESIZE) <= cfg->maxvmsize);
 975      -                        hat_devload(kas.a_hat, cfg->maxvm + sz, PAGESIZE, pfn,
 976      -                            PROT_READ | PROT_WRITE, HAT_LOAD_NOCONSIST);
 977      -                        sz += PAGESIZE;
 978      -                        cfg->foundsm++;
 979      -                        dump_set_used(pfn);
 980      -                        if (sz >= cfg->maxsize)
 981      -                                goto foundmax;
 982      -                }
 983      -        }
 984      -
 985      -        /* Fall back to lzjb if we did not get enough memory for bzip2. */
 986      -        endsz = (cfg->maxsize * cfg->threshold) / cfg->nhelper;
 987      -        if (sz < endsz) {
 988      -                cfg->clevel = DUMP_CLEVEL_LZJB;
 989      -        }
 990      -
 991      -        /* Allocate memory for as many helpers as we can. */
 992      -foundmax:
 993      -
 994      -        /* Byte offsets into memory found and mapped above */
 995      -        endsz = sz;
 996      -        sz = 0;
 997      -
 998      -        /* Set the size for bzip2 state. Only bzip2 needs it. */
 999      -        bz2size = BZ2_bzCompressInitSize(dump_bzip2_level);
1000      -
1001      -        /* Skip the preallocate output buffers. */
1002      -        cp = &cfg->cbuf[MINCBUFS];
1003      -
1004      -        /* Use this to move memory up from the preallocated helpers. */
1005      -        ohp = cfg->helper;
1006      -
1007      -        /* Loop over all helpers and allocate memory. */
1008      -        for (hp = cfg->helper; hp < endhp; hp++) {
1009      -
1010      -                /* Skip preallocated helpers by checking hp->page. */
1011      -                if (hp->page == NULL) {
1012      -                        if (cfg->clevel <= DUMP_CLEVEL_LZJB) {
1013      -                                /* lzjb needs 2 1-page buffers */
1014      -                                if ((sz + (2 * PAGESIZE)) > endsz)
1015      -                                        break;
1016      -                                hp->page = cfg->maxvm + sz;
1017      -                                sz += PAGESIZE;
1018      -                                hp->lzbuf = cfg->maxvm + sz;
1019      -                                sz += PAGESIZE;
1020      -
1021      -                        } else if (ohp->lzbuf != NULL) {
1022      -                                /* re-use the preallocted lzjb page for bzip2 */
1023      -                                hp->page = ohp->lzbuf;
1024      -                                ohp->lzbuf = NULL;
1025      -                                ++ohp;
1026      -
1027      -                        } else {
1028      -                                /* bzip2 needs a 1-page buffer */
1029      -                                if ((sz + PAGESIZE) > endsz)
1030      -                                        break;
1031      -                                hp->page = cfg->maxvm + sz;
1032      -                                sz += PAGESIZE;
1033      -                        }
1034      -                }
1035      -
1036      -                /*
1037      -                 * Add output buffers per helper. The number of
1038      -                 * buffers per helper is determined by the ratio of
1039      -                 * ncbuf to nhelper.
1040      -                 */
1041      -                for (k = 0; cp < endcp && (sz + CBUF_SIZE) <= endsz &&
1042      -                    k < NCBUF_PER_HELPER; k++) {
1043      -                        cp->state = CBUF_FREEBUF;
1044      -                        cp->size = CBUF_SIZE;
1045      -                        cp->buf = cfg->maxvm + sz;
1046      -                        sz += CBUF_SIZE;
1047      -                        ++cp;
1048      -                }
1049      -
1050      -                /*
1051      -                 * bzip2 needs compression state. Use the dumpbzalloc
1052      -                 * and dumpbzfree callbacks to allocate the memory.
1053      -                 * bzip2 does allocation only at init time.
1054      -                 */
1055      -                if (cfg->clevel >= DUMP_CLEVEL_BZIP2) {
1056      -                        if ((sz + bz2size) > endsz) {
1057      -                                hp->page = NULL;
1058      -                                break;
1059      -                        } else {
1060      -                                hp->bzstream.opaque = &sz;
1061      -                                hp->bzstream.bzalloc = dumpbzalloc;
1062      -                                hp->bzstream.bzfree = dumpbzfree;
1063      -                                (void) BZ2_bzCompressInit(&hp->bzstream,
1064      -                                    dump_bzip2_level, 0, 0);
1065      -                                hp->bzstream.opaque = NULL;
1066      -                        }
1067      -                }
1068      -        }
1069      -
1070      -        /* Finish allocating output buffers */
1071      -        for (; cp < endcp && (sz + CBUF_SIZE) <= endsz; cp++) {
1072      -                cp->state = CBUF_FREEBUF;
1073      -                cp->size = CBUF_SIZE;
1074      -                cp->buf = cfg->maxvm + sz;
1075      -                sz += CBUF_SIZE;
1076      -        }
1077      -
1078      -        /* Enable IS_DUMP_PAGE macro, which checks for pages we took. */
1079      -        if (cfg->found4m || cfg->foundsm)
1080      -                dump_check_used = 1;
1081      -
1082      -        ASSERT(sz <= endsz);
1083      -}
1084      -
1085      -static void
1086  386  dumphdr_init(void)
1087  387  {
1088      -        pgcnt_t npages = 0;
      388 +        pgcnt_t npages;
1089  389  
1090  390          ASSERT(MUTEX_HELD(&dump_lock));
1091  391  
1092  392          if (dumphdr == NULL) {
1093  393                  dumphdr = kmem_zalloc(sizeof (dumphdr_t), KM_SLEEP);
1094  394                  dumphdr->dump_magic = DUMP_MAGIC;
1095  395                  dumphdr->dump_version = DUMP_VERSION;
1096  396                  dumphdr->dump_wordsize = DUMP_WORDSIZE;
1097  397                  dumphdr->dump_pageshift = PAGESHIFT;
1098  398                  dumphdr->dump_pagesize = PAGESIZE;
1099  399                  dumphdr->dump_utsname = utsname;
1100  400                  (void) strcpy(dumphdr->dump_platform, platform);
1101  401                  dumpbuf.size = dumpbuf_iosize(maxphys);
1102  402                  dumpbuf.start = kmem_alloc(dumpbuf.size, KM_SLEEP);
1103  403                  dumpbuf.end = dumpbuf.start + dumpbuf.size;
1104  404                  dumpcfg.pids = kmem_alloc(v.v_proc * sizeof (pid_t), KM_SLEEP);
1105      -                dumpcfg.helpermap = kmem_zalloc(BT_SIZEOFMAP(NCPU), KM_SLEEP);
1106      -                LOCK_INIT_HELD(&dumpcfg.helper_lock);
1107  405                  dump_stack_scratch = kmem_alloc(STACK_BUF_SIZE, KM_SLEEP);
1108  406                  (void) strncpy(dumphdr->dump_uuid, dump_get_uuid(),
1109  407                      sizeof (dumphdr->dump_uuid));
1110  408          }
1111  409  
1112  410          npages = num_phys_pages();
1113  411  
1114  412          if (dumpcfg.bitmapsize != npages) {
1115      -                size_t rlen = CBUF_MAPP2R(P2ROUNDUP(npages, CBUF_MAPNP));
1116  413                  void *map = kmem_alloc(BT_SIZEOFMAP(npages), KM_SLEEP);
1117      -                void *rmap = kmem_alloc(BT_SIZEOFMAP(rlen), KM_SLEEP);
1118  414  
1119  415                  if (dumpcfg.bitmap != NULL)
1120  416                          kmem_free(dumpcfg.bitmap, BT_SIZEOFMAP(dumpcfg.
1121  417                              bitmapsize));
1122      -                if (dumpcfg.rbitmap != NULL)
1123      -                        kmem_free(dumpcfg.rbitmap, BT_SIZEOFMAP(dumpcfg.
1124      -                            rbitmapsize));
1125  418                  dumpcfg.bitmap = map;
1126  419                  dumpcfg.bitmapsize = npages;
1127      -                dumpcfg.rbitmap = rmap;
1128      -                dumpcfg.rbitmapsize = rlen;
1129  420          }
1130  421  }
1131  422  
1132  423  /*
1133  424   * Establish a new dump device.
1134  425   */
1135  426  int
1136  427  dumpinit(vnode_t *vp, char *name, int justchecking)
1137  428  {
1138  429          vnode_t *cvp;
↓ open down ↓ 409 lines elided ↑ open up ↑
1548  839          dumpvp_write(&ld, sizeof (ld));
1549  840          (void) dumpvp_flush();
1550  841          if (!panicstr) {
1551  842                  (void) VOP_PUTPAGE(dumpvp, dumpvp_start,
1552  843                      (size_t)(dumpbuf.vp_off - dumpvp_start),
1553  844                      B_INVAL | B_FORCE, kcred, NULL);
1554  845          }
1555  846  }
1556  847  
1557  848  /*
1558      - * The following functions are called on multiple CPUs during dump.
1559      - * They must not use most kernel services, because all cross-calls are
1560      - * disabled during panic. Therefore, blocking locks and cache flushes
1561      - * will not work.
1562      - */
1563      -
1564      -/*
1565  849   * Copy pages, trapping ECC errors. Also, for robustness, trap data
1566  850   * access in case something goes wrong in the hat layer and the
1567  851   * mapping is broken.
1568  852   */
1569  853  static int
1570  854  dump_pagecopy(void *src, void *dst)
1571  855  {
1572  856          long *wsrc = (long *)src;
1573  857          long *wdst = (long *)dst;
1574  858          const ulong_t ncopies = PAGESIZE / sizeof (long);
↓ open down ↓ 18 lines elided ↑ open up ↑
1593  877  #endif
1594  878          }
1595  879          while (w < ncopies) {
1596  880                  wdst[w] = wsrc[w];
1597  881                  w++;
1598  882          }
1599  883          no_trap();
1600  884          return (ueoff);
1601  885  }
1602  886  
1603      -static void
1604      -dumpsys_close_cq(cqueue_t *cq, int live)
1605      -{
1606      -        if (live) {
1607      -                mutex_enter(&cq->mutex);
1608      -                atomic_dec_uint(&cq->open);
1609      -                cv_signal(&cq->cv);
1610      -                mutex_exit(&cq->mutex);
1611      -        } else {
1612      -                atomic_dec_uint(&cq->open);
1613      -        }
1614      -}
1615      -
1616      -static inline void
1617      -dumpsys_spinlock(lock_t *lp)
1618      -{
1619      -        uint_t backoff = 0;
1620      -        int loop_count = 0;
1621      -
1622      -        while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
1623      -                if (++loop_count >= ncpus) {
1624      -                        backoff = mutex_lock_backoff(0);
1625      -                        loop_count = 0;
1626      -                } else {
1627      -                        backoff = mutex_lock_backoff(backoff);
1628      -                }
1629      -                mutex_lock_delay(backoff);
1630      -        }
1631      -}
1632      -
1633      -static inline void
1634      -dumpsys_spinunlock(lock_t *lp)
1635      -{
1636      -        lock_clear(lp);
1637      -}
1638      -
1639      -static inline void
1640      -dumpsys_lock(cqueue_t *cq, int live)
1641      -{
1642      -        if (live)
1643      -                mutex_enter(&cq->mutex);
1644      -        else
1645      -                dumpsys_spinlock(&cq->spinlock);
1646      -}
1647      -
1648      -static inline void
1649      -dumpsys_unlock(cqueue_t *cq, int live, int signal)
1650      -{
1651      -        if (live) {
1652      -                if (signal)
1653      -                        cv_signal(&cq->cv);
1654      -                mutex_exit(&cq->mutex);
1655      -        } else {
1656      -                dumpsys_spinunlock(&cq->spinlock);
1657      -        }
1658      -}
1659      -
1660      -static void
1661      -dumpsys_wait_cq(cqueue_t *cq, int live)
1662      -{
1663      -        if (live) {
1664      -                cv_wait(&cq->cv, &cq->mutex);
1665      -        } else {
1666      -                dumpsys_spinunlock(&cq->spinlock);
1667      -                while (cq->open)
1668      -                        if (cq->first)
1669      -                                break;
1670      -                dumpsys_spinlock(&cq->spinlock);
1671      -        }
1672      -}
1673      -
1674      -static void
1675      -dumpsys_put_cq(cqueue_t *cq, cbuf_t *cp, int newstate, int live)
1676      -{
1677      -        if (cp == NULL)
1678      -                return;
1679      -
1680      -        dumpsys_lock(cq, live);
1681      -
1682      -        if (cq->ts != 0) {
1683      -                cq->empty += gethrtime() - cq->ts;
1684      -                cq->ts = 0;
1685      -        }
1686      -
1687      -        cp->state = newstate;
1688      -        cp->next = NULL;
1689      -        if (cq->last == NULL)
1690      -                cq->first = cp;
1691      -        else
1692      -                cq->last->next = cp;
1693      -        cq->last = cp;
1694      -
1695      -        dumpsys_unlock(cq, live, 1);
1696      -}
1697      -
1698      -static cbuf_t *
1699      -dumpsys_get_cq(cqueue_t *cq, int live)
1700      -{
1701      -        cbuf_t *cp;
1702      -        hrtime_t now = gethrtime();
1703      -
1704      -        dumpsys_lock(cq, live);
1705      -
1706      -        /* CONSTCOND */
1707      -        while (1) {
1708      -                cp = (cbuf_t *)cq->first;
1709      -                if (cp == NULL) {
1710      -                        if (cq->open == 0)
1711      -                                break;
1712      -                        dumpsys_wait_cq(cq, live);
1713      -                        continue;
1714      -                }
1715      -                cq->first = cp->next;
1716      -                if (cq->first == NULL) {
1717      -                        cq->last = NULL;
1718      -                        cq->ts = now;
1719      -                }
1720      -                break;
1721      -        }
1722      -
1723      -        dumpsys_unlock(cq, live, cq->first != NULL || cq->open == 0);
1724      -        return (cp);
1725      -}
1726      -
1727      -/*
1728      - * Send an error message to the console. If the main task is running
1729      - * just write the message via uprintf. If a helper is running the
1730      - * message has to be put on a queue for the main task. Setting fmt to
1731      - * NULL means flush the error message buffer. If fmt is not NULL, just
1732      - * add the text to the existing buffer.
1733      - */
1734      -static void
1735      -dumpsys_errmsg(helper_t *hp, const char *fmt, ...)
1736      -{
1737      -        dumpsync_t *ds = hp->ds;
1738      -        cbuf_t *cp = hp->cperr;
1739      -        va_list adx;
1740      -
1741      -        if (hp->helper == MAINHELPER) {
1742      -                if (fmt != NULL) {
1743      -                        if (ds->neednl) {
1744      -                                uprintf("\n");
1745      -                                ds->neednl = 0;
1746      -                        }
1747      -                        va_start(adx, fmt);
1748      -                        vuprintf(fmt, adx);
1749      -                        va_end(adx);
1750      -                }
1751      -        } else if (fmt == NULL) {
1752      -                if (cp != NULL) {
1753      -                        CQ_PUT(mainq, cp, CBUF_ERRMSG);
1754      -                        hp->cperr = NULL;
1755      -                }
1756      -        } else {
1757      -                if (hp->cperr == NULL) {
1758      -                        cp = CQ_GET(freebufq);
1759      -                        hp->cperr = cp;
1760      -                        cp->used = 0;
1761      -                }
1762      -                va_start(adx, fmt);
1763      -                cp->used += vsnprintf(cp->buf + cp->used, cp->size - cp->used,
1764      -                    fmt, adx);
1765      -                va_end(adx);
1766      -                if ((cp->used + LOG_MSGSIZE) > cp->size) {
1767      -                        CQ_PUT(mainq, cp, CBUF_ERRMSG);
1768      -                        hp->cperr = NULL;
1769      -                }
1770      -        }
1771      -}
1772      -
1773      -/*
1774      - * Write an output buffer to the dump file. If the main task is
1775      - * running just write the data. If a helper is running the output is
1776      - * placed on a queue for the main task.
1777      - */
1778      -static void
1779      -dumpsys_swrite(helper_t *hp, cbuf_t *cp, size_t used)
1780      -{
1781      -        dumpsync_t *ds = hp->ds;
1782      -
1783      -        if (hp->helper == MAINHELPER) {
1784      -                HRSTART(ds->perpage, write);
1785      -                dumpvp_write(cp->buf, used);
1786      -                HRSTOP(ds->perpage, write);
1787      -                CQ_PUT(freebufq, cp, CBUF_FREEBUF);
1788      -        } else {
1789      -                cp->used = used;
1790      -                CQ_PUT(mainq, cp, CBUF_WRITE);
1791      -        }
1792      -}
1793      -
1794      -/*
1795      - * Copy one page within the mapped range. The offset starts at 0 and
1796      - * is relative to the first pfn. cp->buf + cp->off is the address of
1797      - * the first pfn. If dump_pagecopy returns a UE offset, create an
1798      - * error message.  Returns the offset to the next pfn in the range
1799      - * selected by the bitmap.
1800      - */
1801      -static int
1802      -dumpsys_copy_page(helper_t *hp, int offset)
1803      -{
1804      -        cbuf_t *cp = hp->cpin;
1805      -        int ueoff;
1806      -
1807      -        ASSERT(cp->off + offset + PAGESIZE <= cp->size);
1808      -        ASSERT(BT_TEST(dumpcfg.bitmap, cp->bitnum));
1809      -
1810      -        ueoff = dump_pagecopy(cp->buf + cp->off + offset, hp->page);
1811      -
1812      -        /* ueoff is the offset in the page to a UE error */
1813      -        if (ueoff != -1) {
1814      -                uint64_t pa = ptob(cp->pfn) + offset + ueoff;
1815      -
1816      -                dumpsys_errmsg(hp, "cpu %d: memory error at PA 0x%08x.%08x\n",
1817      -                    CPU->cpu_id, (uint32_t)(pa >> 32), (uint32_t)pa);
1818      -        }
1819      -
1820      -        /*
1821      -         * Advance bitnum and offset to the next input page for the
1822      -         * next call to this function.
1823      -         */
1824      -        offset += PAGESIZE;
1825      -        cp->bitnum++;
1826      -        while (cp->off + offset < cp->size) {
1827      -                if (BT_TEST(dumpcfg.bitmap, cp->bitnum))
1828      -                        break;
1829      -                offset += PAGESIZE;
1830      -                cp->bitnum++;
1831      -        }
1832      -
1833      -        return (offset);
1834      -}
1835      -
1836      -/*
1837      - * Read the helper queue, and copy one mapped page. Return 0 when
1838      - * done. Return 1 when a page has been copied into hp->page.
1839      - */
1840      -static int
1841      -dumpsys_sread(helper_t *hp)
1842      -{
1843      -        dumpsync_t *ds = hp->ds;
1844      -
1845      -        /* CONSTCOND */
1846      -        while (1) {
1847      -
1848      -                /* Find the next input buffer. */
1849      -                if (hp->cpin == NULL) {
1850      -                        HRSTART(hp->perpage, inwait);
1851      -
1852      -                        /* CONSTCOND */
1853      -                        while (1) {
1854      -                                hp->cpin = CQ_GET(helperq);
1855      -                                dump_timeleft = dump_timeout;
1856      -
1857      -                                /*
1858      -                                 * NULL return means the helper queue
1859      -                                 * is closed and empty.
1860      -                                 */
1861      -                                if (hp->cpin == NULL)
1862      -                                        break;
1863      -
1864      -                                /* Have input, check for dump I/O error. */
1865      -                                if (!dump_ioerr)
1866      -                                        break;
1867      -
1868      -                                /*
1869      -                                 * If an I/O error occurs, stay in the
1870      -                                 * loop in order to empty the helper
1871      -                                 * queue. Return the buffers to the
1872      -                                 * main task to unmap and free it.
1873      -                                 */
1874      -                                hp->cpin->used = 0;
1875      -                                CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1876      -                        }
1877      -                        HRSTOP(hp->perpage, inwait);
1878      -
1879      -                        /* Stop here when the helper queue is closed. */
1880      -                        if (hp->cpin == NULL)
1881      -                                break;
1882      -
1883      -                        /* Set the offset=0 to get the first pfn. */
1884      -                        hp->in = 0;
1885      -
1886      -                        /* Set the total processed to 0 */
1887      -                        hp->used = 0;
1888      -                }
1889      -
1890      -                /* Process the next page. */
1891      -                if (hp->used < hp->cpin->used) {
1892      -
1893      -                        /*
1894      -                         * Get the next page from the input buffer and
1895      -                         * return a copy.
1896      -                         */
1897      -                        ASSERT(hp->in != -1);
1898      -                        HRSTART(hp->perpage, copy);
1899      -                        hp->in = dumpsys_copy_page(hp, hp->in);
1900      -                        hp->used += PAGESIZE;
1901      -                        HRSTOP(hp->perpage, copy);
1902      -                        break;
1903      -
1904      -                } else {
1905      -
1906      -                        /*
1907      -                         * Done with the input. Flush the VM and
1908      -                         * return the buffer to the main task.
1909      -                         */
1910      -                        if (panicstr && hp->helper != MAINHELPER)
1911      -                                hat_flush_range(kas.a_hat,
1912      -                                    hp->cpin->buf, hp->cpin->size);
1913      -                        dumpsys_errmsg(hp, NULL);
1914      -                        CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
1915      -                        hp->cpin = NULL;
1916      -                }
1917      -        }
1918      -
1919      -        return (hp->cpin != NULL);
1920      -}
1921      -
1922      -/*
1923      - * Compress size bytes starting at buf with bzip2
1924      - * mode:
1925      - *      BZ_RUN          add one more compressed page
1926      - *      BZ_FINISH       no more input, flush the state
1927      - */
1928      -static void
1929      -dumpsys_bzrun(helper_t *hp, void *buf, size_t size, int mode)
1930      -{
1931      -        dumpsync_t *ds = hp->ds;
1932      -        const int CSIZE = sizeof (dumpcsize_t);
1933      -        bz_stream *ps = &hp->bzstream;
1934      -        int rc = 0;
1935      -        uint32_t csize;
1936      -        dumpcsize_t cs;
1937      -
1938      -        /* Set input pointers to new input page */
1939      -        if (size > 0) {
1940      -                ps->avail_in = size;
1941      -                ps->next_in = buf;
1942      -        }
1943      -
1944      -        /* CONSTCOND */
1945      -        while (1) {
1946      -
1947      -                /* Quit when all input has been consumed */
1948      -                if (ps->avail_in == 0 && mode == BZ_RUN)
1949      -                        break;
1950      -
1951      -                /* Get a new output buffer */
1952      -                if (hp->cpout == NULL) {
1953      -                        HRSTART(hp->perpage, outwait);
1954      -                        hp->cpout = CQ_GET(freebufq);
1955      -                        HRSTOP(hp->perpage, outwait);
1956      -                        ps->avail_out = hp->cpout->size - CSIZE;
1957      -                        ps->next_out = hp->cpout->buf + CSIZE;
1958      -                }
1959      -
1960      -                /* Compress input, or finalize */
1961      -                HRSTART(hp->perpage, compress);
1962      -                rc = BZ2_bzCompress(ps, mode);
1963      -                HRSTOP(hp->perpage, compress);
1964      -
1965      -                /* Check for error */
1966      -                if (mode == BZ_RUN && rc != BZ_RUN_OK) {
1967      -                        dumpsys_errmsg(hp, "%d: BZ_RUN error %s at page %lx\n",
1968      -                            hp->helper, BZ2_bzErrorString(rc),
1969      -                            hp->cpin->pagenum);
1970      -                        break;
1971      -                }
1972      -
1973      -                /* Write the buffer if it is full, or we are flushing */
1974      -                if (ps->avail_out == 0 || mode == BZ_FINISH) {
1975      -                        csize = hp->cpout->size - CSIZE - ps->avail_out;
1976      -                        cs = DUMP_SET_TAG(csize, hp->tag);
1977      -                        if (csize > 0) {
1978      -                                (void) memcpy(hp->cpout->buf, &cs, CSIZE);
1979      -                                dumpsys_swrite(hp, hp->cpout, csize + CSIZE);
1980      -                                hp->cpout = NULL;
1981      -                        }
1982      -                }
1983      -
1984      -                /* Check for final complete */
1985      -                if (mode == BZ_FINISH) {
1986      -                        if (rc == BZ_STREAM_END)
1987      -                                break;
1988      -                        if (rc != BZ_FINISH_OK) {
1989      -                                dumpsys_errmsg(hp, "%d: BZ_FINISH error %s\n",
1990      -                                    hp->helper, BZ2_bzErrorString(rc));
1991      -                                break;
1992      -                        }
1993      -                }
1994      -        }
1995      -
1996      -        /* Cleanup state and buffers */
1997      -        if (mode == BZ_FINISH) {
1998      -
1999      -                /* Reset state so that it is re-usable. */
2000      -                (void) BZ2_bzCompressReset(&hp->bzstream);
2001      -
2002      -                /* Give any unused outout buffer to the main task */
2003      -                if (hp->cpout != NULL) {
2004      -                        hp->cpout->used = 0;
2005      -                        CQ_PUT(mainq, hp->cpout, CBUF_ERRMSG);
2006      -                        hp->cpout = NULL;
2007      -                }
2008      -        }
2009      -}
2010      -
2011      -static void
2012      -dumpsys_bz2compress(helper_t *hp)
2013      -{
2014      -        dumpsync_t *ds = hp->ds;
2015      -        dumpstreamhdr_t sh;
2016      -
2017      -        (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2018      -        sh.stream_pagenum = (pgcnt_t)-1;
2019      -        sh.stream_npages = 0;
2020      -        hp->cpin = NULL;
2021      -        hp->cpout = NULL;
2022      -        hp->cperr = NULL;
2023      -        hp->in = 0;
2024      -        hp->out = 0;
2025      -        hp->bzstream.avail_in = 0;
2026      -
2027      -        /* Bump reference to mainq while we are running */
2028      -        CQ_OPEN(mainq);
2029      -
2030      -        /* Get one page at a time */
2031      -        while (dumpsys_sread(hp)) {
2032      -                if (sh.stream_pagenum != hp->cpin->pagenum) {
2033      -                        sh.stream_pagenum = hp->cpin->pagenum;
2034      -                        sh.stream_npages = btop(hp->cpin->used);
2035      -                        dumpsys_bzrun(hp, &sh, sizeof (sh), BZ_RUN);
2036      -                }
2037      -                dumpsys_bzrun(hp, hp->page, PAGESIZE, 0);
2038      -        }
2039      -
2040      -        /* Done with input, flush any partial buffer */
2041      -        if (sh.stream_pagenum != (pgcnt_t)-1) {
2042      -                dumpsys_bzrun(hp, NULL, 0, BZ_FINISH);
2043      -                dumpsys_errmsg(hp, NULL);
2044      -        }
2045      -
2046      -        ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2047      -
2048      -        /* Decrement main queue count, we are done */
2049      -        CQ_CLOSE(mainq);
2050      -}
2051      -
2052      -/*
2053      - * Compress with lzjb
2054      - * write stream block if full or size==0
2055      - * if csize==0 write stream header, else write <csize, data>
2056      - * size==0 is a call to flush a buffer
2057      - * hp->cpout is the buffer we are flushing or filling
2058      - * hp->out is the next index to fill data
2059      - * osize is either csize+data, or the size of a stream header
2060      - */
2061      -static void
2062      -dumpsys_lzjbrun(helper_t *hp, size_t csize, void *buf, size_t size)
2063      -{
2064      -        dumpsync_t *ds = hp->ds;
2065      -        const int CSIZE = sizeof (dumpcsize_t);
2066      -        dumpcsize_t cs;
2067      -        size_t osize = csize > 0 ? CSIZE + size : size;
2068      -
2069      -        /* If flush, and there is no buffer, just return */
2070      -        if (size == 0 && hp->cpout == NULL)
2071      -                return;
2072      -
2073      -        /* If flush, or cpout is full, write it out */
2074      -        if (size == 0 ||
2075      -            hp->cpout != NULL && hp->out + osize > hp->cpout->size) {
2076      -
2077      -                /* Set tag+size word at the front of the stream block. */
2078      -                cs = DUMP_SET_TAG(hp->out - CSIZE, hp->tag);
2079      -                (void) memcpy(hp->cpout->buf, &cs, CSIZE);
2080      -
2081      -                /* Write block to dump file. */
2082      -                dumpsys_swrite(hp, hp->cpout, hp->out);
2083      -
2084      -                /* Clear pointer to indicate we need a new buffer */
2085      -                hp->cpout = NULL;
2086      -
2087      -                /* flushing, we are done */
2088      -                if (size == 0)
2089      -                        return;
2090      -        }
2091      -
2092      -        /* Get an output buffer if we dont have one. */
2093      -        if (hp->cpout == NULL) {
2094      -                HRSTART(hp->perpage, outwait);
2095      -                hp->cpout = CQ_GET(freebufq);
2096      -                HRSTOP(hp->perpage, outwait);
2097      -                hp->out = CSIZE;
2098      -        }
2099      -
2100      -        /* Store csize word. This is the size of compressed data. */
2101      -        if (csize > 0) {
2102      -                cs = DUMP_SET_TAG(csize, 0);
2103      -                (void) memcpy(hp->cpout->buf + hp->out, &cs, CSIZE);
2104      -                hp->out += CSIZE;
2105      -        }
2106      -
2107      -        /* Store the data. */
2108      -        (void) memcpy(hp->cpout->buf + hp->out, buf, size);
2109      -        hp->out += size;
2110      -}
2111      -
2112      -static void
2113      -dumpsys_lzjbcompress(helper_t *hp)
2114      -{
2115      -        dumpsync_t *ds = hp->ds;
2116      -        size_t csize;
2117      -        dumpstreamhdr_t sh;
2118      -
2119      -        (void) strcpy(sh.stream_magic, DUMP_STREAM_MAGIC);
2120      -        sh.stream_pagenum = (pfn_t)-1;
2121      -        sh.stream_npages = 0;
2122      -        hp->cpin = NULL;
2123      -        hp->cpout = NULL;
2124      -        hp->cperr = NULL;
2125      -        hp->in = 0;
2126      -        hp->out = 0;
2127      -
2128      -        /* Bump reference to mainq while we are running */
2129      -        CQ_OPEN(mainq);
2130      -
2131      -        /* Get one page at a time */
2132      -        while (dumpsys_sread(hp)) {
2133      -
2134      -                /* Create a stream header for each new input map */
2135      -                if (sh.stream_pagenum != hp->cpin->pagenum) {
2136      -                        sh.stream_pagenum = hp->cpin->pagenum;
2137      -                        sh.stream_npages = btop(hp->cpin->used);
2138      -                        dumpsys_lzjbrun(hp, 0, &sh, sizeof (sh));
2139      -                }
2140      -
2141      -                /* Compress one page */
2142      -                HRSTART(hp->perpage, compress);
2143      -                csize = compress(hp->page, hp->lzbuf, PAGESIZE);
2144      -                HRSTOP(hp->perpage, compress);
2145      -
2146      -                /* Add csize+data to output block */
2147      -                ASSERT(csize > 0 && csize <= PAGESIZE);
2148      -                dumpsys_lzjbrun(hp, csize, hp->lzbuf, csize);
2149      -        }
2150      -
2151      -        /* Done with input, flush any partial buffer */
2152      -        if (sh.stream_pagenum != (pfn_t)-1) {
2153      -                dumpsys_lzjbrun(hp, 0, NULL, 0);
2154      -                dumpsys_errmsg(hp, NULL);
2155      -        }
2156      -
2157      -        ASSERT(hp->cpin == NULL && hp->cpout == NULL && hp->cperr == NULL);
2158      -
2159      -        /* Decrement main queue count, we are done */
2160      -        CQ_CLOSE(mainq);
2161      -}
2162      -
2163      -/*
2164      - * Dump helper called from panic_idle() to compress pages.  CPUs in
2165      - * this path must not call most kernel services.
2166      - *
2167      - * During panic, all but one of the CPUs is idle. These CPUs are used
2168      - * as helpers working in parallel to copy and compress memory
2169      - * pages. During a panic, however, these processors cannot call any
2170      - * kernel services. This is because mutexes become no-ops during
2171      - * panic, and, cross-call interrupts are inhibited.  Therefore, during
2172      - * panic dump the helper CPUs communicate with the panic CPU using
2173      - * memory variables. All memory mapping and I/O is performed by the
2174      - * panic CPU.
2175      - *
2176      - * At dump configuration time, helper_lock is set and helpers_wanted
2177      - * is 0. dumpsys() decides whether to set helpers_wanted before
2178      - * clearing helper_lock.
2179      - *
2180      - * At panic time, idle CPUs spin-wait on helper_lock, then alternately
2181      - * take the lock and become a helper, or return.
2182      - */
2183      -void
2184      -dumpsys_helper()
2185      -{
2186      -        dumpsys_spinlock(&dumpcfg.helper_lock);
2187      -        if (dumpcfg.helpers_wanted) {
2188      -                helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2189      -
2190      -                for (hp = dumpcfg.helper; hp != hpend; hp++) {
2191      -                        if (hp->helper == FREEHELPER) {
2192      -                                hp->helper = CPU->cpu_id;
2193      -                                BT_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2194      -
2195      -                                dumpsys_spinunlock(&dumpcfg.helper_lock);
2196      -
2197      -                                if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2198      -                                        dumpsys_lzjbcompress(hp);
2199      -                                else
2200      -                                        dumpsys_bz2compress(hp);
2201      -
2202      -                                hp->helper = DONEHELPER;
2203      -                                return;
2204      -                        }
2205      -                }
2206      -
2207      -                /* No more helpers are needed. */
2208      -                dumpcfg.helpers_wanted = 0;
2209      -
2210      -        }
2211      -        dumpsys_spinunlock(&dumpcfg.helper_lock);
2212      -}
2213      -
2214      -/*
2215      - * No-wait helper callable in spin loops.
2216      - *
2217      - * Do not wait for helper_lock. Just check helpers_wanted. The caller
2218      - * may decide to continue. This is the "c)ontinue, s)ync, r)eset? s"
2219      - * case.
2220      - */
2221      -void
2222      -dumpsys_helper_nw()
2223      -{
2224      -        if (dumpcfg.helpers_wanted)
2225      -                dumpsys_helper();
2226      -}
2227      -
2228      -/*
2229      - * Dump helper for live dumps.
2230      - * These run as a system task.
2231      - */
2232      -static void
2233      -dumpsys_live_helper(void *arg)
2234      -{
2235      -        helper_t *hp = arg;
2236      -
2237      -        BT_ATOMIC_SET(dumpcfg.helpermap, CPU->cpu_seqid);
2238      -        if (dumpcfg.clevel < DUMP_CLEVEL_BZIP2)
2239      -                dumpsys_lzjbcompress(hp);
2240      -        else
2241      -                dumpsys_bz2compress(hp);
2242      -}
2243      -
2244      -/*
2245      - * Compress one page with lzjb (single threaded case)
2246      - */
2247      -static void
2248      -dumpsys_lzjb_page(helper_t *hp, cbuf_t *cp)
2249      -{
2250      -        dumpsync_t *ds = hp->ds;
2251      -        uint32_t csize;
2252      -
2253      -        hp->helper = MAINHELPER;
2254      -        hp->in = 0;
2255      -        hp->used = 0;
2256      -        hp->cpin = cp;
2257      -        while (hp->used < cp->used) {
2258      -                HRSTART(hp->perpage, copy);
2259      -                hp->in = dumpsys_copy_page(hp, hp->in);
2260      -                hp->used += PAGESIZE;
2261      -                HRSTOP(hp->perpage, copy);
2262      -
2263      -                HRSTART(hp->perpage, compress);
2264      -                csize = compress(hp->page, hp->lzbuf, PAGESIZE);
2265      -                HRSTOP(hp->perpage, compress);
2266      -
2267      -                HRSTART(hp->perpage, write);
2268      -                dumpvp_write(&csize, sizeof (csize));
2269      -                dumpvp_write(hp->lzbuf, csize);
2270      -                HRSTOP(hp->perpage, write);
2271      -        }
2272      -        CQ_PUT(mainq, hp->cpin, CBUF_USEDMAP);
2273      -        hp->cpin = NULL;
2274      -}
2275      -
2276      -/*
2277      - * Main task to dump pages. This is called on the dump CPU.
2278      - */
2279      -static void
2280      -dumpsys_main_task(void *arg)
2281      -{
2282      -        dumpsync_t *ds = arg;
2283      -        pgcnt_t pagenum = 0, bitnum = 0, hibitnum;
2284      -        dumpmlw_t mlw;
2285      -        cbuf_t *cp;
2286      -        pgcnt_t baseoff, pfnoff;
2287      -        pfn_t base, pfn;
2288      -        int sec, i, dumpserial;
2289      -
2290      -        /*
2291      -         * Fall back to serial mode if there are no helpers.
2292      -         * dump_plat_mincpu can be set to 0 at any time.
2293      -         * dumpcfg.helpermap must contain at least one member.
2294      -         */
2295      -        dumpserial = 1;
2296      -
2297      -        if (dump_plat_mincpu != 0 && dumpcfg.clevel != 0) {
2298      -                for (i = 0; i < BT_BITOUL(NCPU); ++i) {
2299      -                        if (dumpcfg.helpermap[i] != 0) {
2300      -                                dumpserial = 0;
2301      -                                break;
2302      -                        }
2303      -                }
2304      -        }
2305      -
2306      -        if (dumpserial) {
2307      -                dumpcfg.clevel = 0;
2308      -                if (dumpcfg.helper[0].lzbuf == NULL)
2309      -                        dumpcfg.helper[0].lzbuf = dumpcfg.helper[1].page;
2310      -        }
2311      -
2312      -        dump_init_memlist_walker(&mlw);
2313      -
2314      -        /* CONSTCOND */
2315      -        while (1) {
2316      -
2317      -                if (ds->percent > ds->percent_done) {
2318      -                        ds->percent_done = ds->percent;
2319      -                        sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000;
2320      -                        uprintf("^\r%2d:%02d %3d%% done",
2321      -                            sec / 60, sec % 60, ds->percent);
2322      -                        ds->neednl = 1;
2323      -                }
2324      -
2325      -                while (CQ_IS_EMPTY(mainq) && !CQ_IS_EMPTY(writerq)) {
2326      -
2327      -                        /* the writerq never blocks */
2328      -                        cp = CQ_GET(writerq);
2329      -                        if (cp == NULL)
2330      -                                break;
2331      -
2332      -                        dump_timeleft = dump_timeout;
2333      -
2334      -                        HRSTART(ds->perpage, write);
2335      -                        dumpvp_write(cp->buf, cp->used);
2336      -                        HRSTOP(ds->perpage, write);
2337      -
2338      -                        CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2339      -                }
2340      -
2341      -                /*
2342      -                 * Wait here for some buffers to process. Returns NULL
2343      -                 * when all helpers have terminated and all buffers
2344      -                 * have been processed.
2345      -                 */
2346      -                cp = CQ_GET(mainq);
2347      -
2348      -                if (cp == NULL) {
2349      -
2350      -                        /* Drain the write queue. */
2351      -                        if (!CQ_IS_EMPTY(writerq))
2352      -                                continue;
2353      -
2354      -                        /* Main task exits here. */
2355      -                        break;
2356      -                }
2357      -
2358      -                dump_timeleft = dump_timeout;
2359      -
2360      -                switch (cp->state) {
2361      -
2362      -                case CBUF_FREEMAP:
2363      -
2364      -                        /*
2365      -                         * Note that we drop CBUF_FREEMAP buffers on
2366      -                         * the floor (they will not be on any cqueue)
2367      -                         * when we no longer need them.
2368      -                         */
2369      -                        if (bitnum >= dumpcfg.bitmapsize)
2370      -                                break;
2371      -
2372      -                        if (dump_ioerr) {
2373      -                                bitnum = dumpcfg.bitmapsize;
2374      -                                CQ_CLOSE(helperq);
2375      -                                break;
2376      -                        }
2377      -
2378      -                        HRSTART(ds->perpage, bitmap);
2379      -                        for (; bitnum < dumpcfg.bitmapsize; bitnum++)
2380      -                                if (BT_TEST(dumpcfg.bitmap, bitnum))
2381      -                                        break;
2382      -                        HRSTOP(ds->perpage, bitmap);
2383      -                        dump_timeleft = dump_timeout;
2384      -
2385      -                        if (bitnum >= dumpcfg.bitmapsize) {
2386      -                                CQ_CLOSE(helperq);
2387      -                                break;
2388      -                        }
2389      -
2390      -                        /*
2391      -                         * Try to map CBUF_MAPSIZE ranges. Can't
2392      -                         * assume that memory segment size is a
2393      -                         * multiple of CBUF_MAPSIZE. Can't assume that
2394      -                         * the segment starts on a CBUF_MAPSIZE
2395      -                         * boundary.
2396      -                         */
2397      -                        pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2398      -                        ASSERT(pfn != PFN_INVALID);
2399      -                        ASSERT(bitnum + mlw.mpleft <= dumpcfg.bitmapsize);
2400      -
2401      -                        base = P2ALIGN(pfn, CBUF_MAPNP);
2402      -                        if (base < mlw.mpaddr) {
2403      -                                base = mlw.mpaddr;
2404      -                                baseoff = P2PHASE(base, CBUF_MAPNP);
2405      -                        } else {
2406      -                                baseoff = 0;
2407      -                        }
2408      -
2409      -                        pfnoff = pfn - base;
2410      -                        if (pfnoff + mlw.mpleft < CBUF_MAPNP) {
2411      -                                hibitnum = bitnum + mlw.mpleft;
2412      -                                cp->size = ptob(pfnoff + mlw.mpleft);
2413      -                        } else {
2414      -                                hibitnum = bitnum - pfnoff + CBUF_MAPNP -
2415      -                                    baseoff;
2416      -                                cp->size = CBUF_MAPSIZE - ptob(baseoff);
2417      -                        }
2418      -
2419      -                        cp->pfn = pfn;
2420      -                        cp->bitnum = bitnum++;
2421      -                        cp->pagenum = pagenum++;
2422      -                        cp->off = ptob(pfnoff);
2423      -
2424      -                        for (; bitnum < hibitnum; bitnum++)
2425      -                                if (BT_TEST(dumpcfg.bitmap, bitnum))
2426      -                                        pagenum++;
2427      -
2428      -                        dump_timeleft = dump_timeout;
2429      -                        cp->used = ptob(pagenum - cp->pagenum);
2430      -
2431      -                        HRSTART(ds->perpage, map);
2432      -                        hat_devload(kas.a_hat, cp->buf, cp->size, base,
2433      -                            PROT_READ, HAT_LOAD_NOCONSIST);
2434      -                        HRSTOP(ds->perpage, map);
2435      -
2436      -                        ds->pages_mapped += btop(cp->size);
2437      -                        ds->pages_used += pagenum - cp->pagenum;
2438      -
2439      -                        CQ_OPEN(mainq);
2440      -
2441      -                        /*
2442      -                         * If there are no helpers the main task does
2443      -                         * non-streams lzjb compress.
2444      -                         */
2445      -                        if (dumpserial) {
2446      -                                dumpsys_lzjb_page(dumpcfg.helper, cp);
2447      -                                break;
2448      -                        }
2449      -
2450      -                        /* pass mapped pages to a helper */
2451      -                        CQ_PUT(helperq, cp, CBUF_INREADY);
2452      -
2453      -                        /* the last page was done */
2454      -                        if (bitnum >= dumpcfg.bitmapsize)
2455      -                                CQ_CLOSE(helperq);
2456      -
2457      -                        break;
2458      -
2459      -                case CBUF_USEDMAP:
2460      -
2461      -                        ds->npages += btop(cp->used);
2462      -
2463      -                        HRSTART(ds->perpage, unmap);
2464      -                        hat_unload(kas.a_hat, cp->buf, cp->size, HAT_UNLOAD);
2465      -                        HRSTOP(ds->perpage, unmap);
2466      -
2467      -                        if (bitnum < dumpcfg.bitmapsize)
2468      -                                CQ_PUT(mainq, cp, CBUF_FREEMAP);
2469      -                        CQ_CLOSE(mainq);
2470      -
2471      -                        ASSERT(ds->npages <= dumphdr->dump_npages);
2472      -                        ds->percent = ds->npages * 100LL / dumphdr->dump_npages;
2473      -                        break;
2474      -
2475      -                case CBUF_WRITE:
2476      -
2477      -                        CQ_PUT(writerq, cp, CBUF_WRITE);
2478      -                        break;
2479      -
2480      -                case CBUF_ERRMSG:
2481      -
2482      -                        if (cp->used > 0) {
2483      -                                cp->buf[cp->size - 2] = '\n';
2484      -                                cp->buf[cp->size - 1] = '\0';
2485      -                                if (ds->neednl) {
2486      -                                        uprintf("\n%s", cp->buf);
2487      -                                        ds->neednl = 0;
2488      -                                } else {
2489      -                                        uprintf("%s", cp->buf);
2490      -                                }
2491      -                                /* wait for console output */
2492      -                                drv_usecwait(200000);
2493      -                                dump_timeleft = dump_timeout;
2494      -                        }
2495      -                        CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2496      -                        break;
2497      -
2498      -                default:
2499      -                        uprintf("dump: unexpected buffer state %d, "
2500      -                            "buffer will be lost\n", cp->state);
2501      -                        break;
2502      -
2503      -                } /* end switch */
2504      -
2505      -        } /* end while(1) */
2506      -}
2507      -
2508  887  #ifdef  COLLECT_METRICS
2509  888  size_t
2510  889  dumpsys_metrics(dumpsync_t *ds, char *buf, size_t size)
2511  890  {
2512  891          dumpcfg_t *cfg = &dumpcfg;
2513  892          int myid = CPU->cpu_seqid;
2514  893          int i, compress_ratio;
2515  894          int sec, iorate;
2516      -        helper_t *hp, *hpend = &cfg->helper[cfg->nhelper];
2517  895          char *e = buf + size;
2518  896          char *p = buf;
2519  897  
2520  898          sec = ds->elapsed / (1000 * 1000 * 1000ULL);
2521  899          if (sec < 1)
2522  900                  sec = 1;
2523  901  
2524  902          if (ds->iotime < 1)
2525  903                  ds->iotime = 1;
2526  904          iorate = (ds->nwrite * 100000ULL) / ds->iotime;
2527  905  
2528  906          compress_ratio = 100LL * ds->npages / btopr(ds->nwrite + 1);
2529  907  
2530  908  #define P(...) (p += p < e ? snprintf(p, e - p, __VA_ARGS__) : 0)
2531  909  
2532  910          P("Master cpu_seqid,%d\n", CPU->cpu_seqid);
2533  911          P("Master cpu_id,%d\n", CPU->cpu_id);
2534  912          P("dump_flags,0x%x\n", dumphdr->dump_flags);
2535  913          P("dump_ioerr,%d\n", dump_ioerr);
2536  914  
2537      -        P("Helpers:\n");
2538      -        for (i = 0; i < ncpus; i++) {
2539      -                if ((i & 15) == 0)
2540      -                        P(",,%03d,", i);
2541      -                if (i == myid)
2542      -                        P("   M");
2543      -                else if (BT_TEST(cfg->helpermap, i))
2544      -                        P("%4d", cpu_seq[i]->cpu_id);
2545      -                else
2546      -                        P("   *");
2547      -                if ((i & 15) == 15)
2548      -                        P("\n");
2549      -        }
2550      -
2551      -        P("ncbuf_used,%d\n", cfg->ncbuf_used);
2552      -        P("ncmap,%d\n", cfg->ncmap);
2553      -
2554      -        P("Found %ldM ranges,%ld\n", (CBUF_MAPSIZE / DUMP_1MB), cfg->found4m);
2555      -        P("Found small pages,%ld\n", cfg->foundsm);
2556      -
2557      -        P("Compression level,%d\n", cfg->clevel);
2558      -        P("Compression type,%s %s\n", cfg->clevel == 0 ? "serial" : "parallel",
2559      -            cfg->clevel >= DUMP_CLEVEL_BZIP2 ? "bzip2" : "lzjb");
      915 +        P("Compression type,serial lzjb\n");
2560  916          P("Compression ratio,%d.%02d\n", compress_ratio / 100, compress_ratio %
2561  917              100);
2562      -        P("nhelper_used,%d\n", cfg->nhelper_used);
2563  918  
2564  919          P("Dump I/O rate MBS,%d.%02d\n", iorate / 100, iorate % 100);
2565  920          P("..total bytes,%lld\n", (u_longlong_t)ds->nwrite);
2566  921          P("..total nsec,%lld\n", (u_longlong_t)ds->iotime);
2567  922          P("dumpbuf.iosize,%ld\n", dumpbuf.iosize);
2568  923          P("dumpbuf.size,%ld\n", dumpbuf.size);
2569  924  
2570  925          P("Dump pages/sec,%llu\n", (u_longlong_t)ds->npages / sec);
2571  926          P("Dump pages,%llu\n", (u_longlong_t)ds->npages);
2572  927          P("Dump time,%d\n", sec);
2573  928  
2574  929          if (ds->pages_mapped > 0)
2575  930                  P("per-cent map utilization,%d\n", (int)((100 * ds->pages_used)
2576  931                      / ds->pages_mapped));
2577  932  
2578  933          P("\nPer-page metrics:\n");
2579  934          if (ds->npages > 0) {
2580      -                for (hp = cfg->helper; hp != hpend; hp++) {
2581      -#define PERPAGE(x)      ds->perpage.x += hp->perpage.x;
2582      -                        PERPAGES;
      935 +#define PERPAGE(x)      ds->perpage.x += cfg->perpage.x;
      936 +                PERPAGES;
2583  937  #undef PERPAGE
2584      -                }
2585  938  #define PERPAGE(x) \
2586  939                  P("%s nsec/page,%d\n", #x, (int)(ds->perpage.x / ds->npages));
2587  940                  PERPAGES;
2588  941  #undef PERPAGE
2589      -                P("freebufq.empty,%d\n", (int)(ds->freebufq.empty /
2590      -                    ds->npages));
2591      -                P("helperq.empty,%d\n", (int)(ds->helperq.empty /
2592      -                    ds->npages));
2593      -                P("writerq.empty,%d\n", (int)(ds->writerq.empty /
2594      -                    ds->npages));
2595      -                P("mainq.empty,%d\n", (int)(ds->mainq.empty / ds->npages));
2596  942  
2597  943                  P("I/O wait nsec/page,%llu\n", (u_longlong_t)(ds->iowait /
2598  944                      ds->npages));
2599  945          }
2600  946  #undef P
2601  947          if (p < e)
2602  948                  bzero(p, e - p);
2603  949          return (p - buf);
2604  950  }
2605  951  #endif  /* COLLECT_METRICS */
2606  952  
2607  953  /*
2608  954   * Dump the system.
2609  955   */
2610  956  void
2611  957  dumpsys(void)
2612  958  {
2613  959          dumpsync_t *ds = &dumpsync;
2614      -        taskq_t *livetaskq = NULL;
2615  960          pfn_t pfn;
2616  961          pgcnt_t bitnum;
2617  962          proc_t *p;
2618      -        helper_t *hp, *hpend = &dumpcfg.helper[dumpcfg.nhelper];
2619      -        cbuf_t *cp;
2620  963          pid_t npids, pidx;
2621  964          char *content;
2622  965          char *buf;
2623  966          size_t size;
2624      -        int save_dump_clevel;
2625  967          dumpmlw_t mlw;
2626  968          dumpcsize_t datatag;
2627  969          dumpdatahdr_t datahdr;
2628  970  
2629  971          if (dumpvp == NULL || dumphdr == NULL) {
2630  972                  uprintf("skipping system dump - no dump device configured\n");
2631      -                if (panicstr) {
2632      -                        dumpcfg.helpers_wanted = 0;
2633      -                        dumpsys_spinunlock(&dumpcfg.helper_lock);
2634      -                }
2635  973                  return;
2636  974          }
2637  975          dumpbuf.cur = dumpbuf.start;
2638  976  
2639  977          /* clear the sync variables */
2640      -        ASSERT(dumpcfg.nhelper > 0);
2641  978          bzero(ds, sizeof (*ds));
2642      -        ds->dumpcpu = CPU->cpu_id;
2643  979  
2644  980          /*
2645  981           * Calculate the starting block for dump.  If we're dumping on a
2646  982           * swap device, start 1/5 of the way in; otherwise, start at the
2647  983           * beginning.  And never use the first page -- it may be a disk label.
2648  984           */
2649  985          if (dumpvp->v_flag & VISSWAP)
2650  986                  dumphdr->dump_start = P2ROUNDUP(dumpvp_size / 5, DUMP_OFFSET);
2651  987          else
2652  988                  dumphdr->dump_start = DUMP_OFFSET;
↓ open down ↓ 152 lines elided ↑ open up ↑
2805 1141                          continue;
2806 1142                  pfn = dump_bitnum_to_pfn(bitnum, &mlw);
2807 1143                  ASSERT(pfn != PFN_INVALID);
2808 1144                  dumpvp_write(&pfn, sizeof (pfn_t));
2809 1145          }
2810 1146          dump_plat_pfn();
2811 1147  
2812 1148          /*
2813 1149           * Write out all the pages.
2814 1150           * Map pages, copy them handling UEs, compress, and write them out.
2815      -         * Cooperate with any helpers running on CPUs in panic_idle().
2816 1151           */
2817 1152          dumphdr->dump_data = dumpvp_flush();
2818 1153  
2819      -        bzero(dumpcfg.helpermap, BT_SIZEOFMAP(NCPU));
2820      -        ds->live = dumpcfg.clevel > 0 &&
2821      -            (dumphdr->dump_flags & DF_LIVE) != 0;
     1154 +        ASSERT(dumpcfg.page);
     1155 +        bzero(&dumpcfg.perpage, sizeof (dumpcfg.perpage));
2822 1156  
2823      -        save_dump_clevel = dumpcfg.clevel;
     1157 +        ds->start = gethrtime();
     1158 +        ds->iowaitts = ds->start;
     1159 +
2824 1160          if (panicstr)
2825      -                dumpsys_get_maxmem();
2826      -        else if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2827      -                dumpcfg.clevel = DUMP_CLEVEL_LZJB;
     1161 +                kmem_dump_begin();
2828 1162  
2829      -        dumpcfg.nhelper_used = 0;
2830      -        for (hp = dumpcfg.helper; hp != hpend; hp++) {
2831      -                if (hp->page == NULL) {
2832      -                        hp->helper = DONEHELPER;
     1163 +        dump_init_memlist_walker(&mlw);
     1164 +        for (bitnum = 0; bitnum < dumpcfg.bitmapsize; bitnum++) {
     1165 +                size_t csize;
     1166 +
     1167 +                dump_timeleft = dump_timeout;
     1168 +                HRSTART(ds->perpage, bitmap);
     1169 +                if (!BT_TEST(dumpcfg.bitmap, bitnum)) {
     1170 +                        HRSTOP(ds->perpage, bitmap);
2833 1171                          continue;
2834 1172                  }
2835      -                ++dumpcfg.nhelper_used;
2836      -                hp->helper = FREEHELPER;
2837      -                hp->taskqid = NULL;
2838      -                hp->ds = ds;
2839      -                bzero(&hp->perpage, sizeof (hp->perpage));
2840      -                if (dumpcfg.clevel >= DUMP_CLEVEL_BZIP2)
2841      -                        (void) BZ2_bzCompressReset(&hp->bzstream);
2842      -        }
     1173 +                HRSTOP(ds->perpage, bitmap);
2843 1174  
2844      -        CQ_OPEN(freebufq);
2845      -        CQ_OPEN(helperq);
     1175 +                pfn = dump_bitnum_to_pfn(bitnum, &mlw);
     1176 +                ASSERT(pfn != PFN_INVALID);
2846 1177  
2847      -        dumpcfg.ncbuf_used = 0;
2848      -        for (cp = dumpcfg.cbuf; cp != &dumpcfg.cbuf[dumpcfg.ncbuf]; cp++) {
2849      -                if (cp->buf != NULL) {
2850      -                        CQ_PUT(freebufq, cp, CBUF_FREEBUF);
2851      -                        ++dumpcfg.ncbuf_used;
2852      -                }
2853      -        }
     1178 +                HRSTART(ds->perpage, map);
     1179 +                hat_devload(kas.a_hat, dumpcfg.cmap, PAGESIZE, pfn, PROT_READ,
     1180 +                            HAT_LOAD_NOCONSIST);
     1181 +                HRSTOP(ds->perpage, map);
2854 1182  
2855      -        for (cp = dumpcfg.cmap; cp != &dumpcfg.cmap[dumpcfg.ncmap]; cp++)
2856      -                CQ_PUT(mainq, cp, CBUF_FREEMAP);
     1183 +                dump_pagecopy(dumpcfg.cmap, dumpcfg.page);
2857 1184  
2858      -        ds->start = gethrtime();
2859      -        ds->iowaitts = ds->start;
     1185 +                HRSTART(ds->perpage, unmap);
     1186 +                hat_unload(kas.a_hat, dumpcfg.cmap, PAGESIZE, HAT_UNLOAD);
     1187 +                HRSTOP(ds->perpage, unmap);
2860 1188  
2861      -        /* start helpers */
2862      -        if (ds->live) {
2863      -                int n = dumpcfg.nhelper_used;
2864      -                int pri = MINCLSYSPRI - 25;
     1189 +                HRSTART(dumpcfg.perpage, compress);
     1190 +                csize = compress(dumpcfg.page, dumpcfg.lzbuf, PAGESIZE);
     1191 +                HRSTOP(dumpcfg.perpage, compress);
2865 1192  
2866      -                livetaskq = taskq_create("LiveDump", n, pri, n, n,
2867      -                    TASKQ_PREPOPULATE);
2868      -                for (hp = dumpcfg.helper; hp != hpend; hp++) {
2869      -                        if (hp->page == NULL)
2870      -                                continue;
2871      -                        hp->helper = hp - dumpcfg.helper;
2872      -                        hp->taskqid = taskq_dispatch(livetaskq,
2873      -                            dumpsys_live_helper, (void *)hp, TQ_NOSLEEP);
     1193 +                HRSTART(dumpcfg.perpage, write);
     1194 +                dumpvp_write(&csize, sizeof (csize));
     1195 +                dumpvp_write(dumpcfg.lzbuf, csize);
     1196 +                HRSTOP(dumpcfg.perpage, write);
     1197 +
     1198 +                if (dump_ioerr) {
     1199 +                        dumphdr->dump_flags &= ~DF_COMPLETE;
     1200 +                        dumphdr->dump_npages = ds->npages;
     1201 +                        break;
2874 1202                  }
     1203 +                if (++ds->npages * 100LL / dumphdr->dump_npages > ds->percent_done) {
     1204 +                        int sec;
2875 1205  
2876      -        } else {
2877      -                if (panicstr)
2878      -                        kmem_dump_begin();
2879      -                dumpcfg.helpers_wanted = dumpcfg.clevel > 0;
2880      -                dumpsys_spinunlock(&dumpcfg.helper_lock);
     1206 +                        sec = (gethrtime() - ds->start) / 1000 / 1000 / 1000;
     1207 +                        uprintf("^\r%2d:%02d %3d%% done", sec / 60, sec % 60,
     1208 +                                ++ds->percent_done);
     1209 +                        if (!panicstr)
     1210 +                                delay(1);       /* let the output be sent */
     1211 +                }
2881 1212          }
2882 1213  
2883      -        /* run main task */
2884      -        dumpsys_main_task(ds);
2885      -
2886 1214          ds->elapsed = gethrtime() - ds->start;
2887 1215          if (ds->elapsed < 1)
2888 1216                  ds->elapsed = 1;
2889 1217  
2890      -        if (livetaskq != NULL)
2891      -                taskq_destroy(livetaskq);
2892      -
2893      -        if (ds->neednl) {
2894      -                uprintf("\n");
2895      -                ds->neednl = 0;
2896      -        }
2897      -
2898 1218          /* record actual pages dumped */
2899 1219          dumphdr->dump_npages = ds->npages;
2900 1220  
2901 1221          /* platform-specific data */
2902      -        dumphdr->dump_npages += dump_plat_data(dumpcfg.cbuf[0].buf);
     1222 +        dumphdr->dump_npages += dump_plat_data(dumpcfg.page);
2903 1223  
2904 1224          /* note any errors by clearing DF_COMPLETE */
2905 1225          if (dump_ioerr || ds->npages < dumphdr->dump_npages)
2906 1226                  dumphdr->dump_flags &= ~DF_COMPLETE;
2907 1227  
2908 1228          /* end of stream blocks */
2909 1229          datatag = 0;
2910 1230          dumpvp_write(&datatag, sizeof (datatag));
2911 1231  
2912 1232          bzero(&datahdr, sizeof (datahdr));
2913 1233  
2914 1234          /* buffer for metrics */
2915      -        buf = dumpcfg.cbuf[0].buf;
2916      -        size = MIN(dumpcfg.cbuf[0].size, DUMP_OFFSET - sizeof (dumphdr_t) -
     1235 +        buf = dumpcfg.page;
     1236 +        size = MIN(PAGESIZE, DUMP_OFFSET - sizeof (dumphdr_t) -
2917 1237              sizeof (dumpdatahdr_t));
2918 1238  
2919 1239          /* finish the kmem intercepts, collect kmem verbose info */
2920 1240          if (panicstr) {
2921 1241                  datahdr.dump_metrics = kmem_dump_finish(buf, size);
2922 1242                  buf += datahdr.dump_metrics;
2923 1243                  size -= datahdr.dump_metrics;
2924 1244          }
2925 1245  
2926 1246          /* record in the header whether this is a fault-management panic */
2927 1247          if (panicstr)
2928 1248                  dumphdr->dump_fm_panic = is_fm_panic();
2929 1249  
2930 1250          /* compression info in data header */
2931 1251          datahdr.dump_datahdr_magic = DUMP_DATAHDR_MAGIC;
2932 1252          datahdr.dump_datahdr_version = DUMP_DATAHDR_VERSION;
2933      -        datahdr.dump_maxcsize = CBUF_SIZE;
2934      -        datahdr.dump_maxrange = CBUF_MAPSIZE / PAGESIZE;
2935      -        datahdr.dump_nstreams = dumpcfg.nhelper_used;
2936      -        datahdr.dump_clevel = dumpcfg.clevel;
     1253 +        datahdr.dump_maxcsize = PAGESIZE;
     1254 +        datahdr.dump_maxrange = 1;
     1255 +        datahdr.dump_nstreams = 1;
     1256 +        datahdr.dump_clevel = 0;
2937 1257  #ifdef COLLECT_METRICS
2938 1258          if (dump_metrics_on)
2939 1259                  datahdr.dump_metrics += dumpsys_metrics(ds, buf, size);
2940 1260  #endif
2941 1261          datahdr.dump_data_csize = dumpvp_flush() - dumphdr->dump_data;
2942 1262  
2943 1263          /*
2944 1264           * Write out the initial and terminal dump headers.
2945 1265           */
2946 1266          dumpbuf.vp_off = dumphdr->dump_start;
2947 1267          dumpvp_write(dumphdr, sizeof (dumphdr_t));
2948 1268          (void) dumpvp_flush();
2949 1269  
2950 1270          dumpbuf.vp_limit = dumpvp_size;
2951 1271          dumpbuf.vp_off = dumpbuf.vp_limit - DUMP_OFFSET;
2952 1272          dumpvp_write(dumphdr, sizeof (dumphdr_t));
2953 1273          dumpvp_write(&datahdr, sizeof (dumpdatahdr_t));
2954      -        dumpvp_write(dumpcfg.cbuf[0].buf, datahdr.dump_metrics);
     1274 +        dumpvp_write(dumpcfg.page, datahdr.dump_metrics);
2955 1275  
2956 1276          (void) dumpvp_flush();
2957 1277  
2958 1278          uprintf("\r%3d%% done: %llu pages dumped, ",
2959 1279              ds->percent_done, (u_longlong_t)ds->npages);
2960 1280  
2961 1281          if (dump_ioerr == 0) {
2962 1282                  uprintf("dump succeeded\n");
2963 1283          } else {
2964 1284                  uprintf("dump failed: error %d\n", dump_ioerr);
↓ open down ↓ 12 lines elided ↑ open up ↑
2977 1297                  dump_ereports();
2978 1298                  dump_messages();
2979 1299          }
2980 1300  
2981 1301          delay(2 * hz);  /* let people see the 'done' message */
2982 1302          dump_timeleft = 0;
2983 1303          dump_ioerr = 0;
2984 1304  
2985 1305          /* restore settings after live dump completes */
2986 1306          if (!panicstr) {
2987      -                dumpcfg.clevel = save_dump_clevel;
2988      -
2989 1307                  /* release any VCHR open of the dump device */
2990 1308                  if (dumpbuf.cdev_vp != NULL) {
2991 1309                          (void) VOP_CLOSE(dumpbuf.cdev_vp, FREAD | FWRITE, 1, 0,
2992 1310                              kcred, NULL);
2993 1311                          VN_RELE(dumpbuf.cdev_vp);
2994 1312                          dumpbuf.cdev_vp = NULL;
2995 1313                  }
2996 1314          }
2997 1315  }
2998 1316  
↓ open down ↓ 84 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX