1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  */
  26 
  27 #include <mdb/mdb_modapi.h>
  28 #include <mdb/mdb_ctf.h>
  29 
  30 #include <sys/types.h>
  31 #include <sys/regset.h>
  32 #include <sys/stack.h>
  33 #include <sys/thread.h>
  34 #include <sys/modctl.h>
  35 
  36 #include "findstack.h"
  37 #include "thread.h"
  38 #include "sobj.h"
  39 
  40 #define TOO_BIG_FOR_A_STACK (1024 * 1024)
  41 
  42 #define KTOU(p) ((p) - kbase + ubase)
  43 #define UTOK(p) ((p) - ubase + kbase)
  44 
  45 #define CRAWL_FOUNDALL  (-1)
  46 
  47 #if defined(__i386) || defined(__amd64)
  48 struct rwindow {
  49         uintptr_t rw_fp;
  50         uintptr_t rw_rtn;
  51 };
  52 #endif
  53 
  54 #ifndef STACK_BIAS
  55 #define STACK_BIAS      0
  56 #endif
  57 
  58 /*
  59  * Given a stack pointer, try to crawl down it to the bottom.
  60  * "frame" is a VA in MDB's address space.
  61  *
  62  * Returns the number of frames successfully crawled down, or
  63  * CRAWL_FOUNDALL if it got to the bottom of the stack.
  64  */
  65 static int
  66 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase,
  67     int kill_fp, findstack_info_t *fsip)
  68 {
  69         int levels = 0;
  70 
  71         fsip->fsi_depth = 0;
  72         fsip->fsi_overflow = 0;
  73 
  74         fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n",
  75             frame, kbase, ktop, ubase));
  76         for (;;) {
  77                 uintptr_t fp;
  78                 long *fpp = (long *)&((struct rwindow *)frame)->rw_fp;
  79 
  80                 fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame));
  81 
  82                 if ((frame & (STACK_ALIGN - 1)) != 0)
  83                         break;
  84 
  85                 fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS;
  86                 if (fsip->fsi_depth < fsip->fsi_max_depth)
  87                         fsip->fsi_stack[fsip->fsi_depth++] =
  88                             ((struct rwindow *)frame)->rw_rtn;
  89                 else
  90                         fsip->fsi_overflow = 1;
  91 
  92                 fs_dprintf(("<2> fp = %p\n", fp));
  93 
  94                 if (fp == ktop)
  95                         return (CRAWL_FOUNDALL);
  96                 fs_dprintf(("<3> not at base\n"));
  97 
  98 #if defined(__i386) || defined(__amd64)
  99                 if (ktop - fp == sizeof (struct rwindow)) {
 100                         fs_dprintf(("<4> found base\n"));
 101                         return (CRAWL_FOUNDALL);
 102                 }
 103 #endif
 104 
 105                 fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n",
 106                     fp, kbase, ktop - sizeof (struct rwindow)));
 107 
 108                 if (fp < kbase || fp >= (ktop - sizeof (struct rwindow)))
 109                         break;
 110 
 111                 frame = KTOU(fp);
 112                 fs_dprintf(("<6> frame = %p\n", frame));
 113 
 114                 /*
 115                  * NULL out the old %fp so we don't go down this stack
 116                  * more than once.
 117                  */
 118                 if (kill_fp) {
 119                         fs_dprintf(("<7> fpp = %p\n", fpp));
 120                         *fpp = NULL;
 121                 }
 122 
 123                 fs_dprintf(("<8> levels = %d\n", levels));
 124                 levels++;
 125         }
 126 
 127         return (levels);
 128 }
 129 
 130 typedef struct mdb_findstack_kthread {
 131         struct _sobj_ops *t_sobj_ops;
 132         uint_t  t_state;
 133         ushort_t t_flag;
 134         ushort_t t_schedflag;
 135         caddr_t t_stk;
 136         caddr_t t_stkbase;
 137         label_t t_pcb;
 138 } mdb_findstack_kthread_t;
 139 
 140 /*ARGSUSED*/
 141 int
 142 stacks_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings)
 143 {
 144         mdb_findstack_kthread_t thr;
 145         size_t stksz;
 146         uintptr_t ubase, utop;
 147         uintptr_t kbase, ktop;
 148         uintptr_t win, sp;
 149 
 150         fsip->fsi_failed = 0;
 151         fsip->fsi_pc = 0;
 152         fsip->fsi_sp = 0;
 153         fsip->fsi_depth = 0;
 154         fsip->fsi_overflow = 0;
 155 
 156         if (mdb_ctf_vread(&thr, "kthread_t", "mdb_findstack_kthread_t",
 157             addr, print_warnings ? 0 : MDB_CTF_VREAD_QUIET) == -1) {
 158                 fsip->fsi_failed = FSI_FAIL_BADTHREAD;
 159                 return (DCMD_ERR);
 160         }
 161 
 162         fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops;
 163         fsip->fsi_tstate = thr.t_state;
 164         fsip->fsi_panic = !!(thr.t_flag & T_PANIC);
 165 
 166         if ((thr.t_schedflag & TS_LOAD) == 0) {
 167                 if (print_warnings)
 168                         mdb_warn("thread %p isn't in memory\n", addr);
 169                 fsip->fsi_failed = FSI_FAIL_NOTINMEMORY;
 170                 return (DCMD_ERR);
 171         }
 172 
 173         if (thr.t_stk < thr.t_stkbase) {
 174                 if (print_warnings)
 175                         mdb_warn(
 176                             "stack base or stack top corrupt for thread %p\n",
 177                             addr);
 178                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 179                 return (DCMD_ERR);
 180         }
 181 
 182         kbase = (uintptr_t)thr.t_stkbase;
 183         ktop = (uintptr_t)thr.t_stk;
 184         stksz = ktop - kbase;
 185 
 186 #ifdef __amd64
 187         /*
 188          * The stack on amd64 is intentionally misaligned, so ignore the top
 189          * half-frame.  See thread_stk_init().  When handling traps, the frame
 190          * is automatically aligned by the hardware, so we only alter ktop if
 191          * needed.
 192          */
 193         if ((ktop & (STACK_ALIGN - 1)) != 0)
 194                 ktop -= STACK_ENTRY_ALIGN;
 195 #endif
 196 
 197         /*
 198          * If the stack size is larger than a meg, assume that it's bogus.
 199          */
 200         if (stksz > TOO_BIG_FOR_A_STACK) {
 201                 if (print_warnings)
 202                         mdb_warn("stack size for thread %p is too big to be "
 203                             "reasonable\n", addr);
 204                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 205                 return (DCMD_ERR);
 206         }
 207 
 208         /*
 209          * This could be (and was) a UM_GC allocation.  Unfortunately,
 210          * stksz tends to be very large.  As currently implemented, dcmds
 211          * invoked as part of pipelines don't have their UM_GC-allocated
 212          * memory freed until the pipeline completes.  With stksz in the
 213          * neighborhood of 20k, the popular ::walk thread |::findstack
 214          * pipeline can easily run memory-constrained debuggers (kmdb) out
 215          * of memory.  This can be changed back to a gc-able allocation when
 216          * the debugger is changed to free UM_GC memory more promptly.
 217          */
 218         ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP);
 219         utop = ubase + stksz;
 220         if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) {
 221                 mdb_free((void *)ubase, stksz);
 222                 if (print_warnings)
 223                         mdb_warn("couldn't read entire stack for thread %p\n",
 224                             addr);
 225                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 226                 return (DCMD_ERR);
 227         }
 228 
 229         /*
 230          * Try the saved %sp first, if it looks reasonable.
 231          */
 232         sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS);
 233         if (sp >= ubase && sp <= utop) {
 234                 if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) {
 235                         fsip->fsi_sp = (uintptr_t)thr.t_sp;
 236 #if !defined(__i386)
 237                         fsip->fsi_pc = (uintptr_t)thr.t_pc;
 238 #endif
 239                         goto found;
 240                 }
 241         }
 242 
 243         /*
 244          * Now walk through the whole stack, starting at the base,
 245          * trying every possible "window".
 246          */
 247         for (win = ubase;
 248             win + sizeof (struct rwindow) <= utop;
 249             win += sizeof (struct rwindow *)) {
 250                 if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) {
 251                         fsip->fsi_sp = UTOK(win) - STACK_BIAS;
 252                         goto found;
 253                 }
 254         }
 255 
 256         /*
 257          * We didn't conclusively find the stack.  So we'll take another lap,
 258          * and print out anything that looks possible.
 259          */
 260         if (print_warnings)
 261                 mdb_printf("Possible stack pointers for thread %p:\n", addr);
 262         (void) mdb_vread((caddr_t)ubase, stksz, kbase);
 263 
 264         for (win = ubase;
 265             win + sizeof (struct rwindow) <= utop;
 266             win += sizeof (struct rwindow *)) {
 267                 uintptr_t fp = ((struct rwindow *)win)->rw_fp;
 268                 int levels;
 269 
 270                 if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) {
 271                         if (print_warnings)
 272                                 mdb_printf("  %p (%d)\n", fp, levels);
 273                 } else if (levels == CRAWL_FOUNDALL) {
 274                         /*
 275                          * If this is a live system, the stack could change
 276                          * between the two mdb_vread(ubase, utop, kbase)'s,
 277                          * and we could have a fully valid stack here.
 278                          */
 279                         fsip->fsi_sp = UTOK(win) - STACK_BIAS;
 280                         goto found;
 281                 }
 282         }
 283 
 284         fsip->fsi_depth = 0;
 285         fsip->fsi_overflow = 0;
 286         fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND;
 287 
 288         mdb_free((void *)ubase, stksz);
 289         return (DCMD_ERR);
 290 found:
 291         mdb_free((void *)ubase, stksz);
 292         return (DCMD_OK);
 293 }
 294 
 295 void
 296 stacks_findstack_cleanup()
 297 {}
 298 
 299 /*ARGSUSED*/
 300 int
 301 stacks_module_cb(uintptr_t addr, const modctl_t *mp, stacks_module_t *smp)
 302 {
 303         char mod_modname[MODMAXNAMELEN + 1];
 304 
 305         if (!mp->mod_modname)
 306                 return (WALK_NEXT);
 307 
 308         if (mdb_readstr(mod_modname, sizeof (mod_modname),
 309             (uintptr_t)mp->mod_modname) == -1) {
 310                 mdb_warn("failed to read mod_modname in \"modctl\" walk");
 311                 return (WALK_ERR);
 312         }
 313 
 314         if (strcmp(smp->sm_name, mod_modname))
 315                 return (WALK_NEXT);
 316 
 317         smp->sm_text = (uintptr_t)mp->mod_text;
 318         smp->sm_size = mp->mod_text_size;
 319 
 320         return (WALK_DONE);
 321 }
 322 
 323 int
 324 stacks_module(stacks_module_t *smp)
 325 {
 326         if (mdb_walk("modctl", (mdb_walk_cb_t)stacks_module_cb, smp) != 0) {
 327                 mdb_warn("cannot walk \"modctl\"");
 328                 return (-1);
 329         }
 330 
 331         return (0);
 332 }
 333 
 334 /*ARGSUSED*/
 335 static void
 336 print_sobj_help(int type, const char *name, const char *ops_name, void *ign)
 337 {
 338         mdb_printf(" %s", name);
 339 }
 340 
 341 /*ARGSUSED*/
 342 static void
 343 print_tstate_help(uint_t state, const char *name, void *ignored)
 344 {
 345         mdb_printf(" %s", name);
 346 }
 347 
 348 void
 349 stacks_help(void)
 350 {
 351         mdb_printf(
 352 "::stacks processes all of the thread stacks on the system, grouping\n"
 353 "together threads which have the same:\n"
 354 "\n"
 355 "  * Thread state,\n"
 356 "  * Sync object type, and\n"
 357 "  * PCs in their stack trace.\n"
 358 "\n"
 359 "The default output (no address or options) is just a dump of the thread\n"
 360 "groups in the system.  For a view of active threads, use \"::stacks -i\",\n"
 361 "which filters out FREE threads (interrupt threads which are currently\n"
 362 "inactive) and threads sleeping on a CV. (Note that those threads may still\n"
 363 "be noteworthy; this is just for a first glance.)  More general filtering\n"
 364 "options are described below, in the \"FILTERS\" section.\n"
 365 "\n"
 366 "::stacks can be used in a pipeline.  The input to ::stacks is one or more\n"
 367 "thread pointers.  For example, to get a summary of threads in a process,\n"
 368 "you can do:\n"
 369 "\n"
 370 "  %<b>procp%</b>::walk thread | ::stacks\n"
 371 "\n"
 372 "When output into a pipe, ::stacks prints all of the threads input,\n"
 373 "filtered by the given filtering options.  This means that multiple\n"
 374 "::stacks invocations can be piped together to achieve more complicated\n"
 375 "filters.  For example, to get threads which have both 'fop_read' and\n"
 376 "'cv_wait_sig_swap' in their stack trace, you could do:\n"
 377 "\n"
 378 "  ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n"
 379 "\n"
 380 "To get the full list of threads in each group, use the '-a' flag:\n"
 381 "\n"
 382 "  ::stacks -a\n"
 383 "\n");
 384         mdb_dec_indent(2);
 385         mdb_printf("%<b>OPTIONS%</b>\n");
 386         mdb_inc_indent(2);
 387         mdb_printf("%s",
 388 "  -a    Print all of the grouped threads, instead of just a count.\n"
 389 "  -f    Force a re-run of the thread stack gathering.\n"
 390 "  -v    Be verbose about thread stack gathering.\n"
 391 "\n");
 392         mdb_dec_indent(2);
 393         mdb_printf("%<b>FILTERS%</b>\n");
 394         mdb_inc_indent(2);
 395         mdb_printf("%s",
 396 "  -i    Show active threads; equivalent to '-S CV -T FREE'.\n"
 397 "  -c func[+offset]\n"
 398 "        Only print threads whose stacks contain func/func+offset.\n"
 399 "  -C func[+offset]\n"
 400 "        Only print threads whose stacks do not contain func/func+offset.\n"
 401 "  -m module\n"
 402 "        Only print threads whose stacks contain functions from module.\n"
 403 "  -M module\n"
 404 "        Only print threads whose stacks do not contain functions from\n"
 405 "        module.\n"
 406 "  -s {type | ALL}\n"
 407 "        Only print threads which are on a 'type' synchronization object\n"
 408 "        (SOBJ).\n"
 409 "  -S {type | ALL}\n"
 410 "        Only print threads which are not on a 'type' SOBJ.\n"
 411 "  -t tstate\n"
 412 "        Only print threads which are in thread state 'tstate'.\n"
 413 "  -T tstate\n"
 414 "        Only print threads which are not in thread state 'tstate'.\n"
 415 "\n");
 416         mdb_printf("   SOBJ types:");
 417         sobj_type_walk(print_sobj_help, NULL);
 418         mdb_printf("\n");
 419         mdb_printf("Thread states:");
 420         thread_walk_states(print_tstate_help, NULL);
 421         mdb_printf(" panic\n");
 422 }