1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2012 by Delphix. All rights reserved.
  25  */
  26 
  27 #include <mdb/mdb_modapi.h>
  28 #include <mdb/mdb_ctf.h>
  29 
  30 #include <sys/types.h>
  31 #include <sys/regset.h>
  32 #include <sys/stack.h>
  33 #include <sys/thread.h>
  34 #include <sys/modctl.h>
  35 
  36 #include "findstack.h"
  37 #include "thread.h"
  38 #include "sobj.h"
  39 
  40 #define TOO_BIG_FOR_A_STACK (1024 * 1024)
  41 
  42 #define KTOU(p) ((p) - kbase + ubase)
  43 #define UTOK(p) ((p) - ubase + kbase)
  44 
  45 #define CRAWL_FOUNDALL  (-1)
  46 
  47 #if defined(__i386) || defined(__amd64)
  48 struct rwindow {
  49         uintptr_t rw_fp;
  50         uintptr_t rw_rtn;
  51 };
  52 #endif
  53 
  54 #ifndef STACK_BIAS
  55 #define STACK_BIAS      0
  56 #endif
  57 
  58 /*
  59  * Given a stack pointer, try to crawl down it to the bottom.
  60  * "frame" is a VA in MDB's address space.
  61  *
  62  * Returns the number of frames successfully crawled down, or
  63  * CRAWL_FOUNDALL if it got to the bottom of the stack.
  64  */
  65 static int
  66 crawl(uintptr_t frame, uintptr_t kbase, uintptr_t ktop, uintptr_t ubase,
  67     int kill_fp, findstack_info_t *fsip)
  68 {
  69         int levels = 0;
  70 
  71         fsip->fsi_depth = 0;
  72         fsip->fsi_overflow = 0;
  73 
  74         fs_dprintf(("<0> frame = %p, kbase = %p, ktop = %p, ubase = %p\n",
  75             frame, kbase, ktop, ubase));
  76         for (;;) {
  77                 uintptr_t fp;
  78                 long *fpp = (long *)&((struct rwindow *)frame)->rw_fp;
  79 
  80                 fs_dprintf(("<1> fpp = %p, frame = %p\n", fpp, frame));
  81 
  82                 if ((frame & (STACK_ALIGN - 1)) != 0)
  83                         break;
  84 
  85                 fp = ((struct rwindow *)frame)->rw_fp + STACK_BIAS;
  86                 if (fsip->fsi_depth < fsip->fsi_max_depth)
  87                         fsip->fsi_stack[fsip->fsi_depth++] =
  88                             ((struct rwindow *)frame)->rw_rtn;
  89                 else
  90                         fsip->fsi_overflow = 1;
  91 
  92                 fs_dprintf(("<2> fp = %p\n", fp));
  93 
  94                 if (fp == ktop)
  95                         return (CRAWL_FOUNDALL);
  96                 fs_dprintf(("<3> not at base\n"));
  97 
  98 #if defined(__i386) || defined(__amd64)
  99                 if (ktop - fp == sizeof (struct rwindow)) {
 100                         fs_dprintf(("<4> found base\n"));
 101                         return (CRAWL_FOUNDALL);
 102                 }
 103 #endif
 104 
 105                 fs_dprintf(("<5> fp = %p, kbase = %p, ktop - size = %p\n",
 106                     fp, kbase, ktop - sizeof (struct rwindow)));
 107 
 108                 if (fp < kbase || fp >= (ktop - sizeof (struct rwindow)))
 109                         break;
 110 
 111                 frame = KTOU(fp);
 112                 fs_dprintf(("<6> frame = %p\n", frame));
 113 
 114                 /*
 115                  * NULL out the old %fp so we don't go down this stack
 116                  * more than once.
 117                  */
 118                 if (kill_fp) {
 119                         fs_dprintf(("<7> fpp = %p\n", fpp));
 120                         *fpp = NULL;
 121                 }
 122 
 123                 fs_dprintf(("<8> levels = %d\n", levels));
 124                 levels++;
 125         }
 126 
 127         return (levels);
 128 }
 129 
 130 typedef struct mdb_findstack_kthread {
 131         struct _sobj_ops *t_sobj_ops;
 132         uint_t  t_state;
 133         ushort_t t_flag;
 134         ushort_t t_schedflag;
 135         caddr_t t_stk;
 136         caddr_t t_stkbase;
 137         label_t t_pcb;
 138 } mdb_findstack_kthread_t;
 139 
 140 /*ARGSUSED*/
 141 int
 142 stacks_findstack(uintptr_t addr, findstack_info_t *fsip, uint_t print_warnings)
 143 {
 144         mdb_findstack_kthread_t thr;
 145         size_t stksz;
 146         uintptr_t ubase, utop;
 147         uintptr_t kbase, ktop;
 148         uintptr_t win, sp;
 149 
 150         fsip->fsi_failed = 0;
 151         fsip->fsi_pc = 0;
 152         fsip->fsi_sp = 0;
 153         fsip->fsi_depth = 0;
 154         fsip->fsi_overflow = 0;
 155 
 156         if (mdb_ctf_vread(&thr, "kthread_t", "mdb_findstack_kthread_t",
 157             addr, print_warnings ? 0 : MDB_CTF_VREAD_QUIET) == -1) {
 158                 fsip->fsi_failed = FSI_FAIL_BADTHREAD;
 159                 return (DCMD_ERR);
 160         }
 161 
 162         fsip->fsi_sobj_ops = (uintptr_t)thr.t_sobj_ops;
 163         fsip->fsi_tstate = thr.t_state;
 164         fsip->fsi_panic = !!(thr.t_flag & T_PANIC);
 165 
 166         if (thr.t_stk < thr.t_stkbase) {
 167                 if (print_warnings)
 168                         mdb_warn(
 169                             "stack base or stack top corrupt for thread %p\n",
 170                             addr);
 171                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 172                 return (DCMD_ERR);
 173         }
 174 
 175         kbase = (uintptr_t)thr.t_stkbase;
 176         ktop = (uintptr_t)thr.t_stk;
 177         stksz = ktop - kbase;
 178 
 179 #ifdef __amd64
 180         /*
 181          * The stack on amd64 is intentionally misaligned, so ignore the top
 182          * half-frame.  See thread_stk_init().  When handling traps, the frame
 183          * is automatically aligned by the hardware, so we only alter ktop if
 184          * needed.
 185          */
 186         if ((ktop & (STACK_ALIGN - 1)) != 0)
 187                 ktop -= STACK_ENTRY_ALIGN;
 188 #endif
 189 
 190         /*
 191          * If the stack size is larger than a meg, assume that it's bogus.
 192          */
 193         if (stksz > TOO_BIG_FOR_A_STACK) {
 194                 if (print_warnings)
 195                         mdb_warn("stack size for thread %p is too big to be "
 196                             "reasonable\n", addr);
 197                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 198                 return (DCMD_ERR);
 199         }
 200 
 201         /*
 202          * This could be (and was) a UM_GC allocation.  Unfortunately,
 203          * stksz tends to be very large.  As currently implemented, dcmds
 204          * invoked as part of pipelines don't have their UM_GC-allocated
 205          * memory freed until the pipeline completes.  With stksz in the
 206          * neighborhood of 20k, the popular ::walk thread |::findstack
 207          * pipeline can easily run memory-constrained debuggers (kmdb) out
 208          * of memory.  This can be changed back to a gc-able allocation when
 209          * the debugger is changed to free UM_GC memory more promptly.
 210          */
 211         ubase = (uintptr_t)mdb_alloc(stksz, UM_SLEEP);
 212         utop = ubase + stksz;
 213         if (mdb_vread((caddr_t)ubase, stksz, kbase) != stksz) {
 214                 mdb_free((void *)ubase, stksz);
 215                 if (print_warnings)
 216                         mdb_warn("couldn't read entire stack for thread %p\n",
 217                             addr);
 218                 fsip->fsi_failed = FSI_FAIL_THREADCORRUPT;
 219                 return (DCMD_ERR);
 220         }
 221 
 222         /*
 223          * Try the saved %sp first, if it looks reasonable.
 224          */
 225         sp = KTOU((uintptr_t)thr.t_sp + STACK_BIAS);
 226         if (sp >= ubase && sp <= utop) {
 227                 if (crawl(sp, kbase, ktop, ubase, 0, fsip) == CRAWL_FOUNDALL) {
 228                         fsip->fsi_sp = (uintptr_t)thr.t_sp;
 229 #if !defined(__i386)
 230                         fsip->fsi_pc = (uintptr_t)thr.t_pc;
 231 #endif
 232                         goto found;
 233                 }
 234         }
 235 
 236         /*
 237          * Now walk through the whole stack, starting at the base,
 238          * trying every possible "window".
 239          */
 240         for (win = ubase;
 241             win + sizeof (struct rwindow) <= utop;
 242             win += sizeof (struct rwindow *)) {
 243                 if (crawl(win, kbase, ktop, ubase, 1, fsip) == CRAWL_FOUNDALL) {
 244                         fsip->fsi_sp = UTOK(win) - STACK_BIAS;
 245                         goto found;
 246                 }
 247         }
 248 
 249         /*
 250          * We didn't conclusively find the stack.  So we'll take another lap,
 251          * and print out anything that looks possible.
 252          */
 253         if (print_warnings)
 254                 mdb_printf("Possible stack pointers for thread %p:\n", addr);
 255         (void) mdb_vread((caddr_t)ubase, stksz, kbase);
 256 
 257         for (win = ubase;
 258             win + sizeof (struct rwindow) <= utop;
 259             win += sizeof (struct rwindow *)) {
 260                 uintptr_t fp = ((struct rwindow *)win)->rw_fp;
 261                 int levels;
 262 
 263                 if ((levels = crawl(win, kbase, ktop, ubase, 1, fsip)) > 1) {
 264                         if (print_warnings)
 265                                 mdb_printf("  %p (%d)\n", fp, levels);
 266                 } else if (levels == CRAWL_FOUNDALL) {
 267                         /*
 268                          * If this is a live system, the stack could change
 269                          * between the two mdb_vread(ubase, utop, kbase)'s,
 270                          * and we could have a fully valid stack here.
 271                          */
 272                         fsip->fsi_sp = UTOK(win) - STACK_BIAS;
 273                         goto found;
 274                 }
 275         }
 276 
 277         fsip->fsi_depth = 0;
 278         fsip->fsi_overflow = 0;
 279         fsip->fsi_failed = FSI_FAIL_STACKNOTFOUND;
 280 
 281         mdb_free((void *)ubase, stksz);
 282         return (DCMD_ERR);
 283 found:
 284         mdb_free((void *)ubase, stksz);
 285         return (DCMD_OK);
 286 }
 287 
 288 void
 289 stacks_findstack_cleanup()
 290 {}
 291 
 292 /*ARGSUSED*/
 293 int
 294 stacks_module_cb(uintptr_t addr, const modctl_t *mp, stacks_module_t *smp)
 295 {
 296         char mod_modname[MODMAXNAMELEN + 1];
 297 
 298         if (!mp->mod_modname)
 299                 return (WALK_NEXT);
 300 
 301         if (mdb_readstr(mod_modname, sizeof (mod_modname),
 302             (uintptr_t)mp->mod_modname) == -1) {
 303                 mdb_warn("failed to read mod_modname in \"modctl\" walk");
 304                 return (WALK_ERR);
 305         }
 306 
 307         if (strcmp(smp->sm_name, mod_modname))
 308                 return (WALK_NEXT);
 309 
 310         smp->sm_text = (uintptr_t)mp->mod_text;
 311         smp->sm_size = mp->mod_text_size;
 312 
 313         return (WALK_DONE);
 314 }
 315 
 316 int
 317 stacks_module(stacks_module_t *smp)
 318 {
 319         if (mdb_walk("modctl", (mdb_walk_cb_t)stacks_module_cb, smp) != 0) {
 320                 mdb_warn("cannot walk \"modctl\"");
 321                 return (-1);
 322         }
 323 
 324         return (0);
 325 }
 326 
 327 /*ARGSUSED*/
 328 static void
 329 print_sobj_help(int type, const char *name, const char *ops_name, void *ign)
 330 {
 331         mdb_printf(" %s", name);
 332 }
 333 
 334 /*ARGSUSED*/
 335 static void
 336 print_tstate_help(uint_t state, const char *name, void *ignored)
 337 {
 338         mdb_printf(" %s", name);
 339 }
 340 
 341 void
 342 stacks_help(void)
 343 {
 344         mdb_printf(
 345 "::stacks processes all of the thread stacks on the system, grouping\n"
 346 "together threads which have the same:\n"
 347 "\n"
 348 "  * Thread state,\n"
 349 "  * Sync object type, and\n"
 350 "  * PCs in their stack trace.\n"
 351 "\n"
 352 "The default output (no address or options) is just a dump of the thread\n"
 353 "groups in the system.  For a view of active threads, use \"::stacks -i\",\n"
 354 "which filters out FREE threads (interrupt threads which are currently\n"
 355 "inactive) and threads sleeping on a CV. (Note that those threads may still\n"
 356 "be noteworthy; this is just for a first glance.)  More general filtering\n"
 357 "options are described below, in the \"FILTERS\" section.\n"
 358 "\n"
 359 "::stacks can be used in a pipeline.  The input to ::stacks is one or more\n"
 360 "thread pointers.  For example, to get a summary of threads in a process,\n"
 361 "you can do:\n"
 362 "\n"
 363 "  %<b>procp%</b>::walk thread | ::stacks\n"
 364 "\n"
 365 "When output into a pipe, ::stacks prints all of the threads input,\n"
 366 "filtered by the given filtering options.  This means that multiple\n"
 367 "::stacks invocations can be piped together to achieve more complicated\n"
 368 "filters.  For example, to get threads which have both 'fop_read' and\n"
 369 "'cv_wait_sig_swap' in their stack trace, you could do:\n"
 370 "\n"
 371 "  ::stacks -c fop_read | ::stacks -c cv_wait_sig_swap_core\n"
 372 "\n"
 373 "To get the full list of threads in each group, use the '-a' flag:\n"
 374 "\n"
 375 "  ::stacks -a\n"
 376 "\n");
 377         mdb_dec_indent(2);
 378         mdb_printf("%<b>OPTIONS%</b>\n");
 379         mdb_inc_indent(2);
 380         mdb_printf("%s",
 381 "  -a    Print all of the grouped threads, instead of just a count.\n"
 382 "  -f    Force a re-run of the thread stack gathering.\n"
 383 "  -v    Be verbose about thread stack gathering.\n"
 384 "\n");
 385         mdb_dec_indent(2);
 386         mdb_printf("%<b>FILTERS%</b>\n");
 387         mdb_inc_indent(2);
 388         mdb_printf("%s",
 389 "  -i    Show active threads; equivalent to '-S CV -T FREE'.\n"
 390 "  -c func[+offset]\n"
 391 "        Only print threads whose stacks contain func/func+offset.\n"
 392 "  -C func[+offset]\n"
 393 "        Only print threads whose stacks do not contain func/func+offset.\n"
 394 "  -m module\n"
 395 "        Only print threads whose stacks contain functions from module.\n"
 396 "  -M module\n"
 397 "        Only print threads whose stacks do not contain functions from\n"
 398 "        module.\n"
 399 "  -s {type | ALL}\n"
 400 "        Only print threads which are on a 'type' synchronization object\n"
 401 "        (SOBJ).\n"
 402 "  -S {type | ALL}\n"
 403 "        Only print threads which are not on a 'type' SOBJ.\n"
 404 "  -t tstate\n"
 405 "        Only print threads which are in thread state 'tstate'.\n"
 406 "  -T tstate\n"
 407 "        Only print threads which are not in thread state 'tstate'.\n"
 408 "\n");
 409         mdb_printf("   SOBJ types:");
 410         sobj_type_walk(print_sobj_help, NULL);
 411         mdb_printf("\n");
 412         mdb_printf("Thread states:");
 413         thread_walk_states(print_tstate_help, NULL);
 414         mdb_printf(" panic\n");
 415 }