Print this page
6659 nvlist_free(NULL) is a no-op
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/cmd/fm/fmd/common/fmd_case.c
+++ new/usr/src/cmd/fm/fmd/common/fmd_case.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 24 */
25 25
26 26 /*
27 27 * FMD Case Subsystem
28 28 *
29 29 * Diagnosis engines are expected to group telemetry events related to the
30 30 * diagnosis of a particular problem on the system into a set of cases. The
31 31 * diagnosis engine may have any number of cases open at a given point in time.
32 32 * Some cases may eventually be *solved* by associating a suspect list of one
33 33 * or more problems with the case, at which point fmd publishes a list.suspect
34 34 * event for the case and it becomes visible to administrators and agents.
35 35 *
36 36 * Every case is named using a UUID, and is globally visible in the case hash.
37 37 * Cases are reference-counted, except for the reference from the case hash
38 38 * itself. Consumers of case references include modules, which store active
39 39 * cases on the mod_cases list, ASRUs in the resource cache, and the RPC code.
40 40 *
41 41 * Cases obey the following state machine. In states UNSOLVED, SOLVED, and
42 42 * CLOSE_WAIT, a case's module refers to the owning module (a diagnosis engine
43 43 * or transport) and the case is referenced by the mod_cases list. Once the
44 44 * case reaches the CLOSED or REPAIRED states, a case's module changes to refer
45 45 * to the root module (fmd.d_rmod) and is deleted from the owner's mod_cases.
46 46 *
47 47 * +------------+
48 48 * +----------| UNSOLVED |
49 49 * | +------------+
50 50 * | 1 |
51 51 * | |
52 52 * | +-------v----+
53 53 * 2 | | SOLVED |
54 54 * | +------------+
55 55 * | 3 | 5 |
56 56 * +------------+ | |
57 57 * | | |
58 58 * +-v---v----v-+
59 59 * | CLOSE_WAIT |
60 60 * +------------+
61 61 * | | |
62 62 * +-----------+ | +------------+
63 63 * | 4 | |
64 64 * v +-----v------+ |
65 65 * discard | CLOSED | 6 |
66 66 * +------------+ |
67 67 * | |
68 68 * | +------------+
69 69 * 7 | |
70 70 * +-----v----v-+
71 71 * | REPAIRED |
72 72 * +------------+
73 73 * |
74 74 * 8 |
75 75 * +-----v------+
76 76 * | RESOLVED |
77 77 * +------------+
78 78 * |
79 79 * v
80 80 * discard
81 81 *
82 82 * The state machine changes are triggered by calls to fmd_case_transition()
83 83 * from various locations inside of fmd, as described below:
84 84 *
85 85 * [1] Called by: fmd_case_solve()
86 86 * Actions: FMD_CF_SOLVED flag is set in ci_flags
87 87 * conviction policy is applied to suspect list
88 88 * suspects convicted are marked faulty (F) in R$
89 89 * list.suspect event logged and dispatched
90 90 *
91 91 * [2] Called by: fmd_case_close(), fmd_case_uuclose()
92 92 * Actions: diagnosis engine fmdo_close() entry point scheduled
93 93 * case discarded upon exit from CLOSE_WAIT
94 94 *
95 95 * [3] Called by: fmd_case_close(), fmd_case_uuclose(), fmd_xprt_event_uuclose()
96 96 * Actions: FMD_CF_ISOLATED flag is set in ci_flags
97 97 * suspects convicted (F) are marked unusable (U) in R$
98 98 * diagnosis engine fmdo_close() entry point scheduled
99 99 * case transitions to CLOSED [4] upon exit from CLOSE_WAIT
100 100 *
101 101 * [4] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
102 102 * Actions: list.isolated event dispatched
103 103 * case deleted from module's list of open cases
104 104 *
105 105 * [5] Called by: fmd_case_repair(), fmd_case_update()
106 106 * Actions: FMD_CF_REPAIR flag is set in ci_flags
107 107 * diagnosis engine fmdo_close() entry point scheduled
108 108 * case transitions to REPAIRED [6] upon exit from CLOSE_WAIT
109 109 *
110 110 * [6] Called by: fmd_case_delete() (after fmdo_close() entry point returns)
111 111 * Actions: suspects convicted are marked non faulty (!F) in R$
112 112 * list.repaired or list.updated event dispatched
113 113 *
114 114 * [7] Called by: fmd_case_repair(), fmd_case_update()
115 115 * Actions: FMD_CF_REPAIR flag is set in ci_flags
116 116 * suspects convicted are marked non faulty (!F) in R$
117 117 * list.repaired or list.updated event dispatched
118 118 *
119 119 * [8] Called by: fmd_case_uuresolve()
120 120 * Actions: list.resolved event dispatched
121 121 * case is discarded
122 122 */
123 123
124 124 #include <sys/fm/protocol.h>
125 125 #include <uuid/uuid.h>
126 126 #include <alloca.h>
127 127
128 128 #include <fmd_alloc.h>
129 129 #include <fmd_module.h>
130 130 #include <fmd_error.h>
131 131 #include <fmd_conf.h>
132 132 #include <fmd_case.h>
133 133 #include <fmd_string.h>
134 134 #include <fmd_subr.h>
135 135 #include <fmd_protocol.h>
136 136 #include <fmd_event.h>
137 137 #include <fmd_eventq.h>
138 138 #include <fmd_dispq.h>
139 139 #include <fmd_buf.h>
140 140 #include <fmd_log.h>
141 141 #include <fmd_asru.h>
142 142 #include <fmd_fmri.h>
143 143 #include <fmd_xprt.h>
144 144
145 145 #include <fmd.h>
146 146
147 147 static const char *const _fmd_case_snames[] = {
148 148 "UNSOLVED", /* FMD_CASE_UNSOLVED */
149 149 "SOLVED", /* FMD_CASE_SOLVED */
150 150 "CLOSE_WAIT", /* FMD_CASE_CLOSE_WAIT */
151 151 "CLOSED", /* FMD_CASE_CLOSED */
152 152 "REPAIRED", /* FMD_CASE_REPAIRED */
153 153 "RESOLVED" /* FMD_CASE_RESOLVED */
154 154 };
155 155
156 156 static fmd_case_impl_t *fmd_case_tryhold(fmd_case_impl_t *);
157 157
158 158 fmd_case_hash_t *
159 159 fmd_case_hash_create(void)
160 160 {
161 161 fmd_case_hash_t *chp = fmd_alloc(sizeof (fmd_case_hash_t), FMD_SLEEP);
162 162
163 163 (void) pthread_rwlock_init(&chp->ch_lock, NULL);
164 164 chp->ch_hashlen = fmd.d_str_buckets;
165 165 chp->ch_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen, FMD_SLEEP);
166 166 chp->ch_code_hash = fmd_zalloc(sizeof (void *) * chp->ch_hashlen,
167 167 FMD_SLEEP);
168 168 chp->ch_count = 0;
169 169
170 170 return (chp);
171 171 }
172 172
173 173 /*
174 174 * Destroy the case hash. Unlike most of our hash tables, no active references
175 175 * are kept by the case hash itself; all references come from other subsystems.
176 176 * The hash must be destroyed after all modules are unloaded; if anything was
177 177 * present in the hash it would be by definition a reference count leak.
178 178 */
179 179 void
180 180 fmd_case_hash_destroy(fmd_case_hash_t *chp)
181 181 {
182 182 fmd_free(chp->ch_hash, sizeof (void *) * chp->ch_hashlen);
183 183 fmd_free(chp->ch_code_hash, sizeof (void *) * chp->ch_hashlen);
184 184 fmd_free(chp, sizeof (fmd_case_hash_t));
185 185 }
186 186
187 187 /*
188 188 * Take a snapshot of the case hash by placing an additional hold on each
189 189 * member in an auxiliary array, and then call 'func' for each case.
190 190 */
191 191 void
192 192 fmd_case_hash_apply(fmd_case_hash_t *chp,
193 193 void (*func)(fmd_case_t *, void *), void *arg)
194 194 {
195 195 fmd_case_impl_t *cp, **cps, **cpp;
196 196 uint_t cpc, i;
197 197
198 198 (void) pthread_rwlock_rdlock(&chp->ch_lock);
199 199
200 200 cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
201 201 cpc = chp->ch_count;
202 202
203 203 for (i = 0; i < chp->ch_hashlen; i++) {
204 204 for (cp = chp->ch_hash[i]; cp != NULL; cp = cp->ci_next)
205 205 *cpp++ = fmd_case_tryhold(cp);
206 206 }
207 207
208 208 ASSERT(cpp == cps + cpc);
209 209 (void) pthread_rwlock_unlock(&chp->ch_lock);
210 210
211 211 for (i = 0; i < cpc; i++) {
212 212 if (cps[i] != NULL) {
213 213 func((fmd_case_t *)cps[i], arg);
214 214 fmd_case_rele((fmd_case_t *)cps[i]);
215 215 }
216 216 }
217 217
218 218 fmd_free(cps, cpc * sizeof (fmd_case_t *));
219 219 }
220 220
221 221 static void
222 222 fmd_case_code_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
223 223 {
224 224 uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
225 225
226 226 cip->ci_code_next = chp->ch_code_hash[h];
227 227 chp->ch_code_hash[h] = cip;
228 228 }
229 229
230 230 static void
231 231 fmd_case_code_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
232 232 {
233 233 fmd_case_impl_t **pp, *cp;
234 234
235 235 if (cip->ci_code) {
236 236 uint_t h = fmd_strhash(cip->ci_code) % chp->ch_hashlen;
237 237
238 238 pp = &chp->ch_code_hash[h];
239 239 for (cp = *pp; cp != NULL; cp = cp->ci_code_next) {
240 240 if (cp != cip)
241 241 pp = &cp->ci_code_next;
242 242 else
243 243 break;
244 244 }
245 245 if (cp != NULL) {
246 246 *pp = cp->ci_code_next;
247 247 cp->ci_code_next = NULL;
248 248 }
249 249 }
250 250 }
251 251
252 252 /*
253 253 * Look up the diagcode for this case and cache it in ci_code. If no suspects
254 254 * were defined for this case or if the lookup fails, the event dictionary or
255 255 * module code is broken, and we set the event code to a precomputed default.
256 256 */
257 257 static const char *
258 258 fmd_case_mkcode(fmd_case_t *cp)
259 259 {
260 260 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
261 261 fmd_case_susp_t *cis;
262 262 fmd_case_hash_t *chp = fmd.d_cases;
263 263
264 264 char **keys, **keyp;
265 265 const char *s;
266 266
267 267 ASSERT(MUTEX_HELD(&cip->ci_lock));
268 268 ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
269 269
270 270 /*
271 271 * delete any existing entry from code hash if it is on it
272 272 */
273 273 fmd_case_code_hash_delete(chp, cip);
274 274
275 275 fmd_free(cip->ci_code, cip->ci_codelen);
276 276 cip->ci_codelen = cip->ci_mod->mod_codelen;
277 277 cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
278 278 keys = keyp = alloca(sizeof (char *) * (cip->ci_nsuspects + 1));
279 279
280 280 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
281 281 if (nvlist_lookup_string(cis->cis_nvl, FM_CLASS, keyp) == 0)
282 282 keyp++;
283 283 }
284 284
285 285 *keyp = NULL; /* mark end of keys[] array for libdiagcode */
286 286
287 287 if (cip->ci_nsuspects == 0 || fmd_module_dc_key2code(
288 288 cip->ci_mod, keys, cip->ci_code, cip->ci_codelen) != 0) {
289 289 (void) fmd_conf_getprop(fmd.d_conf, "nodiagcode", &s);
290 290 fmd_free(cip->ci_code, cip->ci_codelen);
291 291 cip->ci_codelen = strlen(s) + 1;
292 292 cip->ci_code = fmd_zalloc(cip->ci_codelen, FMD_SLEEP);
293 293 (void) strcpy(cip->ci_code, s);
294 294 }
295 295
296 296 /*
297 297 * add into hash of solved cases
298 298 */
299 299 fmd_case_code_hash_insert(chp, cip);
300 300
301 301 return (cip->ci_code);
302 302 }
303 303
304 304 typedef struct {
305 305 int *fcl_countp;
306 306 int fcl_maxcount;
307 307 uint8_t *fcl_ba;
308 308 nvlist_t **fcl_nva;
309 309 int *fcl_msgp;
310 310 } fmd_case_lst_t;
311 311
312 312 static void
313 313 fmd_case_set_lst(fmd_asru_link_t *alp, void *arg)
314 314 {
315 315 fmd_case_lst_t *entryp = (fmd_case_lst_t *)arg;
316 316 boolean_t b;
317 317 int state;
318 318
319 319 if (*entryp->fcl_countp >= entryp->fcl_maxcount)
320 320 return;
321 321 if (nvlist_lookup_boolean_value(alp->al_event, FM_SUSPECT_MESSAGE,
322 322 &b) == 0 && b == B_FALSE)
323 323 *entryp->fcl_msgp = B_FALSE;
324 324 entryp->fcl_ba[*entryp->fcl_countp] = 0;
325 325 state = fmd_asru_al_getstate(alp);
326 326 if (state & FMD_ASRU_DEGRADED)
327 327 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_DEGRADED;
328 328 if (state & FMD_ASRU_UNUSABLE)
329 329 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_UNUSABLE;
330 330 if (state & FMD_ASRU_FAULTY)
331 331 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_FAULTY;
332 332 if (!(state & FMD_ASRU_PRESENT))
333 333 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_NOT_PRESENT;
334 334 if (alp->al_reason == FMD_ASRU_REPAIRED)
335 335 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPAIRED;
336 336 else if (alp->al_reason == FMD_ASRU_REPLACED)
337 337 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_REPLACED;
338 338 else if (alp->al_reason == FMD_ASRU_ACQUITTED)
339 339 entryp->fcl_ba[*entryp->fcl_countp] |= FM_SUSPECT_ACQUITTED;
340 340 entryp->fcl_nva[*entryp->fcl_countp] = alp->al_event;
341 341 (*entryp->fcl_countp)++;
342 342 }
343 343
344 344 static void
345 345 fmd_case_faulty(fmd_asru_link_t *alp, void *arg)
346 346 {
347 347 int *faultyp = (int *)arg;
348 348
349 349 *faultyp |= (alp->al_flags & FMD_ASRU_FAULTY);
350 350 }
351 351
352 352 static void
353 353 fmd_case_usable(fmd_asru_link_t *alp, void *arg)
354 354 {
355 355 int *usablep = (int *)arg;
356 356
357 357 *usablep |= !(fmd_asru_al_getstate(alp) & FMD_ASRU_UNUSABLE);
358 358 }
359 359
360 360 static void
361 361 fmd_case_not_faulty(fmd_asru_link_t *alp, void *arg)
362 362 {
363 363 int *not_faultyp = (int *)arg;
364 364
365 365 *not_faultyp |= !(alp->al_flags & FMD_ASRU_FAULTY);
366 366 }
367 367
368 368 /*
369 369 * Have we got any suspects with an asru that are still unusable and present?
370 370 */
371 371 static void
372 372 fmd_case_unusable_and_present(fmd_asru_link_t *alp, void *arg)
373 373 {
374 374 int *rvalp = (int *)arg;
375 375 int state;
376 376 nvlist_t *asru;
377 377
378 378 /*
379 379 * if this a proxy case and this suspect doesn't have an local asru
380 380 * then state is unknown so we must assume it may still be unusable.
381 381 */
382 382 if ((alp->al_flags & FMD_ASRU_PROXY) &&
383 383 !(alp->al_flags & FMD_ASRU_PROXY_WITH_ASRU)) {
384 384 *rvalp |= B_TRUE;
385 385 return;
386 386 }
387 387
388 388 state = fmd_asru_al_getstate(alp);
389 389 if (nvlist_lookup_nvlist(alp->al_event, FM_FAULT_ASRU, &asru) != 0)
390 390 return;
391 391 *rvalp |= ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_PRESENT));
392 392 }
393 393
394 394 nvlist_t *
395 395 fmd_case_mkevent(fmd_case_t *cp, const char *class)
396 396 {
397 397 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
398 398 nvlist_t **nva, *nvl;
399 399 uint8_t *ba;
400 400 int msg = B_TRUE;
401 401 const char *code;
402 402 fmd_case_lst_t fcl;
403 403 int count = 0;
404 404
405 405 (void) pthread_mutex_lock(&cip->ci_lock);
406 406 ASSERT(cip->ci_state >= FMD_CASE_SOLVED);
407 407
408 408 nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
409 409 ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
410 410
411 411 /*
412 412 * For each suspect associated with the case, store its fault event
413 413 * nvlist in 'nva'. We also look to see if any of the suspect faults
414 414 * have asked not to be messaged. If any of them have made such a
415 415 * request, propagate that attribute to the composite list.* event.
416 416 * Finally, store each suspect's faulty status into the bitmap 'ba'.
417 417 */
418 418 fcl.fcl_countp = &count;
419 419 fcl.fcl_maxcount = cip->ci_nsuspects;
420 420 fcl.fcl_msgp = &msg;
421 421 fcl.fcl_ba = ba;
422 422 fcl.fcl_nva = nva;
423 423 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
424 424
425 425 if (cip->ci_code == NULL)
426 426 (void) fmd_case_mkcode(cp);
427 427 /*
428 428 * For repair and updated event, we lookup diagcode from dict using key
429 429 * "list.repaired" or "list.updated" or "list.resolved".
430 430 */
431 431 if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0)
432 432 (void) fmd_conf_getprop(fmd.d_conf, "repaircode", &code);
433 433 else if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0)
434 434 (void) fmd_conf_getprop(fmd.d_conf, "resolvecode", &code);
435 435 else if (strcmp(class, FM_LIST_UPDATED_CLASS) == 0)
436 436 (void) fmd_conf_getprop(fmd.d_conf, "updatecode", &code);
437 437 else
438 438 code = cip->ci_code;
439 439
440 440 if (msg == B_FALSE)
441 441 cip->ci_flags |= FMD_CF_INVISIBLE;
442 442
443 443 /*
444 444 * Use the ci_diag_de if one has been saved (eg for an injected fault).
445 445 * Otherwise use the authority for the current module.
446 446 */
447 447 nvl = fmd_protocol_list(class, cip->ci_diag_de == NULL ?
448 448 cip->ci_mod->mod_fmri : cip->ci_diag_de, cip->ci_uuid, code, count,
449 449 nva, ba, msg, &cip->ci_tv, cip->ci_injected);
450 450
451 451 (void) pthread_mutex_unlock(&cip->ci_lock);
452 452 return (nvl);
453 453 }
454 454
455 455 static int fmd_case_match_on_faulty_overlap = 1;
456 456 static int fmd_case_match_on_acquit_overlap = 1;
457 457 static int fmd_case_auto_acquit_isolated = 1;
458 458 static int fmd_case_auto_acquit_non_acquitted = 1;
459 459 static int fmd_case_too_recent = 10; /* time in seconds */
460 460
461 461 static boolean_t
462 462 fmd_case_compare_elem(nvlist_t *nvl, nvlist_t *xnvl, const char *elem)
463 463 {
464 464 nvlist_t *new_rsrc;
465 465 nvlist_t *rsrc;
466 466 char *new_name = NULL;
467 467 char *name = NULL;
468 468 ssize_t new_namelen;
469 469 ssize_t namelen;
470 470 int fmri_present = 1;
471 471 int new_fmri_present = 1;
472 472 int match = B_FALSE;
473 473 fmd_topo_t *ftp = fmd_topo_hold();
474 474
475 475 if (nvlist_lookup_nvlist(xnvl, elem, &rsrc) != 0)
476 476 fmri_present = 0;
477 477 else {
478 478 if ((namelen = fmd_fmri_nvl2str(rsrc, NULL, 0)) == -1)
479 479 goto done;
480 480 name = fmd_alloc(namelen + 1, FMD_SLEEP);
481 481 if (fmd_fmri_nvl2str(rsrc, name, namelen + 1) == -1)
482 482 goto done;
483 483 }
484 484 if (nvlist_lookup_nvlist(nvl, elem, &new_rsrc) != 0)
485 485 new_fmri_present = 0;
486 486 else {
487 487 if ((new_namelen = fmd_fmri_nvl2str(new_rsrc, NULL, 0)) == -1)
488 488 goto done;
489 489 new_name = fmd_alloc(new_namelen + 1, FMD_SLEEP);
490 490 if (fmd_fmri_nvl2str(new_rsrc, new_name, new_namelen + 1) == -1)
491 491 goto done;
492 492 }
493 493 match = (fmri_present == new_fmri_present &&
494 494 (fmri_present == 0 ||
495 495 topo_fmri_strcmp(ftp->ft_hdl, name, new_name)));
496 496 done:
497 497 if (name != NULL)
498 498 fmd_free(name, namelen + 1);
499 499 if (new_name != NULL)
500 500 fmd_free(new_name, new_namelen + 1);
501 501 fmd_topo_rele(ftp);
502 502 return (match);
503 503 }
504 504
505 505 static int
506 506 fmd_case_match_suspect(nvlist_t *nvl1, nvlist_t *nvl2)
507 507 {
508 508 char *class, *new_class;
509 509
510 510 if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_ASRU))
511 511 return (0);
512 512 if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_RESOURCE))
513 513 return (0);
514 514 if (!fmd_case_compare_elem(nvl1, nvl2, FM_FAULT_FRU))
515 515 return (0);
516 516 (void) nvlist_lookup_string(nvl2, FM_CLASS, &class);
517 517 (void) nvlist_lookup_string(nvl1, FM_CLASS, &new_class);
518 518 return (strcmp(class, new_class) == 0);
519 519 }
520 520
521 521 typedef struct {
522 522 int *fcms_countp;
523 523 int fcms_maxcount;
524 524 fmd_case_impl_t *fcms_cip;
525 525 uint8_t *fcms_new_susp_state;
526 526 uint8_t *fcms_old_susp_state;
527 527 uint8_t *fcms_old_match_state;
528 528 } fcms_t;
529 529 #define SUSPECT_STATE_FAULTY 0x1
530 530 #define SUSPECT_STATE_ISOLATED 0x2
531 531 #define SUSPECT_STATE_REMOVED 0x4
532 532 #define SUSPECT_STATE_ACQUITED 0x8
533 533 #define SUSPECT_STATE_REPAIRED 0x10
534 534 #define SUSPECT_STATE_REPLACED 0x20
535 535 #define SUSPECT_STATE_NO_MATCH 0x1
536 536
537 537 /*
538 538 * This is called for each suspect in the old case. Compare it against each
539 539 * suspect in the new case, setting fcms_old_susp_state and fcms_new_susp_state
540 540 * as appropriate. fcms_new_susp_state will left as 0 if the suspect is not
541 541 * found in the old case.
542 542 */
543 543 static void
544 544 fmd_case_match_suspects(fmd_asru_link_t *alp, void *arg)
545 545 {
546 546 fcms_t *fcmsp = (fcms_t *)arg;
547 547 fmd_case_impl_t *cip = fcmsp->fcms_cip;
548 548 fmd_case_susp_t *cis;
549 549 int i = 0;
550 550 int state = fmd_asru_al_getstate(alp);
551 551
552 552 if (*fcmsp->fcms_countp >= fcmsp->fcms_maxcount)
553 553 return;
554 554
555 555 if (!(state & FMD_ASRU_PRESENT) || (!(state & FMD_ASRU_FAULTY) &&
556 556 alp->al_reason == FMD_ASRU_REMOVED))
557 557 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
558 558 SUSPECT_STATE_REMOVED;
559 559 else if ((state & FMD_ASRU_UNUSABLE) && (state & FMD_ASRU_FAULTY))
560 560 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
561 561 SUSPECT_STATE_ISOLATED;
562 562 else if (state & FMD_ASRU_FAULTY)
563 563 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
564 564 SUSPECT_STATE_FAULTY;
565 565 else if (alp->al_reason == FMD_ASRU_REPLACED)
566 566 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
567 567 SUSPECT_STATE_REPLACED;
568 568 else if (alp->al_reason == FMD_ASRU_ACQUITTED)
569 569 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
570 570 SUSPECT_STATE_ACQUITED;
571 571 else
572 572 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp] =
573 573 SUSPECT_STATE_REPAIRED;
574 574
575 575 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next, i++)
576 576 if (fmd_case_match_suspect(cis->cis_nvl, alp->al_event) == 1)
577 577 break;
578 578 if (cis != NULL)
579 579 fcmsp->fcms_new_susp_state[i] =
580 580 fcmsp->fcms_old_susp_state[*fcmsp->fcms_countp];
581 581 else
582 582 fcmsp->fcms_old_match_state[*fcmsp->fcms_countp] |=
583 583 SUSPECT_STATE_NO_MATCH;
584 584 (*fcmsp->fcms_countp)++;
585 585 }
586 586
587 587 typedef struct {
588 588 int *fca_do_update;
589 589 fmd_case_impl_t *fca_cip;
590 590 } fca_t;
591 591
592 592 /*
593 593 * Re-fault all acquitted suspects that are still present in the new list.
594 594 */
595 595 static void
596 596 fmd_case_fault_acquitted_matching(fmd_asru_link_t *alp, void *arg)
597 597 {
598 598 fca_t *fcap = (fca_t *)arg;
599 599 fmd_case_impl_t *cip = fcap->fca_cip;
600 600 fmd_case_susp_t *cis;
601 601 int state = fmd_asru_al_getstate(alp);
602 602
603 603 if (!(state & FMD_ASRU_FAULTY) &&
604 604 alp->al_reason == FMD_ASRU_ACQUITTED) {
605 605 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
606 606 if (fmd_case_match_suspect(cis->cis_nvl,
607 607 alp->al_event) == 1)
608 608 break;
609 609 if (cis != NULL) {
610 610 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
611 611 *fcap->fca_do_update = 1;
612 612 }
613 613 }
614 614 }
615 615
616 616 /*
617 617 * Re-fault all suspects that are still present in the new list.
618 618 */
619 619 static void
620 620 fmd_case_fault_all_matching(fmd_asru_link_t *alp, void *arg)
621 621 {
622 622 fca_t *fcap = (fca_t *)arg;
623 623 fmd_case_impl_t *cip = fcap->fca_cip;
624 624 fmd_case_susp_t *cis;
625 625 int state = fmd_asru_al_getstate(alp);
626 626
627 627 if (!(state & FMD_ASRU_FAULTY)) {
628 628 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
629 629 if (fmd_case_match_suspect(cis->cis_nvl,
630 630 alp->al_event) == 1)
631 631 break;
632 632 if (cis != NULL) {
633 633 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
634 634 *fcap->fca_do_update = 1;
635 635 }
636 636 }
637 637 }
638 638
639 639 /*
640 640 * Acquit all suspects that are no longer present in the new list.
641 641 */
642 642 static void
643 643 fmd_case_acquit_no_match(fmd_asru_link_t *alp, void *arg)
644 644 {
645 645 fca_t *fcap = (fca_t *)arg;
646 646 fmd_case_impl_t *cip = fcap->fca_cip;
647 647 fmd_case_susp_t *cis;
648 648 int state = fmd_asru_al_getstate(alp);
649 649
650 650 if (state & FMD_ASRU_FAULTY) {
651 651 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next)
652 652 if (fmd_case_match_suspect(cis->cis_nvl,
653 653 alp->al_event) == 1)
654 654 break;
655 655 if (cis == NULL) {
656 656 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
657 657 FMD_ASRU_ACQUITTED);
658 658 *fcap->fca_do_update = 1;
659 659 }
660 660 }
661 661 }
662 662
663 663 /*
664 664 * Acquit all isolated suspects.
665 665 */
666 666 static void
667 667 fmd_case_acquit_isolated(fmd_asru_link_t *alp, void *arg)
668 668 {
669 669 int *do_update = (int *)arg;
670 670 int state = fmd_asru_al_getstate(alp);
671 671
672 672 if ((state & FMD_ASRU_PRESENT) && (state & FMD_ASRU_UNUSABLE) &&
673 673 (state & FMD_ASRU_FAULTY)) {
674 674 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
675 675 FMD_ASRU_ACQUITTED);
676 676 *do_update = 1;
677 677 }
678 678 }
679 679
680 680 /*
681 681 * Acquit suspect which matches specified nvlist
682 682 */
683 683 static void
684 684 fmd_case_acquit_suspect(fmd_asru_link_t *alp, void *arg)
685 685 {
686 686 nvlist_t *nvl = (nvlist_t *)arg;
687 687 int state = fmd_asru_al_getstate(alp);
688 688
689 689 if ((state & FMD_ASRU_FAULTY) &&
690 690 fmd_case_match_suspect(nvl, alp->al_event) == 1)
691 691 (void) fmd_asru_clrflags(alp, FMD_ASRU_FAULTY,
692 692 FMD_ASRU_ACQUITTED);
693 693 }
694 694
695 695 typedef struct {
696 696 fmd_case_impl_t *fccd_cip;
697 697 uint8_t *fccd_new_susp_state;
698 698 uint8_t *fccd_new_match_state;
699 699 int *fccd_discard_new;
700 700 int *fccd_adjust_new;
701 701 } fccd_t;
702 702
703 703 /*
704 704 * see if a matching suspect list already exists in the cache
705 705 */
706 706 static void
707 707 fmd_case_check_for_dups(fmd_case_t *old_cp, void *arg)
708 708 {
709 709 fccd_t *fccdp = (fccd_t *)arg;
710 710 fmd_case_impl_t *new_cip = fccdp->fccd_cip;
711 711 fmd_case_impl_t *old_cip = (fmd_case_impl_t *)old_cp;
712 712 int i, count = 0, do_update = 0, got_isolated_overlap = 0;
713 713 int got_faulty_overlap = 0;
714 714 int got_acquit_overlap = 0;
715 715 boolean_t too_recent;
716 716 uint64_t most_recent = 0;
717 717 fcms_t fcms;
718 718 fca_t fca;
719 719 uint8_t *new_susp_state;
720 720 uint8_t *old_susp_state;
721 721 uint8_t *old_match_state;
722 722
723 723 new_susp_state = alloca(new_cip->ci_nsuspects * sizeof (uint8_t));
724 724 for (i = 0; i < new_cip->ci_nsuspects; i++)
725 725 new_susp_state[i] = 0;
726 726 old_susp_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
727 727 for (i = 0; i < old_cip->ci_nsuspects; i++)
728 728 old_susp_state[i] = 0;
729 729 old_match_state = alloca(old_cip->ci_nsuspects * sizeof (uint8_t));
730 730 for (i = 0; i < old_cip->ci_nsuspects; i++)
731 731 old_match_state[i] = 0;
732 732
733 733 /*
734 734 * Compare with each suspect in the existing case.
735 735 */
736 736 fcms.fcms_countp = &count;
737 737 fcms.fcms_maxcount = old_cip->ci_nsuspects;
738 738 fcms.fcms_cip = new_cip;
739 739 fcms.fcms_new_susp_state = new_susp_state;
740 740 fcms.fcms_old_susp_state = old_susp_state;
741 741 fcms.fcms_old_match_state = old_match_state;
742 742 fmd_asru_hash_apply_by_case(fmd.d_asrus, (fmd_case_t *)old_cip,
743 743 fmd_case_match_suspects, &fcms);
744 744
745 745 /*
746 746 * If we have some faulty, non-isolated suspects that overlap, then most
747 747 * likely it is the suspects that overlap in the suspect lists that are
748 748 * to blame. So we can consider this to be a match.
749 749 */
750 750 for (i = 0; i < new_cip->ci_nsuspects; i++)
751 751 if (new_susp_state[i] == SUSPECT_STATE_FAULTY)
752 752 got_faulty_overlap = 1;
753 753 if (got_faulty_overlap && fmd_case_match_on_faulty_overlap)
754 754 goto got_match;
755 755
756 756 /*
757 757 * If we have no faulty, non-isolated suspects in the old case, but we
758 758 * do have some acquitted suspects that overlap, then most likely it is
759 759 * the acquitted suspects that overlap in the suspect lists that are
760 760 * to blame. So we can consider this to be a match.
761 761 */
762 762 for (i = 0; i < new_cip->ci_nsuspects; i++)
763 763 if (new_susp_state[i] == SUSPECT_STATE_ACQUITED)
764 764 got_acquit_overlap = 1;
765 765 for (i = 0; i < old_cip->ci_nsuspects; i++)
766 766 if (old_susp_state[i] == SUSPECT_STATE_FAULTY)
767 767 got_acquit_overlap = 0;
768 768 if (got_acquit_overlap && fmd_case_match_on_acquit_overlap)
769 769 goto got_match;
770 770
771 771 /*
772 772 * Check that all suspects in the new list are present in the old list.
773 773 * Return if we find one that isn't.
774 774 */
775 775 for (i = 0; i < new_cip->ci_nsuspects; i++)
776 776 if (new_susp_state[i] == 0)
777 777 return;
778 778
779 779 /*
780 780 * Check that all suspects in the old list are present in the new list
781 781 * *or* they are isolated or removed/replaced (which would explain why
782 782 * they are not present in the new list). Return if we find one that is
783 783 * faulty and unisolated or repaired or acquitted, and that is not
784 784 * present in the new case.
785 785 */
786 786 for (i = 0; i < old_cip->ci_nsuspects; i++)
787 787 if (old_match_state[i] == SUSPECT_STATE_NO_MATCH &&
788 788 (old_susp_state[i] == SUSPECT_STATE_FAULTY ||
789 789 old_susp_state[i] == SUSPECT_STATE_ACQUITED ||
790 790 old_susp_state[i] == SUSPECT_STATE_REPAIRED))
791 791 return;
792 792
793 793 got_match:
794 794 /*
795 795 * If the old case is already in repaired/resolved state, we can't
796 796 * do anything more with it, so keep the new case, but acquit some
797 797 * of the suspects if appropriate.
798 798 */
799 799 if (old_cip->ci_state >= FMD_CASE_REPAIRED) {
800 800 if (fmd_case_auto_acquit_non_acquitted) {
801 801 *fccdp->fccd_adjust_new = 1;
802 802 for (i = 0; i < new_cip->ci_nsuspects; i++) {
803 803 fccdp->fccd_new_susp_state[i] |=
804 804 new_susp_state[i];
805 805 if (new_susp_state[i] == 0)
806 806 fccdp->fccd_new_susp_state[i] =
807 807 SUSPECT_STATE_NO_MATCH;
808 808 }
809 809 }
810 810 return;
811 811 }
812 812
813 813 /*
814 814 * Otherwise discard the new case and keep the old, again updating the
815 815 * state of the suspects as appropriate
816 816 */
817 817 *fccdp->fccd_discard_new = 1;
818 818 fca.fca_cip = new_cip;
819 819 fca.fca_do_update = &do_update;
820 820
821 821 /*
822 822 * See if new case occurred within fmd_case_too_recent seconds of the
823 823 * most recent modification to the old case and if so don't do
824 824 * auto-acquit. This avoids problems if a flood of ereports come in and
825 825 * they don't all get diagnosed before the first case causes some of
826 826 * the devices to be isolated making it appear that an isolated device
827 827 * was in the suspect list.
828 828 */
829 829 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
830 830 fmd_asru_most_recent, &most_recent);
831 831 too_recent = (new_cip->ci_tv.tv_sec - most_recent <
832 832 fmd_case_too_recent);
833 833
834 834 if (got_faulty_overlap) {
835 835 /*
836 836 * Acquit any suspects not present in the new list, plus
837 837 * any that are are present but are isolated.
838 838 */
839 839 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
840 840 fmd_case_acquit_no_match, &fca);
841 841 if (fmd_case_auto_acquit_isolated && !too_recent)
842 842 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
843 843 fmd_case_acquit_isolated, &do_update);
844 844 } else if (got_acquit_overlap) {
845 845 /*
846 846 * Re-fault the acquitted matching suspects and acquit all
847 847 * isolated suspects.
848 848 */
849 849 if (fmd_case_auto_acquit_isolated && !too_recent) {
850 850 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
851 851 fmd_case_fault_acquitted_matching, &fca);
852 852 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
853 853 fmd_case_acquit_isolated, &do_update);
854 854 }
855 855 } else if (fmd_case_auto_acquit_isolated) {
856 856 /*
857 857 * To get here, there must be no faulty or acquitted suspects,
858 858 * but there must be at least one isolated suspect. Just acquit
859 859 * non-matching isolated suspects. If there are no matching
860 860 * isolated suspects, then re-fault all matching suspects.
861 861 */
862 862 for (i = 0; i < new_cip->ci_nsuspects; i++)
863 863 if (new_susp_state[i] == SUSPECT_STATE_ISOLATED)
864 864 got_isolated_overlap = 1;
865 865 if (!got_isolated_overlap)
866 866 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
867 867 fmd_case_fault_all_matching, &fca);
868 868 fmd_asru_hash_apply_by_case(fmd.d_asrus, old_cp,
869 869 fmd_case_acquit_no_match, &fca);
870 870 }
871 871
872 872 /*
873 873 * If we've updated anything in the old case, call fmd_case_update()
874 874 */
875 875 if (do_update)
876 876 fmd_case_update(old_cp);
877 877 }
878 878
879 879 /*
880 880 * Convict suspects in a case by applying a conviction policy and updating the
881 881 * resource cache prior to emitting the list.suspect event for the given case.
882 882 * At present, our policy is very simple: convict every suspect in the case.
883 883 * In the future, this policy can be extended and made configurable to permit:
884 884 *
885 885 * - convicting the suspect with the highest FIT rate
886 886 * - convicting the suspect with the cheapest FRU
887 887 * - convicting the suspect with the FRU that is in a depot's inventory
888 888 * - convicting the suspect with the longest lifetime
889 889 *
890 890 * and so forth. A word to the wise: this problem is significantly harder that
891 891 * it seems at first glance. Future work should heed the following advice:
892 892 *
893 893 * Hacking the policy into C code here is a very bad idea. The policy needs to
894 894 * be decided upon very carefully and fundamentally encodes knowledge of what
895 895 * suspect list combinations can be emitted by what diagnosis engines. As such
896 896 * fmd's code is the wrong location, because that would require fmd itself to
897 897 * be updated for every diagnosis engine change, defeating the entire design.
898 898 * The FMA Event Registry knows the suspect list combinations: policy inputs
899 899 * can be derived from it and used to produce per-module policy configuration.
900 900 *
901 901 * If the policy needs to be dynamic and not statically fixed at either fmd
902 902 * startup or module load time, any implementation of dynamic policy retrieval
903 903 * must employ some kind of caching mechanism or be part of a built-in module.
904 904 * The fmd_case_convict() function is called with locks held inside of fmd and
905 905 * is not a place where unbounded blocking on some inter-process or inter-
906 906 * system communication to another service (e.g. another daemon) can occur.
907 907 */
908 908 static int
909 909 fmd_case_convict(fmd_case_t *cp)
910 910 {
911 911 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
912 912 fmd_asru_hash_t *ahp = fmd.d_asrus;
913 913 int discard_new = 0, i;
914 914 fmd_case_susp_t *cis;
915 915 fmd_asru_link_t *alp;
916 916 uint8_t *new_susp_state;
917 917 uint8_t *new_match_state;
918 918 int adjust_new = 0;
919 919 fccd_t fccd;
920 920 fmd_case_impl_t *ncp, **cps, **cpp;
921 921 uint_t cpc;
922 922 fmd_case_hash_t *chp;
923 923
924 924 /*
925 925 * First we must see if any matching cases already exist.
926 926 */
927 927 new_susp_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
928 928 for (i = 0; i < cip->ci_nsuspects; i++)
929 929 new_susp_state[i] = 0;
930 930 new_match_state = alloca(cip->ci_nsuspects * sizeof (uint8_t));
931 931 for (i = 0; i < cip->ci_nsuspects; i++)
932 932 new_match_state[i] = 0;
933 933 fccd.fccd_cip = cip;
934 934 fccd.fccd_adjust_new = &adjust_new;
935 935 fccd.fccd_new_susp_state = new_susp_state;
936 936 fccd.fccd_new_match_state = new_match_state;
937 937 fccd.fccd_discard_new = &discard_new;
938 938
939 939 /*
940 940 * Hold all cases
941 941 */
942 942 chp = fmd.d_cases;
943 943 (void) pthread_rwlock_rdlock(&chp->ch_lock);
944 944 cps = cpp = fmd_alloc(chp->ch_count * sizeof (fmd_case_t *), FMD_SLEEP);
945 945 cpc = chp->ch_count;
946 946 for (i = 0; i < chp->ch_hashlen; i++)
947 947 for (ncp = chp->ch_hash[i]; ncp != NULL; ncp = ncp->ci_next)
948 948 *cpp++ = fmd_case_tryhold(ncp);
949 949 ASSERT(cpp == cps + cpc);
950 950 (void) pthread_rwlock_unlock(&chp->ch_lock);
951 951
952 952 /*
953 953 * Run fmd_case_check_for_dups() on all cases except the current one.
954 954 */
955 955 for (i = 0; i < cpc; i++) {
956 956 if (cps[i] != NULL) {
957 957 if (cps[i] != (fmd_case_impl_t *)cp)
958 958 fmd_case_check_for_dups((fmd_case_t *)cps[i],
959 959 &fccd);
960 960 fmd_case_rele((fmd_case_t *)cps[i]);
961 961 }
962 962 }
963 963 fmd_free(cps, cpc * sizeof (fmd_case_t *));
964 964
965 965 (void) pthread_mutex_lock(&cip->ci_lock);
966 966 if (cip->ci_code == NULL)
967 967 (void) fmd_case_mkcode(cp);
968 968 else if (cip->ci_precanned)
969 969 fmd_case_code_hash_insert(fmd.d_cases, cip);
970 970
971 971 if (discard_new) {
972 972 /*
973 973 * We've found an existing case that is a match and it is not
974 974 * already in repaired or resolved state. So we can close this
975 975 * one as a duplicate.
976 976 */
977 977 (void) pthread_mutex_unlock(&cip->ci_lock);
978 978 return (1);
979 979 }
980 980
981 981 /*
982 982 * Allocate new cache entries
983 983 */
984 984 for (cis = cip->ci_suspects; cis != NULL; cis = cis->cis_next) {
985 985 if ((alp = fmd_asru_hash_create_entry(ahp,
986 986 cp, cis->cis_nvl)) == NULL) {
987 987 fmd_error(EFMD_CASE_EVENT, "cannot convict suspect in "
988 988 "%s: %s\n", cip->ci_uuid, fmd_strerror(errno));
989 989 continue;
990 990 }
991 991 alp->al_flags |= FMD_ASRU_PRESENT;
992 992 alp->al_asru->asru_flags |= FMD_ASRU_PRESENT;
993 993 (void) fmd_asru_clrflags(alp, FMD_ASRU_UNUSABLE, 0);
994 994 (void) fmd_asru_setflags(alp, FMD_ASRU_FAULTY);
995 995 }
996 996
997 997 if (adjust_new) {
998 998 int some_suspect = 0, some_not_suspect = 0;
999 999
1000 1000 /*
1001 1001 * There is one or more matching case but they are already in
1002 1002 * repaired or resolved state. So we need to keep the new
1003 1003 * case, but we can adjust it. Repaired/removed/replaced
1004 1004 * suspects are unlikely to be to blame (unless there are
1005 1005 * actually two separate faults). So if we have a combination of
1006 1006 * repaired/replaced/removed suspects and acquitted suspects in
1007 1007 * the old lists, then we should acquit in the new list those
1008 1008 * that were repaired/replaced/removed in the old.
1009 1009 */
1010 1010 for (i = 0; i < cip->ci_nsuspects; i++) {
1011 1011 if ((new_susp_state[i] & SUSPECT_STATE_REPLACED) ||
1012 1012 (new_susp_state[i] & SUSPECT_STATE_REPAIRED) ||
1013 1013 (new_susp_state[i] & SUSPECT_STATE_REMOVED) ||
1014 1014 (new_match_state[i] & SUSPECT_STATE_NO_MATCH))
1015 1015 some_not_suspect = 1;
1016 1016 else
1017 1017 some_suspect = 1;
1018 1018 }
1019 1019 if (some_suspect && some_not_suspect) {
1020 1020 for (cis = cip->ci_suspects, i = 0; cis != NULL;
1021 1021 cis = cis->cis_next, i++)
1022 1022 if ((new_susp_state[i] &
1023 1023 SUSPECT_STATE_REPLACED) ||
1024 1024 (new_susp_state[i] &
1025 1025 SUSPECT_STATE_REPAIRED) ||
1026 1026 (new_susp_state[i] &
1027 1027 SUSPECT_STATE_REMOVED) ||
1028 1028 (new_match_state[i] &
1029 1029 SUSPECT_STATE_NO_MATCH))
1030 1030 fmd_asru_hash_apply_by_case(fmd.d_asrus,
1031 1031 cp, fmd_case_acquit_suspect,
1032 1032 cis->cis_nvl);
1033 1033 }
1034 1034 }
1035 1035
1036 1036 (void) pthread_mutex_unlock(&cip->ci_lock);
1037 1037 return (0);
1038 1038 }
1039 1039
1040 1040 void
1041 1041 fmd_case_publish(fmd_case_t *cp, uint_t state)
1042 1042 {
1043 1043 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1044 1044 fmd_event_t *e;
1045 1045 nvlist_t *nvl;
1046 1046 char *class;
1047 1047
1048 1048 if (state == FMD_CASE_CURRENT)
1049 1049 state = cip->ci_state; /* use current state */
1050 1050
1051 1051 switch (state) {
1052 1052 case FMD_CASE_SOLVED:
1053 1053 (void) pthread_mutex_lock(&cip->ci_lock);
1054 1054
1055 1055 /*
1056 1056 * If we already have a code, then case is already solved.
1057 1057 */
1058 1058 if (cip->ci_precanned == 0 && cip->ci_xprt == NULL &&
1059 1059 cip->ci_code != NULL) {
1060 1060 (void) pthread_mutex_unlock(&cip->ci_lock);
1061 1061 break;
1062 1062 }
1063 1063
1064 1064 if (cip->ci_tv_valid == 0) {
1065 1065 fmd_time_gettimeofday(&cip->ci_tv);
1066 1066 cip->ci_tv_valid = 1;
1067 1067 }
1068 1068 (void) pthread_mutex_unlock(&cip->ci_lock);
1069 1069
1070 1070 if (fmd_case_convict(cp) == 1) { /* dupclose */
1071 1071 cip->ci_flags &= ~FMD_CF_SOLVED;
1072 1072 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, 0);
1073 1073 break;
1074 1074 }
1075 1075 if (cip->ci_xprt != NULL) {
1076 1076 /*
1077 1077 * For proxy, save some information about the transport
1078 1078 * in the resource cache.
1079 1079 */
1080 1080 int count = 0;
1081 1081 fmd_asru_set_on_proxy_t fasp;
1082 1082 fmd_xprt_impl_t *xip = (fmd_xprt_impl_t *)cip->ci_xprt;
1083 1083
1084 1084 fasp.fasp_countp = &count;
1085 1085 fasp.fasp_maxcount = cip->ci_nsuspects;
1086 1086 fasp.fasp_proxy_asru = cip->ci_proxy_asru;
1087 1087 fasp.fasp_proxy_external = xip->xi_flags &
1088 1088 FMD_XPRT_EXTERNAL;
1089 1089 fasp.fasp_proxy_rdonly = ((xip->xi_flags &
1090 1090 FMD_XPRT_RDWR) == FMD_XPRT_RDONLY);
1091 1091 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1092 1092 fmd_asru_set_on_proxy, &fasp);
1093 1093 }
1094 1094 nvl = fmd_case_mkevent(cp, FM_LIST_SUSPECT_CLASS);
1095 1095 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1096 1096
1097 1097 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1098 1098 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1099 1099 fmd_log_append(fmd.d_fltlog, e, cp);
1100 1100 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1101 1101 fmd_dispq_dispatch(fmd.d_disp, e, class);
1102 1102
1103 1103 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1104 1104 cip->ci_mod->mod_stats->ms_casesolved.fmds_value.ui64++;
1105 1105 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1106 1106
1107 1107 break;
1108 1108
1109 1109 case FMD_CASE_CLOSE_WAIT:
1110 1110 fmd_case_hold(cp);
1111 1111 e = fmd_event_create(FMD_EVT_CLOSE, FMD_HRT_NOW, NULL, cp);
1112 1112 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1113 1113
1114 1114 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1115 1115 cip->ci_mod->mod_stats->ms_caseclosed.fmds_value.ui64++;
1116 1116 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1117 1117
1118 1118 break;
1119 1119
1120 1120 case FMD_CASE_CLOSED:
1121 1121 nvl = fmd_case_mkevent(cp, FM_LIST_ISOLATED_CLASS);
1122 1122 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1123 1123 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1124 1124 fmd_dispq_dispatch(fmd.d_disp, e, class);
1125 1125 break;
1126 1126
1127 1127 case FMD_CASE_REPAIRED:
1128 1128 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
1129 1129 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1130 1130 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1131 1131 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1132 1132 fmd_log_append(fmd.d_fltlog, e, cp);
1133 1133 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1134 1134 fmd_dispq_dispatch(fmd.d_disp, e, class);
1135 1135 break;
1136 1136
1137 1137 case FMD_CASE_RESOLVED:
1138 1138 nvl = fmd_case_mkevent(cp, FM_LIST_RESOLVED_CLASS);
1139 1139 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
1140 1140 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
1141 1141 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
1142 1142 fmd_log_append(fmd.d_fltlog, e, cp);
1143 1143 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
1144 1144 fmd_dispq_dispatch(fmd.d_disp, e, class);
1145 1145 break;
1146 1146 }
1147 1147 }
1148 1148
1149 1149 fmd_case_t *
1150 1150 fmd_case_hash_lookup(fmd_case_hash_t *chp, const char *uuid)
1151 1151 {
1152 1152 fmd_case_impl_t *cip;
1153 1153 uint_t h;
1154 1154
1155 1155 (void) pthread_rwlock_rdlock(&chp->ch_lock);
1156 1156 h = fmd_strhash(uuid) % chp->ch_hashlen;
1157 1157
1158 1158 for (cip = chp->ch_hash[h]; cip != NULL; cip = cip->ci_next) {
1159 1159 if (strcmp(cip->ci_uuid, uuid) == 0)
1160 1160 break;
1161 1161 }
1162 1162
1163 1163 /*
1164 1164 * If deleting bit is set, treat the case as if it doesn't exist.
1165 1165 */
1166 1166 if (cip != NULL)
1167 1167 cip = fmd_case_tryhold(cip);
1168 1168
1169 1169 if (cip == NULL)
1170 1170 (void) fmd_set_errno(EFMD_CASE_INVAL);
1171 1171
1172 1172 (void) pthread_rwlock_unlock(&chp->ch_lock);
1173 1173 return ((fmd_case_t *)cip);
1174 1174 }
1175 1175
1176 1176 static fmd_case_impl_t *
1177 1177 fmd_case_hash_insert(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
1178 1178 {
1179 1179 fmd_case_impl_t *eip;
1180 1180 uint_t h;
1181 1181
1182 1182 (void) pthread_rwlock_wrlock(&chp->ch_lock);
1183 1183 h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
1184 1184
1185 1185 for (eip = chp->ch_hash[h]; eip != NULL; eip = eip->ci_next) {
1186 1186 if (strcmp(cip->ci_uuid, eip->ci_uuid) == 0 &&
1187 1187 fmd_case_tryhold(eip) != NULL) {
1188 1188 (void) pthread_rwlock_unlock(&chp->ch_lock);
1189 1189 return (eip); /* uuid already present */
1190 1190 }
1191 1191 }
1192 1192
1193 1193 cip->ci_next = chp->ch_hash[h];
1194 1194 chp->ch_hash[h] = cip;
1195 1195
1196 1196 chp->ch_count++;
1197 1197 ASSERT(chp->ch_count != 0);
1198 1198
1199 1199 (void) pthread_rwlock_unlock(&chp->ch_lock);
1200 1200 return (cip);
1201 1201 }
1202 1202
1203 1203 static void
1204 1204 fmd_case_hash_delete(fmd_case_hash_t *chp, fmd_case_impl_t *cip)
1205 1205 {
1206 1206 fmd_case_impl_t *cp, **pp;
1207 1207 uint_t h;
1208 1208
1209 1209 ASSERT(MUTEX_HELD(&cip->ci_lock));
1210 1210
1211 1211 cip->ci_flags |= FMD_CF_DELETING;
1212 1212 (void) pthread_mutex_unlock(&cip->ci_lock);
1213 1213
1214 1214 (void) pthread_rwlock_wrlock(&chp->ch_lock);
1215 1215
1216 1216 h = fmd_strhash(cip->ci_uuid) % chp->ch_hashlen;
1217 1217 pp = &chp->ch_hash[h];
1218 1218
1219 1219 for (cp = *pp; cp != NULL; cp = cp->ci_next) {
1220 1220 if (cp != cip)
1221 1221 pp = &cp->ci_next;
1222 1222 else
1223 1223 break;
1224 1224 }
1225 1225
1226 1226 if (cp == NULL) {
1227 1227 fmd_panic("case %p (%s) not found on hash chain %u\n",
1228 1228 (void *)cip, cip->ci_uuid, h);
1229 1229 }
1230 1230
1231 1231 *pp = cp->ci_next;
1232 1232 cp->ci_next = NULL;
1233 1233
1234 1234 /*
1235 1235 * delete from code hash if it is on it
1236 1236 */
1237 1237 fmd_case_code_hash_delete(chp, cip);
1238 1238
1239 1239 ASSERT(chp->ch_count != 0);
1240 1240 chp->ch_count--;
1241 1241
1242 1242 (void) pthread_rwlock_unlock(&chp->ch_lock);
1243 1243
1244 1244 (void) pthread_mutex_lock(&cip->ci_lock);
1245 1245 ASSERT(cip->ci_flags & FMD_CF_DELETING);
1246 1246 }
1247 1247
1248 1248 fmd_case_t *
1249 1249 fmd_case_create(fmd_module_t *mp, const char *uuidstr, void *data)
1250 1250 {
1251 1251 fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1252 1252 fmd_case_impl_t *eip = NULL;
1253 1253 uuid_t uuid;
1254 1254
1255 1255 (void) pthread_mutex_init(&cip->ci_lock, NULL);
1256 1256 fmd_buf_hash_create(&cip->ci_bufs);
1257 1257
1258 1258 fmd_module_hold(mp);
1259 1259 cip->ci_mod = mp;
1260 1260 cip->ci_refs = 1;
1261 1261 cip->ci_state = FMD_CASE_UNSOLVED;
1262 1262 cip->ci_flags = FMD_CF_DIRTY;
1263 1263 cip->ci_data = data;
1264 1264
1265 1265 /*
1266 1266 * Calling libuuid: get a clue. The library interfaces cleverly do not
1267 1267 * define any constant for the length of an unparse string, and do not
1268 1268 * permit the caller to specify a buffer length for safety. The spec
1269 1269 * says it will be 36 bytes, but we make it tunable just in case.
1270 1270 */
1271 1271 (void) fmd_conf_getprop(fmd.d_conf, "uuidlen", &cip->ci_uuidlen);
1272 1272 cip->ci_uuid = fmd_zalloc(cip->ci_uuidlen + 1, FMD_SLEEP);
1273 1273
1274 1274 if (uuidstr == NULL) {
1275 1275 /*
1276 1276 * We expect this loop to execute only once, but code it
1277 1277 * defensively against the possibility of libuuid bugs.
1278 1278 * Keep generating uuids and attempting to do a hash insert
1279 1279 * until we get a unique one.
1280 1280 */
1281 1281 do {
1282 1282 if (eip != NULL)
1283 1283 fmd_case_rele((fmd_case_t *)eip);
1284 1284 uuid_generate(uuid);
1285 1285 uuid_unparse(uuid, cip->ci_uuid);
1286 1286 } while ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip);
1287 1287 } else {
1288 1288 /*
1289 1289 * If a uuid was specified we must succeed with that uuid,
1290 1290 * or return NULL indicating a case with that uuid already
1291 1291 * exists.
1292 1292 */
1293 1293 (void) strncpy(cip->ci_uuid, uuidstr, cip->ci_uuidlen + 1);
1294 1294 if (fmd_case_hash_insert(fmd.d_cases, cip) != cip) {
1295 1295 fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1296 1296 (void) fmd_buf_hash_destroy(&cip->ci_bufs);
1297 1297 fmd_module_rele(mp);
1298 1298 pthread_mutex_destroy(&cip->ci_lock);
1299 1299 fmd_free(cip, sizeof (*cip));
1300 1300 return (NULL);
1301 1301 }
1302 1302 }
1303 1303
1304 1304 ASSERT(fmd_module_locked(mp));
1305 1305 fmd_list_append(&mp->mod_cases, cip);
1306 1306 fmd_module_setcdirty(mp);
1307 1307
1308 1308 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1309 1309 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
1310 1310 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1311 1311
1312 1312 return ((fmd_case_t *)cip);
1313 1313 }
1314 1314
↓ open down ↓ |
1314 lines elided |
↑ open up ↑ |
1315 1315 static void
1316 1316 fmd_case_destroy_suspects(fmd_case_impl_t *cip)
1317 1317 {
1318 1318 fmd_case_susp_t *cis, *ncis;
1319 1319
1320 1320 ASSERT(MUTEX_HELD(&cip->ci_lock));
1321 1321
1322 1322 if (cip->ci_proxy_asru)
1323 1323 fmd_free(cip->ci_proxy_asru, sizeof (uint8_t) *
1324 1324 cip->ci_nsuspects);
1325 - if (cip->ci_diag_de)
1326 - nvlist_free(cip->ci_diag_de);
1325 + nvlist_free(cip->ci_diag_de);
1327 1326 if (cip->ci_diag_asru)
1328 1327 fmd_free(cip->ci_diag_asru, sizeof (uint8_t) *
1329 1328 cip->ci_nsuspects);
1330 1329
1331 1330 for (cis = cip->ci_suspects; cis != NULL; cis = ncis) {
1332 1331 ncis = cis->cis_next;
1333 1332 nvlist_free(cis->cis_nvl);
1334 1333 fmd_free(cis, sizeof (fmd_case_susp_t));
1335 1334 }
1336 1335
1337 1336 cip->ci_suspects = NULL;
1338 1337 cip->ci_nsuspects = 0;
1339 1338 }
1340 1339
1341 1340 fmd_case_t *
1342 1341 fmd_case_recreate(fmd_module_t *mp, fmd_xprt_t *xp,
1343 1342 uint_t state, const char *uuid, const char *code)
1344 1343 {
1345 1344 fmd_case_impl_t *cip = fmd_zalloc(sizeof (fmd_case_impl_t), FMD_SLEEP);
1346 1345 fmd_case_impl_t *eip;
1347 1346
1348 1347 (void) pthread_mutex_init(&cip->ci_lock, NULL);
1349 1348 fmd_buf_hash_create(&cip->ci_bufs);
1350 1349
1351 1350 fmd_module_hold(mp);
1352 1351 cip->ci_mod = mp;
1353 1352 cip->ci_xprt = xp;
1354 1353 cip->ci_refs = 1;
1355 1354 cip->ci_state = state;
1356 1355 cip->ci_uuid = fmd_strdup(uuid, FMD_SLEEP);
1357 1356 cip->ci_uuidlen = strlen(cip->ci_uuid);
1358 1357 cip->ci_code = fmd_strdup(code, FMD_SLEEP);
1359 1358 cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
1360 1359
1361 1360 if (state > FMD_CASE_CLOSE_WAIT)
1362 1361 cip->ci_flags |= FMD_CF_SOLVED;
1363 1362
1364 1363 /*
1365 1364 * Insert the case into the global case hash. If the specified UUID is
1366 1365 * already present, check to see if it is an orphan: if so, reclaim it;
1367 1366 * otherwise if it is owned by a different module then return NULL.
1368 1367 */
1369 1368 if ((eip = fmd_case_hash_insert(fmd.d_cases, cip)) != cip) {
1370 1369 (void) pthread_mutex_lock(&cip->ci_lock);
1371 1370 cip->ci_refs--; /* decrement to zero */
1372 1371 fmd_case_destroy((fmd_case_t *)cip, B_FALSE);
1373 1372
1374 1373 cip = eip; /* switch 'cip' to the existing case */
1375 1374 (void) pthread_mutex_lock(&cip->ci_lock);
1376 1375
1377 1376 /*
1378 1377 * If the ASRU cache is trying to recreate an orphan, then just
1379 1378 * return the existing case that we found without changing it.
1380 1379 */
1381 1380 if (mp == fmd.d_rmod) {
1382 1381 /*
1383 1382 * In case the case has already been created from
1384 1383 * a checkpoint file we need to set up code now.
1385 1384 */
1386 1385 if (cip->ci_state < FMD_CASE_CLOSED) {
1387 1386 if (code != NULL && cip->ci_code == NULL) {
1388 1387 cip->ci_code = fmd_strdup(code,
1389 1388 FMD_SLEEP);
1390 1389 cip->ci_codelen = cip->ci_code ?
1391 1390 strlen(cip->ci_code) + 1 : 0;
1392 1391 fmd_case_code_hash_insert(fmd.d_cases,
1393 1392 cip);
1394 1393 }
1395 1394 }
1396 1395
1397 1396 /*
1398 1397 * When recreating an orphan case, state passed in may
1399 1398 * be CLOSED (faulty) or REPAIRED/RESOLVED (!faulty). If
1400 1399 * any suspects are still CLOSED (faulty) then the
1401 1400 * overall state needs to be CLOSED.
1402 1401 */
1403 1402 if ((cip->ci_state == FMD_CASE_REPAIRED ||
1404 1403 cip->ci_state == FMD_CASE_RESOLVED) &&
1405 1404 state == FMD_CASE_CLOSED)
1406 1405 cip->ci_state = FMD_CASE_CLOSED;
1407 1406 (void) pthread_mutex_unlock(&cip->ci_lock);
1408 1407 fmd_case_rele((fmd_case_t *)cip);
1409 1408 return ((fmd_case_t *)cip);
1410 1409 }
1411 1410
1412 1411 /*
1413 1412 * If the existing case isn't an orphan or is being proxied,
1414 1413 * then we have a UUID conflict: return failure to the caller.
1415 1414 */
1416 1415 if (cip->ci_mod != fmd.d_rmod || xp != NULL) {
1417 1416 (void) pthread_mutex_unlock(&cip->ci_lock);
1418 1417 fmd_case_rele((fmd_case_t *)cip);
1419 1418 return (NULL);
1420 1419 }
1421 1420
1422 1421 /*
1423 1422 * If the new module is reclaiming an orphaned case, remove
1424 1423 * the case from the root module, switch ci_mod, and then fall
1425 1424 * through to adding the case to the new owner module 'mp'.
1426 1425 */
1427 1426 fmd_module_lock(cip->ci_mod);
1428 1427 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1429 1428 fmd_module_unlock(cip->ci_mod);
1430 1429
1431 1430 fmd_module_rele(cip->ci_mod);
1432 1431 cip->ci_mod = mp;
1433 1432 fmd_module_hold(mp);
1434 1433
1435 1434 /*
1436 1435 * It's possible that fmd crashed or was restarted during a
1437 1436 * previous solve operation between the asru cache being created
1438 1437 * and the ckpt file being updated to SOLVED. Thus when the DE
1439 1438 * recreates the case here from the checkpoint file, the state
1440 1439 * will be UNSOLVED and yet we are having to reclaim because
1441 1440 * the case was in the asru cache. If this happens, revert the
1442 1441 * case back to the UNSOLVED state and let the DE solve it again
1443 1442 */
1444 1443 if (state == FMD_CASE_UNSOLVED) {
1445 1444 fmd_asru_hash_delete_case(fmd.d_asrus,
1446 1445 (fmd_case_t *)cip);
1447 1446 fmd_case_destroy_suspects(cip);
1448 1447 fmd_case_code_hash_delete(fmd.d_cases, cip);
1449 1448 fmd_free(cip->ci_code, cip->ci_codelen);
1450 1449 cip->ci_code = NULL;
1451 1450 cip->ci_codelen = 0;
1452 1451 cip->ci_tv_valid = 0;
1453 1452 }
1454 1453
1455 1454 cip->ci_state = state;
1456 1455
1457 1456 (void) pthread_mutex_unlock(&cip->ci_lock);
1458 1457 fmd_case_rele((fmd_case_t *)cip);
1459 1458 } else {
1460 1459 /*
1461 1460 * add into hash of solved cases
1462 1461 */
1463 1462 if (cip->ci_code)
1464 1463 fmd_case_code_hash_insert(fmd.d_cases, cip);
1465 1464 }
1466 1465
1467 1466 ASSERT(fmd_module_locked(mp));
1468 1467 fmd_list_append(&mp->mod_cases, cip);
1469 1468
1470 1469 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
1471 1470 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64++;
1472 1471 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
1473 1472
1474 1473 return ((fmd_case_t *)cip);
1475 1474 }
1476 1475
1477 1476 void
1478 1477 fmd_case_destroy(fmd_case_t *cp, int visible)
1479 1478 {
1480 1479 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1481 1480 fmd_case_item_t *cit, *ncit;
1482 1481
1483 1482 ASSERT(MUTEX_HELD(&cip->ci_lock));
1484 1483 ASSERT(cip->ci_refs == 0);
1485 1484
1486 1485 if (visible) {
1487 1486 TRACE((FMD_DBG_CASE, "deleting case %s", cip->ci_uuid));
1488 1487 fmd_case_hash_delete(fmd.d_cases, cip);
1489 1488 }
1490 1489
1491 1490 for (cit = cip->ci_items; cit != NULL; cit = ncit) {
1492 1491 ncit = cit->cit_next;
1493 1492 fmd_event_rele(cit->cit_event);
1494 1493 fmd_free(cit, sizeof (fmd_case_item_t));
1495 1494 }
1496 1495
1497 1496 fmd_case_destroy_suspects(cip);
1498 1497
1499 1498 if (cip->ci_principal != NULL)
1500 1499 fmd_event_rele(cip->ci_principal);
1501 1500
1502 1501 fmd_free(cip->ci_uuid, cip->ci_uuidlen + 1);
1503 1502 fmd_free(cip->ci_code, cip->ci_codelen);
1504 1503 (void) fmd_buf_hash_destroy(&cip->ci_bufs);
1505 1504
1506 1505 fmd_module_rele(cip->ci_mod);
1507 1506 fmd_free(cip, sizeof (fmd_case_impl_t));
1508 1507 }
1509 1508
1510 1509 void
1511 1510 fmd_case_hold(fmd_case_t *cp)
1512 1511 {
1513 1512 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1514 1513
1515 1514 (void) pthread_mutex_lock(&cip->ci_lock);
1516 1515 fmd_case_hold_locked(cp);
1517 1516 (void) pthread_mutex_unlock(&cip->ci_lock);
1518 1517 }
1519 1518
1520 1519 void
1521 1520 fmd_case_hold_locked(fmd_case_t *cp)
1522 1521 {
1523 1522 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1524 1523
1525 1524 ASSERT(MUTEX_HELD(&cip->ci_lock));
1526 1525 if (cip->ci_flags & FMD_CF_DELETING)
1527 1526 fmd_panic("attempt to hold a deleting case %p (%s)\n",
1528 1527 (void *)cip, cip->ci_uuid);
1529 1528 cip->ci_refs++;
1530 1529 ASSERT(cip->ci_refs != 0);
1531 1530 }
1532 1531
1533 1532 static fmd_case_impl_t *
1534 1533 fmd_case_tryhold(fmd_case_impl_t *cip)
1535 1534 {
1536 1535 /*
1537 1536 * If the case's "deleting" bit is unset, hold and return case,
1538 1537 * otherwise, return NULL.
1539 1538 */
1540 1539 (void) pthread_mutex_lock(&cip->ci_lock);
1541 1540 if (cip->ci_flags & FMD_CF_DELETING) {
1542 1541 (void) pthread_mutex_unlock(&cip->ci_lock);
1543 1542 cip = NULL;
1544 1543 } else {
1545 1544 fmd_case_hold_locked((fmd_case_t *)cip);
1546 1545 (void) pthread_mutex_unlock(&cip->ci_lock);
1547 1546 }
1548 1547 return (cip);
1549 1548 }
1550 1549
1551 1550 void
1552 1551 fmd_case_rele(fmd_case_t *cp)
1553 1552 {
1554 1553 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1555 1554
1556 1555 (void) pthread_mutex_lock(&cip->ci_lock);
1557 1556 ASSERT(cip->ci_refs != 0);
1558 1557
1559 1558 if (--cip->ci_refs == 0)
1560 1559 fmd_case_destroy((fmd_case_t *)cip, B_TRUE);
1561 1560 else
1562 1561 (void) pthread_mutex_unlock(&cip->ci_lock);
1563 1562 }
1564 1563
1565 1564 void
1566 1565 fmd_case_rele_locked(fmd_case_t *cp)
1567 1566 {
1568 1567 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1569 1568
1570 1569 ASSERT(MUTEX_HELD(&cip->ci_lock));
1571 1570 --cip->ci_refs;
1572 1571 ASSERT(cip->ci_refs != 0);
1573 1572 }
1574 1573
1575 1574 int
1576 1575 fmd_case_insert_principal(fmd_case_t *cp, fmd_event_t *ep)
1577 1576 {
1578 1577 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1579 1578 fmd_case_item_t *cit;
1580 1579 fmd_event_t *oep;
1581 1580 uint_t state;
1582 1581 int new;
1583 1582
1584 1583 fmd_event_hold(ep);
1585 1584 (void) pthread_mutex_lock(&cip->ci_lock);
1586 1585
1587 1586 if (cip->ci_flags & FMD_CF_SOLVED)
1588 1587 state = FMD_EVS_DIAGNOSED;
1589 1588 else
1590 1589 state = FMD_EVS_ACCEPTED;
1591 1590
1592 1591 oep = cip->ci_principal;
1593 1592 cip->ci_principal = ep;
1594 1593
1595 1594 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1596 1595 if (cit->cit_event == ep)
1597 1596 break;
1598 1597 }
1599 1598
1600 1599 cip->ci_flags |= FMD_CF_DIRTY;
1601 1600 new = cit == NULL && ep != oep;
1602 1601
1603 1602 (void) pthread_mutex_unlock(&cip->ci_lock);
1604 1603
1605 1604 fmd_module_setcdirty(cip->ci_mod);
1606 1605 fmd_event_transition(ep, state);
1607 1606
1608 1607 if (oep != NULL)
1609 1608 fmd_event_rele(oep);
1610 1609
1611 1610 return (new);
1612 1611 }
1613 1612
1614 1613 int
1615 1614 fmd_case_insert_event(fmd_case_t *cp, fmd_event_t *ep)
1616 1615 {
1617 1616 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1618 1617 fmd_case_item_t *cit;
1619 1618 uint_t state;
1620 1619 int new;
1621 1620 boolean_t injected;
1622 1621
1623 1622 (void) pthread_mutex_lock(&cip->ci_lock);
1624 1623
1625 1624 if (cip->ci_flags & FMD_CF_SOLVED)
1626 1625 state = FMD_EVS_DIAGNOSED;
1627 1626 else
1628 1627 state = FMD_EVS_ACCEPTED;
1629 1628
1630 1629 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
1631 1630 if (cit->cit_event == ep)
1632 1631 break;
1633 1632 }
1634 1633
1635 1634 new = cit == NULL && ep != cip->ci_principal;
1636 1635
1637 1636 /*
1638 1637 * If the event is already in the case or the case is already solved,
1639 1638 * there is no reason to save it: just transition it appropriately.
1640 1639 */
1641 1640 if (cit != NULL || (cip->ci_flags & FMD_CF_SOLVED)) {
1642 1641 (void) pthread_mutex_unlock(&cip->ci_lock);
1643 1642 fmd_event_transition(ep, state);
1644 1643 return (new);
1645 1644 }
1646 1645
1647 1646 cit = fmd_alloc(sizeof (fmd_case_item_t), FMD_SLEEP);
1648 1647 fmd_event_hold(ep);
1649 1648
1650 1649 if (nvlist_lookup_boolean_value(((fmd_event_impl_t *)ep)->ev_nvl,
1651 1650 "__injected", &injected) == 0 && injected)
1652 1651 fmd_case_set_injected(cp);
1653 1652
1654 1653 cit->cit_next = cip->ci_items;
1655 1654 cit->cit_event = ep;
1656 1655
1657 1656 cip->ci_items = cit;
1658 1657 cip->ci_nitems++;
1659 1658
1660 1659 cip->ci_flags |= FMD_CF_DIRTY;
1661 1660 (void) pthread_mutex_unlock(&cip->ci_lock);
1662 1661
1663 1662 fmd_module_setcdirty(cip->ci_mod);
1664 1663 fmd_event_transition(ep, state);
1665 1664
1666 1665 return (new);
1667 1666 }
1668 1667
1669 1668 void
1670 1669 fmd_case_insert_suspect(fmd_case_t *cp, nvlist_t *nvl)
1671 1670 {
1672 1671 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1673 1672 fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1674 1673
1675 1674 (void) pthread_mutex_lock(&cip->ci_lock);
1676 1675 ASSERT(cip->ci_state < FMD_CASE_CLOSE_WAIT);
1677 1676 cip->ci_flags |= FMD_CF_DIRTY;
1678 1677
1679 1678 cis->cis_next = cip->ci_suspects;
1680 1679 cis->cis_nvl = nvl;
1681 1680
1682 1681 cip->ci_suspects = cis;
1683 1682 cip->ci_nsuspects++;
1684 1683
1685 1684 (void) pthread_mutex_unlock(&cip->ci_lock);
1686 1685 if (cip->ci_xprt == NULL)
1687 1686 fmd_module_setcdirty(cip->ci_mod);
1688 1687 }
1689 1688
1690 1689 void
1691 1690 fmd_case_recreate_suspect(fmd_case_t *cp, nvlist_t *nvl)
1692 1691 {
1693 1692 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1694 1693 fmd_case_susp_t *cis = fmd_alloc(sizeof (fmd_case_susp_t), FMD_SLEEP);
1695 1694 boolean_t b;
1696 1695
1697 1696 (void) pthread_mutex_lock(&cip->ci_lock);
1698 1697
1699 1698 cis->cis_next = cip->ci_suspects;
1700 1699 cis->cis_nvl = nvl;
1701 1700
1702 1701 if (nvlist_lookup_boolean_value(nvl,
1703 1702 FM_SUSPECT_MESSAGE, &b) == 0 && b == B_FALSE)
1704 1703 cip->ci_flags |= FMD_CF_INVISIBLE;
1705 1704
1706 1705 cip->ci_suspects = cis;
1707 1706 cip->ci_nsuspects++;
1708 1707
1709 1708 (void) pthread_mutex_unlock(&cip->ci_lock);
1710 1709 }
1711 1710
1712 1711 void
1713 1712 fmd_case_reset_suspects(fmd_case_t *cp)
1714 1713 {
1715 1714 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1716 1715
1717 1716 (void) pthread_mutex_lock(&cip->ci_lock);
1718 1717 ASSERT(cip->ci_state < FMD_CASE_SOLVED);
1719 1718
1720 1719 fmd_case_destroy_suspects(cip);
1721 1720 cip->ci_flags |= FMD_CF_DIRTY;
1722 1721
1723 1722 (void) pthread_mutex_unlock(&cip->ci_lock);
1724 1723 fmd_module_setcdirty(cip->ci_mod);
1725 1724 }
1726 1725
1727 1726 /*ARGSUSED*/
1728 1727 static void
1729 1728 fmd_case_unusable(fmd_asru_link_t *alp, void *arg)
1730 1729 {
1731 1730 (void) fmd_asru_setflags(alp, FMD_ASRU_UNUSABLE);
1732 1731 }
1733 1732
1734 1733 /*
1735 1734 * Grab ci_lock and update the case state and set the dirty bit. Then perform
1736 1735 * whatever actions and emit whatever events are appropriate for the state.
1737 1736 * Refer to the topmost block comment explaining the state machine for details.
1738 1737 */
1739 1738 void
1740 1739 fmd_case_transition(fmd_case_t *cp, uint_t state, uint_t flags)
1741 1740 {
1742 1741 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1743 1742 fmd_case_item_t *cit;
1744 1743 fmd_event_t *e;
1745 1744 int resolved = 0;
1746 1745 int any_unusable_and_present = 0;
1747 1746
1748 1747 ASSERT(state <= FMD_CASE_RESOLVED);
1749 1748 (void) pthread_mutex_lock(&cip->ci_lock);
1750 1749
1751 1750 if (!(cip->ci_flags & FMD_CF_SOLVED) && !(flags & FMD_CF_SOLVED))
1752 1751 flags &= ~(FMD_CF_ISOLATED | FMD_CF_REPAIRED | FMD_CF_RESOLVED);
1753 1752
1754 1753 cip->ci_flags |= flags;
1755 1754
1756 1755 if (cip->ci_state >= state) {
1757 1756 (void) pthread_mutex_unlock(&cip->ci_lock);
1758 1757 return; /* already in specified state */
1759 1758 }
1760 1759
1761 1760 TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1762 1761 _fmd_case_snames[cip->ci_state], _fmd_case_snames[state]));
1763 1762
1764 1763 cip->ci_state = state;
1765 1764 cip->ci_flags |= FMD_CF_DIRTY;
1766 1765
1767 1766 if (cip->ci_xprt == NULL && cip->ci_mod != fmd.d_rmod)
1768 1767 fmd_module_setcdirty(cip->ci_mod);
1769 1768
1770 1769 switch (state) {
1771 1770 case FMD_CASE_SOLVED:
1772 1771 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
1773 1772 fmd_event_transition(cit->cit_event, FMD_EVS_DIAGNOSED);
1774 1773
1775 1774 if (cip->ci_principal != NULL) {
1776 1775 fmd_event_transition(cip->ci_principal,
1777 1776 FMD_EVS_DIAGNOSED);
1778 1777 }
1779 1778 break;
1780 1779
1781 1780 case FMD_CASE_CLOSE_WAIT:
1782 1781 /*
1783 1782 * If the case was never solved, do not change ASRUs.
1784 1783 * If the case was never fmd_case_closed, do not change ASRUs.
1785 1784 * If the case was repaired, do not change ASRUs.
1786 1785 */
1787 1786 if ((cip->ci_flags & (FMD_CF_SOLVED | FMD_CF_ISOLATED |
1788 1787 FMD_CF_REPAIRED)) == (FMD_CF_SOLVED | FMD_CF_ISOLATED))
1789 1788 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1790 1789 fmd_case_unusable, NULL);
1791 1790
1792 1791 /*
1793 1792 * If an orphaned case transitions to CLOSE_WAIT, the owning
1794 1793 * module is no longer loaded: continue on to CASE_CLOSED or
1795 1794 * CASE_REPAIRED as appropriate.
1796 1795 */
1797 1796 if (fmd_case_orphaned(cp)) {
1798 1797 if (cip->ci_flags & FMD_CF_REPAIRED) {
1799 1798 state = cip->ci_state = FMD_CASE_REPAIRED;
1800 1799 TRACE((FMD_DBG_CASE, "case %s %s->%s",
1801 1800 cip->ci_uuid,
1802 1801 _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1803 1802 _fmd_case_snames[FMD_CASE_REPAIRED]));
1804 1803 goto do_repair;
1805 1804 } else {
1806 1805 state = cip->ci_state = FMD_CASE_CLOSED;
1807 1806 TRACE((FMD_DBG_CASE, "case %s %s->%s",
1808 1807 cip->ci_uuid,
1809 1808 _fmd_case_snames[FMD_CASE_CLOSE_WAIT],
1810 1809 _fmd_case_snames[FMD_CASE_CLOSED]));
1811 1810 }
1812 1811 }
1813 1812 break;
1814 1813
1815 1814 case FMD_CASE_REPAIRED:
1816 1815 do_repair:
1817 1816 ASSERT(cip->ci_xprt != NULL || fmd_case_orphaned(cp));
1818 1817
1819 1818 /*
1820 1819 * If we've been requested to transition straight on to the
1821 1820 * RESOLVED state (which can happen with fault proxying where a
1822 1821 * list.resolved or a uuresolved is received from the other
1823 1822 * side), or if all suspects are already either usable or not
1824 1823 * present then transition straight to RESOLVED state,
1825 1824 * publishing both the list.repaired and list.resolved. For a
1826 1825 * proxy, if we discover here that all suspects are already
1827 1826 * either usable or not present, notify the diag side instead
1828 1827 * using fmd_xprt_uuresolved().
1829 1828 */
1830 1829 if (flags & FMD_CF_RESOLVED) {
1831 1830 if (cip->ci_xprt != NULL)
1832 1831 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1833 1832 } else {
1834 1833 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1835 1834 fmd_case_unusable_and_present,
1836 1835 &any_unusable_and_present);
1837 1836 if (any_unusable_and_present)
1838 1837 break;
1839 1838 if (cip->ci_xprt != NULL) {
1840 1839 fmd_xprt_uuresolved(cip->ci_xprt, cip->ci_uuid);
1841 1840 break;
1842 1841 }
1843 1842 }
1844 1843
1845 1844 cip->ci_state = FMD_CASE_RESOLVED;
1846 1845 (void) pthread_mutex_unlock(&cip->ci_lock);
1847 1846 fmd_case_publish(cp, state);
1848 1847 TRACE((FMD_DBG_CASE, "case %s %s->%s", cip->ci_uuid,
1849 1848 _fmd_case_snames[FMD_CASE_REPAIRED],
1850 1849 _fmd_case_snames[FMD_CASE_RESOLVED]));
1851 1850 state = FMD_CASE_RESOLVED;
1852 1851 resolved = 1;
1853 1852 (void) pthread_mutex_lock(&cip->ci_lock);
1854 1853 break;
1855 1854
1856 1855 case FMD_CASE_RESOLVED:
1857 1856 /*
1858 1857 * For a proxy, no need to check that all suspects are already
1859 1858 * either usable or not present - this request has come from
1860 1859 * the diagnosing side which makes the final decision on this.
1861 1860 */
1862 1861 if (cip->ci_xprt != NULL) {
1863 1862 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1864 1863 resolved = 1;
1865 1864 break;
1866 1865 }
1867 1866
1868 1867 ASSERT(fmd_case_orphaned(cp));
1869 1868
1870 1869 /*
1871 1870 * If all suspects are already either usable or not present then
1872 1871 * carry on, publish list.resolved and discard the case.
1873 1872 */
1874 1873 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1875 1874 fmd_case_unusable_and_present, &any_unusable_and_present);
1876 1875 if (any_unusable_and_present) {
1877 1876 (void) pthread_mutex_unlock(&cip->ci_lock);
1878 1877 return;
1879 1878 }
1880 1879
1881 1880 resolved = 1;
1882 1881 break;
1883 1882 }
1884 1883
1885 1884 (void) pthread_mutex_unlock(&cip->ci_lock);
1886 1885
1887 1886 /*
1888 1887 * If the module has initialized, then publish the appropriate event
1889 1888 * for the new case state. If not, we are being called from the
1890 1889 * checkpoint code during module load, in which case the module's
1891 1890 * _fmd_init() routine hasn't finished yet, and our event dictionaries
1892 1891 * may not be open yet, which will prevent us from computing the event
1893 1892 * code. Defer the call to fmd_case_publish() by enqueuing a PUBLISH
1894 1893 * event in our queue: this won't be processed until _fmd_init is done.
1895 1894 */
1896 1895 if (cip->ci_mod->mod_flags & FMD_MOD_INIT)
1897 1896 fmd_case_publish(cp, state);
1898 1897 else {
1899 1898 fmd_case_hold(cp);
1900 1899 e = fmd_event_create(FMD_EVT_PUBLISH, FMD_HRT_NOW, NULL, cp);
1901 1900 fmd_eventq_insert_at_head(cip->ci_mod->mod_queue, e);
1902 1901 }
1903 1902
1904 1903 if (resolved) {
1905 1904 if (cip->ci_xprt != NULL) {
1906 1905 /*
1907 1906 * If we transitioned to RESOLVED, adjust the reference
1908 1907 * count to reflect our removal from
1909 1908 * fmd.d_rmod->mod_cases above. If the caller has not
1910 1909 * placed an additional hold on the case, it will now
1911 1910 * be freed.
1912 1911 */
1913 1912 (void) pthread_mutex_lock(&cip->ci_lock);
1914 1913 fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1915 1914 (void) pthread_mutex_unlock(&cip->ci_lock);
1916 1915 fmd_case_rele(cp);
1917 1916 } else {
1918 1917 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1919 1918 fmd_asru_log_resolved, NULL);
1920 1919 (void) pthread_mutex_lock(&cip->ci_lock);
1921 1920 /* mark as "ready to be discarded */
1922 1921 cip->ci_flags |= FMD_CF_RES_CMPL;
1923 1922 (void) pthread_mutex_unlock(&cip->ci_lock);
1924 1923 }
1925 1924 }
1926 1925 }
1927 1926
1928 1927 /*
1929 1928 * Discard any case if it is in RESOLVED state (and if check_if_aged argument
1930 1929 * is set if all suspects have passed the rsrc.aged time).
1931 1930 */
1932 1931 void
1933 1932 fmd_case_discard_resolved(fmd_case_t *cp, void *arg)
1934 1933 {
1935 1934 int check_if_aged = *(int *)arg;
1936 1935 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1937 1936
1938 1937 /*
1939 1938 * First check if case has completed transition to resolved.
1940 1939 */
1941 1940 (void) pthread_mutex_lock(&cip->ci_lock);
1942 1941 if (!(cip->ci_flags & FMD_CF_RES_CMPL)) {
1943 1942 (void) pthread_mutex_unlock(&cip->ci_lock);
1944 1943 return;
1945 1944 }
1946 1945
1947 1946 /*
1948 1947 * Now if check_is_aged is set, see if all suspects have aged.
1949 1948 */
1950 1949 if (check_if_aged) {
1951 1950 int aged = 1;
1952 1951
1953 1952 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
1954 1953 fmd_asru_check_if_aged, &aged);
1955 1954 if (!aged) {
1956 1955 (void) pthread_mutex_unlock(&cip->ci_lock);
1957 1956 return;
1958 1957 }
1959 1958 }
1960 1959
1961 1960 /*
1962 1961 * Finally discard the case, clearing FMD_CF_RES_CMPL so we don't
1963 1962 * do it twice.
1964 1963 */
1965 1964 fmd_module_lock(cip->ci_mod);
1966 1965 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
1967 1966 fmd_module_unlock(cip->ci_mod);
1968 1967 fmd_asru_hash_delete_case(fmd.d_asrus, cp);
1969 1968 cip->ci_flags &= ~FMD_CF_RES_CMPL;
1970 1969 (void) pthread_mutex_unlock(&cip->ci_lock);
1971 1970 fmd_case_rele(cp);
1972 1971 }
1973 1972
1974 1973 /*
1975 1974 * Transition the specified case to *at least* the specified state by first
1976 1975 * re-validating the suspect list using the resource cache. This function is
1977 1976 * employed by the checkpoint code when restoring a saved, solved case to see
1978 1977 * if the state of the case has effectively changed while fmd was not running
1979 1978 * or the module was not loaded.
1980 1979 */
1981 1980 void
1982 1981 fmd_case_transition_update(fmd_case_t *cp, uint_t state, uint_t flags)
1983 1982 {
1984 1983 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
1985 1984
1986 1985 int usable = 0; /* are any suspects usable? */
1987 1986
1988 1987 ASSERT(state >= FMD_CASE_SOLVED);
1989 1988 (void) pthread_mutex_lock(&cip->ci_lock);
1990 1989
1991 1990 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_usable, &usable);
1992 1991
1993 1992 (void) pthread_mutex_unlock(&cip->ci_lock);
1994 1993
1995 1994 if (!usable) {
1996 1995 state = MAX(state, FMD_CASE_CLOSE_WAIT);
1997 1996 flags |= FMD_CF_ISOLATED;
1998 1997 }
1999 1998
2000 1999 fmd_case_transition(cp, state, flags);
2001 2000 }
2002 2001
2003 2002 void
2004 2003 fmd_case_setdirty(fmd_case_t *cp)
2005 2004 {
2006 2005 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2007 2006
2008 2007 (void) pthread_mutex_lock(&cip->ci_lock);
2009 2008 cip->ci_flags |= FMD_CF_DIRTY;
2010 2009 (void) pthread_mutex_unlock(&cip->ci_lock);
2011 2010
2012 2011 fmd_module_setcdirty(cip->ci_mod);
2013 2012 }
2014 2013
2015 2014 void
2016 2015 fmd_case_clrdirty(fmd_case_t *cp)
2017 2016 {
2018 2017 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2019 2018
2020 2019 (void) pthread_mutex_lock(&cip->ci_lock);
2021 2020 cip->ci_flags &= ~FMD_CF_DIRTY;
2022 2021 (void) pthread_mutex_unlock(&cip->ci_lock);
2023 2022 }
2024 2023
2025 2024 void
2026 2025 fmd_case_commit(fmd_case_t *cp)
2027 2026 {
2028 2027 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2029 2028 fmd_case_item_t *cit;
2030 2029
2031 2030 (void) pthread_mutex_lock(&cip->ci_lock);
2032 2031
2033 2032 if (cip->ci_flags & FMD_CF_DIRTY) {
2034 2033 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next)
2035 2034 fmd_event_commit(cit->cit_event);
2036 2035
2037 2036 if (cip->ci_principal != NULL)
2038 2037 fmd_event_commit(cip->ci_principal);
2039 2038
2040 2039 fmd_buf_hash_commit(&cip->ci_bufs);
2041 2040 cip->ci_flags &= ~FMD_CF_DIRTY;
2042 2041 }
2043 2042
2044 2043 (void) pthread_mutex_unlock(&cip->ci_lock);
2045 2044 }
2046 2045
2047 2046 /*
2048 2047 * On proxy side, send back repair/acquit/etc request to diagnosing side
2049 2048 */
2050 2049 void
2051 2050 fmd_case_xprt_updated(fmd_case_t *cp)
2052 2051 {
2053 2052 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2054 2053 nvlist_t **nva;
2055 2054 uint8_t *ba;
2056 2055 int msg = B_TRUE;
2057 2056 int count = 0;
2058 2057 fmd_case_lst_t fcl;
2059 2058
2060 2059 ASSERT(cip->ci_xprt != NULL);
2061 2060 (void) pthread_mutex_lock(&cip->ci_lock);
2062 2061 ba = alloca(sizeof (uint8_t) * cip->ci_nsuspects);
2063 2062 nva = alloca(sizeof (nvlist_t *) * cip->ci_nsuspects);
2064 2063 fcl.fcl_countp = &count;
2065 2064 fcl.fcl_maxcount = cip->ci_nsuspects;
2066 2065 fcl.fcl_msgp = &msg;
2067 2066 fcl.fcl_ba = ba;
2068 2067 fcl.fcl_nva = nva;
2069 2068 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_set_lst, &fcl);
2070 2069 (void) pthread_mutex_unlock(&cip->ci_lock);
2071 2070 fmd_xprt_updated(cip->ci_xprt, cip->ci_uuid, ba, cip->ci_proxy_asru,
2072 2071 count);
2073 2072 }
2074 2073
2075 2074 /*
2076 2075 * fmd_case_update_status() can be called on either the proxy side when a
2077 2076 * list.suspect is received, or on the diagnosing side when an update request
2078 2077 * is received from the proxy. It updates the status in the resource cache.
2079 2078 */
2080 2079 void
2081 2080 fmd_case_update_status(fmd_case_t *cp, uint8_t *statusp, uint8_t *proxy_asrup,
2082 2081 uint8_t *diag_asrup)
2083 2082 {
2084 2083 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2085 2084 int count = 0;
2086 2085 fmd_asru_update_status_t faus;
2087 2086
2088 2087 /*
2089 2088 * update status of resource cache entries
2090 2089 */
2091 2090 faus.faus_countp = &count;
2092 2091 faus.faus_maxcount = cip->ci_nsuspects;
2093 2092 faus.faus_ba = statusp;
2094 2093 faus.faus_proxy_asru = proxy_asrup;
2095 2094 faus.faus_diag_asru = diag_asrup;
2096 2095 faus.faus_is_proxy = (cip->ci_xprt != NULL);
2097 2096 (void) pthread_mutex_lock(&cip->ci_lock);
2098 2097 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_update_status,
2099 2098 &faus);
2100 2099 (void) pthread_mutex_unlock(&cip->ci_lock);
2101 2100 }
2102 2101
2103 2102 /*
2104 2103 * Called on either the proxy side or the diag side when a repair has taken
2105 2104 * place on the other side but this side may know the asru "contains"
2106 2105 * relationships.
2107 2106 */
2108 2107 void
2109 2108 fmd_case_update_containees(fmd_case_t *cp)
2110 2109 {
2111 2110 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2112 2111
2113 2112 (void) pthread_mutex_lock(&cip->ci_lock);
2114 2113 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2115 2114 fmd_asru_update_containees, NULL);
2116 2115 (void) pthread_mutex_unlock(&cip->ci_lock);
2117 2116 }
2118 2117
2119 2118 /*
2120 2119 * fmd_case_close_status() is called on diagnosing side when proxy side
2121 2120 * has had a uuclose. It updates the status in the resource cache.
2122 2121 */
2123 2122 void
2124 2123 fmd_case_close_status(fmd_case_t *cp)
2125 2124 {
2126 2125 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2127 2126 int count = 0;
2128 2127 fmd_asru_close_status_t facs;
2129 2128
2130 2129 /*
2131 2130 * update status of resource cache entries
2132 2131 */
2133 2132 facs.facs_countp = &count;
2134 2133 facs.facs_maxcount = cip->ci_nsuspects;
2135 2134 (void) pthread_mutex_lock(&cip->ci_lock);
2136 2135 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_close_status,
2137 2136 &facs);
2138 2137 (void) pthread_mutex_unlock(&cip->ci_lock);
2139 2138 }
2140 2139
2141 2140 /*
2142 2141 * Indicate that the case may need to change state because one or more of the
2143 2142 * ASRUs named as a suspect has changed state. We examine all the suspects
2144 2143 * and if none are still faulty, we initiate a case close transition.
2145 2144 */
2146 2145 void
2147 2146 fmd_case_update(fmd_case_t *cp)
2148 2147 {
2149 2148 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2150 2149 uint_t cstate;
2151 2150 int faulty = 0;
2152 2151
2153 2152 (void) pthread_mutex_lock(&cip->ci_lock);
2154 2153 cstate = cip->ci_state;
2155 2154
2156 2155 if (cip->ci_state < FMD_CASE_SOLVED) {
2157 2156 (void) pthread_mutex_unlock(&cip->ci_lock);
2158 2157 return; /* update is not appropriate */
2159 2158 }
2160 2159
2161 2160 if (cip->ci_flags & FMD_CF_REPAIRED) {
2162 2161 (void) pthread_mutex_unlock(&cip->ci_lock);
2163 2162 return; /* already repaired */
2164 2163 }
2165 2164
2166 2165 TRACE((FMD_DBG_CASE, "case update %s", cip->ci_uuid));
2167 2166 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
2168 2167 (void) pthread_mutex_unlock(&cip->ci_lock);
2169 2168
2170 2169 if (faulty) {
2171 2170 nvlist_t *nvl;
2172 2171 fmd_event_t *e;
2173 2172 char *class;
2174 2173
2175 2174 TRACE((FMD_DBG_CASE, "sending list.updated %s", cip->ci_uuid));
2176 2175 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
2177 2176 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2178 2177 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
2179 2178 (void) pthread_rwlock_rdlock(&fmd.d_log_lock);
2180 2179 fmd_log_append(fmd.d_fltlog, e, cp);
2181 2180 (void) pthread_rwlock_unlock(&fmd.d_log_lock);
2182 2181 fmd_dispq_dispatch(fmd.d_disp, e, class);
2183 2182 return; /* one or more suspects are still marked faulty */
2184 2183 }
2185 2184
2186 2185 if (cstate == FMD_CASE_CLOSED)
2187 2186 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2188 2187 else
2189 2188 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2190 2189 }
2191 2190
2192 2191 /*
2193 2192 * Delete a closed case from the module's case list once the fmdo_close() entry
2194 2193 * point has run to completion. If the case is owned by a transport module,
2195 2194 * tell the transport to proxy a case close on the other end of the transport.
2196 2195 * Transition to the appropriate next state based on ci_flags. This
2197 2196 * function represents the end of CLOSE_WAIT and transitions the case to either
2198 2197 * CLOSED or REPAIRED or discards it entirely because it was never solved;
2199 2198 * refer to the topmost block comment explaining the state machine for details.
2200 2199 */
2201 2200 void
2202 2201 fmd_case_delete(fmd_case_t *cp)
2203 2202 {
2204 2203 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2205 2204 fmd_modstat_t *msp;
2206 2205 size_t buftotal;
2207 2206
2208 2207 TRACE((FMD_DBG_CASE, "case delete %s", cip->ci_uuid));
2209 2208 ASSERT(fmd_module_locked(cip->ci_mod));
2210 2209 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
2211 2210 buftotal = fmd_buf_hash_destroy(&cip->ci_bufs);
2212 2211
2213 2212 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
2214 2213 msp = cip->ci_mod->mod_stats;
2215 2214
2216 2215 ASSERT(msp->ms_caseopen.fmds_value.ui64 != 0);
2217 2216 msp->ms_caseopen.fmds_value.ui64--;
2218 2217
2219 2218 ASSERT(msp->ms_buftotal.fmds_value.ui64 >= buftotal);
2220 2219 msp->ms_buftotal.fmds_value.ui64 -= buftotal;
2221 2220
2222 2221 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2223 2222
2224 2223 if (cip->ci_xprt == NULL)
2225 2224 fmd_module_setcdirty(cip->ci_mod);
2226 2225
2227 2226 fmd_module_rele(cip->ci_mod);
2228 2227 cip->ci_mod = fmd.d_rmod;
2229 2228 fmd_module_hold(cip->ci_mod);
2230 2229
2231 2230 /*
2232 2231 * If the case has been solved, then retain it
2233 2232 * on the root module's case list at least until we're transitioned.
2234 2233 * Otherwise free the case with our final fmd_case_rele() below.
2235 2234 */
2236 2235 if (cip->ci_flags & FMD_CF_SOLVED) {
2237 2236 fmd_module_lock(cip->ci_mod);
2238 2237 fmd_list_append(&cip->ci_mod->mod_cases, cip);
2239 2238 fmd_module_unlock(cip->ci_mod);
2240 2239 fmd_case_hold(cp);
2241 2240 }
2242 2241
2243 2242 /*
2244 2243 * Transition onwards to REPAIRED or CLOSED as originally requested.
2245 2244 * Note that for proxy case if we're transitioning to CLOSED it means
2246 2245 * the case was isolated locally, so call fmd_xprt_uuclose() to notify
2247 2246 * the diagnosing side. No need to notify the diagnosing side if we are
2248 2247 * transitioning to REPAIRED as we only do this when requested to do
2249 2248 * so by the diagnosing side anyway.
2250 2249 */
2251 2250 if (cip->ci_flags & FMD_CF_REPAIRED)
2252 2251 fmd_case_transition(cp, FMD_CASE_REPAIRED, 0);
2253 2252 else if (cip->ci_flags & FMD_CF_ISOLATED) {
2254 2253 fmd_case_transition(cp, FMD_CASE_CLOSED, 0);
2255 2254 if (cip->ci_xprt != NULL)
2256 2255 fmd_xprt_uuclose(cip->ci_xprt, cip->ci_uuid);
2257 2256 }
2258 2257
2259 2258 fmd_case_rele(cp);
2260 2259 }
2261 2260
2262 2261 void
2263 2262 fmd_case_discard(fmd_case_t *cp, boolean_t delete_from_asru_cache)
2264 2263 {
2265 2264 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2266 2265
2267 2266 (void) pthread_mutex_lock(&cip->ci_mod->mod_stats_lock);
2268 2267 cip->ci_mod->mod_stats->ms_caseopen.fmds_value.ui64--;
2269 2268 (void) pthread_mutex_unlock(&cip->ci_mod->mod_stats_lock);
2270 2269
2271 2270 ASSERT(fmd_module_locked(cip->ci_mod));
2272 2271 fmd_list_delete(&cip->ci_mod->mod_cases, cip);
2273 2272 if (delete_from_asru_cache) {
2274 2273 (void) pthread_mutex_lock(&cip->ci_lock);
2275 2274 fmd_asru_hash_delete_case(fmd.d_asrus, cp);
2276 2275 (void) pthread_mutex_unlock(&cip->ci_lock);
2277 2276 }
2278 2277 fmd_case_rele(cp);
2279 2278 }
2280 2279
2281 2280 /*
2282 2281 * Indicate that the problem corresponding to a case has been repaired by
2283 2282 * clearing the faulty bit on each ASRU named as a suspect. If the case hasn't
2284 2283 * already been closed, this function initiates the transition to CLOSE_WAIT.
2285 2284 * The caller must have the case held from fmd_case_hash_lookup(), so we can
2286 2285 * grab and drop ci_lock without the case being able to be freed in between.
2287 2286 */
2288 2287 int
2289 2288 fmd_case_repair(fmd_case_t *cp)
2290 2289 {
2291 2290 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2292 2291 uint_t cstate;
2293 2292 fmd_asru_rep_arg_t fara;
2294 2293
2295 2294 (void) pthread_mutex_lock(&cip->ci_lock);
2296 2295 cstate = cip->ci_state;
2297 2296
2298 2297 if (cstate < FMD_CASE_SOLVED) {
2299 2298 (void) pthread_mutex_unlock(&cip->ci_lock);
2300 2299 return (fmd_set_errno(EFMD_CASE_STATE));
2301 2300 }
2302 2301
2303 2302 if (cip->ci_flags & FMD_CF_REPAIRED) {
2304 2303 (void) pthread_mutex_unlock(&cip->ci_lock);
2305 2304 return (0); /* already repaired */
2306 2305 }
2307 2306
2308 2307 TRACE((FMD_DBG_CASE, "case repair %s", cip->ci_uuid));
2309 2308 fara.fara_reason = FMD_ASRU_REPAIRED;
2310 2309 fara.fara_bywhat = FARA_BY_CASE;
2311 2310 fara.fara_rval = NULL;
2312 2311 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
2313 2312 (void) pthread_mutex_unlock(&cip->ci_lock);
2314 2313
2315 2314 /*
2316 2315 * if this is a proxied case, send the repair across the transport.
2317 2316 * The remote side will then do the repair and send a list.repaired back
2318 2317 * again such that we can finally repair the case on this side.
2319 2318 */
2320 2319 if (cip->ci_xprt != NULL) {
2321 2320 fmd_case_xprt_updated(cp);
2322 2321 return (0);
2323 2322 }
2324 2323
2325 2324 if (cstate == FMD_CASE_CLOSED)
2326 2325 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2327 2326 else
2328 2327 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2329 2328
2330 2329 return (0);
2331 2330 }
2332 2331
2333 2332 int
2334 2333 fmd_case_acquit(fmd_case_t *cp)
2335 2334 {
2336 2335 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2337 2336 uint_t cstate;
2338 2337 fmd_asru_rep_arg_t fara;
2339 2338
2340 2339 (void) pthread_mutex_lock(&cip->ci_lock);
2341 2340 cstate = cip->ci_state;
2342 2341
2343 2342 if (cstate < FMD_CASE_SOLVED) {
2344 2343 (void) pthread_mutex_unlock(&cip->ci_lock);
2345 2344 return (fmd_set_errno(EFMD_CASE_STATE));
2346 2345 }
2347 2346
2348 2347 if (cip->ci_flags & FMD_CF_REPAIRED) {
2349 2348 (void) pthread_mutex_unlock(&cip->ci_lock);
2350 2349 return (0); /* already repaired */
2351 2350 }
2352 2351
2353 2352 TRACE((FMD_DBG_CASE, "case acquit %s", cip->ci_uuid));
2354 2353 fara.fara_reason = FMD_ASRU_ACQUITTED;
2355 2354 fara.fara_bywhat = FARA_BY_CASE;
2356 2355 fara.fara_rval = NULL;
2357 2356 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_asru_repaired, &fara);
2358 2357 (void) pthread_mutex_unlock(&cip->ci_lock);
2359 2358
2360 2359 /*
2361 2360 * if this is a proxied case, send the repair across the transport.
2362 2361 * The remote side will then do the repair and send a list.repaired back
2363 2362 * again such that we can finally repair the case on this side.
2364 2363 */
2365 2364 if (cip->ci_xprt != NULL) {
2366 2365 fmd_case_xprt_updated(cp);
2367 2366 return (0);
2368 2367 }
2369 2368
2370 2369 if (cstate == FMD_CASE_CLOSED)
2371 2370 fmd_case_transition(cp, FMD_CASE_REPAIRED, FMD_CF_REPAIRED);
2372 2371 else
2373 2372 fmd_case_transition(cp, FMD_CASE_CLOSE_WAIT, FMD_CF_REPAIRED);
2374 2373
2375 2374 return (0);
2376 2375 }
2377 2376
2378 2377 int
2379 2378 fmd_case_contains(fmd_case_t *cp, fmd_event_t *ep)
2380 2379 {
2381 2380 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2382 2381 fmd_case_item_t *cit;
2383 2382 uint_t state;
2384 2383 int rv = 0;
2385 2384
2386 2385 (void) pthread_mutex_lock(&cip->ci_lock);
2387 2386
2388 2387 if (cip->ci_state >= FMD_CASE_SOLVED)
2389 2388 state = FMD_EVS_DIAGNOSED;
2390 2389 else
2391 2390 state = FMD_EVS_ACCEPTED;
2392 2391
2393 2392 for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
2394 2393 if ((rv = fmd_event_equal(ep, cit->cit_event)) != 0)
2395 2394 break;
2396 2395 }
2397 2396
2398 2397 if (rv == 0 && cip->ci_principal != NULL)
2399 2398 rv = fmd_event_equal(ep, cip->ci_principal);
2400 2399
2401 2400 (void) pthread_mutex_unlock(&cip->ci_lock);
2402 2401
2403 2402 if (rv != 0)
2404 2403 fmd_event_transition(ep, state);
2405 2404
2406 2405 return (rv);
2407 2406 }
2408 2407
2409 2408 int
2410 2409 fmd_case_orphaned(fmd_case_t *cp)
2411 2410 {
2412 2411 return (((fmd_case_impl_t *)cp)->ci_mod == fmd.d_rmod);
2413 2412 }
2414 2413
2415 2414 void
2416 2415 fmd_case_settime(fmd_case_t *cp, time_t tv_sec, suseconds_t tv_usec)
2417 2416 {
2418 2417 ((fmd_case_impl_t *)cp)->ci_tv.tv_sec = tv_sec;
2419 2418 ((fmd_case_impl_t *)cp)->ci_tv.tv_usec = tv_usec;
2420 2419 ((fmd_case_impl_t *)cp)->ci_tv_valid = 1;
2421 2420 }
2422 2421
2423 2422 void
↓ open down ↓ |
1087 lines elided |
↑ open up ↑ |
2424 2423 fmd_case_set_injected(fmd_case_t *cp)
2425 2424 {
2426 2425 ((fmd_case_impl_t *)cp)->ci_injected = 1;
2427 2426 }
2428 2427
2429 2428 void
2430 2429 fmd_case_set_de_fmri(fmd_case_t *cp, nvlist_t *nvl)
2431 2430 {
2432 2431 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2433 2432
2434 - if (cip->ci_diag_de)
2435 - nvlist_free(cip->ci_diag_de);
2433 + nvlist_free(cip->ci_diag_de);
2436 2434 cip->ci_diag_de = nvl;
2437 2435 }
2438 2436
2439 2437 void
2440 2438 fmd_case_setcode(fmd_case_t *cp, char *code)
2441 2439 {
2442 2440 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2443 2441
2444 2442 cip->ci_code = fmd_strdup(code, FMD_SLEEP);
2445 2443 cip->ci_codelen = cip->ci_code ? strlen(cip->ci_code) + 1 : 0;
2446 2444 }
2447 2445
2448 2446 /*ARGSUSED*/
2449 2447 static void
2450 2448 fmd_case_repair_replay_case(fmd_case_t *cp, void *arg)
2451 2449 {
2452 2450 int not_faulty = 0;
2453 2451 int faulty = 0;
2454 2452 nvlist_t *nvl;
2455 2453 fmd_event_t *e;
2456 2454 char *class;
2457 2455 int any_unusable_and_present = 0;
2458 2456 fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
2459 2457
2460 2458 if (cip->ci_state < FMD_CASE_SOLVED || cip->ci_xprt != NULL)
2461 2459 return;
2462 2460
2463 2461 if (cip->ci_state == FMD_CASE_RESOLVED) {
2464 2462 cip->ci_flags |= FMD_CF_RES_CMPL;
2465 2463 return;
2466 2464 }
2467 2465
2468 2466 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_faulty, &faulty);
2469 2467 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp, fmd_case_not_faulty,
2470 2468 ¬_faulty);
2471 2469
2472 2470 if (cip->ci_state >= FMD_CASE_REPAIRED && !faulty) {
2473 2471 /*
2474 2472 * If none of the suspects is faulty, replay the list.repaired.
2475 2473 * If all suspects are already either usable or not present then
2476 2474 * also transition straight to RESOLVED state.
2477 2475 */
2478 2476 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2479 2477 fmd_case_unusable_and_present, &any_unusable_and_present);
2480 2478 if (!any_unusable_and_present) {
2481 2479 cip->ci_state = FMD_CASE_RESOLVED;
2482 2480
2483 2481 TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2484 2482 cip->ci_uuid));
2485 2483 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
2486 2484 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2487 2485 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
2488 2486 class);
2489 2487 fmd_dispq_dispatch(fmd.d_disp, e, class);
2490 2488
2491 2489 TRACE((FMD_DBG_CASE, "replay sending list.resolved %s",
2492 2490 cip->ci_uuid));
2493 2491 fmd_case_publish(cp, FMD_CASE_RESOLVED);
2494 2492 fmd_asru_hash_apply_by_case(fmd.d_asrus, cp,
2495 2493 fmd_asru_log_resolved, NULL);
2496 2494 cip->ci_flags |= FMD_CF_RES_CMPL;
2497 2495 } else {
2498 2496 TRACE((FMD_DBG_CASE, "replay sending list.repaired %s",
2499 2497 cip->ci_uuid));
2500 2498 nvl = fmd_case_mkevent(cp, FM_LIST_REPAIRED_CLASS);
2501 2499 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2502 2500 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl,
2503 2501 class);
2504 2502 fmd_dispq_dispatch(fmd.d_disp, e, class);
2505 2503 }
2506 2504 } else if (faulty && not_faulty) {
2507 2505 /*
2508 2506 * if some but not all of the suspects are not faulty, replay
2509 2507 * the list.updated.
2510 2508 */
2511 2509 TRACE((FMD_DBG_CASE, "replay sending list.updated %s",
2512 2510 cip->ci_uuid));
2513 2511 nvl = fmd_case_mkevent(cp, FM_LIST_UPDATED_CLASS);
2514 2512 (void) nvlist_lookup_string(nvl, FM_CLASS, &class);
2515 2513 e = fmd_event_create(FMD_EVT_PROTOCOL, FMD_HRT_NOW, nvl, class);
2516 2514 fmd_dispq_dispatch(fmd.d_disp, e, class);
2517 2515 }
2518 2516 }
2519 2517
2520 2518 void
2521 2519 fmd_case_repair_replay()
2522 2520 {
2523 2521 fmd_case_hash_apply(fmd.d_cases, fmd_case_repair_replay_case, NULL);
2524 2522 }
↓ open down ↓ |
79 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX