xref: /illumos-gate/usr/src/cmd/fm/fmadm/common/faulty.c (revision 7adb68a6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2020 Joyent, Inc.
24  */
25 
26 #include <sys/types.h>
27 #include <fmadm.h>
28 #include <errno.h>
29 #include <limits.h>
30 #include <strings.h>
31 #include <stdio.h>
32 #include <unistd.h>
33 #include <sys/wait.h>
34 #include <sys/stat.h>
35 #include <fcntl.h>
36 #include <fm/fmd_log.h>
37 #include <sys/fm/protocol.h>
38 #include <fm/libtopo.h>
39 #include <fm/fmd_adm.h>
40 #include <fm/fmd_msg.h>
41 #include <dlfcn.h>
42 #include <sys/systeminfo.h>
43 #include <sys/utsname.h>
44 #include <libintl.h>
45 #include <locale.h>
46 #include <sys/smbios.h>
47 #include <libdevinfo.h>
48 #include <stdlib.h>
49 #include <stddef.h>
50 
51 /*
52  * Fault records are added to catalog by calling add_fault_record_to_catalog()
53  * records are stored in order of importance to the system.
54  * If -g flag is set or not_suppressed is not set and the class fru, fault,
55  * type are the same then details are merged into an existing record, with uuid
56  * records are stored in time order.
57  * For each record information is extracted from nvlist and merged into linked
58  * list each is checked for identical records for which percentage certainty are
59  * added together.
60  * print_catalog() is called to print out catalog and release external resources
61  *
62  *                         /---------------\
63  *	status_rec_list -> |               | -|
64  *                         \---------------/
65  *                                \/
66  *                         /---------------\    /-------\    /-------\
67  *      status_fru_list    | status_record | -> | uurec | -> | uurec | -|
68  *            \/           |               | |- |       | <- |       |
69  *      /-------------\    |               |    \-------/    \-------/
70  *      |             | -> |               |       \/           \/
71  *      \-------------/    |               |    /-------\    /-------\
72  *            \/           |               | -> | asru  | -> | asru  |
73  *            ---          |               |    |       | <- |       |
74  *                         |               |    \-------/    \-------/
75  *      status_asru_list   |  class        |
76  *            \/           |  resource     |    /-------\    /-------\
77  *      /-------------\    |  fru          | -> | list  | -> | list  |
78  *      |             | -> |  serial       |    |       | <- |       |
79  *      \-------------/    |               |    \-------/    \-------/
80  *            \/           \---------------/
81  *            ---               \/    /\
82  *                         /---------------\
83  *                         | status_record |
84  *                         \---------------/
85  *
86  * Fmadm faulty takes a number of options which affect the format of the
87  * output displayed. By default, the display reports the FRU and ASRU along
88  * with other information on per-case basis as in the example below.
89  *
90  * --------------- ------------------------------------  -------------- -------
91  * TIME            EVENT-ID                              MSG-ID         SEVERITY
92  * --------------- ------------------------------------  -------------- -------
93  * Sep 21 10:01:36 d482f935-5c8f-e9ab-9f25-d0aaafec1e6c  AMD-8000-2F    Major
94  *
95  * Fault class	: fault.memory.dimm_sb
96  * Affects	: mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0
97  *		    faulted but still in service
98  * FRU		: "CPU 0 DIMM 0" (hc://.../memory-controller=0/dimm=0)
99  *		    faulty
100  *
101  * Description	: The number of errors associated with this memory module has
102  *		exceeded acceptable levels.  Refer to
103  *		http://illumos.org/msg/AMD-8000-2F for more information.
104  *
105  * Response	: Pages of memory associated with this memory module are being
106  *		removed from service as errors are reported.
107  *
108  * Impact	: Total system memory capacity will be reduced as pages are
109  *		retired.
110  *
111  * Action	: Schedule a repair procedure to replace the affected memory
112  *		module.  Use fmdump -v -u <EVENT_ID> to identify the module.
113  *
114  * The -v flag is similar, but adds some additonal information such as the
115  * resource. The -s flag is also similar but just gives the top line summary.
116  * All these options (ie without the -f or -r flags) use the print_catalog()
117  * function to do the display.
118  *
119  * The -f flag changes the output so that it appears sorted on a per-fru basis.
120  * The output is somewhat cut down compared to the default output. If -f is
121  * used, then print_fru() is used to print the output.
122  *
123  * -----------------------------------------------------------------------------
124  * "SLOT 2" (hc://.../hostbridge=3/pciexrc=3/pciexbus=4/pciexdev=0) faulty
125  * 5ca4aeb3-36...f6be-c2e8166dc484 2 suspects in this FRU total certainty 100%
126  *
127  * Description	: A problem was detected for a PCI device.
128  *		Refer to http://illumos.org/msg/PCI-8000-7J
129  *		for more information.
130  *
131  * Response	: One or more device instances may be disabled
132  *
133  * Impact	: Possible loss of services provided by the device instances
134  *		associated with this fault
135  *
136  * Action	: Schedule a repair procedure to replace the affected device.
137  *		Use fmdump -v -u <EVENT_ID> to identify the device or contact
138  *		Sun for support.
139  *
140  * The -r flag changes the output so that it appears sorted on a per-asru basis.
141  * The output is very much cut down compared to the default output, just giving
142  * the asru fmri and state. Here print_asru() is used to print the output.
143  *
144  * mem:///motherboard=0/chip=0/memory-controller=0/dimm=0/rank=0	degraded
145  *
146  * For all fmadm faulty options, the sequence of events is
147  *
148  * 1) Walk through all the cases in the system using fmd_adm_case_iter() and
149  * for each case call dfault_rec(). This will call add_fault_record_to_catalog()
150  * This will extract the data from the nvlist and call catalog_new_record() to
151  * save the data away in various linked lists in the catalogue.
152  *
153  * 2) Once this is done, the data can be supplemented by using
154  * fmd_adm_rsrc_iter(). However this is now only necessary for the -i option.
155  *
156  * 3) Finally print_catalog(), print_fru() or print_asru() are called as
157  * appropriate to display the information from the catalogue sorted in the
158  * requested way.
159  *
160  */
161 
162 typedef struct name_list {
163 	struct name_list *next;
164 	struct name_list *prev;
165 	char *name;
166 	uint8_t pct;
167 	uint8_t max_pct;
168 	ushort_t count;
169 	int status;
170 	char *label;
171 } name_list_t;
172 
173 typedef struct ari_list {
174 	char *ari_uuid;
175 	struct ari_list *next;
176 } ari_list_t;
177 
178 typedef struct uurec {
179 	struct uurec *next;
180 	struct uurec *prev;
181 	char *uuid;
182 	ari_list_t *ari_uuid_list;
183 	name_list_t *asru;
184 	uint64_t sec;
185 	nvlist_t *event;
186 } uurec_t;
187 
188 typedef struct uurec_select {
189 	struct uurec_select *next;
190 	char *uuid;
191 } uurec_select_t;
192 
193 typedef struct host_id {
194 	char *chassis;
195 	char *server;
196 	char *platform;
197 	char *domain;
198 	char *product_sn;
199 } hostid_t;
200 
201 typedef struct host_id_list {
202 	hostid_t hostid;
203 	struct host_id_list *next;
204 } host_id_list_t;
205 
206 typedef struct status_record {
207 	hostid_t *host;
208 	int nrecs;
209 	uurec_t *uurec;
210 	char *severity;			/* in C locale */
211 	char *msgid;
212 	name_list_t *class;
213 	name_list_t *resource;
214 	name_list_t *asru;
215 	name_list_t *fru;
216 	name_list_t *serial;
217 	uint8_t not_suppressed;
218 	uint8_t injected;
219 } status_record_t;
220 
221 typedef struct sr_list {
222 	struct sr_list *next;
223 	struct sr_list *prev;
224 	struct status_record *status_record;
225 } sr_list_t;
226 
227 typedef struct resource_list {
228 	struct resource_list *next;
229 	struct resource_list *prev;
230 	sr_list_t *status_rec_list;
231 	char *resource;
232 	uint8_t not_suppressed;
233 	uint8_t injected;
234 	uint8_t max_pct;
235 } resource_list_t;
236 
237 sr_list_t *status_rec_list;
238 resource_list_t *status_fru_list;
239 resource_list_t *status_asru_list;
240 
241 static int max_display;
242 static int max_fault = 0;
243 static topo_hdl_t *topo_handle;
244 static host_id_list_t *host_list;
245 static int n_server;
246 static int opt_g;
247 static fmd_msg_hdl_t *fmadm_msghdl = NULL; /* handle for libfmd_msg calls */
248 
249 static char *
format_date(char * buf,size_t len,uint64_t sec)250 format_date(char *buf, size_t len, uint64_t sec)
251 {
252 	if (sec > LONG_MAX) {
253 		(void) fprintf(stderr,
254 		    "record time is too large for 32-bit utility\n");
255 		(void) snprintf(buf, len, "0x%llx", sec);
256 	} else {
257 		time_t tod = (time_t)sec;
258 		time_t now = time(NULL);
259 		if (tod > now+60 ||
260 		    tod < now - 6L*30L*24L*60L*60L) { /* 6 months ago */
261 			(void) strftime(buf, len, "%b %d %Y    ",
262 			    localtime(&tod));
263 		} else {
264 			(void) strftime(buf, len, "%b %d %T", localtime(&tod));
265 		}
266 	}
267 
268 	return (buf);
269 }
270 
271 static hostid_t *
find_hostid_in_list(char * platform,char * chassis,char * server,char * domain,char * product_sn)272 find_hostid_in_list(char *platform, char *chassis, char *server, char *domain,
273     char *product_sn)
274 {
275 	hostid_t *rt = NULL;
276 	host_id_list_t *hostp;
277 
278 	if (platform == NULL)
279 		platform = "-";
280 	if (server == NULL)
281 		server = "-";
282 	hostp = host_list;
283 	while (hostp) {
284 		if (hostp->hostid.platform &&
285 		    strcmp(hostp->hostid.platform, platform) == 0 &&
286 		    hostp->hostid.server &&
287 		    strcmp(hostp->hostid.server, server) == 0 &&
288 		    (chassis == NULL || hostp->hostid.chassis == NULL ||
289 		    strcmp(chassis, hostp->hostid.chassis) == 0) &&
290 		    (product_sn == NULL || hostp->hostid.product_sn == NULL ||
291 		    strcmp(product_sn, hostp->hostid.product_sn) == 0) &&
292 		    (domain == NULL || hostp->hostid.domain == NULL ||
293 		    strcmp(domain, hostp->hostid.domain) == 0)) {
294 			rt = &hostp->hostid;
295 			break;
296 		}
297 		hostp = hostp->next;
298 	}
299 	if (rt == NULL) {
300 		hostp = malloc(sizeof (host_id_list_t));
301 		hostp->hostid.platform = strdup(platform);
302 		hostp->hostid.product_sn =
303 		    product_sn ? strdup(product_sn) : NULL;
304 		hostp->hostid.server = strdup(server);
305 		hostp->hostid.chassis = chassis ? strdup(chassis) : NULL;
306 		hostp->hostid.domain = domain ? strdup(domain) : NULL;
307 		hostp->next = host_list;
308 		host_list = hostp;
309 		rt = &hostp->hostid;
310 		n_server++;
311 	}
312 	return (rt);
313 }
314 
315 static hostid_t *
find_hostid(nvlist_t * nvl)316 find_hostid(nvlist_t *nvl)
317 {
318 	char *platform = NULL, *chassis = NULL, *server = NULL, *domain = NULL;
319 	char *product_sn = NULL;
320 	nvlist_t *auth, *fmri;
321 	hostid_t *rt = NULL;
322 
323 	if (nvlist_lookup_nvlist(nvl, FM_SUSPECT_DE, &fmri) == 0 &&
324 	    nvlist_lookup_nvlist(fmri, FM_FMRI_AUTHORITY, &auth) == 0) {
325 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT,
326 		    &platform);
327 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_PRODUCT_SN,
328 		    &product_sn);
329 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_SERVER, &server);
330 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_CHASSIS,
331 		    &chassis);
332 		(void) nvlist_lookup_string(auth, FM_FMRI_AUTH_DOMAIN, &domain);
333 		rt = find_hostid_in_list(platform, chassis, server,
334 		    domain, product_sn);
335 	}
336 	return (rt);
337 }
338 
339 static char *
get_nvl2str_topo(nvlist_t * nvl)340 get_nvl2str_topo(nvlist_t *nvl)
341 {
342 	char *name = NULL;
343 	char *tname;
344 	int err;
345 	char *scheme = NULL;
346 	char *mod_name = NULL;
347 	char buf[128];
348 
349 	if (topo_handle == NULL)
350 		topo_handle = topo_open(TOPO_VERSION, 0, &err);
351 	if (topo_fmri_nvl2str(topo_handle, nvl, &tname, &err) == 0) {
352 		name = strdup(tname);
353 		topo_hdl_strfree(topo_handle, tname);
354 	} else {
355 		(void) nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &scheme);
356 		(void) nvlist_lookup_string(nvl, FM_FMRI_MOD_NAME, &mod_name);
357 		if (scheme && strcmp(scheme, FM_FMRI_SCHEME_FMD) == 0 &&
358 		    mod_name) {
359 			(void) snprintf(buf, sizeof (buf), "%s:///module/%s",
360 			    scheme, mod_name);
361 			name = strdup(buf);
362 		}
363 	}
364 	return (name);
365 }
366 
367 static int
set_priority(char * s)368 set_priority(char *s)
369 {
370 	int rt = 0;
371 
372 	if (s) {
373 		if (strcmp(s, "Minor") == 0)
374 			rt = 1;
375 		else if (strcmp(s, "Major") == 0)
376 			rt = 10;
377 		else if (strcmp(s, "Critical") == 0)
378 			rt = 100;
379 	}
380 	return (rt);
381 }
382 
383 static int
cmp_priority(char * s1,char * s2,uint64_t t1,uint64_t t2,uint8_t p1,uint8_t p2)384 cmp_priority(char *s1, char *s2, uint64_t t1, uint64_t t2, uint8_t p1,
385     uint8_t p2)
386 {
387 	int r1, r2;
388 	int rt;
389 
390 	r1 = set_priority(s1);
391 	r2 = set_priority(s2);
392 	rt = r1 - r2;
393 	if (rt == 0) {
394 		if (t1 > t2)
395 			rt = 1;
396 		else if (t1 < t2)
397 			rt = -1;
398 		else
399 			rt = p1 - p2;
400 	}
401 	return (rt);
402 }
403 
404 /*
405  * merge two lists into one, by comparing enties in new and moving into list if
406  * name is not there or free off memory for names which are already there
407  * add_pct indicates if pct is the sum or highest pct
408  */
409 static name_list_t *
merge_name_list(name_list_t ** list,name_list_t * new,int add_pct)410 merge_name_list(name_list_t **list, name_list_t *new, int add_pct)
411 {
412 	name_list_t *lp, *np, *sp, *rt = NULL;
413 	int max_pct;
414 
415 	rt = *list;
416 	np = new;
417 	while (np) {
418 		lp = *list;
419 		while (lp) {
420 			if (strcmp(lp->name, np->name) == 0)
421 				break;
422 			lp = lp->next;
423 			if (lp == *list)
424 				lp = NULL;
425 		}
426 		if (np->next == new)
427 			sp = NULL;
428 		else
429 			sp = np->next;
430 		if (lp) {
431 			lp->status |= (np->status & FM_SUSPECT_FAULTY);
432 			if (add_pct) {
433 				lp->pct += np->pct;
434 				lp->count += np->count;
435 			} else if (np->pct > lp->pct) {
436 				lp->pct = np->pct;
437 			}
438 			max_pct = np->max_pct;
439 			if (np->label)
440 				free(np->label);
441 			free(np->name);
442 			free(np);
443 			np = NULL;
444 			if (max_pct > lp->max_pct) {
445 				lp->max_pct = max_pct;
446 				if (lp->max_pct > lp->prev->max_pct &&
447 				    lp != *list) {
448 					lp->prev->next = lp->next;
449 					lp->next->prev = lp->prev;
450 					np = lp;
451 				}
452 			}
453 		}
454 		if (np) {
455 			lp = *list;
456 			if (lp) {
457 				if (np->max_pct > lp->max_pct) {
458 					np->next = lp;
459 					np->prev = lp->prev;
460 					lp->prev->next = np;
461 					lp->prev = np;
462 					*list = np;
463 					rt = np;
464 				} else {
465 					lp = lp->next;
466 					while (lp != *list &&
467 					    np->max_pct < lp->max_pct) {
468 						lp = lp->next;
469 					}
470 					np->next = lp;
471 					np->prev = lp->prev;
472 					lp->prev->next = np;
473 					lp->prev = np;
474 				}
475 			} else {
476 				*list = np;
477 				np->next = np;
478 				np->prev = np;
479 				rt = np;
480 			}
481 		}
482 		np = sp;
483 	}
484 	return (rt);
485 }
486 
487 static name_list_t *
alloc_name_list(char * name,uint8_t pct)488 alloc_name_list(char *name, uint8_t pct)
489 {
490 	name_list_t *nlp;
491 
492 	nlp = malloc(sizeof (*nlp));
493 	nlp->name = strdup(name);
494 	nlp->pct = pct;
495 	nlp->max_pct = pct;
496 	nlp->count = 1;
497 	nlp->next = nlp;
498 	nlp->prev = nlp;
499 	nlp->status = 0;
500 	nlp->label = NULL;
501 	return (nlp);
502 }
503 
504 static status_record_t *
new_record_init(uurec_t * uurec_p,char * msgid,name_list_t * class,name_list_t * fru,name_list_t * asru,name_list_t * resource,name_list_t * serial,boolean_t not_suppressed,hostid_t * hostid,boolean_t injected)505 new_record_init(uurec_t *uurec_p, char *msgid, name_list_t *class,
506     name_list_t *fru, name_list_t *asru, name_list_t *resource,
507     name_list_t *serial, boolean_t not_suppressed,
508     hostid_t *hostid, boolean_t injected)
509 {
510 	status_record_t *status_rec_p;
511 
512 	status_rec_p = (status_record_t *)malloc(sizeof (status_record_t));
513 	status_rec_p->nrecs = 1;
514 	status_rec_p->host = hostid;
515 	status_rec_p->uurec = uurec_p;
516 	uurec_p->next = NULL;
517 	uurec_p->prev = NULL;
518 	uurec_p->asru = asru;
519 	if ((status_rec_p->severity = fmd_msg_getitem_id(fmadm_msghdl, NULL,
520 	    msgid, FMD_MSG_ITEM_SEVERITY)) == NULL)
521 		status_rec_p->severity = strdup("unknown");
522 	status_rec_p->class = class;
523 	status_rec_p->fru = fru;
524 	status_rec_p->asru = asru;
525 	status_rec_p->resource = resource;
526 	status_rec_p->serial = serial;
527 	status_rec_p->msgid = strdup(msgid);
528 	status_rec_p->not_suppressed = not_suppressed;
529 	status_rec_p->injected = injected;
530 	return (status_rec_p);
531 }
532 
533 /*
534  * add record to given list maintaining order higher priority first.
535  */
536 static void
add_rec_list(status_record_t * status_rec_p,sr_list_t ** list_pp)537 add_rec_list(status_record_t *status_rec_p, sr_list_t **list_pp)
538 {
539 	sr_list_t *tp, *np, *sp;
540 	int order;
541 	uint64_t sec;
542 
543 	np = malloc(sizeof (sr_list_t));
544 	np->status_record = status_rec_p;
545 	sec = status_rec_p->uurec->sec;
546 	if ((sp = *list_pp) == NULL) {
547 		*list_pp = np;
548 		np->next = np;
549 		np->prev = np;
550 	} else {
551 		/* insert new record in front of lower priority */
552 		tp = sp;
553 		order = cmp_priority(status_rec_p->severity,
554 		    sp->status_record->severity, sec,
555 		    tp->status_record->uurec->sec, 0, 0);
556 		if (order > 0) {
557 			*list_pp = np;
558 		} else {
559 			tp = sp->next;
560 			while (tp != sp &&
561 			    cmp_priority(status_rec_p->severity,
562 			    tp->status_record->severity, sec,
563 			    tp->status_record->uurec->sec, 0, 0)) {
564 				tp = tp->next;
565 			}
566 		}
567 		np->next = tp;
568 		np->prev = tp->prev;
569 		tp->prev->next = np;
570 		tp->prev = np;
571 	}
572 }
573 
574 static void
add_resource(status_record_t * status_rec_p,resource_list_t ** rp,resource_list_t * np)575 add_resource(status_record_t *status_rec_p, resource_list_t **rp,
576     resource_list_t *np)
577 {
578 	int order;
579 	uint64_t sec;
580 	resource_list_t *sp, *tp;
581 	status_record_t *srp;
582 	char *severity = status_rec_p->severity;
583 
584 	add_rec_list(status_rec_p, &np->status_rec_list);
585 	if ((sp = *rp) == NULL) {
586 		np->next = np;
587 		np->prev = np;
588 		*rp = np;
589 	} else {
590 		/*
591 		 * insert new record in front of lower priority
592 		 */
593 		tp = sp->next;
594 		srp = sp->status_rec_list->status_record;
595 		sec = status_rec_p->uurec->sec;
596 		order = cmp_priority(severity, srp->severity, sec,
597 		    srp->uurec->sec, np->max_pct, sp->max_pct);
598 		if (order > 0) {
599 			*rp = np;
600 		} else {
601 			srp = tp->status_rec_list->status_record;
602 			while (tp != sp &&
603 			    cmp_priority(severity, srp->severity, sec,
604 			    srp->uurec->sec, np->max_pct, sp->max_pct) < 0) {
605 				tp = tp->next;
606 				srp = tp->status_rec_list->status_record;
607 			}
608 		}
609 		np->next = tp;
610 		np->prev = tp->prev;
611 		tp->prev->next = np;
612 		tp->prev = np;
613 	}
614 }
615 
616 static void
add_resource_list(status_record_t * status_rec_p,name_list_t * fp,resource_list_t ** rpp)617 add_resource_list(status_record_t *status_rec_p, name_list_t *fp,
618     resource_list_t **rpp)
619 {
620 	int order;
621 	resource_list_t *np, *end;
622 	status_record_t *srp;
623 
624 	np = *rpp;
625 	end = np;
626 	while (np) {
627 		if (strcmp(fp->name, np->resource) == 0) {
628 			np->not_suppressed |= status_rec_p->not_suppressed;
629 			np->injected |= status_rec_p->injected;
630 			srp = np->status_rec_list->status_record;
631 			order = cmp_priority(status_rec_p->severity,
632 			    srp->severity, status_rec_p->uurec->sec,
633 			    srp->uurec->sec, fp->max_pct, np->max_pct);
634 			if (order > 0 && np != end) {
635 				/*
636 				 * remove from list and add again using
637 				 * new priority
638 				 */
639 				np->prev->next = np->next;
640 				np->next->prev = np->prev;
641 				add_resource(status_rec_p,
642 				    rpp, np);
643 			} else {
644 				add_rec_list(status_rec_p,
645 				    &np->status_rec_list);
646 			}
647 			break;
648 		}
649 		np = np->next;
650 		if (np == end) {
651 			np = NULL;
652 			break;
653 		}
654 	}
655 	if (np == NULL) {
656 		np = malloc(sizeof (resource_list_t));
657 		np->resource = fp->name;
658 		np->not_suppressed = status_rec_p->not_suppressed;
659 		np->injected = status_rec_p->injected;
660 		np->status_rec_list = NULL;
661 		np->max_pct = fp->max_pct;
662 		add_resource(status_rec_p, rpp, np);
663 	}
664 }
665 
666 static void
add_list(status_record_t * status_rec_p,name_list_t * listp,resource_list_t ** glistp)667 add_list(status_record_t *status_rec_p, name_list_t *listp,
668     resource_list_t **glistp)
669 {
670 	name_list_t *fp, *end;
671 
672 	fp = listp;
673 	end = fp;
674 	while (fp) {
675 		add_resource_list(status_rec_p, fp, glistp);
676 		fp = fp->next;
677 		if (fp == end)
678 			break;
679 	}
680 }
681 
682 /*
683  * add record to rec, fru and asru lists.
684  */
685 static void
catalog_new_record(uurec_t * uurec_p,char * msgid,name_list_t * class,name_list_t * fru,name_list_t * asru,name_list_t * resource,name_list_t * serial,boolean_t not_suppressed,hostid_t * hostid,boolean_t injected,boolean_t dummy_fru)686 catalog_new_record(uurec_t *uurec_p, char *msgid, name_list_t *class,
687     name_list_t *fru, name_list_t *asru, name_list_t *resource,
688     name_list_t *serial, boolean_t not_suppressed,
689     hostid_t *hostid, boolean_t injected, boolean_t dummy_fru)
690 {
691 	status_record_t *status_rec_p;
692 
693 	status_rec_p = new_record_init(uurec_p, msgid, class, fru, asru,
694 	    resource, serial, not_suppressed, hostid, injected);
695 	add_rec_list(status_rec_p, &status_rec_list);
696 	if (status_rec_p->fru && !dummy_fru)
697 		add_list(status_rec_p, status_rec_p->fru, &status_fru_list);
698 	if (status_rec_p->asru)
699 		add_list(status_rec_p, status_rec_p->asru, &status_asru_list);
700 }
701 
702 static void
get_serial_no(nvlist_t * nvl,name_list_t ** serial_p,uint8_t pct)703 get_serial_no(nvlist_t *nvl, name_list_t **serial_p, uint8_t pct)
704 {
705 	char *name;
706 	char *serial = NULL;
707 	char **lserial = NULL;
708 	uint64_t serint;
709 	name_list_t *nlp;
710 	int j;
711 	uint_t nelem;
712 	char buf[64];
713 
714 	if (nvlist_lookup_string(nvl, FM_FMRI_SCHEME, &name) == 0) {
715 		if (strcmp(name, FM_FMRI_SCHEME_CPU) == 0) {
716 			if (nvlist_lookup_uint64(nvl, FM_FMRI_CPU_SERIAL_ID,
717 			    &serint) == 0) {
718 				(void) snprintf(buf, sizeof (buf), "%llX",
719 				    serint);
720 				nlp = alloc_name_list(buf, pct);
721 				(void) merge_name_list(serial_p, nlp, 1);
722 			}
723 		} else if (strcmp(name, FM_FMRI_SCHEME_MEM) == 0) {
724 			if (nvlist_lookup_string_array(nvl,
725 			    FM_FMRI_MEM_SERIAL_ID, &lserial, &nelem) == 0) {
726 				nlp = alloc_name_list(lserial[0], pct);
727 				for (j = 1; j < nelem; j++) {
728 					name_list_t *n1lp;
729 					n1lp = alloc_name_list(lserial[j], pct);
730 					(void) merge_name_list(&nlp, n1lp, 1);
731 				}
732 				(void) merge_name_list(serial_p, nlp, 1);
733 			}
734 		} else if (strcmp(name, FM_FMRI_SCHEME_HC) == 0) {
735 			if (nvlist_lookup_string(nvl, FM_FMRI_HC_SERIAL_ID,
736 			    &serial) == 0) {
737 				nlp = alloc_name_list(serial, pct);
738 				(void) merge_name_list(serial_p, nlp, 1);
739 			}
740 		}
741 	}
742 }
743 
744 static void
extract_record_info(nvlist_t * nvl,name_list_t ** class_p,name_list_t ** fru_p,name_list_t ** serial_p,name_list_t ** resource_p,name_list_t ** asru_p,boolean_t * dummy_fru,uint8_t status)745 extract_record_info(nvlist_t *nvl, name_list_t **class_p,
746     name_list_t **fru_p, name_list_t **serial_p, name_list_t **resource_p,
747     name_list_t **asru_p, boolean_t *dummy_fru, uint8_t status)
748 {
749 	nvlist_t *lfru, *lasru, *rsrc;
750 	name_list_t *nlp;
751 	char *name;
752 	uint8_t lpct = 0;
753 	char *lclass = NULL;
754 	char *label;
755 
756 	(void) nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &lpct);
757 	if (nvlist_lookup_string(nvl, FM_CLASS, &lclass) == 0) {
758 		nlp = alloc_name_list(lclass, lpct);
759 		(void) merge_name_list(class_p, nlp, 1);
760 	}
761 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_FRU, &lfru) == 0) {
762 		name = get_nvl2str_topo(lfru);
763 		if (name != NULL) {
764 			nlp = alloc_name_list(name, lpct);
765 			nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
766 			    FM_SUSPECT_DEGRADED);
767 			free(name);
768 			if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION,
769 			    &label) == 0)
770 				nlp->label = strdup(label);
771 			(void) merge_name_list(fru_p, nlp, 1);
772 		}
773 		get_serial_no(lfru, serial_p, lpct);
774 	} else if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) != 0) {
775 		/*
776 		 * No FRU or resource. But we want to display the repair status
777 		 * somehow, so create a dummy FRU field.
778 		 */
779 		*dummy_fru = 1;
780 		nlp = alloc_name_list(dgettext("FMD", "None"), lpct);
781 		nlp->status = status & ~(FM_SUSPECT_UNUSABLE |
782 		    FM_SUSPECT_DEGRADED);
783 		(void) merge_name_list(fru_p, nlp, 1);
784 	}
785 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_ASRU, &lasru) == 0) {
786 		name = get_nvl2str_topo(lasru);
787 		if (name != NULL) {
788 			nlp = alloc_name_list(name, lpct);
789 			nlp->status = status & ~(FM_SUSPECT_NOT_PRESENT |
790 			    FM_SUSPECT_REPAIRED | FM_SUSPECT_REPLACED |
791 			    FM_SUSPECT_ACQUITTED);
792 			free(name);
793 			(void) merge_name_list(asru_p, nlp, 1);
794 		}
795 		get_serial_no(lasru, serial_p, lpct);
796 	}
797 	if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
798 		name = get_nvl2str_topo(rsrc);
799 		if (name != NULL) {
800 			nlp = alloc_name_list(name, lpct);
801 			nlp->status = status;
802 			free(name);
803 			if (nvlist_lookup_string(nvl, FM_FAULT_LOCATION,
804 			    &label) == 0)
805 				nlp->label = strdup(label);
806 			(void) merge_name_list(resource_p, nlp, 1);
807 		}
808 	}
809 }
810 
811 static void
add_fault_record_to_catalog(nvlist_t * nvl,uint64_t sec,char * uuid)812 add_fault_record_to_catalog(nvlist_t *nvl, uint64_t sec, char *uuid)
813 {
814 	char *msgid = "-";
815 	uint_t i, size = 0;
816 	name_list_t *class = NULL, *resource = NULL;
817 	name_list_t *asru = NULL, *fru = NULL, *serial = NULL;
818 	nvlist_t **nva;
819 	uint8_t *ba;
820 	uurec_t *uurec_p;
821 	hostid_t *host;
822 	boolean_t not_suppressed = 1;
823 	boolean_t any_present = 0;
824 	boolean_t injected = 0;
825 	boolean_t dummy_fru = 0;
826 
827 	(void) nvlist_lookup_string(nvl, FM_SUSPECT_DIAG_CODE, &msgid);
828 	(void) nvlist_lookup_uint32(nvl, FM_SUSPECT_FAULT_SZ, &size);
829 	(void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_MESSAGE,
830 	    &not_suppressed);
831 	(void) nvlist_lookup_boolean_value(nvl, FM_SUSPECT_INJECTED, &injected);
832 
833 	if (size != 0) {
834 		(void) nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST,
835 		    &nva, &size);
836 		(void) nvlist_lookup_uint8_array(nvl, FM_SUSPECT_FAULT_STATUS,
837 		    &ba, &size);
838 		for (i = 0; i < size; i++) {
839 			extract_record_info(nva[i], &class, &fru, &serial,
840 			    &resource, &asru, &dummy_fru, ba[i]);
841 			if (!(ba[i] & FM_SUSPECT_NOT_PRESENT) &&
842 			    (ba[i] & FM_SUSPECT_FAULTY))
843 				any_present = 1;
844 		}
845 		/*
846 		 * also suppress if no resources present
847 		 */
848 		if (any_present == 0)
849 			not_suppressed = 0;
850 	}
851 
852 	uurec_p = (uurec_t *)malloc(sizeof (uurec_t));
853 	uurec_p->uuid = strdup(uuid);
854 	uurec_p->sec = sec;
855 	uurec_p->ari_uuid_list = NULL;
856 	uurec_p->event = NULL;
857 	(void) nvlist_dup(nvl, &uurec_p->event, 0);
858 	host = find_hostid(nvl);
859 	catalog_new_record(uurec_p, msgid, class, fru, asru,
860 	    resource, serial, not_suppressed, host, injected, dummy_fru);
861 }
862 
863 static void
update_asru_state_in_catalog(const char * uuid,const char * ari_uuid)864 update_asru_state_in_catalog(const char *uuid, const char *ari_uuid)
865 {
866 	sr_list_t *srp;
867 	uurec_t *uurp;
868 	ari_list_t *ari_list;
869 
870 	srp = status_rec_list;
871 	if (srp) {
872 		for (;;) {
873 			uurp = srp->status_record->uurec;
874 			while (uurp) {
875 				if (strcmp(uuid, uurp->uuid) == 0) {
876 					ari_list = (ari_list_t *)
877 					    malloc(sizeof (ari_list_t));
878 					ari_list->ari_uuid = strdup(ari_uuid);
879 					ari_list->next = uurp->ari_uuid_list;
880 					uurp->ari_uuid_list = ari_list;
881 					return;
882 				}
883 				uurp = uurp->next;
884 			}
885 			if (srp->next == status_rec_list)
886 				break;
887 			srp = srp->next;
888 		}
889 	}
890 }
891 
892 static void
print_line(char * label,char * buf)893 print_line(char *label, char *buf)
894 {
895 	char *cp, *ep, *wp;
896 	char c;
897 	int i;
898 	int lsz;
899 	char *padding;
900 
901 	lsz = strlen(label);
902 	padding = malloc(lsz + 1);
903 	for (i = 0; i < lsz; i++)
904 		padding[i] = ' ';
905 	padding[i] = 0;
906 	cp = buf;
907 	ep = buf;
908 	c = *ep;
909 	(void) printf("\n");
910 	while (c) {
911 		i = lsz;
912 		wp = NULL;
913 		while ((c = *ep) != '\0' && (wp == NULL || i < 80)) {
914 			if (c == ' ')
915 				wp = ep;
916 			else if (c == '\n') {
917 				i = 0;
918 				*ep = 0;
919 				do {
920 					ep++;
921 				} while ((c = *ep) != '\0' && c == ' ');
922 				break;
923 			}
924 			ep++;
925 			i++;
926 		}
927 		if (i >= 80 && wp) {
928 			*wp = 0;
929 			ep = wp + 1;
930 			c = *ep;
931 		}
932 		(void) printf("%s%s\n", label, cp);
933 		cp = ep;
934 		label = padding;
935 	}
936 	free(padding);
937 }
938 
939 static void
print_dict_info_line(nvlist_t * e,fmd_msg_item_t what,const char * linehdr)940 print_dict_info_line(nvlist_t *e, fmd_msg_item_t what, const char *linehdr)
941 {
942 	char *cp = fmd_msg_getitem_nv(fmadm_msghdl, NULL, e, what);
943 
944 	if (cp) {
945 		print_line(dgettext("FMD", linehdr), cp);
946 		free(cp);
947 	}
948 }
949 
950 static void
print_dict_info(nvlist_t * nvl)951 print_dict_info(nvlist_t *nvl)
952 {
953 	print_dict_info_line(nvl, FMD_MSG_ITEM_DESC, "Description : ");
954 	print_dict_info_line(nvl, FMD_MSG_ITEM_RESPONSE, "Response    : ");
955 	print_dict_info_line(nvl, FMD_MSG_ITEM_IMPACT, "Impact      : ");
956 	print_dict_info_line(nvl, FMD_MSG_ITEM_ACTION, "Action      : ");
957 }
958 
959 static void
print_name(name_list_t * list,char * padding,int * np,int pct,int full)960 print_name(name_list_t *list, char *padding, int *np, int pct, int full)
961 {
962 	char *name;
963 
964 	name = list->name;
965 	if (list->label) {
966 		(void) printf("%s \"%s\" (%s)", padding, list->label, name);
967 		*np += 1;
968 	} else {
969 		(void) printf("%s %s", padding, name);
970 		*np += 1;
971 	}
972 	if (list->pct && pct > 0 && pct < 100) {
973 		if (list->count > 1) {
974 			if (full) {
975 				(void) printf(" %d @ %s %d%%\n", list->count,
976 				    dgettext("FMD", "max"),
977 				    list->max_pct);
978 			} else {
979 				(void) printf(" %s %d%%\n",
980 				    dgettext("FMD", "max"),
981 				    list->max_pct);
982 			}
983 		} else {
984 			(void) printf(" %d%%\n", list->pct);
985 		}
986 	} else {
987 		(void) printf("\n");
988 	}
989 }
990 
991 static void
print_asru_status(int status,char * label)992 print_asru_status(int status, char *label)
993 {
994 	char *msg = NULL;
995 
996 	switch (status) {
997 	case 0:
998 		msg = dgettext("FMD", "ok and in service");
999 		break;
1000 	case FM_SUSPECT_DEGRADED:
1001 		msg = dgettext("FMD", "service degraded, "
1002 		    "but associated components no longer faulty");
1003 		break;
1004 	case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
1005 		msg = dgettext("FMD", "faulted but still "
1006 		    "providing degraded service");
1007 		break;
1008 	case FM_SUSPECT_FAULTY:
1009 		msg = dgettext("FMD", "faulted but still in service");
1010 		break;
1011 	case FM_SUSPECT_UNUSABLE:
1012 		msg = dgettext("FMD", "out of service, "
1013 		    "but associated components no longer faulty");
1014 		break;
1015 	case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
1016 		msg = dgettext("FMD", "faulted and taken out of service");
1017 		break;
1018 	default:
1019 		break;
1020 	}
1021 	if (msg) {
1022 		(void) printf("%s     %s\n", label, msg);
1023 	}
1024 }
1025 
1026 static void
print_fru_status(int status,char * label)1027 print_fru_status(int status, char *label)
1028 {
1029 	char *msg = NULL;
1030 
1031 	if (status & FM_SUSPECT_NOT_PRESENT)
1032 		msg = dgettext("FMD", "not present");
1033 	else if (status & FM_SUSPECT_FAULTY)
1034 		msg = dgettext("FMD", "faulty");
1035 	else if (status & FM_SUSPECT_REPLACED)
1036 		msg = dgettext("FMD", "replaced");
1037 	else if (status & FM_SUSPECT_REPAIRED)
1038 		msg = dgettext("FMD", "repair attempted");
1039 	else if (status & FM_SUSPECT_ACQUITTED)
1040 		msg = dgettext("FMD", "acquitted");
1041 	else
1042 		msg = dgettext("FMD", "removed");
1043 	(void) printf("%s     %s\n", label, msg);
1044 }
1045 
1046 static void
print_rsrc_status(int status,char * label)1047 print_rsrc_status(int status, char *label)
1048 {
1049 	char *msg = "";
1050 
1051 	if (status & FM_SUSPECT_NOT_PRESENT)
1052 		msg = dgettext("FMD", "not present");
1053 	else if (status & FM_SUSPECT_FAULTY) {
1054 		if (status & FM_SUSPECT_DEGRADED)
1055 			msg = dgettext("FMD",
1056 			    "faulted but still providing degraded service");
1057 		else if (status & FM_SUSPECT_UNUSABLE)
1058 			msg = dgettext("FMD",
1059 			    "faulted and taken out of service");
1060 		else
1061 			msg = dgettext("FMD", "faulted but still in service");
1062 	} else if (status & FM_SUSPECT_REPLACED)
1063 		msg = dgettext("FMD", "replaced");
1064 	else if (status & FM_SUSPECT_REPAIRED)
1065 		msg = dgettext("FMD", "repair attempted");
1066 	else if (status & FM_SUSPECT_ACQUITTED)
1067 		msg = dgettext("FMD", "acquitted");
1068 	else
1069 		msg = dgettext("FMD", "removed");
1070 	(void) printf("%s     %s\n", label, msg);
1071 }
1072 
1073 static void
print_name_list(name_list_t * list,char * label,int limit,int pct,void (func1)(int,char *),int full)1074 print_name_list(name_list_t *list, char *label,
1075     int limit, int pct, void (func1)(int, char *), int full)
1076 {
1077 	char *name;
1078 	char *padding;
1079 	int i, j, l, n;
1080 	name_list_t *end = list;
1081 
1082 	l = strlen(label);
1083 	padding = malloc(l + 1);
1084 	for (i = 0; i < l; i++)
1085 		padding[i] = ' ';
1086 	padding[l] = 0;
1087 	(void) printf("%s", label);
1088 	name = list->name;
1089 	if (list->label)
1090 		(void) printf(" \"%s\" (%s)", list->label, name);
1091 	else
1092 		(void) printf(" %s", name);
1093 	if (list->pct && pct > 0 && pct < 100) {
1094 		if (list->count > 1) {
1095 			if (full) {
1096 				(void) printf(" %d @ %s %d%%\n", list->count,
1097 				    dgettext("FMD", "max"), list->max_pct);
1098 			} else {
1099 				(void) printf(" %s %d%%\n",
1100 				    dgettext("FMD", "max"), list->max_pct);
1101 			}
1102 		} else {
1103 			(void) printf(" %d%%\n", list->pct);
1104 		}
1105 	} else {
1106 		(void) printf("\n");
1107 	}
1108 	if (func1)
1109 		func1(list->status, padding);
1110 	n = 1;
1111 	j = 0;
1112 	while ((list = list->next) != end) {
1113 		if (limit == 0 || n < limit) {
1114 			print_name(list, padding, &n, pct, full);
1115 			if (func1)
1116 				func1(list->status, padding);
1117 		} else
1118 			j++;
1119 	}
1120 	if (j == 1) {
1121 		print_name(list->prev, padding, &n, pct, full);
1122 	} else if (j > 1) {
1123 		(void) printf("%s... %d %s\n", padding, j,
1124 		    dgettext("FMD", "more entries suppressed,"
1125 		    " use -v option for full list"));
1126 	}
1127 	free(padding);
1128 }
1129 
1130 static int
asru_same_status(name_list_t * list)1131 asru_same_status(name_list_t *list)
1132 {
1133 	name_list_t *end = list;
1134 	int status = list->status;
1135 
1136 	while ((list = list->next) != end) {
1137 		if (status == -1) {
1138 			status = list->status;
1139 			continue;
1140 		}
1141 		if (list->status != -1 && status != list->status) {
1142 			status = -1;
1143 			break;
1144 		}
1145 	}
1146 	return (status);
1147 }
1148 
1149 static int
serial_in_fru(name_list_t * fru,name_list_t * serial)1150 serial_in_fru(name_list_t *fru, name_list_t *serial)
1151 {
1152 	name_list_t *sp = serial;
1153 	name_list_t *fp;
1154 	int nserial = 0;
1155 	int found = 0;
1156 	char buf[128];
1157 
1158 	while (sp) {
1159 		fp = fru;
1160 		nserial++;
1161 		(void) snprintf(buf, sizeof (buf), "serial=%s", sp->name);
1162 		buf[sizeof (buf) - 1] = 0;
1163 		while (fp) {
1164 			if (strstr(fp->name, buf) != NULL) {
1165 				found++;
1166 				break;
1167 			}
1168 			fp = fp->next;
1169 			if (fp == fru)
1170 				break;
1171 		}
1172 		sp = sp->next;
1173 		if (sp == serial)
1174 			break;
1175 	}
1176 	return (found == nserial ? 1 : 0);
1177 }
1178 
1179 static void
print_sup_record(status_record_t * srp,int opt_i,int full)1180 print_sup_record(status_record_t *srp, int opt_i, int full)
1181 {
1182 	char buf[32];
1183 	uurec_t *uurp = srp->uurec;
1184 	int n, j, k, max;
1185 	int status;
1186 	ari_list_t *ari_list;
1187 
1188 	n = 0;
1189 	max = max_fault;
1190 	if (max < 0) {
1191 		max = 0;
1192 	}
1193 	j = max / 2;
1194 	max -= j;
1195 	k = srp->nrecs - max;
1196 	while ((uurp = uurp->next) != NULL) {
1197 		if (full || n < j || n >= k || max_fault == 0 ||
1198 		    srp->nrecs == max_fault+1) {
1199 			if (opt_i) {
1200 				ari_list = uurp->ari_uuid_list;
1201 				while (ari_list) {
1202 					(void) printf("%-15s %s\n",
1203 					    format_date(buf, sizeof (buf),
1204 					    uurp->sec), ari_list->ari_uuid);
1205 					ari_list = ari_list->next;
1206 				}
1207 			} else {
1208 				(void) printf("%-15s %s\n",
1209 				    format_date(buf, sizeof (buf), uurp->sec),
1210 				    uurp->uuid);
1211 			}
1212 		} else if (n == j)
1213 			(void) printf("... %d %s\n", srp->nrecs - max_fault,
1214 			    dgettext("FMD", "more entries suppressed"));
1215 		n++;
1216 	}
1217 	(void) printf("\n");
1218 	if (srp->host) {
1219 		(void) printf("%s %s", dgettext("FMD", "Host        :"),
1220 		    srp->host->server);
1221 		if (srp->host->domain)
1222 			(void) printf("\t%s %s", dgettext("FMD",
1223 			    "Domain      :"), srp->host->domain);
1224 		(void) printf("\n%s %s", dgettext("FMD", "Platform    :"),
1225 		    srp->host->platform);
1226 		(void) printf("\t%s %s", dgettext("FMD", "Chassis_id  :"),
1227 		    srp->host->chassis ? srp->host->chassis : "");
1228 		(void) printf("\n%s %s\n\n", dgettext("FMD", "Product_sn  :"),
1229 		    srp->host->product_sn ? srp->host->product_sn : "");
1230 	}
1231 	if (srp->class)
1232 		print_name_list(srp->class,
1233 		    dgettext("FMD", "Fault class :"), 0, srp->class->pct,
1234 		    NULL, full);
1235 	if (srp->asru) {
1236 		status = asru_same_status(srp->asru);
1237 		if (status != -1) {
1238 			print_name_list(srp->asru,
1239 			    dgettext("FMD", "Affects     :"),
1240 			    full ? 0 : max_display, 0, NULL, full);
1241 			print_asru_status(status, "             ");
1242 		} else
1243 			print_name_list(srp->asru,
1244 			    dgettext("FMD", "Affects     :"),
1245 			    full ? 0 : max_display, 0, print_asru_status, full);
1246 	}
1247 	if (full || srp->fru == NULL || srp->asru == NULL) {
1248 		if (srp->resource) {
1249 			status = asru_same_status(srp->resource);
1250 			if (status != -1) {
1251 				print_name_list(srp->resource,
1252 				    dgettext("FMD", "Problem in  :"),
1253 				    full ? 0 : max_display, 0, NULL, full);
1254 				print_rsrc_status(status, "             ");
1255 			} else
1256 				print_name_list(srp->resource,
1257 				    dgettext("FMD", "Problem in  :"),
1258 				    full ? 0 : max_display, 0,
1259 				    print_rsrc_status, full);
1260 		}
1261 	}
1262 	if (srp->fru) {
1263 		status = asru_same_status(srp->fru);
1264 		if (status != -1) {
1265 			print_name_list(srp->fru, dgettext("FMD",
1266 			    "FRU         :"), 0,
1267 			    srp->fru->pct == 100 ? 100 : srp->fru->max_pct,
1268 			    NULL, full);
1269 			print_fru_status(status, "             ");
1270 		} else
1271 			print_name_list(srp->fru, dgettext("FMD",
1272 			    "FRU         :"), 0,
1273 			    srp->fru->pct == 100 ? 100 : srp->fru->max_pct,
1274 			    print_fru_status, full);
1275 	}
1276 	if (srp->serial && !serial_in_fru(srp->fru, srp->serial) &&
1277 	    !serial_in_fru(srp->asru, srp->serial)) {
1278 		print_name_list(srp->serial, dgettext("FMD", "Serial ID.  :"),
1279 		    0, 0, NULL, full);
1280 	}
1281 	print_dict_info(srp->uurec->event);
1282 	(void) printf("\n");
1283 }
1284 
1285 static void
print_status_record(status_record_t * srp,int summary,int opt_i,int full)1286 print_status_record(status_record_t *srp, int summary, int opt_i, int full)
1287 {
1288 	char buf[32];
1289 	uurec_t *uurp = srp->uurec;
1290 	static int header = 0;
1291 	char *head;
1292 	ari_list_t *ari_list;
1293 
1294 	if (!summary || !header) {
1295 		if (opt_i) {
1296 			head = "--------------- "
1297 			    "------------------------------------  "
1298 			    "-------------- ---------\n"
1299 			    "TIME            CACHE-ID"
1300 			    "                              MSG-ID"
1301 			    "         SEVERITY\n--------------- "
1302 			    "------------------------------------ "
1303 			    " -------------- ---------";
1304 		} else {
1305 			head = "--------------- "
1306 			    "------------------------------------  "
1307 			    "-------------- ---------\n"
1308 			    "TIME            EVENT-ID"
1309 			    "                              MSG-ID"
1310 			    "         SEVERITY\n--------------- "
1311 			    "------------------------------------ "
1312 			    " -------------- ---------";
1313 		}
1314 		(void) printf("%s\n", dgettext("FMD", head));
1315 		header = 1;
1316 	}
1317 	if (opt_i) {
1318 		ari_list = uurp->ari_uuid_list;
1319 		while (ari_list) {
1320 			(void) printf("%-15s %-37s %-14s %-9s %s\n",
1321 			    format_date(buf, sizeof (buf), uurp->sec),
1322 			    ari_list->ari_uuid, srp->msgid, srp->severity,
1323 			    srp->injected ? dgettext("FMD", "injected") : "");
1324 			ari_list = ari_list->next;
1325 		}
1326 	} else {
1327 		(void) printf("%-15s %-37s %-14s %-9s %s\n",
1328 		    format_date(buf, sizeof (buf), uurp->sec),
1329 		    uurp->uuid, srp->msgid, srp->severity,
1330 		    srp->injected ? dgettext("FMD", "injected") : "");
1331 	}
1332 
1333 	if (!summary)
1334 		print_sup_record(srp, opt_i, full);
1335 }
1336 
1337 static void
print_catalog(int summary,int opt_a,int full,int opt_i,int page_feed)1338 print_catalog(int summary, int opt_a, int full, int opt_i, int page_feed)
1339 {
1340 	status_record_t *srp;
1341 	sr_list_t *slp;
1342 
1343 	slp = status_rec_list;
1344 	if (slp) {
1345 		for (;;) {
1346 			srp = slp->status_record;
1347 			if (opt_a || srp->not_suppressed) {
1348 				if (page_feed)
1349 					(void) printf("\f\n");
1350 				print_status_record(srp, summary, opt_i, full);
1351 			}
1352 			if (slp->next == status_rec_list)
1353 				break;
1354 			slp = slp->next;
1355 		}
1356 	}
1357 }
1358 
1359 static name_list_t *
find_fru(status_record_t * srp,char * resource)1360 find_fru(status_record_t *srp, char *resource)
1361 {
1362 	name_list_t *rt = NULL;
1363 	name_list_t *fru = srp->fru;
1364 
1365 	while (fru) {
1366 		if (strcmp(resource, fru->name) == 0) {
1367 			rt = fru;
1368 			break;
1369 		}
1370 		fru = fru->next;
1371 		if (fru == srp->fru)
1372 			break;
1373 	}
1374 	return (rt);
1375 }
1376 
1377 static void
print_fru_line(name_list_t * fru,char * uuid)1378 print_fru_line(name_list_t *fru, char *uuid)
1379 {
1380 	if (fru->pct == 100) {
1381 		(void) printf("%s %d %s %d%%\n", uuid, fru->count,
1382 		    dgettext("FMD", "suspects in this FRU total certainty"),
1383 		    100);
1384 	} else {
1385 		(void) printf("%s %d %s %d%%\n", uuid, fru->count,
1386 		    dgettext("FMD", "suspects in this FRU max certainty"),
1387 		    fru->max_pct);
1388 	}
1389 }
1390 
1391 static void
print_fru(int summary,int opt_a,int opt_i,int page_feed)1392 print_fru(int summary, int opt_a, int opt_i, int page_feed)
1393 {
1394 	resource_list_t *tp = status_fru_list;
1395 	status_record_t *srp;
1396 	sr_list_t *slp, *end;
1397 	uurec_t *uurp;
1398 	name_list_t *fru;
1399 	int status;
1400 	ari_list_t *ari_list;
1401 
1402 	while (tp) {
1403 		if (opt_a || tp->not_suppressed) {
1404 			if (page_feed)
1405 				(void) printf("\f\n");
1406 			if (!summary)
1407 				(void) printf("-----------------------------"
1408 				    "---------------------------------------"
1409 				    "----------\n");
1410 			slp = tp->status_rec_list;
1411 			end = slp;
1412 			do {
1413 				srp = slp->status_record;
1414 				if (!srp->not_suppressed) {
1415 					slp = slp->next;
1416 					continue;
1417 				}
1418 				fru = find_fru(srp, tp->resource);
1419 				if (fru) {
1420 					if (fru->label)
1421 						(void) printf("\"%s\" (%s) ",
1422 						    fru->label, fru->name);
1423 					else
1424 						(void) printf("%s ",
1425 						    fru->name);
1426 					break;
1427 				}
1428 				slp = slp->next;
1429 			} while (slp != end);
1430 
1431 			slp = tp->status_rec_list;
1432 			end = slp;
1433 			status = 0;
1434 			do {
1435 				srp = slp->status_record;
1436 				if (!srp->not_suppressed) {
1437 					slp = slp->next;
1438 					continue;
1439 				}
1440 				fru = srp->fru;
1441 				while (fru) {
1442 					if (strcmp(tp->resource,
1443 					    fru->name) == 0)
1444 						status |= fru->status;
1445 					fru = fru->next;
1446 					if (fru == srp->fru)
1447 						break;
1448 				}
1449 				slp = slp->next;
1450 			} while (slp != end);
1451 			if (status & FM_SUSPECT_NOT_PRESENT)
1452 				(void) printf(dgettext("FMD", "not present"));
1453 			else if (status & FM_SUSPECT_FAULTY)
1454 				(void) printf(dgettext("FMD", "faulty"));
1455 			else if (status & FM_SUSPECT_REPLACED)
1456 				(void) printf(dgettext("FMD", "replaced"));
1457 			else if (status & FM_SUSPECT_REPAIRED)
1458 				(void) printf(dgettext("FMD",
1459 				    "repair attempted"));
1460 			else if (status & FM_SUSPECT_ACQUITTED)
1461 				(void) printf(dgettext("FMD", "acquitted"));
1462 			else
1463 				(void) printf(dgettext("FMD", "removed"));
1464 
1465 			if (tp->injected)
1466 				(void) printf(dgettext("FMD", " injected\n"));
1467 			else
1468 				(void) printf(dgettext("FMD", "\n"));
1469 
1470 			slp = tp->status_rec_list;
1471 			end = slp;
1472 			do {
1473 				srp = slp->status_record;
1474 				if (!srp->not_suppressed) {
1475 					slp = slp->next;
1476 					continue;
1477 				}
1478 				uurp = srp->uurec;
1479 				fru = find_fru(srp, tp->resource);
1480 				if (fru) {
1481 					if (opt_i) {
1482 						ari_list = uurp->ari_uuid_list;
1483 						while (ari_list) {
1484 							print_fru_line(fru,
1485 							    ari_list->ari_uuid);
1486 							ari_list =
1487 							    ari_list->next;
1488 						}
1489 					} else {
1490 						print_fru_line(fru, uurp->uuid);
1491 					}
1492 				}
1493 				slp = slp->next;
1494 			} while (slp != end);
1495 			if (!summary) {
1496 				slp = tp->status_rec_list;
1497 				end = slp;
1498 				do {
1499 					srp = slp->status_record;
1500 					if (!srp->not_suppressed) {
1501 						slp = slp->next;
1502 						continue;
1503 					}
1504 					if (srp->serial &&
1505 					    !serial_in_fru(srp->fru,
1506 					    srp->serial)) {
1507 						print_name_list(srp->serial,
1508 						    dgettext("FMD",
1509 						    "Serial ID.  :"),
1510 						    0, 0, NULL, 1);
1511 						break;
1512 					}
1513 					slp = slp->next;
1514 				} while (slp != end);
1515 			}
1516 		}
1517 		tp = tp->next;
1518 		if (tp == status_fru_list)
1519 			break;
1520 	}
1521 }
1522 
1523 static void
print_asru(int opt_a)1524 print_asru(int opt_a)
1525 {
1526 	resource_list_t *tp = status_asru_list;
1527 	status_record_t *srp;
1528 	sr_list_t *slp, *end;
1529 	char *msg;
1530 	int status;
1531 	name_list_t *asru;
1532 
1533 	while (tp) {
1534 		if (opt_a || tp->not_suppressed) {
1535 			status = 0;
1536 			slp = tp->status_rec_list;
1537 			end = slp;
1538 			do {
1539 				srp = slp->status_record;
1540 				if (!srp->not_suppressed) {
1541 					slp = slp->next;
1542 					continue;
1543 				}
1544 				asru = srp->asru;
1545 				while (asru) {
1546 					if (strcmp(tp->resource,
1547 					    asru->name) == 0)
1548 						status |= asru->status;
1549 					asru = asru->next;
1550 					if (asru == srp->asru)
1551 						break;
1552 				}
1553 				slp = slp->next;
1554 			} while (slp != end);
1555 			switch (status) {
1556 			case 0:
1557 				msg = dgettext("FMD", "ok");
1558 				break;
1559 			case FM_SUSPECT_DEGRADED:
1560 				msg = dgettext("FMD", "degraded");
1561 				break;
1562 			case FM_SUSPECT_FAULTY | FM_SUSPECT_DEGRADED:
1563 				msg = dgettext("FMD", "degraded");
1564 				break;
1565 			case FM_SUSPECT_FAULTY:
1566 				msg = dgettext("FMD", "degraded");
1567 				break;
1568 			case FM_SUSPECT_UNUSABLE:
1569 				msg = dgettext("FMD", "unknown");
1570 				break;
1571 			case FM_SUSPECT_FAULTY | FM_SUSPECT_UNUSABLE:
1572 				msg = dgettext("FMD", "faulted");
1573 				break;
1574 			default:
1575 				msg = "";
1576 				break;
1577 			}
1578 			(void) printf("%-69s %s", tp->resource, msg);
1579 			if (tp->injected)
1580 				(void) printf(dgettext("FMD", " injected\n"));
1581 			else
1582 				(void) printf(dgettext("FMD", "\n"));
1583 		}
1584 		tp = tp->next;
1585 		if (tp == status_asru_list)
1586 			break;
1587 	}
1588 }
1589 
1590 static int
uuid_in_list(char * uuid,uurec_select_t * uurecp)1591 uuid_in_list(char *uuid, uurec_select_t *uurecp)
1592 {
1593 	while (uurecp) {
1594 		if (strcmp(uuid, uurecp->uuid) == 0)
1595 			return (1);
1596 		uurecp = uurecp->next;
1597 	}
1598 	return (0);
1599 }
1600 
1601 static int
dfault_rec(const fmd_adm_caseinfo_t * acp,void * arg)1602 dfault_rec(const fmd_adm_caseinfo_t *acp, void *arg)
1603 {
1604 	int64_t *diag_time;
1605 	uint_t nelem;
1606 	int rt = 0;
1607 	char *uuid = "-";
1608 	uurec_select_t *uurecp = (uurec_select_t *)arg;
1609 
1610 	if (nvlist_lookup_int64_array(acp->aci_event, FM_SUSPECT_DIAG_TIME,
1611 	    &diag_time, &nelem) == 0 && nelem >= 2) {
1612 		(void) nvlist_lookup_string(acp->aci_event, FM_SUSPECT_UUID,
1613 		    &uuid);
1614 		if (uurecp == NULL || uuid_in_list(uuid, uurecp))
1615 			add_fault_record_to_catalog(acp->aci_event, *diag_time,
1616 			    uuid);
1617 	} else {
1618 		rt = -1;
1619 	}
1620 	return (rt);
1621 }
1622 
1623 /*ARGSUSED*/
1624 static int
dstatus_rec(const fmd_adm_rsrcinfo_t * ari,void * unused)1625 dstatus_rec(const fmd_adm_rsrcinfo_t *ari, void *unused)
1626 {
1627 	update_asru_state_in_catalog(ari->ari_case, ari->ari_uuid);
1628 	return (0);
1629 }
1630 
1631 static int
get_cases_from_fmd(fmd_adm_t * adm,uurec_select_t * uurecp,int opt_i)1632 get_cases_from_fmd(fmd_adm_t *adm, uurec_select_t *uurecp, int opt_i)
1633 {
1634 	int rt = FMADM_EXIT_SUCCESS;
1635 
1636 	/*
1637 	 * These calls may fail with Protocol error if message payload is
1638 	 * too big
1639 	 */
1640 	if (fmd_adm_case_iter(adm, NULL, dfault_rec, uurecp) != 0)
1641 		die("failed to get case list from fmd");
1642 	if (opt_i && fmd_adm_rsrc_iter(adm, 1, dstatus_rec, NULL) != 0)
1643 		die("failed to get case status from fmd");
1644 	return (rt);
1645 }
1646 
1647 /*
1648  * fmadm faulty command
1649  *
1650  *	-a		show hidden fault records
1651  *	-f		show faulty fru's
1652  *	-g		force grouping of similar faults on the same fru
1653  *	-n		number of fault records to display
1654  *	-p		pipe output through pager
1655  *	-r		show faulty asru's
1656  *	-s		print summary of first fault
1657  *	-u		print listed uuid's only
1658  *	-v		full output
1659  */
1660 
1661 int
cmd_faulty(fmd_adm_t * adm,int argc,char * argv[])1662 cmd_faulty(fmd_adm_t *adm, int argc, char *argv[])
1663 {
1664 	int opt_a = 0, opt_v = 0, opt_p = 0, opt_s = 0, opt_r = 0, opt_f = 0;
1665 	int opt_i = 0;
1666 	char *pager;
1667 	FILE *fp;
1668 	int rt, c, stat;
1669 	uurec_select_t *tp;
1670 	uurec_select_t *uurecp = NULL;
1671 
1672 	while ((c = getopt(argc, argv, "afgin:prsu:v")) != EOF) {
1673 		switch (c) {
1674 		case 'a':
1675 			opt_a++;
1676 			break;
1677 		case 'f':
1678 			opt_f++;
1679 			break;
1680 		case 'g':
1681 			opt_g++;
1682 			break;
1683 		case 'i':
1684 			opt_i++;
1685 			break;
1686 		case 'n':
1687 			max_fault = atoi(optarg);
1688 			break;
1689 		case 'p':
1690 			opt_p++;
1691 			break;
1692 		case 'r':
1693 			opt_r++;
1694 			break;
1695 		case 's':
1696 			opt_s++;
1697 			break;
1698 		case 'u':
1699 			tp = (uurec_select_t *)malloc(sizeof (uurec_select_t));
1700 			tp->uuid = optarg;
1701 			tp->next = uurecp;
1702 			uurecp = tp;
1703 			opt_a = 1;
1704 			break;
1705 		case 'v':
1706 			opt_v++;
1707 			break;
1708 		default:
1709 			return (FMADM_EXIT_USAGE);
1710 		}
1711 	}
1712 	if (optind < argc)
1713 		return (FMADM_EXIT_USAGE);
1714 
1715 	if ((fmadm_msghdl = fmd_msg_init(NULL, FMD_MSG_VERSION)) == NULL)
1716 		return (FMADM_EXIT_ERROR);
1717 	rt = get_cases_from_fmd(adm, uurecp, opt_i);
1718 	if (opt_p) {
1719 		if ((pager = getenv("PAGER")) == NULL)
1720 			pager = "/usr/bin/more";
1721 		fp = popen(pager, "w");
1722 		if (fp == NULL) {
1723 			rt = FMADM_EXIT_ERROR;
1724 			opt_p = 0;
1725 		} else {
1726 			(void) dup2(fileno(fp), 1);
1727 			setbuf(stdout, NULL);
1728 			(void) fclose(fp);
1729 		}
1730 	}
1731 	max_display = max_fault;
1732 	if (opt_f)
1733 		print_fru(opt_s, opt_a, opt_i, opt_p && !opt_s);
1734 	if (opt_r)
1735 		print_asru(opt_a);
1736 	if (opt_f == 0 && opt_r == 0)
1737 		print_catalog(opt_s, opt_a, opt_v, opt_i, opt_p && !opt_s);
1738 	fmd_msg_fini(fmadm_msghdl);
1739 	if (topo_handle)
1740 		topo_close(topo_handle);
1741 	if (opt_p) {
1742 		(void) fclose(stdout);
1743 		(void) wait(&stat);
1744 	}
1745 	return (rt);
1746 }
1747 
1748 int
cmd_flush(fmd_adm_t * adm,int argc,char * argv[])1749 cmd_flush(fmd_adm_t *adm, int argc, char *argv[])
1750 {
1751 	int i, status = FMADM_EXIT_SUCCESS;
1752 
1753 	if (argc < 2 || (i = getopt(argc, argv, "")) != EOF)
1754 		return (FMADM_EXIT_USAGE);
1755 
1756 	for (i = 1; i < argc; i++) {
1757 		if (fmd_adm_rsrc_flush(adm, argv[i]) != 0) {
1758 			warn("failed to flush %s", argv[i]);
1759 			status = FMADM_EXIT_ERROR;
1760 		} else
1761 			note("flushed resource history for %s\n", argv[i]);
1762 	}
1763 
1764 	return (status);
1765 }
1766 
1767 int
cmd_repair(fmd_adm_t * adm,int argc,char * argv[])1768 cmd_repair(fmd_adm_t *adm, int argc, char *argv[])
1769 {
1770 	int err;
1771 
1772 	if (getopt(argc, argv, "") != EOF)
1773 		return (FMADM_EXIT_USAGE);
1774 
1775 	if (argc - optind != 1)
1776 		return (FMADM_EXIT_USAGE);
1777 
1778 	/*
1779 	 * argument could be a uuid, an fmri (asru, fru or resource)
1780 	 * or a label. Try uuid first, If that fails try the others.
1781 	 */
1782 	err = fmd_adm_case_repair(adm, argv[optind]);
1783 	if (err != 0)
1784 		err = fmd_adm_rsrc_repaired(adm, argv[optind]);
1785 
1786 	if (err != 0)
1787 		die("failed to record repair to %s", argv[optind]);
1788 
1789 	note("recorded repair to %s\n", argv[optind]);
1790 	return (FMADM_EXIT_SUCCESS);
1791 }
1792 
1793 int
cmd_repaired(fmd_adm_t * adm,int argc,char * argv[])1794 cmd_repaired(fmd_adm_t *adm, int argc, char *argv[])
1795 {
1796 	int err;
1797 
1798 	if (getopt(argc, argv, "") != EOF)
1799 		return (FMADM_EXIT_USAGE);
1800 
1801 	if (argc - optind != 1)
1802 		return (FMADM_EXIT_USAGE);
1803 
1804 	/*
1805 	 * argument could be an fmri (asru, fru or resource) or a label.
1806 	 */
1807 	err = fmd_adm_rsrc_repaired(adm, argv[optind]);
1808 	if (err != 0)
1809 		die("failed to record repair to %s", argv[optind]);
1810 
1811 	note("recorded repair to of %s\n", argv[optind]);
1812 	return (FMADM_EXIT_SUCCESS);
1813 }
1814 
1815 int
cmd_replaced(fmd_adm_t * adm,int argc,char * argv[])1816 cmd_replaced(fmd_adm_t *adm, int argc, char *argv[])
1817 {
1818 	int err;
1819 
1820 	if (getopt(argc, argv, "") != EOF)
1821 		return (FMADM_EXIT_USAGE);
1822 
1823 	if (argc - optind != 1)
1824 		return (FMADM_EXIT_USAGE);
1825 
1826 	/*
1827 	 * argument could be an fmri (asru, fru or resource) or a label.
1828 	 */
1829 	err = fmd_adm_rsrc_replaced(adm, argv[optind]);
1830 	if (err != 0)
1831 		die("failed to record replacement of %s", argv[optind]);
1832 
1833 	note("recorded replacement of %s\n", argv[optind]);
1834 	return (FMADM_EXIT_SUCCESS);
1835 }
1836 
1837 int
cmd_acquit(fmd_adm_t * adm,int argc,char * argv[])1838 cmd_acquit(fmd_adm_t *adm, int argc, char *argv[])
1839 {
1840 	int err;
1841 
1842 	if (getopt(argc, argv, "") != EOF)
1843 		return (FMADM_EXIT_USAGE);
1844 
1845 	if (argc - optind != 1 && argc - optind != 2)
1846 		return (FMADM_EXIT_USAGE);
1847 
1848 	/*
1849 	 * argument could be a uuid, an fmri (asru, fru or resource)
1850 	 * or a label. Or it could be a uuid and an fmri or label.
1851 	 */
1852 	if (argc - optind == 2) {
1853 		err = fmd_adm_rsrc_acquit(adm, argv[optind], argv[optind + 1]);
1854 		if (err != 0)
1855 			err = fmd_adm_rsrc_acquit(adm, argv[optind + 1],
1856 			    argv[optind]);
1857 	} else {
1858 		err = fmd_adm_case_acquit(adm, argv[optind]);
1859 		if (err != 0)
1860 			err = fmd_adm_rsrc_acquit(adm, argv[optind], "");
1861 	}
1862 
1863 	if (err != 0)
1864 		die("failed to record acquital of %s", argv[optind]);
1865 
1866 	note("recorded acquital of %s\n", argv[optind]);
1867 	return (FMADM_EXIT_SUCCESS);
1868 }
1869