1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 /*
28  * Ereport-handling routines for memory errors
29  */
30 
31 #include <gmem_mem.h>
32 #include <gmem_dimm.h>
33 #include <gmem_page.h>
34 #include <gmem.h>
35 
36 #include <strings.h>
37 #include <string.h>
38 #include <errno.h>
39 #include <assert.h>
40 #include <fm/fmd_api.h>
41 #include <fm/libtopo.h>
42 #include <sys/fm/protocol.h>
43 #include <sys/async.h>
44 #include <sys/errclassify.h>
45 
46 struct ce_name2type {
47 	const char *name;
48 	ce_dispact_t type;
49 };
50 
51 static ce_dispact_t
52 gmem_mem_name2type(const char *name)
53 {
54 	static const struct ce_name2type new[] = {
55 		{ "mem-unk",		CE_DISP_UNKNOWN },
56 		{ "mem-is",		CE_DISP_INTERMITTENT },
57 		{ "mem-cs",		CE_DISP_PERS },
58 		{ "mem-ss",		CE_DISP_STICKY },
59 		{ NULL }
60 	};
61 	const struct ce_name2type *names = &new[0];
62 	const struct ce_name2type *tp;
63 
64 	for (tp = names; tp->name != NULL; tp++) {
65 		if (strcasecmp(name, tp->name) == 0)
66 			return (tp->type);
67 	}
68 
69 	return (CE_DISP_UNKNOWN);
70 }
71 
72 static void
73 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
74 {
75 	fmd_case_t *cp;
76 	nvlist_t *dflt, *rsc;
77 	uint_t nret;
78 
79 	if (dimm->dimm_flags & GMEM_F_FAULTING) {
80 		/* We've already complained about this DIMM */
81 		return;
82 	}
83 
84 	nret = dimm->dimm_nretired;
85 	/*
86 	 * fault the dimm if number retired page >= max_retired_pages
87 	 */
88 	if (nret < gmem.gm_max_retired_pages)
89 		return;
90 
91 	dimm->dimm_flags |= GMEM_F_FAULTING;
92 	gmem_dimm_dirty(hdl, dimm);
93 
94 	cp = fmd_case_open(hdl, NULL);
95 	rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
96 	dflt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
97 	    NULL, gmem_dimm_fru(dimm), rsc);
98 	fmd_case_add_suspect(hdl, cp, dflt);
99 	fmd_case_solve(hdl, cp);
100 	if (rsc != NULL)
101 		nvlist_free(rsc);
102 }
103 
104 /*
105  * Create a fresh index block for MQSC CE correlation.
106  */
107 gmem_mq_t *
108 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
109     uint64_t afar, uint16_t upos, uint16_t dram, uint16_t ckwd, uint64_t now)
110 {
111 	gmem_mq_t *cp;
112 	cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
113 	cp->mq_tstamp = now;
114 	cp->mq_ckwd = ckwd;
115 	cp->mq_phys_addr = afar;
116 	cp->mq_unit_position = upos;
117 	cp->mq_dram = (int16_t)dram;
118 	cp->mq_ep = ep;
119 	cp->mq_serdnm =
120 	    gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
121 
122 	/*
123 	 * Create SERD to keep this event from being removed
124 	 * by fmd which may not know there is an event pointer
125 	 * saved here. This SERD is *never* meant to fire.
126 	 */
127 	if (fmd_serd_exists(hdl, cp->mq_serdnm))
128 		fmd_serd_destroy(hdl, cp->mq_serdnm);
129 
130 	fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
131 	(void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
132 
133 	return (cp);
134 }
135 
136 gmem_mq_t *
137 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
138 {
139 	gmem_mq_t *jp = gmem_list_next(ip);
140 
141 	if (ip->mq_serdnm != NULL) {
142 		if (fmd_serd_exists(hdl, ip->mq_serdnm))
143 			fmd_serd_destroy(hdl, ip->mq_serdnm);
144 		fmd_hdl_strfree(hdl, ip->mq_serdnm);
145 		ip->mq_serdnm = NULL;
146 	}
147 	gmem_list_delete(lp, &ip->mq_l);
148 	fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
149 
150 	return (jp);
151 }
152 
153 
154 /*
155  * Add an index block for a new CE, sorted
156  * a) by ascending unit position
157  * b) order of arrival (~= time order)
158  */
159 void
160 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
161     uint64_t afar, uint16_t unit_position, uint16_t dram, uint16_t ckwd,
162     uint64_t now)
163 {
164 	gmem_mq_t *ip, *jp;
165 	int cw = (int)ckwd;
166 
167 	for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
168 		if (ip->mq_unit_position > unit_position) {
169 			/* list is in unit position order */
170 			break;
171 		} else if (ip->mq_unit_position == unit_position &&
172 		    ip->mq_phys_addr == afar) {
173 			/*
174 			 * Found a duplicate cw, unit_position, and afar.
175 			 * Delete this node, to be superseded by the new
176 			 * node added below.
177 			 */
178 			ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
179 		} else {
180 			ip = gmem_list_next(ip);
181 		}
182 	}
183 	jp = mq_create(hdl, ep, afar, unit_position, dram, cw, now);
184 	if (ip == NULL)
185 		gmem_list_append(&dimm->mq_root[cw], jp);
186 	else
187 		gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
188 }
189 
190 /*
191  * Prune the MQSC index lists (one for each checkword), by deleting
192  * outdated index blocks from each list.
193  */
194 
195 void
196 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
197 {
198 	gmem_mq_t *ip;
199 	int cw;
200 
201 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
202 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
203 			if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
204 				/*
205 				 * This event has timed out - delete the
206 				 * mq block as well as serd for the event.
207 				 */
208 				ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
209 			} else {
210 				/* tstamp < now - ce_t */
211 				ip = gmem_list_next(ip);
212 			}
213 		} /* per checkword */
214 	} /* cw = 0...3 */
215 }
216 
217 /*
218  * Check the MQSC index lists (one for each checkword) by making a
219  * complete pass through each list, checking if the criteria for either
220  * Rule 4A or 4B have been met.  Rule 4A checking is done for each checkword;
221  * 4B check is done at end.
222  *
223  * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
224  * two or more different physical addresses on each of two or more different
225  * bit positions from the same DIMM within 72 hours of each other, and all
226  * the addresses are in the same relative checkword (that is, the AFARs
227  * are all the same modulo 64).  [Note: This means at least 4 CEs; two
228  * from one bit position, with unique addresses, and two from another,
229  * also with unique addresses, and the lower 6 bits of all the addresses
230  * are the same."
231  *
232  * Rule 4B: fault a DIMM "whenever Solaris reports two or more CEs from
233  * two or more different physical addresses on each of three or more
234  * different outputs from the same DRAM within 72 hours of each other, as
235  * long as the three outputs do not all correspond to the same relative
236  * bit position in their respective checkwords.  [Note: This means at least
237  * 6 CEs; two from one DRAM output signal, with unique addresses, two from
238  * another output from the same DRAM, also with unique addresses, and two
239  * more from yet another output from the same DRAM, again with unique
240  * addresses, as long as the three outputs do not all correspond to the
241  * same relative bit position in their respective checkwords.]"
242  */
243 
244 void
245 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm, int16_t dram)
246 {
247 	int upos_pairs, curr_upos, cw, i, j, k;
248 	nvlist_t *flt, *rsc;
249 	typedef struct upos_pair {
250 		int upos;
251 		int dram;
252 		gmem_mq_t *mq1;
253 		gmem_mq_t *mq2;
254 	} upos_pair_t;
255 	upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
256 	gmem_mq_t *ip;
257 
258 	/*
259 	 * Each upos_array[] member represents a pair of CEs for the same
260 	 * unit position (symbol) which is a 4 bit nibble.
261 	 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
262 	 * for rule 4A, and same DRAM for rule 4B) for a violation - this
263 	 * is why CE pairs are tracked.
264 	 */
265 	upos_pairs = 0;
266 	upos_array[0].mq1 = NULL;
267 
268 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
269 		i = upos_pairs;
270 		curr_upos = -1;
271 
272 		/*
273 		 * mq_root[] is an array of cumulative lists of CEs
274 		 * indexed by checkword where the list is in unit position
275 		 * order. Loop through checking for duplicate unit position
276 		 * entries (filled in at mq_create()).
277 		 * The upos_array[] is filled in each time a duplicate
278 		 * unit position is found; the first time through the loop
279 		 * of a unit position sets curr_upos but does not fill in
280 		 * upos_array[] until the second symbol is found.
281 		 */
282 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
283 		    ip = gmem_list_next(ip)) {
284 			if (curr_upos != ip->mq_unit_position) {
285 				/* Set initial current position */
286 				curr_upos = ip->mq_unit_position;
287 			} else if (i > upos_pairs &&
288 			    curr_upos == upos_array[i-1].upos) {
289 				/*
290 				 * Only keep track of CE pairs; skip
291 				 * triples, quads, etc...
292 				 */
293 				continue;
294 			} else if (upos_array[i].mq1 == NULL) {
295 				/* Have a pair. Add to upos_array[] */
296 				fmd_hdl_debug(hdl, "pair:upos=%d dram=%d",
297 				    curr_upos, ip->mq_dram);
298 				upos_array[i].upos = curr_upos;
299 				upos_array[i].dram = ip->mq_dram;
300 				upos_array[i].mq1 = gmem_list_prev(ip);
301 				upos_array[i].mq2 = ip;
302 				upos_array[++i].mq1 = NULL;
303 			}
304 		}
305 		if (i - upos_pairs >= 2) {
306 			/* Rule 4A violation */
307 			rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
308 			flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
309 			    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
310 			for (j = upos_pairs; j < i; j++) {
311 				fmd_case_add_ereport(hdl,
312 				    dimm->dimm_case.cc_cp,
313 				    upos_array[j].mq1->mq_ep);
314 				fmd_case_add_ereport(hdl,
315 				    dimm->dimm_case.cc_cp,
316 				    upos_array[j].mq2->mq_ep);
317 			}
318 			dimm->dimm_flags |= GMEM_F_FAULTING;
319 			gmem_dimm_dirty(hdl, dimm);
320 			fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
321 			fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
322 			if (rsc != NULL)
323 				nvlist_free(rsc);
324 			return;
325 		}
326 		upos_pairs = i;
327 		assert(upos_pairs < 16);
328 	}
329 
330 	if ((dram == INVALID_DRAM) || (upos_pairs  < 3)) {
331 		fmd_hdl_debug(hdl, "Skip rules 4B upos_pairs=%d\n", upos_pairs);
332 		return; /* 4B violation needs at least 3 pairs */
333 	}
334 
335 	/*
336 	 * Walk through checking for a rule 4B violation.
337 	 * Since we only keep track of two CE pairs per CW we'll only have
338 	 * a max of potentially 16 lements in the array. So as not to run
339 	 * off the end of the array, need to be careful with i and j indexes.
340 	 */
341 	for (i = 0; i < (upos_pairs - 2); i++) {
342 		for (j = i+1; j < (upos_pairs - 1); j++) {
343 			if (upos_array[i].dram != upos_array[j].dram)
344 				/*
345 				 * These two pairs aren't the same dram;
346 				 * continue looking for pairs that are.
347 				 */
348 				continue;
349 			for (k = j+1; k < upos_pairs; k++) {
350 				if (upos_array[j].dram != upos_array[k].dram)
351 					/*
352 					 * DRAMs must be the same for a rule
353 					 * 4B violation. Continue looking for
354 					 * pairs that have the same DRAMs.
355 					 */
356 					continue;
357 				if ((upos_array[i].upos !=
358 				    upos_array[j].upos) ||
359 				    (upos_array[j].upos !=
360 				    upos_array[k].upos)) {
361 					rsc = gmem_find_dimm_rsc(hdl,
362 					    dimm->dimm_serial);
363 					flt = fmd_nvl_create_fault(hdl,
364 					    GMEM_FAULT_DIMM_4B, GMEM_FLTMAXCONF,
365 					    NULL, gmem_dimm_fru(dimm), rsc);
366 					fmd_case_add_ereport(hdl,
367 					    dimm->dimm_case.cc_cp,
368 					    upos_array[i].mq1->mq_ep);
369 					fmd_case_add_ereport(hdl,
370 					    dimm->dimm_case.cc_cp,
371 					    upos_array[i].mq2->mq_ep);
372 					fmd_case_add_ereport(hdl,
373 					    dimm->dimm_case.cc_cp,
374 					    upos_array[j].mq1->mq_ep);
375 					fmd_case_add_ereport(hdl,
376 					    dimm->dimm_case.cc_cp,
377 					    upos_array[j].mq2->mq_ep);
378 					fmd_case_add_ereport(hdl,
379 					    dimm->dimm_case.cc_cp,
380 					    upos_array[k].mq1->mq_ep);
381 					fmd_case_add_ereport(hdl,
382 					    dimm->dimm_case.cc_cp,
383 					    upos_array[k].mq2->mq_ep);
384 					fmd_case_add_suspect(hdl,
385 					    dimm->dimm_case.cc_cp, flt);
386 					fmd_case_solve(hdl,
387 					    dimm->dimm_case.cc_cp);
388 					dimm->dimm_flags |= GMEM_F_FAULTING;
389 					gmem_dimm_dirty(hdl, dimm);
390 					if (rsc != NULL)
391 						nvlist_free(rsc);
392 					return;
393 				}
394 			}
395 		}
396 	}
397 }
398 
399 /*ARGSUSED*/
400 gmem_evdisp_t
401 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
402 {
403 	uint16_t symbol_pos, erpt_dram, cw;
404 	uint64_t phyaddr, offset;
405 	uint32_t filter_ratio = 0;
406 	int16_t dram;
407 	gmem_dimm_t *dimm;
408 	gmem_page_t *page;
409 	nvlist_t *fru = NULL;
410 	nvlist_t *topo_rsc = NULL;
411 	nvlist_t *rsrc;
412 	const char *uuid;
413 	ce_dispact_t type;
414 	boolean_t diagnose;
415 	char *sn;
416 	int err, rc;
417 
418 	err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
419 	    &diagnose);
420 	if (err != 0 || diagnose == 0)
421 		return (GMEM_EVD_UNUSED);
422 
423 	if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
424 	    &phyaddr) != 0) ||
425 	    (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
426 	    &offset) != 0)) {
427 		fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
428 		return (GMEM_EVD_BAD);
429 	}
430 
431 	fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
432 
433 	if ((page = gmem_page_lookup(phyaddr)) != NULL &&
434 	    page->page_case.cc_cp != NULL &&
435 	    fmd_case_solved(hdl, page->page_case.cc_cp))
436 		return (GMEM_EVD_REDUND);
437 
438 	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
439 	    &rsrc) != 0 ||
440 	    nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
441 		fmd_hdl_debug(hdl, "Can't get dimm serial\n");
442 		return (GMEM_EVD_BAD);
443 	}
444 
445 	fmd_hdl_debug(hdl, "serial %s", sn);
446 
447 	/*
448 	 * Find dimm fru by serial number.
449 	 */
450 	fru = gmem_find_dimm_fru(hdl, sn);
451 
452 	if (fru == NULL) {
453 		fmd_hdl_debug(hdl, "Dimm is not present\n");
454 		return (GMEM_EVD_UNUSED);
455 	}
456 
457 	if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
458 	    (dimm = gmem_dimm_create(hdl, fru)) == NULL) {
459 		nvlist_free(fru);
460 		return (GMEM_EVD_UNUSED);
461 	}
462 
463 	if (dimm->dimm_case.cc_cp == NULL) {
464 		dimm->dimm_case.cc_cp = gmem_case_create(hdl,
465 		    &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
466 	}
467 
468 	/*
469 	 * Add to MQSC correlation lists all CEs which pass validity
470 	 * checks above. If there is no symbol_pos & relative ckword
471 	 * in the ereport, skip rules 4A & 4B checking.
472 	 * If there is no dram in the ereport, skip the rule 4B checking.
473 	 */
474 	if (nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_DRAM, &erpt_dram) != 0)
475 		dram = INVALID_DRAM;
476 	else
477 		dram = (int16_t)erpt_dram;
478 
479 	err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
480 	    &symbol_pos);
481 	err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
482 
483 	if (err == 0)
484 		fmd_hdl_debug(hdl, "symbol_pos=%d dram=%d cw=%d",
485 		    symbol_pos, dram, cw);
486 
487 	if (!(dimm->dimm_flags & GMEM_F_FAULTING) && (err == 0)) {
488 		uint64_t *now;
489 		uint_t nelem;
490 		if (nvlist_lookup_uint64_array(nvl,
491 		    "__tod", &now, &nelem) == 0) {
492 			mq_add(hdl, dimm, ep, phyaddr, symbol_pos, dram,
493 			    cw, *now);
494 			mq_prune(hdl, dimm, *now);
495 			mq_check(hdl, dimm, dram);
496 		}
497 	}
498 
499 	type = gmem_mem_name2type(strstr(class, "mem"));
500 
501 	switch (type) {
502 	case CE_DISP_UNKNOWN:
503 		GMEM_STAT_BUMP(ce_unknown);
504 		nvlist_free(fru);
505 		return (GMEM_EVD_UNUSED);
506 	case CE_DISP_INTERMITTENT:
507 		GMEM_STAT_BUMP(ce_interm);
508 		nvlist_free(fru);
509 		return (GMEM_EVD_UNUSED);
510 	case CE_DISP_PERS:
511 		GMEM_STAT_BUMP(ce_clearable_persis);
512 		break;
513 	case CE_DISP_STICKY:
514 		GMEM_STAT_BUMP(ce_sticky);
515 		break;
516 	default:
517 		nvlist_free(fru);
518 		return (GMEM_EVD_BAD);
519 	}
520 
521 	if (page == NULL) {
522 		page = gmem_page_create(hdl, fru, phyaddr, offset);
523 		if (page == NULL) {
524 			nvlist_free(fru);
525 			return (GMEM_EVD_UNUSED);
526 		}
527 	}
528 
529 	nvlist_free(fru);
530 
531 	if (page->page_case.cc_cp == NULL) {
532 		page->page_case.cc_cp = gmem_case_create(hdl,
533 		    &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
534 	}
535 
536 	switch (type) {
537 	case CE_DISP_PERS:
538 		fmd_hdl_debug(hdl, "adding persistent event to CE serd");
539 		if (page->page_case.cc_serdnm == NULL)
540 			gmem_page_serd_create(hdl, page, nvl);
541 
542 		filter_ratio = gmem_get_serd_filter_ratio(nvl);
543 
544 		fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
545 
546 		if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
547 		    filter_ratio, ep) == FMD_B_FALSE) {
548 				return (GMEM_EVD_OK); /* engine hasn't fired */
549 		}
550 
551 		fmd_hdl_debug(hdl, "ce page serd fired\n");
552 		fmd_case_add_serd(hdl, page->page_case.cc_cp,
553 		    page->page_case.cc_serdnm);
554 		fmd_serd_reset(hdl, page->page_case.cc_serdnm);
555 		break;	/* to retire */
556 
557 	case CE_DISP_STICKY:
558 		fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
559 		break;	/* to retire */
560 	}
561 
562 
563 	topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
564 	rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
565 	    ep, phyaddr, offset);
566 
567 	if (rc) {
568 		dimm->dimm_nretired++;
569 		dimm->dimm_retstat.fmds_value.ui64++;
570 		gmem_dimm_dirty(hdl, dimm);
571 		ce_thresh_check(hdl, dimm);
572 	}
573 	return (GMEM_EVD_OK);
574 }
575 
576 void
577 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
578 {
579 	gmem_dimm_destroy(hdl, arg);
580 }
581