1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 
26 /*
27  * Ereport-handling routines for memory errors
28  */
29 
30 #include <gmem_mem.h>
31 #include <gmem_dimm.h>
32 #include <gmem_page.h>
33 #include <gmem.h>
34 
35 #include <strings.h>
36 #include <string.h>
37 #include <errno.h>
38 #include <assert.h>
39 #include <fm/fmd_api.h>
40 #include <fm/libtopo.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/async.h>
43 #include <sys/errclassify.h>
44 
45 #define	OFFBIT  	0xFFFFFFFFFFFC07FFULL
46 #define	BIT28_32	0x00000001F0000000ULL
47 #define	BIT13_17	0x000000000003E000ULL
48 #define	BIT18_19	0x00000000000C0000ULL
49 #define	BIT11_12	0x0000000000001800ULL
50 
51 struct ce_name2type {
52 	const char *name;
53 	ce_dispact_t type;
54 };
55 
56 nvlist_t *fru_nvl;
57 
58 static ce_dispact_t
gmem_mem_name2type(const char * name)59 gmem_mem_name2type(const char *name)
60 {
61 	static const struct ce_name2type new[] = {
62 		{ "mem-unk",		CE_DISP_UNKNOWN },
63 		{ "mem-is",		CE_DISP_INTERMITTENT },
64 		{ "mem-cs",		CE_DISP_PERS },
65 		{ "mem-ss",		CE_DISP_STICKY },
66 		{ NULL }
67 	};
68 	const struct ce_name2type *names = &new[0];
69 	const struct ce_name2type *tp;
70 
71 	for (tp = names; tp->name != NULL; tp++) {
72 		if (strcasecmp(name, tp->name) == 0)
73 			return (tp->type);
74 	}
75 
76 	return (CE_DISP_UNKNOWN);
77 }
78 
79 /*ARGSUSED*/
80 static int
find_fault_fru(topo_hdl_t * thp,tnode_t * node,void * arg)81 find_fault_fru(topo_hdl_t *thp, tnode_t *node, void *arg)
82 {
83 	nvlist_t *nvl = (nvlist_t *)arg;
84 	nvlist_t *rsc = NULL, *fru = NULL;
85 	nvlist_t **hcl, **topo_hcl;
86 	uint_t n1, n2;
87 	char *name, *name1, *name2;
88 	char *id1, *id2;
89 	int err, i;
90 
91 	if (topo_node_resource(node, &rsc, &err) < 0)
92 		return (TOPO_WALK_NEXT);
93 
94 	err = nvlist_lookup_nvlist_array(rsc, FM_FMRI_HC_LIST, &topo_hcl, &n1);
95 
96 	if (err != 0) {
97 		nvlist_free(rsc);
98 		return (TOPO_WALK_NEXT);
99 	}
100 
101 	(void) nvlist_lookup_string(topo_hcl[n1 - 1], FM_FMRI_HC_NAME, &name);
102 	if (strcmp(name, "chip") != 0) {
103 		nvlist_free(rsc);
104 		return (TOPO_WALK_NEXT);
105 	}
106 
107 	(void) nvlist_lookup_nvlist_array(nvl, FM_FMRI_HC_LIST, &hcl, &n2);
108 
109 	if (n1 != n2) {
110 		nvlist_free(rsc);
111 		return (TOPO_WALK_NEXT);
112 	}
113 
114 	for (i = 0; i < n1; i++) {
115 		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_NAME,
116 		    &name1);
117 		(void) nvlist_lookup_string(topo_hcl[i], FM_FMRI_HC_ID, &id1);
118 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME, &name2);
119 		(void) nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &id2);
120 		if (strcmp(name1, name2) != 0 || strcmp(id1, id2) != 0) {
121 			nvlist_free(rsc);
122 			return (TOPO_WALK_NEXT);
123 		}
124 	}
125 
126 	(void) topo_node_fru(node, &fru, NULL, &err);
127 	if (fru != NULL) {
128 		(void) nvlist_dup(fru, &fru_nvl, NV_UNIQUE_NAME);
129 		nvlist_free(fru);
130 	}
131 	nvlist_free(rsc);
132 	return (TOPO_WALK_TERMINATE);
133 }
134 
135 nvlist_t *
gmem_find_fault_fru(fmd_hdl_t * hdl,nvlist_t * nvl)136 gmem_find_fault_fru(fmd_hdl_t *hdl, nvlist_t *nvl) {
137 	topo_hdl_t *thp;
138 	topo_walk_t *twp;
139 	int err;
140 	fru_nvl = NULL;
141 
142 	if ((thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION)) == NULL)
143 		return (NULL);
144 
145 	if ((twp = topo_walk_init(thp, FM_FMRI_SCHEME_HC,
146 	    find_fault_fru, nvl, &err)) == NULL) {
147 		fmd_hdl_topo_rele(hdl, thp);
148 		return (NULL);
149 	}
150 
151 	(void) topo_walk_step(twp, TOPO_WALK_CHILD);
152 	topo_walk_fini(twp);
153 	fmd_hdl_topo_rele(hdl, thp);
154 	return (fru_nvl);
155 }
156 
157 /*
158  * fault the FRU of the common detector between two DIMMs
159  */
160 void
gmem_gen_datapath_fault(fmd_hdl_t * hdl,nvlist_t * det)161 gmem_gen_datapath_fault(fmd_hdl_t *hdl, nvlist_t *det)
162 {
163 	char *name, *id;
164 	nvlist_t **hcl1, **hcl;
165 	uint_t n;
166 	int i, j;
167 	fmd_case_t *cp;
168 	nvlist_t *fltlist, *rsrc;
169 	nvlist_t *fru = NULL;
170 
171 	if (nvlist_lookup_nvlist_array(det, FM_FMRI_HC_LIST, &hcl1, &n) < 0)
172 		return;
173 
174 	for (i = 0; i < n; i++) {
175 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
176 		if (strcmp(name, "chip") == 0)
177 			break;
178 	}
179 
180 	n = i + 1;
181 	hcl = fmd_hdl_zalloc(hdl, sizeof (nvlist_t *) * n, FMD_SLEEP);
182 	if (hcl == NULL)
183 		return;
184 
185 	for (i = 0; i < n; i++) {
186 		(void) nvlist_alloc(&hcl[i],
187 		    NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0);
188 	}
189 
190 	for (i = 0, j = 0; i < n; i++) {
191 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_NAME, &name);
192 		(void) nvlist_lookup_string(hcl1[i], FM_FMRI_HC_ID, &id);
193 		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_NAME, name);
194 		(void) nvlist_add_string(hcl[j], FM_FMRI_HC_ID, id);
195 		j++;
196 		if (strcmp(name, "chip") == 0)
197 			break;
198 	}
199 
200 	if (nvlist_alloc(&rsrc,  NV_UNIQUE_NAME|NV_UNIQUE_NAME_TYPE, 0) != 0) {
201 		for (i = 0; i < n; i++) {
202 			nvlist_free(hcl[i]);
203 		}
204 		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
205 	}
206 
207 	if (nvlist_add_uint8(rsrc, FM_VERSION, FM_HC_SCHEME_VERSION) != 0 ||
208 	    nvlist_add_string(rsrc, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0 ||
209 	    nvlist_add_string(rsrc, FM_FMRI_HC_ROOT, "") != 0 ||
210 	    nvlist_add_uint32(rsrc, FM_FMRI_HC_LIST_SZ, n) != 0 ||
211 	    nvlist_add_nvlist_array(rsrc, FM_FMRI_HC_LIST, hcl, n) != 0) {
212 		for (i = 0; i < n; i++) {
213 			nvlist_free(hcl[i]);
214 		}
215 		fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
216 		nvlist_free(rsrc);
217 	}
218 
219 	fru = gmem_find_fault_fru(hdl, rsrc);
220 	if (fru != NULL) {
221 		cp = fmd_case_open(hdl, NULL);
222 		fltlist = fmd_nvl_create_fault(hdl, "fault.memory.datapath",
223 		    100, fru, fru, fru);
224 		fmd_case_add_suspect(hdl, cp, fltlist);
225 		fmd_case_solve(hdl, cp);
226 		nvlist_free(fru);
227 	}
228 
229 	for (i = 0; i < n; i++) {
230 		nvlist_free(hcl[i]);
231 	}
232 
233 	fmd_hdl_free(hdl, hcl, sizeof (nvlist_t *) * n);
234 	nvlist_free(rsrc);
235 }
236 
237 /*
238  * formula to conver an unhashed address to hashed address
239  * PA[17:11] = (PA[32:28] xor PA[17:13]) :: ((PA[19:18] xor PA[12:11])
240  */
241 static void
gmem_to_hashed_addr(uint64_t * addr,uint64_t afar)242 gmem_to_hashed_addr(uint64_t *addr, uint64_t afar)
243 {
244 
245 	*addr = (afar & OFFBIT) | ((afar & BIT28_32) >> 15) ^ (afar & BIT13_17)
246 	    | ((afar & BIT18_19) >> 7) ^ (afar & BIT11_12);
247 }
248 
249 /*
250  * check if a dimm has n CEs that have the same symbol-in-error
251  */
252 int
upos_thresh_check(gmem_dimm_t * dimm,uint16_t upos,uint32_t threshold)253 upos_thresh_check(gmem_dimm_t *dimm, uint16_t upos, uint32_t threshold)
254 {
255 	int i;
256 	gmem_mq_t *ip, *next;
257 	int count = 0;
258 
259 	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
260 		for (ip = gmem_list_next(&dimm->mq_root[i]); ip != NULL;
261 		    ip = next) {
262 			next = gmem_list_next(ip);
263 			if (ip->mq_unit_position == upos) {
264 				count++;
265 				if (count >= threshold)
266 					return (1);
267 			}
268 		}
269 	}
270 	return (0);
271 }
272 
273 /*
274  * check if smaller number of retired pages > 1/16 of larger number of
275  * retired pages
276  */
277 int
check_bad_rw_retired_pages(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2)278 check_bad_rw_retired_pages(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2)
279 {
280 	uint_t sret, lret;
281 	double ratio;
282 
283 	sret = lret = 0;
284 
285 	if (d2->dimm_nretired < d1->dimm_nretired) {
286 		sret = d2->dimm_nretired;
287 		lret = d1->dimm_nretired;
288 	} else if (d2->dimm_nretired > d1->dimm_nretired) {
289 		sret = d1->dimm_nretired;
290 		lret = d2->dimm_nretired;
291 	} else
292 		return (0);
293 
294 	ratio = lret * GMEM_MQ_RATIO;
295 
296 	if (sret > ratio) {
297 		fmd_hdl_debug(hdl, "sret=%d lret=%d ratio=%.3f",
298 		    sret, lret, ratio);
299 		return (1);
300 	}
301 	return (0);
302 }
303 
304 /*
305  * check bad rw on any two DIMMs. The check succeeds if
306  * - each DIMM has a n CEs which have the same symbol-in-error,
307  * - the smaller number of retired pages > 1/16 larger number of retired pages
308  */
309 static int
check_bad_rw_between_dimms(fmd_hdl_t * hdl,gmem_dimm_t * d1,gmem_dimm_t * d2,uint16_t * rupos)310 check_bad_rw_between_dimms(fmd_hdl_t *hdl, gmem_dimm_t *d1, gmem_dimm_t *d2,
311     uint16_t *rupos)
312 {
313 	int i;
314 	gmem_mq_t *ip, *next;
315 	uint16_t upos;
316 
317 	for (i = 0; i < GMEM_MAX_CKWDS; i++) {
318 		for (ip = gmem_list_next(&d1->mq_root[i]); ip != NULL;
319 		    ip = next) {
320 			next = gmem_list_next(ip);
321 			upos = ip->mq_unit_position;
322 			if (upos_thresh_check(d1, upos, gmem.gm_nupos)) {
323 				if (upos_thresh_check(d2, upos,
324 				    gmem.gm_nupos)) {
325 					if (check_bad_rw_retired_pages(hdl,
326 					    d1, d2)) {
327 						*rupos = upos;
328 						return (1);
329 					}
330 				}
331 			}
332 		}
333 	}
334 
335 	return (0);
336 }
337 
338 static void
bad_reader_writer_check(fmd_hdl_t * hdl,nvlist_t * det,gmem_dimm_t * ce_dimm)339 bad_reader_writer_check(fmd_hdl_t *hdl, nvlist_t *det, gmem_dimm_t *ce_dimm)
340 {
341 	gmem_dimm_t *d, *next;
342 	uint16_t upos;
343 
344 	for (d = gmem_list_next(&gmem.gm_dimms); d != NULL; d = next) {
345 		next = gmem_list_next(d);
346 		if (d == ce_dimm)
347 			continue;
348 		if (!gmem_same_datapath_dimms(hdl, ce_dimm, d))
349 			continue;
350 		if (check_bad_rw_between_dimms(hdl, ce_dimm, d, &upos)) {
351 			gmem_gen_datapath_fault(hdl, det);
352 			gmem_save_symbol_error(hdl, ce_dimm, upos);
353 			fmd_hdl_debug(hdl,
354 			    "check_bad_rw_dimms succeeded: %s %s\n",
355 			    ce_dimm->dimm_serial, d->dimm_serial);
356 			return;
357 		}
358 	}
359 }
360 
361 /*
362  * rule 5a checking. The check succeeds if
363  * - nretired >= 512
364  * - nretired >= 128 and (addr_hi - addr_low) / (nretired -1 ) > 512KB
365  */
366 static void
ce_thresh_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)367 ce_thresh_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
368 {
369 	nvlist_t *flt, *rsrc;
370 	fmd_case_t *cp;
371 	uint_t nret;
372 	uint64_t delta_addr = 0;
373 
374 	if (dimm->dimm_flags & GMEM_F_FAULTING)
375 		return;
376 
377 	nret = dimm->dimm_nretired;
378 
379 	if (nret < gmem.gm_low_ce_thresh)
380 		return;
381 
382 	if (dimm->dimm_phys_addr_hi >= dimm->dimm_phys_addr_low)
383 		delta_addr =
384 		    (dimm->dimm_phys_addr_hi - dimm->dimm_phys_addr_low) /
385 		    (nret - 1);
386 
387 	if (nret >= gmem.gm_max_retired_pages || delta_addr > GMEM_MQ_512KB) {
388 
389 		fmd_hdl_debug(hdl, "ce_thresh_check succeeded nret=%d", nret);
390 		dimm->dimm_flags |= GMEM_F_FAULTING;
391 		gmem_dimm_dirty(hdl, dimm);
392 
393 		cp = fmd_case_open(hdl, NULL);
394 		rsrc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
395 		flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_PAGES,
396 		    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsrc);
397 		fmd_case_add_suspect(hdl, cp, flt);
398 		fmd_case_solve(hdl, cp);
399 		nvlist_free(rsrc);
400 	}
401 }
402 
403 /*
404  * rule 5b checking. The check succeeds if more than 120
405  * non-intermittent CEs are reported against one symbol
406  * position of one afar in 72 hours
407  */
408 static void
mq_5b_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)409 mq_5b_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
410 {
411 	nvlist_t *flt, *rsrc;
412 	fmd_case_t *cp;
413 	gmem_mq_t *ip, *next;
414 	int cw;
415 
416 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
417 		for (ip = gmem_list_next(&dimm->mq_root[cw]);
418 		    ip != NULL; ip = next) {
419 			next = gmem_list_next(ip);
420 			if (ip->mq_dupce_count >= gmem.gm_dupce) {
421 				fmd_hdl_debug(hdl,
422 				    "mq_5b_check succeeded: duplicate CE=%d",
423 				    ip->mq_dupce_count);
424 				cp = fmd_case_open(hdl, NULL);
425 				rsrc = gmem_find_dimm_rsc(hdl,
426 				    dimm->dimm_serial);
427 				flt = fmd_nvl_create_fault(hdl,
428 				    GMEM_FAULT_DIMM_PAGES, GMEM_FLTMAXCONF,
429 				    NULL, gmem_dimm_fru(dimm), rsrc);
430 				dimm->dimm_flags |= GMEM_F_FAULTING;
431 				gmem_dimm_dirty(hdl, dimm);
432 				fmd_case_add_suspect(hdl, cp, flt);
433 				fmd_case_solve(hdl, cp);
434 				nvlist_free(rsrc);
435 				return;
436 			}
437 		}
438 	}
439 }
440 
441 /*
442  * delete the expired duplicate CE time stamps
443  */
444 static void
mq_prune_dup(fmd_hdl_t * hdl,gmem_mq_t * ip,uint64_t now)445 mq_prune_dup(fmd_hdl_t *hdl, gmem_mq_t *ip, uint64_t now)
446 {
447 	tstamp_t *tsp, *next;
448 
449 	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
450 	    tsp = next) {
451 		next = gmem_list_next(tsp);
452 		if (tsp->tstamp < now - GMEM_MQ_TIMELIM) {
453 			gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
454 			fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
455 			ip->mq_dupce_count--;
456 		}
457 	}
458 }
459 
460 static void
mq_update(fmd_hdl_t * hdl,fmd_event_t * ep,gmem_mq_t * ip,uint64_t now)461 mq_update(fmd_hdl_t *hdl, fmd_event_t *ep, gmem_mq_t *ip, uint64_t now)
462 {
463 	tstamp_t *tsp;
464 
465 	ip->mq_tstamp = now;
466 	ip->mq_ep = ep;
467 	if (fmd_serd_exists(hdl, ip->mq_serdnm))
468 		fmd_serd_destroy(hdl, ip->mq_serdnm);
469 
470 	fmd_serd_create(hdl, ip->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
471 	(void) fmd_serd_record(hdl, ip->mq_serdnm, ep);
472 
473 	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
474 	tsp->tstamp = now;
475 	gmem_list_append(&ip->mq_dupce_tstamp, tsp);
476 	ip->mq_dupce_count++;
477 }
478 
479 /*
480  * Create a fresh index block for MQSC CE correlation.
481  */
482 gmem_mq_t *
mq_create(fmd_hdl_t * hdl,fmd_event_t * ep,uint64_t afar,uint16_t upos,uint16_t ckwd,uint64_t now)483 mq_create(fmd_hdl_t *hdl, fmd_event_t *ep,
484     uint64_t afar, uint16_t upos, uint16_t ckwd, uint64_t now)
485 {
486 	gmem_mq_t *cp;
487 	tstamp_t *tsp;
488 
489 	cp = fmd_hdl_zalloc(hdl, sizeof (gmem_mq_t), FMD_SLEEP);
490 	cp->mq_tstamp = now;
491 	cp->mq_ckwd = ckwd;
492 	cp->mq_phys_addr = afar;
493 	cp->mq_unit_position = upos;
494 	cp->mq_ep = ep;
495 	cp->mq_serdnm =
496 	    gmem_mq_serdnm_create(hdl, "mq", afar, ckwd, upos);
497 
498 	tsp = fmd_hdl_zalloc(hdl, sizeof (tstamp_t), FMD_SLEEP);
499 	tsp->tstamp = now;
500 	gmem_list_append(&cp->mq_dupce_tstamp, tsp);
501 	cp->mq_dupce_count = 1;
502 
503 	/*
504 	 * Create SERD to keep this event from being removed
505 	 * by fmd which may not know there is an event pointer
506 	 * saved here. This SERD is *never* meant to fire.
507 	 */
508 	if (fmd_serd_exists(hdl, cp->mq_serdnm))
509 		fmd_serd_destroy(hdl, cp->mq_serdnm);
510 
511 	fmd_serd_create(hdl, cp->mq_serdnm, GMEM_MQ_SERDN, GMEM_MQ_SERDT);
512 	(void) fmd_serd_record(hdl, cp->mq_serdnm, ep);
513 
514 	return (cp);
515 }
516 
517 gmem_mq_t *
mq_destroy(fmd_hdl_t * hdl,gmem_list_t * lp,gmem_mq_t * ip)518 mq_destroy(fmd_hdl_t *hdl, gmem_list_t *lp, gmem_mq_t *ip)
519 {
520 	gmem_mq_t *jp = gmem_list_next(ip);
521 	tstamp_t *tsp, *next;
522 
523 
524 	if (ip->mq_serdnm != NULL) {
525 		if (fmd_serd_exists(hdl, ip->mq_serdnm))
526 			fmd_serd_destroy(hdl, ip->mq_serdnm);
527 		fmd_hdl_strfree(hdl, ip->mq_serdnm);
528 		ip->mq_serdnm = NULL;
529 	}
530 
531 	for (tsp = gmem_list_next(&ip->mq_dupce_tstamp); tsp != NULL;
532 	    tsp = next) {
533 		next = gmem_list_next(tsp);
534 		gmem_list_delete(&ip->mq_dupce_tstamp, &tsp->ts_l);
535 		fmd_hdl_free(hdl, tsp, sizeof (tstamp_t));
536 	}
537 
538 	gmem_list_delete(lp, &ip->mq_l);
539 	fmd_hdl_free(hdl, ip, sizeof (gmem_mq_t));
540 
541 	return (jp);
542 }
543 
544 
545 /*
546  * Add an index block for a new CE, sorted
547  * a) by ascending unit position
548  * b) order of arrival (~= time order)
549  */
550 void
mq_add(fmd_hdl_t * hdl,gmem_dimm_t * dimm,fmd_event_t * ep,uint64_t afar,uint16_t unit_position,uint16_t ckwd,uint64_t now)551 mq_add(fmd_hdl_t *hdl, gmem_dimm_t *dimm, fmd_event_t *ep,
552     uint64_t afar, uint16_t unit_position, uint16_t ckwd,
553     uint64_t now)
554 {
555 	gmem_mq_t *ip, *jp;
556 	int cw = (int)ckwd;
557 
558 	for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
559 		if (ip->mq_unit_position > unit_position) {
560 			/* list is in unit position order */
561 			break;
562 		} else if (ip->mq_unit_position == unit_position &&
563 		    ip->mq_phys_addr == afar) {
564 			/*
565 			 * Found a duplicate cw, unit_position, and afar.
566 			 * Delete this node, to be superseded by the new
567 			 * node added below.
568 			 * update the mq_t structure
569 			 */
570 			mq_update(hdl, ep, ip, now);
571 			return;
572 		} else {
573 			ip = gmem_list_next(ip);
574 		}
575 	}
576 
577 	jp = mq_create(hdl, ep, afar, unit_position, cw, now);
578 	if (ip == NULL)
579 		gmem_list_append(&dimm->mq_root[cw], jp);
580 	else
581 		gmem_list_insert_before(&dimm->mq_root[cw], ip, jp);
582 }
583 
584 /*
585  * Prune the MQSC index lists (one for each checkword), by deleting
586  * outdated index blocks from each list.
587  */
588 
589 void
mq_prune(fmd_hdl_t * hdl,gmem_dimm_t * dimm,uint64_t now)590 mq_prune(fmd_hdl_t *hdl, gmem_dimm_t *dimm, uint64_t now)
591 {
592 	gmem_mq_t *ip;
593 	int cw;
594 
595 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
596 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL; ) {
597 			if (ip->mq_tstamp < now - GMEM_MQ_TIMELIM) {
598 				/*
599 				 * This event has timed out - delete the
600 				 * mq block as well as serd for the event.
601 				 */
602 				ip = mq_destroy(hdl, &dimm->mq_root[cw], ip);
603 			} else {
604 				mq_prune_dup(hdl, ip, now);
605 				/* tstamp < now - ce_t */
606 				ip = gmem_list_next(ip);
607 			}
608 		} /* per checkword */
609 	} /* cw = 0...3 */
610 }
611 
612 /*
613  * Check the MQSC index lists (one for each checkword) by making a
614  * complete pass through each list, checking if the criteria for
615  * Rule 4A has been met.  Rule 4A checking is done for each checkword.
616  *
617  * Rule 4A: fault a DIMM  "whenever Solaris reports two or more CEs from
618  * two or more different physical addresses on each of two or more different
619  * bit positions from the same DIMM within 72 hours of each other, and all
620  * the addresses are in the same relative checkword (that is, the AFARs
621  * are all the same modulo 64).  [Note: This means at least 4 CEs; two
622  * from one bit position, with unique addresses, and two from another,
623  * also with unique addresses, and the lower 6 bits of all the addresses
624  * are the same."
625  */
626 
627 void
mq_check(fmd_hdl_t * hdl,gmem_dimm_t * dimm)628 mq_check(fmd_hdl_t *hdl, gmem_dimm_t *dimm)
629 {
630 	int upos_pairs, curr_upos, cw, i, j;
631 	nvlist_t *flt, *rsc;
632 	typedef struct upos_pair {
633 		int upos;
634 		gmem_mq_t *mq1;
635 		gmem_mq_t *mq2;
636 	} upos_pair_t;
637 	upos_pair_t upos_array[16]; /* max per cw = 2, * 8 cw's */
638 	gmem_mq_t *ip;
639 
640 	/*
641 	 * Each upos_array[] member represents a pair of CEs for the same
642 	 * unit position (symbol) which is a 4 bit nibble.
643 	 * MQSC rule 4 requires pairs of CEs from the same symbol (same DIMM
644 	 * for rule 4A, and same DRAM for rule 4B) for a violation - this
645 	 * is why CE pairs are tracked.
646 	 */
647 	upos_pairs = 0;
648 	upos_array[0].mq1 = NULL;
649 
650 	for (cw = 0; cw < GMEM_MAX_CKWDS; cw++) {
651 		i = upos_pairs;
652 		curr_upos = -1;
653 
654 		/*
655 		 * mq_root[] is an array of cumulative lists of CEs
656 		 * indexed by checkword where the list is in unit position
657 		 * order. Loop through checking for duplicate unit position
658 		 * entries (filled in at mq_create()).
659 		 * The upos_array[] is filled in each time a duplicate
660 		 * unit position is found; the first time through the loop
661 		 * of a unit position sets curr_upos but does not fill in
662 		 * upos_array[] until the second symbol is found.
663 		 */
664 		for (ip = gmem_list_next(&dimm->mq_root[cw]); ip != NULL;
665 		    ip = gmem_list_next(ip)) {
666 			if (curr_upos != ip->mq_unit_position) {
667 				/* Set initial current position */
668 				curr_upos = ip->mq_unit_position;
669 			} else if (i > upos_pairs &&
670 			    curr_upos == upos_array[i-1].upos) {
671 				/*
672 				 * Only keep track of CE pairs; skip
673 				 * triples, quads, etc...
674 				 */
675 				continue;
676 			} else if (upos_array[i].mq1 == NULL) {
677 				/* Have a pair. Add to upos_array[] */
678 				fmd_hdl_debug(hdl, "pair:upos=%d",
679 				    curr_upos);
680 				upos_array[i].upos = curr_upos;
681 				upos_array[i].mq1 = gmem_list_prev(ip);
682 				upos_array[i].mq2 = ip;
683 				upos_array[++i].mq1 = NULL;
684 			}
685 		}
686 		if (i - upos_pairs >= 2) {
687 			/* Rule 4A violation */
688 			rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
689 			flt = fmd_nvl_create_fault(hdl, GMEM_FAULT_DIMM_4A,
690 			    GMEM_FLTMAXCONF, NULL, gmem_dimm_fru(dimm), rsc);
691 			for (j = upos_pairs; j < i; j++) {
692 				fmd_case_add_ereport(hdl,
693 				    dimm->dimm_case.cc_cp,
694 				    upos_array[j].mq1->mq_ep);
695 				fmd_case_add_ereport(hdl,
696 				    dimm->dimm_case.cc_cp,
697 				    upos_array[j].mq2->mq_ep);
698 			}
699 			dimm->dimm_flags |= GMEM_F_FAULTING;
700 			gmem_dimm_dirty(hdl, dimm);
701 			fmd_case_add_suspect(hdl, dimm->dimm_case.cc_cp, flt);
702 			fmd_case_solve(hdl, dimm->dimm_case.cc_cp);
703 			nvlist_free(rsc);
704 			return;
705 		}
706 		upos_pairs = i;
707 		assert(upos_pairs < 16);
708 	}
709 }
710 
711 /*ARGSUSED*/
712 gmem_evdisp_t
gmem_ce(fmd_hdl_t * hdl,fmd_event_t * ep,nvlist_t * nvl,const char * class)713 gmem_ce(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, const char *class)
714 {
715 	uint16_t symbol_pos, cw;
716 	uint64_t phyaddr, offset, addr;
717 	uint32_t filter_ratio = 0;
718 	gmem_dimm_t *dimm;
719 	gmem_page_t *page;
720 	nvlist_t *fru = NULL;
721 	nvlist_t *topo_rsc = NULL;
722 	nvlist_t *rsrc, *det;
723 	const char *uuid;
724 	ce_dispact_t type;
725 	boolean_t diagnose;
726 	char *sn;
727 	int err, rc;
728 	uint64_t *now;
729 	uint_t nelem;
730 	int skip_error = 0;
731 
732 	err = nvlist_lookup_boolean_value(nvl, GMEM_ERPT_PAYLOAD_DIAGNOSE,
733 	    &diagnose);
734 	if (err != 0 || diagnose == 0)
735 		return (GMEM_EVD_UNUSED);
736 
737 	if ((nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_PHYSADDR,
738 	    &phyaddr) != 0) ||
739 	    (nvlist_lookup_uint64(nvl, GMEM_ERPT_PAYLOAD_OFFSET,
740 	    &offset) != 0)) {
741 		fmd_hdl_debug(hdl, "Can't get page phyaddr or offset");
742 		return (GMEM_EVD_BAD);
743 	}
744 
745 	fmd_hdl_debug(hdl, "phyaddr %llx offset %llx", phyaddr, offset);
746 
747 	if ((page = gmem_page_lookup(phyaddr)) != NULL &&
748 	    page->page_case.cc_cp != NULL &&
749 	    fmd_case_solved(hdl, page->page_case.cc_cp))
750 		return (GMEM_EVD_REDUND);
751 
752 	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_RESOURCE,
753 	    &rsrc) != 0 ||
754 	    nvlist_lookup_string(rsrc, FM_FMRI_HC_SERIAL_ID, &sn) != 0) {
755 		fmd_hdl_debug(hdl, "Can't get dimm serial\n");
756 		return (GMEM_EVD_BAD);
757 	}
758 
759 	fmd_hdl_debug(hdl, "serial %s", sn);
760 
761 	if (nvlist_lookup_nvlist(nvl, GMEM_ERPT_PAYLOAD_DETECTOR, &det) != 0)
762 		return (GMEM_EVD_BAD);
763 
764 	/*
765 	 * Find dimm fru by serial number.
766 	 */
767 	fru = gmem_find_dimm_fru(hdl, sn);
768 
769 	if (fru == NULL) {
770 		fmd_hdl_debug(hdl, "Dimm is not present\n");
771 		return (GMEM_EVD_UNUSED);
772 	}
773 
774 	if ((dimm = gmem_dimm_lookup(hdl, fru)) == NULL &&
775 	    (dimm = gmem_dimm_create(hdl, fru, det)) == NULL) {
776 		nvlist_free(fru);
777 		return (GMEM_EVD_UNUSED);
778 	}
779 
780 	if (dimm->dimm_case.cc_cp == NULL) {
781 		dimm->dimm_case.cc_cp = gmem_case_create(hdl,
782 		    &dimm->dimm_header, GMEM_PTR_DIMM_CASE, &uuid);
783 	}
784 
785 	/*
786 	 * Add to MQSC correlation lists all CEs which pass validity
787 	 * checks above. If there is no symbol_pos & relative ckword
788 	 * in the ereport, skip rule 4A checking.
789 	 */
790 
791 	err = nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_SYMBOLPOS,
792 	    &symbol_pos);
793 	err |= nvlist_lookup_uint16(nvl, GMEM_ERPT_PAYLOAD_CKW, &cw);
794 
795 	if (err == 0) {
796 		fmd_hdl_debug(hdl, "symbol_pos=%d cw=%d", symbol_pos, cw);
797 
798 		if (nvlist_lookup_uint64_array(nvl,
799 		    "__tod", &now, &nelem) == 0) {
800 			skip_error = gmem_check_symbol_error(hdl, dimm,
801 			    symbol_pos);
802 
803 			if (!skip_error ||
804 			    !(dimm->dimm_flags & GMEM_F_FAULTING))
805 				mq_add(hdl, dimm, ep, phyaddr, symbol_pos,
806 				    cw, *now);
807 
808 			mq_prune(hdl, dimm, *now);
809 
810 			if (!skip_error)
811 				bad_reader_writer_check(hdl, det, dimm);
812 			if (!(dimm->dimm_flags & GMEM_F_FAULTING)) {
813 				mq_check(hdl, dimm);
814 				mq_5b_check(hdl, dimm);
815 			}
816 		}
817 	}
818 
819 	type = gmem_mem_name2type(strstr(class, "mem"));
820 
821 	switch (type) {
822 	case CE_DISP_UNKNOWN:
823 		GMEM_STAT_BUMP(ce_unknown);
824 		nvlist_free(fru);
825 		return (GMEM_EVD_UNUSED);
826 	case CE_DISP_INTERMITTENT:
827 		GMEM_STAT_BUMP(ce_interm);
828 		nvlist_free(fru);
829 		return (GMEM_EVD_UNUSED);
830 	case CE_DISP_PERS:
831 		GMEM_STAT_BUMP(ce_clearable_persis);
832 		break;
833 	case CE_DISP_STICKY:
834 		GMEM_STAT_BUMP(ce_sticky);
835 		break;
836 	default:
837 		nvlist_free(fru);
838 		return (GMEM_EVD_BAD);
839 	}
840 
841 	if (gmem_check_symbol_error(hdl, dimm, symbol_pos)) {
842 		nvlist_free(fru);
843 		return (GMEM_EVD_REDUND);
844 	}
845 
846 	if (page == NULL) {
847 		page = gmem_page_create(hdl, fru, phyaddr, offset);
848 		if (page == NULL) {
849 			nvlist_free(fru);
850 			return (GMEM_EVD_UNUSED);
851 		}
852 	}
853 
854 	nvlist_free(fru);
855 
856 	if (page->page_case.cc_cp == NULL) {
857 		page->page_case.cc_cp = gmem_case_create(hdl,
858 		    &page->page_header, GMEM_PTR_PAGE_CASE, &uuid);
859 	}
860 
861 	switch (type) {
862 	case CE_DISP_PERS:
863 		fmd_hdl_debug(hdl, "adding persistent event to CE serd");
864 		if (page->page_case.cc_serdnm == NULL)
865 			gmem_page_serd_create(hdl, page, nvl);
866 
867 		filter_ratio = gmem_get_serd_filter_ratio(nvl);
868 
869 		fmd_hdl_debug(hdl, "filter_ratio %d\n", filter_ratio);
870 
871 		if (gmem_serd_record(hdl, page->page_case.cc_serdnm,
872 		    filter_ratio, ep) == FMD_B_FALSE) {
873 				return (GMEM_EVD_OK); /* engine hasn't fired */
874 		}
875 
876 		fmd_hdl_debug(hdl, "ce page serd fired\n");
877 		fmd_case_add_serd(hdl, page->page_case.cc_cp,
878 		    page->page_case.cc_serdnm);
879 		fmd_serd_reset(hdl, page->page_case.cc_serdnm);
880 		break;	/* to retire */
881 
882 	case CE_DISP_STICKY:
883 		fmd_case_add_ereport(hdl, page->page_case.cc_cp, ep);
884 		break;	/* to retire */
885 	}
886 
887 
888 	topo_rsc = gmem_find_dimm_rsc(hdl, dimm->dimm_serial);
889 	rc = gmem_page_fault(hdl, gmem_dimm_fru(dimm), topo_rsc,
890 	    ep, phyaddr, offset);
891 
892 	if (rc) {
893 		gmem_to_hashed_addr(&addr, phyaddr);
894 
895 		if (addr > dimm->dimm_phys_addr_hi)
896 			dimm->dimm_phys_addr_hi = addr;
897 		if (addr < dimm->dimm_phys_addr_low)
898 			dimm->dimm_phys_addr_low = addr;
899 
900 		dimm->dimm_nretired++;
901 		dimm->dimm_retstat.fmds_value.ui64++;
902 		gmem_dimm_dirty(hdl, dimm);
903 		ce_thresh_check(hdl, dimm);
904 	}
905 	return (GMEM_EVD_OK);
906 }
907 
908 void
gmem_dimm_close(fmd_hdl_t * hdl,void * arg)909 gmem_dimm_close(fmd_hdl_t *hdl, void *arg)
910 {
911 	gmem_dimm_destroy(hdl, arg);
912 }
913