xref: /illumos-gate/usr/src/uts/i86pc/cpu/genuineintel/gintel_main.c (revision ee9ef9e5478646701c1f0cc347324b1c7bad1efa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Intel model-specific support.  Right now all this conists of is
29  * to modify the ereport subclass to produce different ereport classes
30  * so that we can have different diagnosis rules and corresponding faults.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/cmn_err.h>
35 #include <sys/modctl.h>
36 #include <sys/mca_x86.h>
37 #include <sys/cpu_module_ms_impl.h>
38 #include <sys/mc_intel.h>
39 #include <sys/pci_cfgspace.h>
40 #include <sys/fm/protocol.h>
41 
42 int gintel_ms_support_disable = 0;
43 int gintel_error_action_return = 0;
44 int gintel_ms_unconstrained = 0;
45 
46 int quickpath;
47 int max_bus_number = 0xff;
48 
49 #define	ERR_COUNTER_INDEX	2
50 #define	MAX_CPU_NODES		2
51 #define	N_MC_COR_ECC_CNT	6
52 uint32_t err_counter_array[MAX_CPU_NODES][ERR_COUNTER_INDEX][N_MC_COR_ECC_CNT];
53 uint8_t	err_counter_index[MAX_CPU_NODES];
54 
55 #define	MAX_BUS_NUMBER  max_bus_number
56 #define	SOCKET_BUS(cpu) (MAX_BUS_NUMBER - (cpu))
57 
58 #define	MC_COR_ECC_CNT(chipid, reg)	(*pci_getl_func)(SOCKET_BUS(chipid), \
59     NEHALEM_EP_MEMORY_CONTROLLER_DEV, NEHALEM_EP_MEMORY_CONTROLLER_FUNC, \
60     0x80 + (reg) * 4)
61 
62 #define	MSCOD_MEM_ECC_READ	0x1
63 #define	MSCOD_MEM_ECC_SCRUB	0x2
64 #define	MSCOD_MEM_WR_PARITY	0x4
65 #define	MSCOD_MEM_REDUNDANT_MEM	0x8
66 #define	MSCOD_MEM_SPARE_MEM	0x10
67 #define	MSCOD_MEM_ILLEGAL_ADDR	0x20
68 #define	MSCOD_MEM_BAD_ID	0x40
69 #define	MSCOD_MEM_ADDR_PARITY	0x80
70 #define	MSCOD_MEM_BYTE_PARITY	0x100
71 
72 #define	GINTEL_ERROR_MEM	0x1000
73 #define	GINTEL_ERROR_QUICKPATH	0x2000
74 
75 #define	GINTEL_ERR_SPARE_MEM	(GINTEL_ERROR_MEM | 1)
76 #define	GINTEL_ERR_MEM_UE	(GINTEL_ERROR_MEM | 2)
77 #define	GINTEL_ERR_MEM_CE	(GINTEL_ERROR_MEM | 3)
78 #define	GINTEL_ERR_MEM_PARITY	(GINTEL_ERROR_MEM | 4)
79 #define	GINTEL_ERR_MEM_ADDR_PARITY	(GINTEL_ERROR_MEM | 5)
80 #define	GINTEL_ERR_MEM_REDUNDANT (GINTEL_ERROR_MEM | 6)
81 #define	GINTEL_ERR_MEM_BAD_ADDR	(GINTEL_ERROR_MEM | 7)
82 #define	GINTEL_ERR_MEM_BAD_ID	(GINTEL_ERROR_MEM | 8)
83 #define	GINTEL_ERR_MEM_UNKNOWN	(GINTEL_ERROR_MEM | 0xfff)
84 
85 #define	MSR_MC_MISC_MEM_CHANNEL_MASK	0x00000000000c0000ULL
86 #define	MSR_MC_MISC_MEM_CHANNEL_SHIFT	18
87 #define	MSR_MC_MISC_MEM_DIMM_MASK	0x0000000000030000ULL
88 #define	MSR_MC_MISC_MEM_DIMM_SHIFT	16
89 #define	MSR_MC_MISC_MEM_SYNDROME_MASK	0xffffffff00000000ULL
90 #define	MSR_MC_MISC_MEM_SYNDROME_SHIFT	32
91 
92 #define	CPU_GENERATION_DONT_CARE	0
93 #define	CPU_GENERATION_NEHALEM_EP	1
94 
95 #define	INTEL_NEHALEM_CPU_FAMILY_ID	0x6
96 #define	INTEL_NEHALEM_CPU_MODEL_ID	0x1A
97 
98 #define	NEHALEM_EP_MEMORY_CONTROLLER_DEV	0x3
99 #define	NEHALEM_EP_MEMORY_CONTROLLER_FUNC	0x2
100 
101 /*ARGSUSED*/
102 int
103 gintel_init(cmi_hdl_t hdl, void **datap)
104 {
105 	uint32_t nb_chipset;
106 
107 	if (gintel_ms_support_disable)
108 		return (ENOTSUP);
109 
110 	if (!(x86_feature & X86_MCA))
111 		return (ENOTSUP);
112 
113 	nb_chipset = (*pci_getl_func)(0, 0, 0, 0x0);
114 	switch (nb_chipset) {
115 	case INTEL_NB_7300:
116 	case INTEL_NB_5000P:
117 	case INTEL_NB_5000X:
118 	case INTEL_NB_5000V:
119 	case INTEL_NB_5000Z:
120 	case INTEL_NB_5400:
121 	case INTEL_NB_5400A:
122 	case INTEL_NB_5400B:
123 		if (!gintel_ms_unconstrained)
124 			gintel_error_action_return |= CMS_ERRSCOPE_POISONED;
125 		break;
126 	case INTEL_QP_IO:
127 	case INTEL_QP_WP:
128 	case INTEL_QP_36D:
129 	case INTEL_QP_24D:
130 	case INTEL_QP_U1:
131 	case INTEL_QP_U2:
132 	case INTEL_QP_U3:
133 	case INTEL_QP_U4:
134 		quickpath = 1;
135 		break;
136 	default:
137 		break;
138 	}
139 	return (0);
140 }
141 
142 /*ARGSUSED*/
143 uint32_t
144 gintel_error_action(cmi_hdl_t hdl, int ismc, int bank,
145     uint64_t status, uint64_t addr, uint64_t misc, void *mslogout)
146 {
147 	if ((status & MSR_MC_STATUS_PCC) == 0)
148 		return (gintel_error_action_return);
149 	else
150 		return (gintel_error_action_return & ~CMS_ERRSCOPE_POISONED);
151 }
152 
153 /*ARGSUSED*/
154 cms_cookie_t
155 gintel_disp_match(cmi_hdl_t hdl, int bank, uint64_t status,
156     uint64_t addr, uint64_t misc, void *mslogout)
157 {
158 	cms_cookie_t rt = (cms_cookie_t)NULL;
159 	uint16_t mcacode = MCAX86_ERRCODE(status);
160 	uint16_t mscode = MCAX86_MSERRCODE(status);
161 
162 	if (MCAX86_ERRCODE_ISMEMORY_CONTROLLER(mcacode)) {
163 		/*
164 		 * memory controller errors
165 		 */
166 		if (mscode & MSCOD_MEM_SPARE_MEM) {
167 			rt = (cms_cookie_t)GINTEL_ERR_SPARE_MEM;
168 		} else if (mscode & (MSCOD_MEM_ECC_READ |
169 		    MSCOD_MEM_ECC_SCRUB)) {
170 			if (status & MSR_MC_STATUS_UC)
171 				rt = (cms_cookie_t)GINTEL_ERR_MEM_UE;
172 			else
173 				rt = (cms_cookie_t)GINTEL_ERR_MEM_CE;
174 		} else if (mscode & (MSCOD_MEM_WR_PARITY |
175 		    MSCOD_MEM_BYTE_PARITY)) {
176 			rt = (cms_cookie_t)GINTEL_ERR_MEM_PARITY;
177 		} else if (mscode & MSCOD_MEM_ADDR_PARITY) {
178 			rt = (cms_cookie_t)GINTEL_ERR_MEM_ADDR_PARITY;
179 		} else if (mscode & MSCOD_MEM_REDUNDANT_MEM) {
180 			rt = (cms_cookie_t)GINTEL_ERR_MEM_REDUNDANT;
181 		} else if (mscode & MSCOD_MEM_ILLEGAL_ADDR) {
182 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ADDR;
183 		} else if (mscode & MSCOD_MEM_BAD_ID) {
184 			rt = (cms_cookie_t)GINTEL_ERR_MEM_BAD_ID;
185 		} else {
186 			rt = (cms_cookie_t)GINTEL_ERR_MEM_UNKNOWN;
187 		}
188 	} else if (quickpath &&
189 	    MCAX86_ERRCODE_ISBUS_INTERCONNECT(MCAX86_ERRCODE(status))) {
190 		rt = (cms_cookie_t)GINTEL_ERROR_QUICKPATH;
191 	}
192 	return (rt);
193 }
194 
195 /*ARGSUSED*/
196 void
197 gintel_ereport_class(cmi_hdl_t hdl, cms_cookie_t mscookie,
198     const char **cpuclsp, const char **leafclsp)
199 {
200 	*cpuclsp = FM_EREPORT_CPU_INTEL;
201 	switch ((uintptr_t)mscookie) {
202 	case GINTEL_ERROR_QUICKPATH:
203 		*leafclsp = "quickpath.interconnect";
204 		break;
205 	case GINTEL_ERR_SPARE_MEM:
206 		*leafclsp = "quickpath.mem_spare";
207 		break;
208 	case GINTEL_ERR_MEM_UE:
209 		*leafclsp = "quickpath.mem_ue";
210 		break;
211 	case GINTEL_ERR_MEM_CE:
212 		*leafclsp = "quickpath.mem_ce";
213 		break;
214 	case GINTEL_ERR_MEM_PARITY:
215 		*leafclsp = "quickpath.mem_parity";
216 		break;
217 	case GINTEL_ERR_MEM_ADDR_PARITY:
218 		*leafclsp = "quickpath.mem_addr_parity";
219 		break;
220 	case GINTEL_ERR_MEM_REDUNDANT:
221 		*leafclsp = "quickpath.mem_redundant";
222 		break;
223 	case GINTEL_ERR_MEM_BAD_ADDR:
224 		*leafclsp = "quickpath.mem_bad_addr";
225 		break;
226 	case GINTEL_ERR_MEM_BAD_ID:
227 		*leafclsp = "quickpath.mem_bad_id";
228 		break;
229 	case GINTEL_ERR_MEM_UNKNOWN:
230 		*leafclsp = "quickpath.mem_unknown";
231 		break;
232 	}
233 }
234 
235 nvlist_t *
236 gintel_ereport_detector(cmi_hdl_t hdl, cms_cookie_t mscookie, nv_alloc_t *nva)
237 {
238 	nvlist_t *nvl = (nvlist_t *)NULL;
239 
240 	if (mscookie) {
241 		if ((nvl = fm_nvlist_create(nva)) == NULL)
242 			return (NULL);
243 		if ((uintptr_t)mscookie & GINTEL_ERROR_QUICKPATH) {
244 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 2,
245 			    "motherboard", 0,
246 			    "chip", cmi_hdl_chipid(hdl));
247 		} else {
248 			fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, NULL, 3,
249 			    "motherboard", 0,
250 			    "chip", cmi_hdl_chipid(hdl),
251 			    "memory-controller", 0);
252 		}
253 	}
254 	return (nvl);
255 }
256 
257 static nvlist_t *
258 gintel_ereport_create_resource_elem(nv_alloc_t *nva, mc_unum_t *unump)
259 {
260 	nvlist_t *nvl, *snvl;
261 
262 	if ((nvl = fm_nvlist_create(nva)) == NULL)	/* freed by caller */
263 		return (NULL);
264 
265 	if ((snvl = fm_nvlist_create(nva)) == NULL) {
266 		fm_nvlist_destroy(nvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
267 		return (NULL);
268 	}
269 
270 	(void) nvlist_add_uint64(snvl, FM_FMRI_HC_SPECIFIC_OFFSET,
271 	    unump->unum_offset);
272 
273 	if (unump->unum_chan == -1) {
274 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 3,
275 		    "motherboard", unump->unum_board,
276 		    "chip", unump->unum_chip,
277 		    "memory-controller", unump->unum_mc);
278 	} else if (unump->unum_cs == -1) {
279 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 4,
280 		    "motherboard", unump->unum_board,
281 		    "chip", unump->unum_chip,
282 		    "memory-controller", unump->unum_mc,
283 		    "dram-channel", unump->unum_chan);
284 	} else if (unump->unum_rank == -1) {
285 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 5,
286 		    "motherboard", unump->unum_board,
287 		    "chip", unump->unum_chip,
288 		    "memory-controller", unump->unum_mc,
289 		    "dram-channel", unump->unum_chan,
290 		    "dimm", unump->unum_cs);
291 	} else {
292 		fm_fmri_hc_set(nvl, FM_HC_SCHEME_VERSION, NULL, snvl, 6,
293 		    "motherboard", unump->unum_board,
294 		    "chip", unump->unum_chip,
295 		    "memory-controller", unump->unum_mc,
296 		    "dram-channel", unump->unum_chan,
297 		    "dimm", unump->unum_cs,
298 		    "rank", unump->unum_rank);
299 	}
300 
301 	fm_nvlist_destroy(snvl, nva ? FM_NVA_RETAIN : FM_NVA_FREE);
302 
303 	return (nvl);
304 }
305 
306 static void
307 nehalem_ep_ereport_add_memory_error_counter(uint_t  chipid,
308     uint32_t *this_err_counter_array)
309 {
310 	int	index;
311 
312 	for (index = 0; index < N_MC_COR_ECC_CNT; index ++)
313 		this_err_counter_array[index] = MC_COR_ECC_CNT(chipid, index);
314 }
315 
316 static int
317 gintel_cpu_generation(cmi_hdl_t hdl)
318 {
319 	int	cpu_generation = CPU_GENERATION_DONT_CARE;
320 
321 	if ((cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID) &&
322 	    (cmi_hdl_model(hdl) == INTEL_NEHALEM_CPU_MODEL_ID))
323 		cpu_generation = CPU_GENERATION_NEHALEM_EP;
324 
325 	return (cpu_generation);
326 }
327 
328 /*ARGSUSED*/
329 void
330 gintel_ereport_add_logout(cmi_hdl_t hdl, nvlist_t *ereport,
331     nv_alloc_t *nva, int banknum, uint64_t status, uint64_t addr,
332     uint64_t misc, void *mslogout, cms_cookie_t mscookie)
333 {
334 	mc_unum_t unum;
335 	nvlist_t *resource;
336 	uint32_t synd = 0;
337 	int  chan = MCAX86_ERRCODE_CCCC(status);
338 	uint8_t last_index, this_index;
339 	int chipid;
340 
341 	if (chan == 0xf)
342 		chan = -1;
343 
344 	if ((uintptr_t)mscookie & GINTEL_ERROR_MEM) {
345 		unum.unum_board = 0;
346 		unum.unum_chip = cmi_hdl_chipid(hdl);
347 		unum.unum_mc = 0;
348 		unum.unum_chan = chan;
349 		unum.unum_cs = -1;
350 		unum.unum_rank = -1;
351 		unum.unum_offset = -1ULL;
352 		if (status & MSR_MC_STATUS_MISCV) {
353 			unum.unum_chan =
354 			    (misc & MSR_MC_MISC_MEM_CHANNEL_MASK) >>
355 			    MSR_MC_MISC_MEM_CHANNEL_SHIFT;
356 			unum.unum_cs =
357 			    (misc & MSR_MC_MISC_MEM_DIMM_MASK) >>
358 			    MSR_MC_MISC_MEM_DIMM_SHIFT;
359 			synd = (misc & MSR_MC_MISC_MEM_SYNDROME_MASK) >>
360 			    MSR_MC_MISC_MEM_SYNDROME_SHIFT;
361 			fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ECC_SYND,
362 			    DATA_TYPE_UINT32, synd, 0);
363 		}
364 		if (status & MSR_MC_STATUS_ADDRV) {
365 			fm_payload_set(ereport, FM_FMRI_MEM_PHYSADDR,
366 			    DATA_TYPE_UINT64, addr, NULL);
367 			(void) cmi_mc_patounum(addr, 0, 0, synd, 0, &unum);
368 		}
369 		resource = gintel_ereport_create_resource_elem(nva, &unum);
370 		fm_payload_set(ereport, FM_EREPORT_PAYLOAD_NAME_RESOURCE,
371 		    DATA_TYPE_NVLIST_ARRAY, 1, &resource, NULL);
372 		fm_nvlist_destroy(resource, nva ? FM_NVA_RETAIN:FM_NVA_FREE);
373 
374 		if (gintel_cpu_generation(hdl) == CPU_GENERATION_NEHALEM_EP) {
375 
376 			chipid = unum.unum_chip;
377 			if (chipid < MAX_CPU_NODES) {
378 				last_index = err_counter_index[chipid];
379 				this_index =
380 				    (last_index + 1) % ERR_COUNTER_INDEX;
381 				err_counter_index[chipid] = this_index;
382 				nehalem_ep_ereport_add_memory_error_counter(
383 				    chipid,
384 				    err_counter_array[chipid][this_index]);
385 				fm_payload_set(ereport,
386 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_THIS,
387 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
388 				    err_counter_array[chipid][this_index],
389 				    NULL);
390 				fm_payload_set(ereport,
391 				    FM_EREPORT_PAYLOAD_MEM_ECC_COUNTER_LAST,
392 				    DATA_TYPE_UINT32_ARRAY, N_MC_COR_ECC_CNT,
393 				    err_counter_array[chipid][last_index],
394 				    NULL);
395 			}
396 		}
397 	}
398 }
399 
400 boolean_t
401 gintel_bankctl_skipinit(cmi_hdl_t hdl, int banknum)
402 {
403 	/*
404 	 * On Intel family 6 before QuickPath we must not enable machine check
405 	 * from bank 0 detectors. bank 0 is reserved for the platform
406 	 */
407 
408 	if (banknum == 0 &&
409 	    cmi_hdl_family(hdl) == INTEL_NEHALEM_CPU_FAMILY_ID &&
410 	    cmi_hdl_model(hdl) < INTEL_NEHALEM_CPU_MODEL_ID)
411 		return (1);
412 	else
413 		return (0);
414 }
415 
416 cms_api_ver_t _cms_api_version = CMS_API_VERSION_0;
417 
418 const cms_ops_t _cms_ops = {
419 	gintel_init,		/* cms_init */
420 	NULL,			/* cms_post_startup */
421 	NULL,			/* cms_post_mpstartup */
422 	NULL,			/* cms_logout_size */
423 	NULL,			/* cms_mcgctl_val */
424 	gintel_bankctl_skipinit, /* cms_bankctl_skipinit */
425 	NULL,			/* cms_bankctl_val */
426 	NULL,			/* cms_bankstatus_skipinit */
427 	NULL,			/* cms_bankstatus_val */
428 	NULL,			/* cms_mca_init */
429 	NULL,			/* cms_poll_ownermask */
430 	NULL,			/* cms_bank_logout */
431 	gintel_error_action,	/* cms_error_action */
432 	gintel_disp_match,	/* cms_disp_match */
433 	gintel_ereport_class,	/* cms_ereport_class */
434 	gintel_ereport_detector,	/* cms_ereport_detector */
435 	NULL,			/* cms_ereport_includestack */
436 	gintel_ereport_add_logout,	/* cms_ereport_add_logout */
437 	NULL,			/* cms_msrinject */
438 	NULL,			/* cms_fini */
439 };
440 
441 static struct modlcpu modlcpu = {
442 	&mod_cpuops,
443 	"Generic Intel model-specific MCA"
444 };
445 
446 static struct modlinkage modlinkage = {
447 	MODREV_1,
448 	(void *)&modlcpu,
449 	NULL
450 };
451 
452 int
453 _init(void)
454 {
455 	return (mod_install(&modlinkage));
456 }
457 
458 int
459 _info(struct modinfo *modinfop)
460 {
461 	return (mod_info(&modlinkage, modinfop));
462 }
463 
464 int
465 _fini(void)
466 {
467 	return (mod_remove(&modlinkage));
468 }
469