xref: /illumos-gate/usr/src/uts/sun4u/opl/io/mc-opl.c (revision 25cf1a30)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * All Rights Reserved, Copyright (c) FUJITSU LIMITED 2006
23  */
24 
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/conf.h>
30 #include <sys/modctl.h>
31 #include <sys/stat.h>
32 #include <sys/async.h>
33 #include <sys/machsystm.h>
34 #include <sys/ksynch.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/ddifm.h>
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/kmem.h>
41 #include <sys/fm/io/opl_mc_fm.h>
42 #include <sys/memlist.h>
43 #include <sys/param.h>
44 #include <sys/ontrap.h>
45 #include <vm/page.h>
46 #include <sys/mc-opl.h>
47 
48 /*
49  * Function prototypes
50  */
51 static int mc_open(dev_t *, int, int, cred_t *);
52 static int mc_close(dev_t, int, int, cred_t *);
53 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
54 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
55 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
56 
57 static int mc_board_add(mc_opl_t *mcp);
58 static int mc_board_del(mc_opl_t *mcp);
59 static int mc_suspend(mc_opl_t *mcp, uint32_t flag);
60 static int mc_resume(mc_opl_t *mcp, uint32_t flag);
61 
62 static void insert_mcp(mc_opl_t *mcp);
63 static void delete_mcp(mc_opl_t *mcp);
64 
65 static int pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr);
66 
67 static int mc_valid_pa(mc_opl_t *mcp, uint64_t pa);
68 
69 int mc_get_mem_unum(int, uint64_t, char *, int, int *);
70 extern int plat_max_boards(void);
71 
72 static void mc_get_mlist(mc_opl_t *);
73 
74 #pragma weak opl_get_physical_board
75 extern int opl_get_physical_board(int);
76 static int mc_opl_get_physical_board(int);
77 
78 /*
79  * Configuration data structures
80  */
81 static struct cb_ops mc_cb_ops = {
82 	mc_open,			/* open */
83 	mc_close,			/* close */
84 	nulldev,			/* strategy */
85 	nulldev,			/* print */
86 	nodev,				/* dump */
87 	nulldev,			/* read */
88 	nulldev,			/* write */
89 	mc_ioctl,			/* ioctl */
90 	nodev,				/* devmap */
91 	nodev,				/* mmap */
92 	nodev,				/* segmap */
93 	nochpoll,			/* poll */
94 	ddi_prop_op,			/* cb_prop_op */
95 	0,				/* streamtab */
96 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
97 	CB_REV,				/* rev */
98 	nodev,				/* cb_aread */
99 	nodev				/* cb_awrite */
100 };
101 
102 static struct dev_ops mc_ops = {
103 	DEVO_REV,			/* rev */
104 	0,				/* refcnt  */
105 	ddi_getinfo_1to1,		/* getinfo */
106 	nulldev,			/* identify */
107 	nulldev,			/* probe */
108 	mc_attach,			/* attach */
109 	mc_detach,			/* detach */
110 	nulldev,			/* reset */
111 	&mc_cb_ops,			/* cb_ops */
112 	(struct bus_ops *)0,		/* bus_ops */
113 	nulldev				/* power */
114 };
115 
116 /*
117  * Driver globals
118  */
119 int mc_patrol_interval_sec = 10;
120 
121 int inject_op_delay = 5;
122 
123 mc_inst_list_t *mc_instances;
124 static kmutex_t mcmutex;
125 
126 void *mc_statep;
127 
128 #ifdef	DEBUG
129 int oplmc_debug = 1;
130 #endif
131 
132 static int mc_debug_show_all;
133 
134 extern struct mod_ops mod_driverops;
135 
136 static struct modldrv modldrv = {
137 	&mod_driverops,			/* module type, this one is a driver */
138 	"OPL Memory-controller 1.1",	/* module name */
139 	&mc_ops,			/* driver ops */
140 };
141 
142 static struct modlinkage modlinkage = {
143 	MODREV_1,		/* rev */
144 	(void *)&modldrv,
145 	NULL
146 };
147 
148 #pragma weak opl_get_mem_unum
149 extern int (*opl_get_mem_unum)(int, uint64_t, char *, int, int *);
150 
151 /*
152  * pseudo-mc node portid format
153  *
154  *		[10]   = 0
155  *		[9]    = 1
156  *		[8]    = LSB_ID[4] = 0
157  *		[7:4]  = LSB_ID[3:0]
158  *		[3:0]  = 0
159  *
160  */
161 
162 /*
163  * These are the module initialization routines.
164  */
165 int
166 _init(void)
167 {
168 	int error;
169 
170 
171 	if ((error = ddi_soft_state_init(&mc_statep,
172 	    sizeof (mc_opl_t), 1)) != 0)
173 		return (error);
174 
175 	mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
176 	if (&opl_get_mem_unum)
177 		opl_get_mem_unum = mc_get_mem_unum;
178 
179 	error =  mod_install(&modlinkage);
180 	if (error != 0) {
181 		if (&opl_get_mem_unum)
182 			opl_get_mem_unum = NULL;
183 		mutex_destroy(&mcmutex);
184 		ddi_soft_state_fini(&mc_statep);
185 	}
186 
187 	return (error);
188 }
189 
190 int
191 _fini(void)
192 {
193 	int error;
194 
195 	if ((error = mod_remove(&modlinkage)) != 0)
196 		return (error);
197 
198 	mutex_destroy(&mcmutex);
199 
200 	if (&opl_get_mem_unum)
201 		opl_get_mem_unum = NULL;
202 
203 	ddi_soft_state_fini(&mc_statep);
204 
205 	return (0);
206 }
207 
208 int
209 _info(struct modinfo *modinfop)
210 {
211 	return (mod_info(&modlinkage, modinfop));
212 }
213 
214 static int
215 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
216 {
217 	mc_opl_t *mcp;
218 	int instance;
219 
220 	/* get the instance of this devi */
221 	instance = ddi_get_instance(devi);
222 
223 	switch (cmd) {
224 	case DDI_ATTACH:
225 		break;
226 	case DDI_RESUME:
227 		mcp = ddi_get_soft_state(mc_statep, instance);
228 		return (mc_resume(mcp, MC_DRIVER_SUSPENDED));
229 	default:
230 		return (DDI_FAILURE);
231 	}
232 
233 
234 	if (ddi_soft_state_zalloc(mc_statep, instance) != DDI_SUCCESS)
235 		return (DDI_FAILURE);
236 
237 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
238 		goto bad;
239 	}
240 
241 	/* set informations in mc state */
242 	mcp->mc_dip = devi;
243 
244 	if (mc_board_add(mcp))
245 		goto bad;
246 
247 	insert_mcp(mcp);
248 	ddi_report_dev(devi);
249 
250 	return (DDI_SUCCESS);
251 
252 bad:
253 	ddi_soft_state_free(mc_statep, instance);
254 	return (DDI_FAILURE);
255 }
256 
257 /* ARGSUSED */
258 static int
259 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
260 {
261 	int instance;
262 	mc_opl_t *mcp;
263 
264 	/* get the instance of this devi */
265 	instance = ddi_get_instance(devi);
266 	if ((mcp = ddi_get_soft_state(mc_statep, instance)) == NULL) {
267 		return (DDI_FAILURE);
268 	}
269 
270 	switch (cmd) {
271 	case DDI_SUSPEND:
272 		return (mc_suspend(mcp, MC_DRIVER_SUSPENDED));
273 	case DDI_DETACH:
274 		break;
275 	default:
276 		return (DDI_FAILURE);
277 	}
278 
279 	mutex_enter(&mcmutex);
280 	if (mc_board_del(mcp) != DDI_SUCCESS) {
281 		mutex_exit(&mcmutex);
282 		return (DDI_FAILURE);
283 	}
284 
285 	delete_mcp(mcp);
286 	mutex_exit(&mcmutex);
287 
288 	/* free up the soft state */
289 	ddi_soft_state_free(mc_statep, instance);
290 
291 	return (DDI_SUCCESS);
292 }
293 
294 /* ARGSUSED */
295 static int
296 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
297 {
298 	return (0);
299 }
300 
301 /* ARGSUSED */
302 static int
303 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
304 {
305 	return (0);
306 }
307 
308 /* ARGSUSED */
309 static int
310 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
311 	int *rvalp)
312 {
313 	return (ENXIO);
314 }
315 
316 /*
317  * PA validity check:
318  * This function return 1 if the PA is valid, otherwise
319  * return 0.
320  */
321 
322 /* ARGSUSED */
323 static int
324 pa_is_valid(mc_opl_t *mcp, uint64_t addr)
325 {
326 	/*
327 	 * Check if the addr is on the board.
328 	 */
329 	if ((addr < mcp->mc_start_address) ||
330 	    (mcp->mc_start_address + mcp->mc_size <= addr))
331 		return (0);
332 
333 	if (mcp->mlist == NULL)
334 		mc_get_mlist(mcp);
335 
336 	if (mcp->mlist && address_in_memlist(mcp->mlist, addr, 0)) {
337 		return (1);
338 	}
339 	return (0);
340 }
341 
342 /*
343  * mac-pa translation routines.
344  *
345  *    Input: mc driver state, (LSB#, Bank#, DIMM address)
346  *    Output: physical address
347  *
348  *    Valid   - return value:  0
349  *    Invalid - return value: -1
350  */
351 static int
352 mcaddr_to_pa(mc_opl_t *mcp, mc_addr_t *maddr, uint64_t *pa)
353 {
354 	int i;
355 	uint64_t pa_offset = 0;
356 	int cs = (maddr->ma_dimm_addr >> CS_SHIFT) & 1;
357 	int bank = maddr->ma_bank;
358 	mc_addr_t maddr1;
359 	int bank0, bank1;
360 
361 	MC_LOG("mcaddr /LSB%d/B%d/%x\n", maddr->ma_bd, bank,
362 		maddr->ma_dimm_addr);
363 
364 	/* loc validity check */
365 	ASSERT(maddr->ma_bd >= 0 && OPL_BOARD_MAX > maddr->ma_bd);
366 	ASSERT(bank >= 0 && OPL_BANK_MAX > bank);
367 
368 	/* Do translation */
369 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
370 		int pa_bit = 0;
371 		int mc_bit = mcp->mc_trans_table[cs][i];
372 		if (mc_bit < MC_ADDRESS_BITS) {
373 			pa_bit = (maddr->ma_dimm_addr >> mc_bit) & 1;
374 		} else if (mc_bit == MP_NONE) {
375 			pa_bit = 0;
376 		} else if (mc_bit == MP_BANK_0) {
377 			pa_bit = bank & 1;
378 		} else if (mc_bit == MP_BANK_1) {
379 			pa_bit = (bank >> 1) & 1;
380 		} else if (mc_bit == MP_BANK_2) {
381 			pa_bit = (bank >> 2) & 1;
382 		}
383 		pa_offset |= ((uint64_t)pa_bit) << i;
384 	}
385 	*pa = mcp->mc_start_address + pa_offset;
386 	MC_LOG("pa = %lx\n", *pa);
387 
388 	if (pa_to_maddr(mcp, *pa, &maddr1) == -1) {
389 		return (-1);
390 	}
391 
392 
393 	if (IS_MIRROR(mcp, maddr->ma_bank)) {
394 		bank0 = maddr->ma_bank & ~(1);
395 		bank1 = maddr1.ma_bank & ~(1);
396 	} else {
397 		bank0 = maddr->ma_bank;
398 		bank1 = maddr1.ma_bank;
399 	}
400 	/*
401 	 * there is no need to check ma_bd because it is generated from
402 	 * mcp.  They are the same.
403 	 */
404 	if ((bank0 == bank1) &&
405 		(maddr->ma_dimm_addr == maddr1.ma_dimm_addr)) {
406 		return (0);
407 	} else {
408 		cmn_err(CE_WARN, "Translation error source /LSB%d/B%d/%x, "
409 			"PA %lx, target /LSB%d/B%d/%x\n",
410 			maddr->ma_bd, bank, maddr->ma_dimm_addr,
411 			*pa, maddr1.ma_bd, maddr1.ma_bank,
412 			maddr1.ma_dimm_addr);
413 		return (-1);
414 	}
415 }
416 
417 /*
418  * PA to CS (used by pa_to_maddr).
419  */
420 static int
421 pa_to_cs(mc_opl_t *mcp, uint64_t pa_offset)
422 {
423 	int i;
424 	int cs = 0;
425 
426 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
427 		/* MAC address bit<29> is arranged on the same PA bit */
428 		/* on both table. So we may use any table. */
429 		if (mcp->mc_trans_table[0][i] == CS_SHIFT) {
430 			cs = (pa_offset >> i) & 1;
431 			break;
432 		}
433 	}
434 	return (cs);
435 }
436 
437 /*
438  * PA to DIMM (used by pa_to_maddr).
439  */
440 /* ARGSUSED */
441 static uint32_t
442 pa_to_dimm(mc_opl_t *mcp, uint64_t pa_offset)
443 {
444 	int i;
445 	int cs = pa_to_cs(mcp, pa_offset);
446 	uint32_t dimm_addr = 0;
447 
448 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
449 		int pa_bit_value = (pa_offset >> i) & 1;
450 		int mc_bit = mcp->mc_trans_table[cs][i];
451 		if (mc_bit < MC_ADDRESS_BITS) {
452 			dimm_addr |= pa_bit_value << mc_bit;
453 		}
454 	}
455 	return (dimm_addr);
456 }
457 
458 /*
459  * PA to Bank (used by pa_to_maddr).
460  */
461 static int
462 pa_to_bank(mc_opl_t *mcp, uint64_t pa_offset)
463 {
464 	int i;
465 	int cs = pa_to_cs(mcp, pa_offset);
466 	int bankno = mcp->mc_trans_table[cs][INDEX_OF_BANK_SUPPLEMENT_BIT];
467 
468 
469 	for (i = 0; i < PA_BITS_FOR_MAC; i++) {
470 		int pa_bit_value = (pa_offset >> i) & 1;
471 		int mc_bit = mcp->mc_trans_table[cs][i];
472 		switch (mc_bit) {
473 		case MP_BANK_0:
474 			bankno |= pa_bit_value;
475 			break;
476 		case MP_BANK_1:
477 			bankno |= pa_bit_value << 1;
478 			break;
479 		case MP_BANK_2:
480 			bankno |= pa_bit_value << 2;
481 			break;
482 		}
483 	}
484 
485 	return (bankno);
486 }
487 
488 /*
489  * PA to MAC address translation
490  *
491  *   Input: MAC driver state, physicall adress
492  *   Output: LSB#, Bank id, mac address
493  *
494  *    Valid   - return value:  0
495  *    Invalid - return value: -1
496  */
497 
498 int
499 pa_to_maddr(mc_opl_t *mcp, uint64_t pa, mc_addr_t *maddr)
500 {
501 	uint64_t pa_offset;
502 
503 	/* PA validity check */
504 	if (!pa_is_valid(mcp, pa))
505 		return (-1);
506 
507 
508 	/* Do translation */
509 	pa_offset = pa - mcp->mc_start_address;
510 
511 	maddr->ma_bd = mcp->mc_board_num;
512 	maddr->ma_bank = pa_to_bank(mcp, pa_offset);
513 	maddr->ma_dimm_addr = pa_to_dimm(mcp, pa_offset);
514 	MC_LOG("pa %lx -> mcaddr /LSB%d/B%d/%x\n",
515 		pa_offset, maddr->ma_bd, maddr->ma_bank, maddr->ma_dimm_addr);
516 	return (0);
517 }
518 
519 static void
520 mc_ereport_post(mc_aflt_t *mc_aflt)
521 {
522 	char buf[FM_MAX_CLASS];
523 	char device_path[MAXPATHLEN];
524 	nv_alloc_t *nva = NULL;
525 	nvlist_t *ereport, *detector, *resource;
526 	errorq_elem_t *eqep;
527 	int nflts;
528 	mc_flt_stat_t *flt_stat;
529 	int i, n, blen;
530 	char *p;
531 	uint32_t values[2], synd[2], dslot[2];
532 
533 	if (panicstr) {
534 		eqep = errorq_reserve(ereport_errorq);
535 		if (eqep == NULL)
536 			return;
537 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
538 		nva = errorq_elem_nva(ereport_errorq, eqep);
539 	} else {
540 		ereport = fm_nvlist_create(nva);
541 	}
542 
543 	/*
544 	 * Create the scheme "dev" FMRI.
545 	 */
546 	detector = fm_nvlist_create(nva);
547 	resource = fm_nvlist_create(nva);
548 
549 	nflts = mc_aflt->mflt_nflts;
550 
551 	ASSERT(nflts >= 1 && nflts <= 2);
552 
553 	flt_stat = mc_aflt->mflt_stat[0];
554 	(void) ddi_pathname(mc_aflt->mflt_mcp->mc_dip, device_path);
555 	(void) fm_fmri_dev_set(detector, FM_DEV_SCHEME_VERSION, NULL,
556 	    device_path, NULL);
557 
558 	/*
559 	 * Encode all the common data into the ereport.
560 	 */
561 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s-%s",
562 		MC_OPL_ERROR_CLASS,
563 		mc_aflt->mflt_is_ptrl ? MC_OPL_PTRL_SUBCLASS :
564 		MC_OPL_MI_SUBCLASS,
565 		mc_aflt->mflt_erpt_class);
566 
567 	MC_LOG("mc_ereport_post: ereport %s\n", buf);
568 
569 
570 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
571 		fm_ena_generate(mc_aflt->mflt_id, FM_ENA_FMT1),
572 		detector, NULL);
573 
574 	/*
575 	 * Set payload.
576 	 */
577 	fm_payload_set(ereport, MC_OPL_BOARD, DATA_TYPE_UINT32,
578 		flt_stat->mf_flt_maddr.ma_bd, NULL);
579 
580 	fm_payload_set(ereport, MC_OPL_PA, DATA_TYPE_UINT64,
581 		flt_stat->mf_flt_paddr, NULL);
582 
583 	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
584 		fm_payload_set(ereport, MC_OPL_FLT_TYPE,
585 			DATA_TYPE_UINT8, ECC_STICKY, NULL);
586 	}
587 
588 	for (i = 0; i < nflts; i++)
589 		values[i] = mc_aflt->mflt_stat[i]->mf_flt_maddr.ma_bank;
590 
591 	fm_payload_set(ereport, MC_OPL_BANK, DATA_TYPE_UINT32_ARRAY,
592 		nflts, values, NULL);
593 
594 	for (i = 0; i < nflts; i++)
595 		values[i] = mc_aflt->mflt_stat[i]->mf_cntl;
596 
597 	fm_payload_set(ereport, MC_OPL_STATUS, DATA_TYPE_UINT32_ARRAY,
598 		nflts, values, NULL);
599 
600 	for (i = 0; i < nflts; i++)
601 		values[i] = mc_aflt->mflt_stat[i]->mf_err_add;
602 
603 	fm_payload_set(ereport, MC_OPL_ERR_ADD, DATA_TYPE_UINT32_ARRAY,
604 		nflts, values, NULL);
605 
606 	for (i = 0; i < nflts; i++)
607 		values[i] = mc_aflt->mflt_stat[i]->mf_err_log;
608 
609 	fm_payload_set(ereport, MC_OPL_ERR_LOG, DATA_TYPE_UINT32_ARRAY,
610 		nflts, values, NULL);
611 
612 	for (i = 0; i < nflts; i++) {
613 		flt_stat = mc_aflt->mflt_stat[i];
614 		if (flt_stat->mf_errlog_valid) {
615 			synd[i] = flt_stat->mf_synd;
616 			dslot[i] = flt_stat->mf_dimm_slot;
617 			values[i] = flt_stat->mf_dram_place;
618 		} else {
619 			synd[i] = 0;
620 			dslot[i] = 0;
621 			values[i] = 0;
622 		}
623 	}
624 
625 	fm_payload_set(ereport, MC_OPL_ERR_SYND,
626 		DATA_TYPE_UINT32_ARRAY, nflts, synd, NULL);
627 
628 	fm_payload_set(ereport, MC_OPL_ERR_DIMMSLOT,
629 		DATA_TYPE_UINT32_ARRAY, nflts, dslot, NULL);
630 
631 	fm_payload_set(ereport, MC_OPL_ERR_DRAM,
632 		DATA_TYPE_UINT32_ARRAY, nflts, values, NULL);
633 
634 	blen = MAXPATHLEN;
635 	device_path[0] = 0;
636 	p = &device_path[0];
637 
638 	for (i = 0; i < nflts; i++) {
639 		int bank = flt_stat->mf_flt_maddr.ma_bank;
640 		int psb = -1;
641 
642 		flt_stat = mc_aflt->mflt_stat[i];
643 		psb = mc_opl_get_physical_board(
644 		    flt_stat->mf_flt_maddr.ma_bd);
645 
646 		if (psb != -1) {
647 			snprintf(p, blen, "/CMU%d/B%d", psb, bank);
648 		} else {
649 			snprintf(p, blen, "/CMU/B%d", bank);
650 		}
651 
652 		if (flt_stat->mf_errlog_valid) {
653 			snprintf(p + strlen(p), blen, "/MEM%d%d%c",
654 			    bank/2, (bank & 0x1) * 2 + dslot[i] & 1,
655 			    (dslot[i] & 0x2) ? 'B' : 'A');
656 		}
657 
658 		n = strlen(&device_path[0]);
659 		blen = MAXPATHLEN - n;
660 		p = &device_path[n];
661 		if (i < (nflts - 1)) {
662 			snprintf(p, blen, " ");
663 			n += 1; blen -= 1; p += 1;
664 		}
665 	}
666 
667 	/*
668 	 * UNUM format /LSB#/B#/MEMxyZ
669 	 * where x is the MAC# = Bank#/2
670 	 * y is slot info = (Bank# & 0x1)*2 + {0, 1} 0 for DIMM-L, 1 for DIMM-H
671 	 * DIMM-L is 0 in bit 13, DIMM-H is 1 in bit 13.
672 	 * Z is A(CS0) or B(CS1) given by bit 14
673 	 */
674 	(void) fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
675 		NULL, device_path, NULL, 0);
676 
677 	fm_payload_set(ereport, MC_OPL_RESOURCE, DATA_TYPE_NVLIST,
678 		resource, NULL);
679 
680 	if (panicstr) {
681 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
682 	} else {
683 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
684 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
685 		fm_nvlist_destroy(detector, FM_NVA_FREE);
686 		fm_nvlist_destroy(resource, FM_NVA_FREE);
687 	}
688 }
689 
690 static void
691 mc_err_drain(mc_aflt_t *mc_aflt)
692 {
693 	int rv;
694 	page_t *pp;
695 	uint64_t errors;
696 	uint64_t pa = (uint64_t)(-1);
697 
698 	MC_LOG("mc_err_drain: %s\n",
699 		mc_aflt->mflt_erpt_class);
700 	/*
701 	 * we come here only when we have:
702 	 * In mirror mode: CMPE, MUE, SUE
703 	 * In normal mode: UE, Permanent CE
704 	 */
705 	rv = mcaddr_to_pa(mc_aflt->mflt_mcp,
706 		&(mc_aflt->mflt_stat[0]->mf_flt_maddr), &pa);
707 	if (rv == 0)
708 		mc_aflt->mflt_stat[0]->mf_flt_paddr = pa;
709 	else
710 		mc_aflt->mflt_stat[0]->mf_flt_paddr = (uint64_t)-1;
711 	if (rv == 0) {
712 		MC_LOG("mc_err_drain:pa = %lx\n", pa);
713 		pp = page_numtopp_nolock(pa >> PAGESHIFT);
714 
715 		if (pp) {
716 			/*
717 			 * Don't keep retiring and make ereports
718 			 * on bad pages in PTRL case
719 			 */
720 			MC_LOG("mc_err_drain:pp = %p\n", pp);
721 			if (mc_aflt->mflt_is_ptrl) {
722 				errors = 0;
723 				if (page_retire_check(pa, &errors) == 0) {
724 					MC_LOG("Page retired\n");
725 					return;
726 				}
727 				if (errors & mc_aflt->mflt_pr) {
728 					MC_LOG("errors %lx, mflt_pr %x\n",
729 						errors, mc_aflt->mflt_pr);
730 					return;
731 				}
732 			}
733 			MC_LOG("offline page %p error %x\n", pp,
734 				mc_aflt->mflt_pr);
735 			(void) page_retire(pa, mc_aflt->mflt_pr);
736 		}
737 	}
738 	mc_ereport_post(mc_aflt);
739 }
740 
741 #define	DIMM_SIZE 0x80000000
742 
743 #define	INC_DIMM_ADDR(p, n) \
744 	(p)->ma_dimm_addr += n; \
745 	(p)->ma_dimm_addr &= (DIMM_SIZE - 1)
746 
747 /*
748  * The restart address is actually defined in unit of PA[37:6]
749  * the mac patrol will convert that to dimm offset.  If the
750  * address is not in the bank, it will continue to search for
751  * the next PA that is within the bank.
752  *
753  * Also the mac patrol scans the dimms based on PA, not
754  * dimm offset.
755  */
756 
757 static int
758 restart_patrol(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr_info)
759 {
760 	page_t *pp;
761 	uint32_t reg;
762 	uint64_t pa;
763 	int rv;
764 	int loop_count = 0;
765 
766 	reg = ldphysio(MAC_PTRL_CNTL(mcp, bank));
767 
768 	/* already running, so we just return */
769 	if (reg & MAC_CNTL_PTRL_START)
770 		return (0);
771 
772 	if (maddr_info == NULL || (maddr_info->mi_valid == 0)) {
773 		MAC_PTRL_START(mcp, bank);
774 		return (0);
775 	}
776 
777 
778 	rv = mcaddr_to_pa(mcp, &maddr_info->mi_maddr, &pa);
779 	if (rv != 0) {
780 		MC_LOG("cannot convert mcaddr to pa. use auto restart\n");
781 		MAC_PTRL_START(mcp, bank);
782 		return (0);
783 	}
784 
785 	/*
786 	 * pa is the last address scanned by the mac patrol
787 	 * we  calculate the next restart address as follows:
788 	 * first we always advance it by 64 byte. Then begin the loop.
789 	 * loop {
790 	 * if it is not in phys_install, we advance to next 64 MB boundary
791 	 * if it is not backed by a page structure, done
792 	 * if the page is bad, advance to the next page boundary.
793 	 * else done
794 	 * if the new address exceeds the board, wrap around.
795 	 * } <stop if we come back to the same page>
796 	 */
797 
798 	if (pa < mcp->mc_start_address || pa >= (mcp->mc_start_address
799 		+ mcp->mc_size)) {
800 		/* pa is not on this board, just retry */
801 		cmn_err(CE_WARN, "restart_patrol: invalid address %lx "
802 			"on board %d\n", pa, mcp->mc_board_num);
803 		MAC_PTRL_START(mcp, bank);
804 		return (0);
805 	}
806 
807 	MC_LOG("restart_patrol: pa = %lx\n", pa);
808 	if (maddr_info->mi_advance) {
809 		uint64_t new_pa;
810 
811 		if (IS_MIRROR(mcp, bank))
812 			new_pa = pa + 64 * 2;
813 		else
814 			new_pa = pa + 64;
815 
816 		if (!mc_valid_pa(mcp, new_pa)) {
817 			/* Isolation unit size is 64 MB */
818 #define	MC_ISOLATION_BSIZE	(64 * 1024 * 1024)
819 			MC_LOG("Invalid PA\n");
820 			pa = roundup(new_pa + 1, MC_ISOLATION_BSIZE);
821 		} else {
822 			pp = page_numtopp_nolock(new_pa >> PAGESHIFT);
823 			if (pp != NULL) {
824 				uint64_t errors = 0;
825 				if (page_retire_check(new_pa, &errors) &&
826 					(errors == 0)) {
827 					MC_LOG("Page has no error\n");
828 					MAC_PTRL_START(mcp, bank);
829 					return (0);
830 				}
831 				/*
832 				 * skip bad pages
833 				 * and let the following loop to take care
834 				 */
835 				pa = roundup(new_pa + 1, PAGESIZE);
836 				MC_LOG("Skipping bad page to %lx\n", pa);
837 			} else {
838 				MC_LOG("Page has no page structure\n");
839 				MAC_PTRL_START(mcp, bank);
840 				return (0);
841 			}
842 		}
843 	}
844 
845 	/*
846 	 * if we wrap around twice, we just give up and let
847 	 * mac patrol decide.
848 	 */
849 	MC_LOG("pa is now %lx\n", pa);
850 	while (loop_count <= 1) {
851 		if (!mc_valid_pa(mcp, pa)) {
852 			MC_LOG("pa is not valid. round up to 64 MB\n");
853 			pa = roundup(pa + 1, 64 * 1024 * 1024);
854 		} else {
855 			pp = page_numtopp_nolock(pa >> PAGESHIFT);
856 			if (pp != NULL) {
857 				uint64_t errors = 0;
858 				if (page_retire_check(pa, &errors) &&
859 					(errors == 0)) {
860 					MC_LOG("Page has no error\n");
861 					break;
862 				}
863 				/* skip bad pages */
864 				pa = roundup(pa + 1, PAGESIZE);
865 				MC_LOG("Skipping bad page to %lx\n", pa);
866 			} else {
867 				MC_LOG("Page has no page structure\n");
868 				break;
869 			}
870 		}
871 		if (pa >= (mcp->mc_start_address + mcp->mc_size)) {
872 			MC_LOG("Wrap around\n");
873 			pa = mcp->mc_start_address;
874 			loop_count++;
875 		}
876 	}
877 
878 	/* retstart MAC patrol: PA[37:6] */
879 	MC_LOG("restart at pa = %lx\n", pa);
880 	ST_MAC_REG(MAC_RESTART_ADD(mcp, bank), MAC_RESTART_PA(pa));
881 	MAC_PTRL_START_ADD(mcp, bank);
882 
883 	return (0);
884 }
885 
886 /*
887  * Rewriting is used for two purposes.
888  *  - to correct the error in memory.
889  *  - to determine whether the error is permanent or intermittent.
890  * It's done by writing the address in MAC_BANKm_REWRITE_ADD
891  * and issuing REW_REQ command in MAC_BANKm_PTRL_CNRL. After that,
892  * REW_END (and REW_CE/REW_UE if some error detected) is set when
893  * rewrite operation is done. See 4.7.3 and 4.7.11 in Columbus2 PRM.
894  *
895  * Note that rewrite operation doesn't change RAW_UE to Marked UE.
896  * Therefore, we use it only CE case.
897  */
898 static uint32_t
899 do_rewrite(mc_opl_t *mcp, int bank, uint32_t dimm_addr)
900 {
901 	uint32_t cntl;
902 	int count = 0;
903 
904 	/* first wait to make sure PTRL_STATUS is 0 */
905 	while (count++ < MAX_MC_LOOP_COUNT) {
906 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
907 		if (!(cntl & MAC_CNTL_PTRL_STATUS))
908 			break;
909 		delay(drv_usectohz(10 * 1000));	/* 10 m.s. */
910 	}
911 	if (count >= MAX_MC_LOOP_COUNT)
912 		goto bad;
913 
914 	count = 0;
915 
916 	ST_MAC_REG(MAC_REWRITE_ADD(mcp, bank), dimm_addr);
917 	MAC_REW_REQ(mcp, bank);
918 
919 	do {
920 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
921 		if (count++ >= MAX_MC_LOOP_COUNT) {
922 			goto bad;
923 		} else
924 			delay(drv_usectohz(10 * 1000));	/* 10 m.s. */
925 	/*
926 	 * If there are other MEMORY or PCI activities, this
927 	 * will be BUSY, else it should be set immediately
928 	 */
929 	} while (!(cntl & MAC_CNTL_REW_END));
930 
931 	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
932 	return (cntl);
933 bad:
934 	/* This is bad.  Just reset the circuit */
935 	cmn_err(CE_WARN, "mc-opl rewrite timeout on /LSB%d/B%d\n",
936 		mcp->mc_board_num, bank);
937 	cntl = MAC_CNTL_REW_END;
938 	MAC_CMD(mcp, bank, MAC_CNTL_PTRL_RESET);
939 	MAC_CLEAR_ERRS(mcp, bank, MAC_CNTL_REW_ERRS);
940 	return (cntl);
941 }
942 
943 void
944 mc_process_scf_log(mc_opl_t *mcp)
945 {
946 	int count = 0;
947 	scf_log_t *p;
948 	int bank;
949 
950 	while ((p = mcp->mc_scf_log) != NULL) {
951 		bank = p->sl_bank;
952 		while ((LD_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank))
953 			& MAC_STATIC_ERR_VLD)) {
954 			if (count++ >= (MAX_MC_LOOP_COUNT)) {
955 				break;
956 			}
957 			delay(drv_usectohz(10 * 1000));	/* 10 m.s. */
958 		}
959 
960 		if (count < MAX_MC_LOOP_COUNT) {
961 			ST_MAC_REG(MAC_STATIC_ERR_LOG(mcp, p->sl_bank),
962 				p->sl_err_log);
963 
964 			ST_MAC_REG(MAC_STATIC_ERR_ADD(mcp, p->sl_bank),
965 				p->sl_err_add|MAC_STATIC_ERR_VLD);
966 			mcp->mc_scf_retry[bank] = 0;
967 		} else {
968 			/* if we try too many times, just drop the req */
969 			if (mcp->mc_scf_retry[bank]++ <= MAX_SCF_RETRY) {
970 				return;
971 			} else {
972 				cmn_err(CE_WARN, "SCF is not responding. "
973 					"Dropping the SCF LOG\n");
974 			}
975 		}
976 		mcp->mc_scf_log = p->sl_next;
977 		mcp->mc_scf_total--;
978 		ASSERT(mcp->mc_scf_total >= 0);
979 		kmem_free(p, sizeof (scf_log_t));
980 	}
981 }
982 
983 void
984 mc_queue_scf_log(mc_opl_t *mcp, mc_flt_stat_t *flt_stat, int bank)
985 {
986 	scf_log_t *p;
987 
988 	if (mcp->mc_scf_total >= MAX_SCF_LOGS) {
989 		cmn_err(CE_WARN,
990 			"Max# SCF logs excceded on /LSB%d/B%d\n",
991 			mcp->mc_board_num, bank);
992 		return;
993 	}
994 	p = kmem_zalloc(sizeof (scf_log_t), KM_SLEEP);
995 	p->sl_next = 0;
996 	p->sl_err_add = flt_stat->mf_err_add;
997 	p->sl_err_log = flt_stat->mf_err_log;
998 	p->sl_bank = bank;
999 
1000 	if (mcp->mc_scf_log == NULL) {
1001 		/*
1002 		 * we rely on mc_scf_log to detect NULL queue.
1003 		 * mc_scf_log_tail is irrelevant is such case.
1004 		 */
1005 		mcp->mc_scf_log_tail = mcp->mc_scf_log = p;
1006 	} else {
1007 		mcp->mc_scf_log_tail->sl_next = p;
1008 		mcp->mc_scf_log_tail = p;
1009 	}
1010 	mcp->mc_scf_total++;
1011 }
1012 
1013 /*
1014  * This routine determines what kind of CE happens, intermittent
1015  * or permanent as follows. (See 4.7.3 in Columbus2 PRM.)
1016  * - Do rewrite by issuing REW_REQ command to MAC_PTRL_CNTL register.
1017  * - If CE is still detected on the same address even after doing
1018  *   rewrite operation twice, it is determined as permanent error.
1019  * - If error is not detected anymore, it is determined as intermittent
1020  *   error.
1021  * - If UE is detected due to rewrite operation, it should be treated
1022  *   as UE.
1023  */
1024 
1025 /* ARGSUSED */
1026 static void
1027 mc_scrub_ce(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat, int ptrl_error)
1028 {
1029 	uint32_t cntl;
1030 	int i;
1031 
1032 	flt_stat->mf_type = FLT_TYPE_PERMANENT_CE;
1033 	/*
1034 	 * rewrite request 1st time reads and correct error data
1035 	 * and write to DIMM.  2nd rewrite request must be issued
1036 	 * after REW_CE/UE/END is 0.  When the 2nd request is completed,
1037 	 * if REW_CE = 1, then it is permanent CE.
1038 	 */
1039 	for (i = 0; i < 2; i++) {
1040 		cntl = do_rewrite(mcp, bank, flt_stat->mf_err_add);
1041 		/*
1042 		 * If the error becomes UE or CMPE
1043 		 * we return to the caller immediately.
1044 		 */
1045 		if (cntl & MAC_CNTL_REW_UE) {
1046 			if (ptrl_error)
1047 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_UE;
1048 			else
1049 				flt_stat->mf_cntl |= MAC_CNTL_MI_UE;
1050 			flt_stat->mf_type = FLT_TYPE_UE;
1051 			return;
1052 		}
1053 		if (cntl & MAC_CNTL_REW_CMPE) {
1054 			if (ptrl_error)
1055 				flt_stat->mf_cntl |= MAC_CNTL_PTRL_CMPE;
1056 			else
1057 				flt_stat->mf_cntl |= MAC_CNTL_MI_CMPE;
1058 			flt_stat->mf_type = FLT_TYPE_CMPE;
1059 			return;
1060 		}
1061 	}
1062 	if (!(cntl & MAC_CNTL_REW_CE)) {
1063 		flt_stat->mf_type = FLT_TYPE_INTERMITTENT_CE;
1064 	}
1065 
1066 	if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1067 		/* report PERMANENT_CE to SP via SCF */
1068 		if (!(flt_stat->mf_err_log & MAC_ERR_LOG_INVALID)) {
1069 			mc_queue_scf_log(mcp, flt_stat, bank);
1070 		}
1071 	}
1072 }
1073 
1074 #define	IS_CMPE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CMPE :\
1075 				MAC_CNTL_MI_CMPE))
1076 #define	IS_UE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_UE : MAC_CNTL_MI_UE))
1077 #define	IS_CE(cntl, f)	((cntl) & ((f) ? MAC_CNTL_PTRL_CE : MAC_CNTL_MI_CE))
1078 #define	IS_OK(cntl, f)	(!((cntl) & ((f) ? MAC_CNTL_PTRL_ERRS : \
1079 			MAC_CNTL_MI_ERRS)))
1080 
1081 
1082 static int
1083 IS_CE_ONLY(uint32_t cntl, int ptrl_error)
1084 {
1085 	if (ptrl_error) {
1086 		return ((cntl & MAC_CNTL_PTRL_ERRS) == MAC_CNTL_PTRL_CE);
1087 	} else {
1088 		return ((cntl & MAC_CNTL_MI_ERRS) == MAC_CNTL_MI_CE);
1089 	}
1090 }
1091 
1092 void
1093 mc_write_cntl(mc_opl_t *mcp, int bank, uint32_t value)
1094 {
1095 	value |= mcp->mc_bank[bank].mcb_ptrl_cntl;
1096 	ST_MAC_REG(MAC_PTRL_CNTL(mcp, bank), value);
1097 }
1098 
1099 static int
1100 mc_stop(mc_opl_t *mcp, int bank)
1101 {
1102 	uint32_t reg;
1103 	int count = 0;
1104 
1105 	reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1106 
1107 	if (reg & MAC_CNTL_PTRL_START)
1108 		MAC_PTRL_STOP(mcp, bank);
1109 
1110 	while (count++ <= MAX_MC_LOOP_COUNT) {
1111 		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
1112 		if ((reg & MAC_CNTL_PTRL_STATUS) == 0)
1113 			return (0);
1114 		delay(drv_usectohz(10 * 1000));	/* 10 m.s. */
1115 	}
1116 	return (-1);
1117 }
1118 
1119 static void
1120 mc_read_ptrl_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1121 {
1122 	flt_stat->mf_cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1123 		MAC_CNTL_PTRL_ERRS;
1124 	flt_stat->mf_err_add = LD_MAC_REG(MAC_PTRL_ERR_ADD(mcp, bank));
1125 	flt_stat->mf_err_log = LD_MAC_REG(MAC_PTRL_ERR_LOG(mcp, bank));
1126 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1127 	flt_stat->mf_flt_maddr.ma_bank = bank;
1128 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1129 }
1130 
1131 static void
1132 mc_read_mi_reg(mc_opl_t *mcp, int bank, mc_flt_stat_t *flt_stat)
1133 {
1134 	uint32_t status, old_status;
1135 
1136 	status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1137 		MAC_CNTL_MI_ERRS;
1138 	old_status = 0;
1139 
1140 	/* we keep reading until the status is stable */
1141 	while (old_status != status) {
1142 		old_status = status;
1143 		flt_stat->mf_err_add =
1144 			LD_MAC_REG(MAC_MI_ERR_ADD(mcp, bank));
1145 		flt_stat->mf_err_log =
1146 			LD_MAC_REG(MAC_MI_ERR_LOG(mcp, bank));
1147 		status = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank)) &
1148 			MAC_CNTL_MI_ERRS;
1149 		if (status == old_status) {
1150 			break;
1151 		}
1152 	}
1153 
1154 	flt_stat->mf_cntl = status;
1155 	flt_stat->mf_flt_maddr.ma_bd = mcp->mc_board_num;
1156 	flt_stat->mf_flt_maddr.ma_bank = bank;
1157 	flt_stat->mf_flt_maddr.ma_dimm_addr = flt_stat->mf_err_add;
1158 }
1159 
1160 
1161 /*
1162  * Error philosophy for mirror mode:
1163  *
1164  * PTRL (The error address for both banks are same, since ptrl stops if it
1165  * detects error.)
1166  * - Compaire error  Report CMPE.
1167  *
1168  * - UE-UE           Report MUE.  No rewrite.
1169  *
1170  * - UE-*	     UE-(CE/OK). Rewrite to scrub UE.  Report SUE.
1171  *
1172  * - CE-*            CE-(CE/OK). Scrub to determine if CE is permanent.
1173  *                   If CE is permanent, inform SCF.  Once for each
1174  *		     Dimm.  If CE becomes UE or CMPE, go back to above.
1175  *
1176  *
1177  * MI (The error addresses for each bank are the same or different.)
1178  * - Compair  error  If addresses are the same.  Just CMPE.
1179  *		     If addresses are different (this could happen
1180  *		     as a result of scrubbing.  Report each seperately.
1181  *		     Only report error info on each side.
1182  *
1183  * - UE-UE           Addresses are the same.  Report MUE.
1184  *		     Addresses are different.  Report SUE on each bank.
1185  *		     Rewrite to clear UE.
1186  *
1187  * - UE-*	     UE-(CE/OK)
1188  *		     Rewrite to clear UE.  Report SUE for the bank.
1189  *
1190  * - CE-*            CE-(CE/OK).  Scrub to determine if CE is permanent.
1191  *                   If CE becomes UE or CMPE, go back to above.
1192  *
1193  */
1194 
1195 static int
1196 mc_process_error_mir(mc_opl_t *mcp, mc_aflt_t *mc_aflt, mc_flt_stat_t *flt_stat)
1197 {
1198 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1199 	int i;
1200 	int rv = 0;
1201 
1202 	MC_LOG("process mirror errors cntl[0] = %x, cntl[1] = %x\n",
1203 		flt_stat[0].mf_cntl, flt_stat[1].mf_cntl);
1204 
1205 	if (ptrl_error) {
1206 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl)
1207 			& MAC_CNTL_PTRL_ERRS) == 0)
1208 			return (0);
1209 	} else {
1210 		if (((flt_stat[0].mf_cntl | flt_stat[1].mf_cntl)
1211 			& MAC_CNTL_MI_ERRS) == 0)
1212 			return (0);
1213 	}
1214 
1215 	/*
1216 	 * First we take care of the case of CE
1217 	 * because they can become UE or CMPE
1218 	 */
1219 	for (i = 0; i < 2; i++) {
1220 		if (IS_CE_ONLY(flt_stat[i].mf_cntl, ptrl_error)) {
1221 			MC_LOG("CE detected on bank %d\n",
1222 				flt_stat[i].mf_flt_maddr.ma_bank);
1223 			mc_scrub_ce(mcp, flt_stat[i].mf_flt_maddr.ma_bank,
1224 				&flt_stat[i], ptrl_error);
1225 			rv = 1;
1226 		}
1227 	}
1228 
1229 	/* The above scrubbing can turn CE into UE or CMPE */
1230 
1231 	/*
1232 	 * Now we distinguish two cases: same address or not
1233 	 * the same address.  It might seem more intuitive to
1234 	 * distinguish PTRL v.s. MI error but it is more
1235 	 * complicated that way.
1236 	 */
1237 
1238 	if (flt_stat[0].mf_err_add == flt_stat[1].mf_err_add) {
1239 
1240 		if (IS_CMPE(flt_stat[0].mf_cntl, ptrl_error) ||
1241 		    IS_CMPE(flt_stat[1].mf_cntl, ptrl_error)) {
1242 			flt_stat[0].mf_type = FLT_TYPE_CMPE;
1243 			flt_stat[1].mf_type = FLT_TYPE_CMPE;
1244 			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1245 			MC_LOG("cmpe error detected\n");
1246 			mc_aflt->mflt_nflts = 2;
1247 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1248 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1249 			mc_aflt->mflt_pr = PR_UE;
1250 			mc_err_drain(mc_aflt);
1251 			return (1);
1252 		}
1253 
1254 		if (IS_UE(flt_stat[0].mf_cntl, ptrl_error) &&
1255 			IS_UE(flt_stat[1].mf_cntl, ptrl_error)) {
1256 			/* Both side are UE's */
1257 
1258 			MAC_SET_ERRLOG_INFO(&flt_stat[0]);
1259 			MAC_SET_ERRLOG_INFO(&flt_stat[1]);
1260 			MC_LOG("MUE detected\n");
1261 			flt_stat[0].mf_type = flt_stat[1].mf_type =
1262 				FLT_TYPE_MUE;
1263 			mc_aflt->mflt_erpt_class = MC_OPL_MUE;
1264 			mc_aflt->mflt_nflts = 2;
1265 			mc_aflt->mflt_stat[0] = &flt_stat[0];
1266 			mc_aflt->mflt_stat[1] = &flt_stat[1];
1267 			mc_aflt->mflt_pr = PR_UE;
1268 			mc_err_drain(mc_aflt);
1269 			return (1);
1270 		}
1271 
1272 		/* Now the only case is UE/CE, UE/OK, or don't care */
1273 		for (i = 0; i < 2; i++) {
1274 		    if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1275 			/* If we have CE, we would have done REW */
1276 			if (IS_OK(flt_stat[i^1].mf_cntl, ptrl_error)) {
1277 				(void) do_rewrite(mcp,
1278 				    flt_stat[i].mf_flt_maddr.ma_bank,
1279 				    flt_stat[i].mf_flt_maddr.ma_dimm_addr);
1280 			}
1281 			flt_stat[i].mf_type = FLT_TYPE_UE;
1282 			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1283 			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1284 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1285 			mc_aflt->mflt_nflts = 1;
1286 			mc_aflt->mflt_pr = PR_MCE;
1287 			mc_err_drain(mc_aflt);
1288 			/* Once we hit a UE/CE or UE/OK case, done */
1289 			return (1);
1290 		    }
1291 		}
1292 
1293 	} else {
1294 		/*
1295 		 * addresses are different. That means errors
1296 		 * on the 2 banks are not related at all.
1297 		 */
1298 		for (i = 0; i < 2; i++) {
1299 		    if (IS_CMPE(flt_stat[i].mf_cntl, ptrl_error)) {
1300 			flt_stat[i].mf_type = FLT_TYPE_CMPE;
1301 			mc_aflt->mflt_erpt_class = MC_OPL_CMPE;
1302 			MC_LOG("cmpe error detected\n");
1303 			mc_aflt->mflt_nflts = 1;
1304 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1305 			mc_aflt->mflt_pr = PR_UE;
1306 			mc_err_drain(mc_aflt);
1307 			/* no more report on this bank */
1308 			flt_stat[i].mf_cntl = 0;
1309 			rv = 1;
1310 		    }
1311 		}
1312 
1313 		for (i = 0; i < 2; i++) {
1314 		    if (IS_UE(flt_stat[i].mf_cntl, ptrl_error)) {
1315 			(void) do_rewrite(mcp,
1316 				flt_stat[i].mf_flt_maddr.ma_bank,
1317 				flt_stat[i].mf_flt_maddr.ma_dimm_addr);
1318 			flt_stat[i].mf_type = FLT_TYPE_UE;
1319 			MAC_SET_ERRLOG_INFO(&flt_stat[i]);
1320 			mc_aflt->mflt_erpt_class = MC_OPL_SUE;
1321 			mc_aflt->mflt_stat[0] = &flt_stat[i];
1322 			mc_aflt->mflt_nflts = 1;
1323 			mc_aflt->mflt_pr = PR_MCE;
1324 			mc_err_drain(mc_aflt);
1325 			rv = 1;
1326 		    }
1327 		}
1328 	}
1329 	return (rv);
1330 }
1331 
1332 static void
1333 mc_error_handler_mir(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr)
1334 {
1335 	mc_aflt_t mc_aflt;
1336 	mc_flt_stat_t flt_stat[2], mi_flt_stat[2];
1337 	int other_bank;
1338 
1339 	if (mc_stop(mcp, bank)) {
1340 		cmn_err(CE_WARN, "Cannot stop Memory Patrol at /LSB%d/B%d\n",
1341 			mcp->mc_board_num, bank);
1342 		return;
1343 	}
1344 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1345 	bzero(&flt_stat, 2 * sizeof (mc_flt_stat_t));
1346 	bzero(&mi_flt_stat, 2 * sizeof (mc_flt_stat_t));
1347 
1348 	mc_aflt.mflt_mcp = mcp;
1349 	mc_aflt.mflt_id = gethrtime();
1350 
1351 	/* Now read all the registers into flt_stat */
1352 
1353 	MC_LOG("Reading registers of bank %d\n", bank);
1354 	/* patrol registers */
1355 	mc_read_ptrl_reg(mcp, bank, &flt_stat[0]);
1356 
1357 	ASSERT(maddr);
1358 	maddr->mi_maddr = flt_stat[0].mf_flt_maddr;
1359 
1360 	MC_LOG("ptrl registers cntl %x add %x log %x\n",
1361 		flt_stat[0].mf_cntl,
1362 		flt_stat[0].mf_err_add,
1363 		flt_stat[0].mf_err_log);
1364 
1365 	/* MI registers */
1366 	mc_read_mi_reg(mcp, bank, &mi_flt_stat[0]);
1367 
1368 	MC_LOG("MI registers cntl %x add %x log %x\n",
1369 		mi_flt_stat[0].mf_cntl,
1370 		mi_flt_stat[0].mf_err_add,
1371 		mi_flt_stat[0].mf_err_log);
1372 
1373 	other_bank = bank^1;
1374 
1375 	MC_LOG("Reading registers of bank %d\n", other_bank);
1376 
1377 	ASSERT(mcp->mc_bank[other_bank].mcb_status & BANK_INSTALLED);
1378 
1379 	mc_read_ptrl_reg(mcp, other_bank, &flt_stat[1]);
1380 	MC_LOG("ptrl registers cntl %x add %x log %x\n",
1381 		flt_stat[1].mf_cntl,
1382 		flt_stat[1].mf_err_add,
1383 		flt_stat[1].mf_err_log);
1384 
1385 	/* MI registers */
1386 	mc_read_mi_reg(mcp, other_bank, &mi_flt_stat[1]);
1387 	MC_LOG("MI registers cntl %x add %x log %x\n",
1388 		mi_flt_stat[1].mf_cntl,
1389 		mi_flt_stat[1].mf_err_add,
1390 		mi_flt_stat[1].mf_err_log);
1391 
1392 	/* clear errors once we read all the registers */
1393 	MAC_CLEAR_ERRS(mcp, other_bank,
1394 		(MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1395 
1396 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1397 
1398 	/* Process PTRL errors first */
1399 
1400 	/* if not error mode, cntl1 is 0 */
1401 	if ((flt_stat[0].mf_err_add & MAC_ERR_ADD_INVALID) ||
1402 		(flt_stat[0].mf_err_log & MAC_ERR_LOG_INVALID))
1403 		flt_stat[0].mf_cntl = 0;
1404 
1405 	if ((flt_stat[1].mf_err_add & MAC_ERR_ADD_INVALID) ||
1406 		(flt_stat[1].mf_err_log & MAC_ERR_LOG_INVALID))
1407 		flt_stat[1].mf_cntl = 0;
1408 
1409 	mc_aflt.mflt_is_ptrl = 1;
1410 	maddr->mi_valid = mc_process_error_mir(mcp, &mc_aflt, &flt_stat[0]);
1411 
1412 	mc_aflt.mflt_is_ptrl = 0;
1413 	mc_process_error_mir(mcp, &mc_aflt, &mi_flt_stat[0]);
1414 }
1415 
1416 static int
1417 mc_process_error(mc_opl_t *mcp, int bank, mc_aflt_t *mc_aflt,
1418 	mc_flt_stat_t *flt_stat)
1419 {
1420 	int ptrl_error = mc_aflt->mflt_is_ptrl;
1421 	int rv = 0;
1422 
1423 	mc_aflt->mflt_erpt_class = NULL;
1424 	if (IS_UE(flt_stat->mf_cntl, ptrl_error)) {
1425 		MC_LOG("UE deteceted\n");
1426 		flt_stat->mf_type = FLT_TYPE_UE;
1427 		mc_aflt->mflt_erpt_class = MC_OPL_UE;
1428 		mc_aflt->mflt_pr = PR_UE;
1429 		MAC_SET_ERRLOG_INFO(flt_stat);
1430 		rv = 1;
1431 	} else if (IS_CE(flt_stat->mf_cntl, ptrl_error)) {
1432 		MC_LOG("CE deteceted\n");
1433 		MAC_SET_ERRLOG_INFO(flt_stat);
1434 
1435 		/* Error type can change after scrubing */
1436 		mc_scrub_ce(mcp, bank, flt_stat, ptrl_error);
1437 
1438 		if (flt_stat->mf_type == FLT_TYPE_PERMANENT_CE) {
1439 			mc_aflt->mflt_erpt_class = MC_OPL_CE;
1440 			mc_aflt->mflt_pr = PR_MCE;
1441 		} else if (flt_stat->mf_type == FLT_TYPE_UE) {
1442 			mc_aflt->mflt_erpt_class = MC_OPL_UE;
1443 			mc_aflt->mflt_pr = PR_UE;
1444 		}
1445 		rv = 1;
1446 	}
1447 	MC_LOG("mc_process_error: fault type %x erpt %s\n",
1448 		flt_stat->mf_type,
1449 		mc_aflt->mflt_erpt_class);
1450 	if (mc_aflt->mflt_erpt_class) {
1451 		mc_aflt->mflt_stat[0] = flt_stat;
1452 		mc_aflt->mflt_nflts = 1;
1453 		mc_err_drain(mc_aflt);
1454 	}
1455 	return (rv);
1456 }
1457 
1458 static void
1459 mc_error_handler(mc_opl_t *mcp, int bank, mc_addr_info_t *maddr)
1460 {
1461 	mc_aflt_t mc_aflt;
1462 	mc_flt_stat_t flt_stat, mi_flt_stat;
1463 
1464 	if (mc_stop(mcp, bank)) {
1465 		cmn_err(CE_WARN, "Cannot stop Memory Patrol at /LSB%d/B%d\n",
1466 			mcp->mc_board_num, bank);
1467 		return;
1468 	}
1469 
1470 	bzero(&mc_aflt, sizeof (mc_aflt_t));
1471 	bzero(&flt_stat, sizeof (mc_flt_stat_t));
1472 	bzero(&mi_flt_stat, sizeof (mc_flt_stat_t));
1473 
1474 	mc_aflt.mflt_mcp = mcp;
1475 	mc_aflt.mflt_id = gethrtime();
1476 
1477 	/* patrol registers */
1478 	mc_read_ptrl_reg(mcp, bank, &flt_stat);
1479 
1480 	ASSERT(maddr);
1481 	maddr->mi_maddr = flt_stat.mf_flt_maddr;
1482 
1483 	MC_LOG("ptrl registers cntl %x add %x log %x\n",
1484 		flt_stat.mf_cntl,
1485 		flt_stat.mf_err_add,
1486 		flt_stat.mf_err_log);
1487 
1488 	/* MI registers */
1489 	mc_read_mi_reg(mcp, bank, &mi_flt_stat);
1490 
1491 	MC_LOG("MI registers cntl %x add %x log %x\n",
1492 		mi_flt_stat.mf_cntl,
1493 		mi_flt_stat.mf_err_add,
1494 		mi_flt_stat.mf_err_log);
1495 
1496 	/* clear errors once we read all the registers */
1497 	MAC_CLEAR_ERRS(mcp, bank, (MAC_CNTL_PTRL_ERRS|MAC_CNTL_MI_ERRS));
1498 
1499 	mc_aflt.mflt_is_ptrl = 1;
1500 	if ((flt_stat.mf_cntl & MAC_CNTL_PTRL_ERRS) &&
1501 		((flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
1502 		((flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
1503 		maddr->mi_valid = mc_process_error(mcp, bank,
1504 			&mc_aflt, &flt_stat);
1505 	}
1506 	mc_aflt.mflt_is_ptrl = 0;
1507 	if ((mi_flt_stat.mf_cntl & MAC_CNTL_MI_ERRS) &&
1508 		((mi_flt_stat.mf_err_add & MAC_ERR_ADD_INVALID) == 0) &&
1509 		((mi_flt_stat.mf_err_log & MAC_ERR_LOG_INVALID) == 0)) {
1510 		mc_process_error(mcp, bank, &mc_aflt, &mi_flt_stat);
1511 	}
1512 }
1513 
1514 /*
1515  *	memory patrol error handling algorithm:
1516  *	timeout() is used to do periodic polling
1517  *	This is the flow chart.
1518  *	timeout ->
1519  *	mc_check_errors()
1520  *	    if memory bank is installed, read the status register
1521  *	    if any error bit is set,
1522  *	    -> mc_error_handler()
1523  *	        -> mc_stop()
1524  *		-> read all error regsiters
1525  *	        -> mc_process_error()
1526  *	            determine error type
1527  *	            rewrite to clear error or scrub to determine CE type
1528  *	            inform SCF on permanent CE
1529  *	        -> mc_err_drain
1530  *	            page offline processing
1531  *	            -> mc_ereport_post()
1532  */
1533 
1534 static void
1535 mc_check_errors_func(mc_opl_t *mcp)
1536 {
1537 	mc_addr_info_t maddr_info;
1538 	int i, error_count = 0;
1539 	uint32_t stat, cntl;
1540 
1541 	/*
1542 	 * scan errors.
1543 	 */
1544 	for (i = 0; i < BANKNUM_PER_SB; i++) {
1545 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
1546 			stat = ldphysio(MAC_PTRL_STAT(mcp, i));
1547 			cntl = ldphysio(MAC_PTRL_CNTL(mcp, i));
1548 			if (cntl & MAC_CNTL_PTRL_ADD_MAX) {
1549 				mcp->mc_period++;
1550 				MC_LOG("mc period %ld on "
1551 				    "/LSB%d/B%d\n", mcp->mc_period,
1552 				    mcp->mc_board_num, i);
1553 				MAC_CLEAR_MAX(mcp, i);
1554 			}
1555 			if (mc_debug_show_all) {
1556 				MC_LOG("/LSB%d/B%d stat %x cntl %x\n",
1557 					mcp->mc_board_num, i,
1558 					stat, cntl);
1559 			}
1560 			if (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS)) {
1561 				maddr_info.mi_valid = 0;
1562 				maddr_info.mi_advance = 1;
1563 				if (IS_MIRROR(mcp, i))
1564 					mc_error_handler_mir(mcp, i,
1565 						&maddr_info);
1566 				else
1567 					mc_error_handler(mcp, i, &maddr_info);
1568 
1569 				error_count++;
1570 				restart_patrol(mcp, i, &maddr_info);
1571 			} else {
1572 				restart_patrol(mcp, i, NULL);
1573 			}
1574 		}
1575 	}
1576 	mc_process_scf_log(mcp);
1577 	if (error_count > 0)
1578 		mcp->mc_last_error += error_count;
1579 	else
1580 		mcp->mc_last_error = 0;
1581 }
1582 
1583 /* this is just a wrapper for the above func */
1584 
1585 static void
1586 mc_check_errors(void *arg)
1587 {
1588 	mc_opl_t *mcp = (mc_opl_t *)arg;
1589 	clock_t interval;
1590 
1591 	/*
1592 	 * scan errors.
1593 	 */
1594 	mutex_enter(&mcp->mc_lock);
1595 	mcp->mc_tid = 0;
1596 	if ((mcp->mc_status & MC_POLL_RUNNING) &&
1597 		!(mcp->mc_status & MC_SOFT_SUSPENDED)) {
1598 		mc_check_errors_func(mcp);
1599 
1600 		if (mcp->mc_last_error > 0) {
1601 			interval = (mcp->mc_interval_hz) >> mcp->mc_last_error;
1602 			if (interval < 1)
1603 				interval = 1;
1604 		} else
1605 			interval = mcp->mc_interval_hz;
1606 
1607 		mcp->mc_tid = timeout(mc_check_errors, mcp,
1608 		    interval);
1609 	}
1610 	mutex_exit(&mcp->mc_lock);
1611 }
1612 
1613 static void
1614 get_ptrl_start_address(mc_opl_t *mcp, int bank, mc_addr_t *maddr)
1615 {
1616 	maddr->ma_bd = mcp->mc_board_num;
1617 	maddr->ma_bank = bank;
1618 	maddr->ma_dimm_addr = 0;
1619 }
1620 
1621 typedef struct mc_mem_range {
1622 	uint64_t	addr;
1623 	uint64_t	size;
1624 } mc_mem_range_t;
1625 
1626 static int
1627 get_base_address(mc_opl_t *mcp)
1628 {
1629 	mc_mem_range_t *mem_range;
1630 	int len;
1631 
1632 	if (ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
1633 		"sb-mem-ranges", (caddr_t)&mem_range, &len) != DDI_SUCCESS) {
1634 		return (DDI_FAILURE);
1635 	}
1636 
1637 	mcp->mc_start_address = mem_range->addr;
1638 	mcp->mc_size = mem_range->size;
1639 
1640 	kmem_free(mem_range, len);
1641 	return (DDI_SUCCESS);
1642 }
1643 
1644 struct mc_addr_spec {
1645 	uint32_t bank;
1646 	uint32_t phys_hi;
1647 	uint32_t phys_lo;
1648 };
1649 
1650 #define	REGS_PA(m, i) ((((uint64_t)m[i].phys_hi)<<32) | m[i].phys_lo)
1651 
1652 static char *mc_tbl_name[] = {
1653 	"cs0-mc-pa-trans-table",
1654 	"cs1-mc-pa-trans-table"
1655 };
1656 
1657 static int
1658 mc_valid_pa(mc_opl_t *mcp, uint64_t pa)
1659 {
1660 	struct memlist *ml;
1661 
1662 	if (mcp->mlist == NULL)
1663 		mc_get_mlist(mcp);
1664 
1665 	for (ml = mcp->mlist; ml; ml = ml->next) {
1666 		if (ml->address <= pa && pa < (ml->address + ml->size))
1667 			return (1);
1668 	}
1669 	return (0);
1670 }
1671 
1672 static void
1673 mc_memlist_delete(struct memlist *mlist)
1674 {
1675 	struct memlist *ml;
1676 
1677 	for (ml = mlist; ml; ml = mlist) {
1678 		mlist = ml->next;
1679 		kmem_free(ml, sizeof (struct memlist));
1680 	}
1681 }
1682 
1683 static struct memlist *
1684 mc_memlist_dup(struct memlist *mlist)
1685 {
1686 	struct memlist *hl = NULL, *tl, **mlp;
1687 
1688 	if (mlist == NULL)
1689 		return (NULL);
1690 
1691 	mlp = &hl;
1692 	tl = *mlp;
1693 	for (; mlist; mlist = mlist->next) {
1694 		*mlp = kmem_alloc(sizeof (struct memlist), KM_SLEEP);
1695 		(*mlp)->address = mlist->address;
1696 		(*mlp)->size = mlist->size;
1697 		(*mlp)->prev = tl;
1698 		tl = *mlp;
1699 		mlp = &((*mlp)->next);
1700 	}
1701 	*mlp = NULL;
1702 
1703 	return (hl);
1704 }
1705 
1706 
1707 static struct memlist *
1708 mc_memlist_del_span(struct memlist *mlist, uint64_t base, uint64_t len)
1709 {
1710 	uint64_t	end;
1711 	struct memlist	*ml, *tl, *nlp;
1712 
1713 	if (mlist == NULL)
1714 		return (NULL);
1715 
1716 	end = base + len;
1717 	if ((end <= mlist->address) || (base == end))
1718 		return (mlist);
1719 
1720 	for (tl = ml = mlist; ml; tl = ml, ml = nlp) {
1721 		uint64_t	mend;
1722 
1723 		nlp = ml->next;
1724 
1725 		if (end <= ml->address)
1726 			break;
1727 
1728 		mend = ml->address + ml->size;
1729 		if (base < mend) {
1730 			if (base <= ml->address) {
1731 				ml->address = end;
1732 				if (end >= mend)
1733 					ml->size = 0ull;
1734 				else
1735 					ml->size = mend - ml->address;
1736 			} else {
1737 				ml->size = base - ml->address;
1738 				if (end < mend) {
1739 					struct memlist	*nl;
1740 					/*
1741 					 * splitting an memlist entry.
1742 					 */
1743 					nl = kmem_alloc(sizeof (struct memlist),
1744 						KM_SLEEP);
1745 					nl->address = end;
1746 					nl->size = mend - nl->address;
1747 					if ((nl->next = nlp) != NULL)
1748 						nlp->prev = nl;
1749 					nl->prev = ml;
1750 					ml->next = nl;
1751 					nlp = nl;
1752 				}
1753 			}
1754 			if (ml->size == 0ull) {
1755 				if (ml == mlist) {
1756 					if ((mlist = nlp) != NULL)
1757 						nlp->prev = NULL;
1758 					kmem_free(ml, sizeof (struct memlist));
1759 					if (mlist == NULL)
1760 						break;
1761 					ml = nlp;
1762 				} else {
1763 					if ((tl->next = nlp) != NULL)
1764 						nlp->prev = tl;
1765 					kmem_free(ml, sizeof (struct memlist));
1766 					ml = tl;
1767 				}
1768 			}
1769 		}
1770 	}
1771 
1772 	return (mlist);
1773 }
1774 
1775 static void
1776 mc_get_mlist(mc_opl_t *mcp)
1777 {
1778 	struct memlist *mlist;
1779 
1780 	memlist_read_lock();
1781 	mlist = mc_memlist_dup(phys_install);
1782 	memlist_read_unlock();
1783 
1784 	if (mlist) {
1785 		mlist = mc_memlist_del_span(mlist, 0ull, mcp->mc_start_address);
1786 	}
1787 
1788 	if (mlist) {
1789 		uint64_t startpa, endpa;
1790 
1791 		startpa = mcp->mc_start_address + mcp->mc_size;
1792 		endpa = ptob(physmax + 1);
1793 		if (endpa > startpa) {
1794 			mlist = mc_memlist_del_span(mlist,
1795 				startpa, endpa - startpa);
1796 		}
1797 	}
1798 
1799 	if (mlist) {
1800 		mcp->mlist = mlist;
1801 	}
1802 }
1803 
1804 int
1805 mc_board_add(mc_opl_t *mcp)
1806 {
1807 	struct mc_addr_spec *macaddr;
1808 	int len, i, bk, cc;
1809 	mc_addr_info_t maddr;
1810 	uint32_t mirr;
1811 
1812 	mutex_init(&mcp->mc_lock, NULL, MUTEX_DRIVER, NULL);
1813 
1814 	/*
1815 	 * Get configurations from "pseudo-mc" node which includes:
1816 	 * board# : LSB number
1817 	 * mac-addr : physical base address of MAC registers
1818 	 * csX-mac-pa-trans-table: translation table from DIMM address
1819 	 *			to physical address or vice versa.
1820 	 */
1821 	mcp->mc_board_num = (int)ddi_getprop(DDI_DEV_T_ANY, mcp->mc_dip,
1822 		DDI_PROP_DONTPASS, "board#", -1);
1823 
1824 	/*
1825 	 * Get start address in this CAB. It can be gotten from
1826 	 * "sb-mem-ranges" property.
1827 	 */
1828 
1829 	if (get_base_address(mcp) == DDI_FAILURE) {
1830 		mutex_destroy(&mcp->mc_lock);
1831 		return (DDI_FAILURE);
1832 	}
1833 	/* get mac-pa trans tables */
1834 	for (i = 0; i < MC_TT_CS; i++) {
1835 		len = MC_TT_ENTRIES;
1836 		cc = ddi_getlongprop_buf(DDI_DEV_T_ANY, mcp->mc_dip,
1837 			DDI_PROP_DONTPASS, mc_tbl_name[i],
1838 			(caddr_t)mcp->mc_trans_table[i], &len);
1839 
1840 		if (cc != DDI_SUCCESS) {
1841 			bzero(mcp->mc_trans_table[i], MC_TT_ENTRIES);
1842 		}
1843 	}
1844 	mcp->mlist = NULL;
1845 
1846 	mc_get_mlist(mcp);
1847 
1848 	/* initialize bank informations */
1849 	cc = ddi_getlongprop(DDI_DEV_T_ANY, mcp->mc_dip, DDI_PROP_DONTPASS,
1850 		"mc-addr", (caddr_t)&macaddr, &len);
1851 	if (cc != DDI_SUCCESS) {
1852 		cmn_err(CE_WARN, "Cannot get mc-addr. err=%d\n", cc);
1853 		mutex_destroy(&mcp->mc_lock);
1854 		return (DDI_FAILURE);
1855 	}
1856 
1857 	for (i = 0; i < len / sizeof (struct mc_addr_spec); i++) {
1858 		struct mc_bank *bankp;
1859 		uint32_t reg;
1860 
1861 		/*
1862 		 * setup bank
1863 		 */
1864 		bk = macaddr[i].bank;
1865 		bankp = &(mcp->mc_bank[bk]);
1866 		bankp->mcb_status = BANK_INSTALLED;
1867 		bankp->mcb_reg_base = REGS_PA(macaddr, i);
1868 
1869 		reg = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bk));
1870 		bankp->mcb_ptrl_cntl = (reg & MAC_CNTL_PTRL_PRESERVE_BITS);
1871 
1872 		/*
1873 		 * check if mirror mode
1874 		 */
1875 		mirr = LD_MAC_REG(MAC_MIRR(mcp, bk));
1876 
1877 		if (mirr & MAC_MIRR_MIRROR_MODE) {
1878 			MC_LOG("Mirror -> /LSB%d/B%d\n",
1879 				mcp->mc_board_num, bk);
1880 			bankp->mcb_status |= BANK_MIRROR_MODE;
1881 			/*
1882 			 * The following bit is only used for
1883 			 * error injection.  We should clear it
1884 			 */
1885 			if (mirr & MAC_MIRR_BANK_EXCLUSIVE)
1886 				ST_MAC_REG(MAC_MIRR(mcp, bk),
1887 					0);
1888 		}
1889 
1890 		/*
1891 		 * restart if not mirror mode or the other bank
1892 		 * of the mirror is not running
1893 		 */
1894 		if (!(mirr & MAC_MIRR_MIRROR_MODE) ||
1895 			!(mcp->mc_bank[bk^1].mcb_status &
1896 			BANK_PTRL_RUNNING)) {
1897 			MC_LOG("Starting up /LSB%d/B%d\n",
1898 				mcp->mc_board_num, bk);
1899 			get_ptrl_start_address(mcp, bk, &maddr.mi_maddr);
1900 			maddr.mi_valid = 1;
1901 			maddr.mi_advance = 0;
1902 			restart_patrol(mcp, bk, &maddr);
1903 		} else {
1904 			MC_LOG("Not starting up /LSB%d/B%d\n",
1905 				mcp->mc_board_num, bk);
1906 		}
1907 		bankp->mcb_status |= BANK_PTRL_RUNNING;
1908 	}
1909 	kmem_free(macaddr, len);
1910 
1911 	/*
1912 	 * set interval in HZ.
1913 	 */
1914 	for (i = 0; i < BANKNUM_PER_SB; i++) {
1915 		mcp->mc_scf_retry[i] = 0;
1916 	}
1917 	mcp->mc_last_error = 0;
1918 	mcp->mc_period = 0;
1919 
1920 	mcp->mc_interval_hz = drv_usectohz(mc_patrol_interval_sec * 1000000);
1921 	/* restart memory patrol checking */
1922 	mcp->mc_status |= MC_POLL_RUNNING;
1923 	mcp->mc_tid = timeout(mc_check_errors, mcp, mcp->mc_interval_hz);
1924 
1925 	return (DDI_SUCCESS);
1926 }
1927 
1928 int
1929 mc_board_del(mc_opl_t *mcp)
1930 {
1931 	int i;
1932 	scf_log_t *p;
1933 	timeout_id_t tid = 0;
1934 
1935 	/*
1936 	 * cleanup mac state
1937 	 */
1938 	mutex_enter(&mcp->mc_lock);
1939 	for (i = 0; i < BANKNUM_PER_SB; i++) {
1940 		if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
1941 			if (mc_stop(mcp, i)) {
1942 				mutex_exit(&mcp->mc_lock);
1943 				return (-1);
1944 			}
1945 			mcp->mc_bank[i].mcb_status &= ~BANK_INSTALLED;
1946 		}
1947 	}
1948 
1949 	/* stop memory patrol checking */
1950 	if (mcp->mc_status & MC_POLL_RUNNING) {
1951 		mcp->mc_status &= ~MC_POLL_RUNNING;
1952 		tid = mcp->mc_tid;
1953 		mcp->mc_tid = 0;
1954 	}
1955 
1956 	/* just throw away all the scf logs */
1957 	while ((p = mcp->mc_scf_log) != NULL) {
1958 		mcp->mc_scf_log = p->sl_next;
1959 		mcp->mc_scf_total--;
1960 		kmem_free(p, sizeof (scf_log_t));
1961 	}
1962 
1963 	if (mcp->mlist)
1964 		mc_memlist_delete(mcp->mlist);
1965 
1966 	mutex_exit(&mcp->mc_lock);
1967 	if (tid)
1968 		(void) untimeout(tid);
1969 
1970 	mutex_destroy(&mcp->mc_lock);
1971 	return (DDI_SUCCESS);
1972 }
1973 
1974 int
1975 mc_suspend(mc_opl_t *mcp, uint32_t flag)
1976 {
1977 	timeout_id_t tid = 0;
1978 	int i;
1979 	/* stop memory patrol checking */
1980 	mutex_enter(&mcp->mc_lock);
1981 	if (mcp->mc_status & MC_POLL_RUNNING) {
1982 		for (i = 0; i < BANKNUM_PER_SB; i++) {
1983 			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
1984 				if (mc_stop(mcp, i)) {
1985 					mutex_exit(&mcp->mc_lock);
1986 					return (-1);
1987 				}
1988 			}
1989 		}
1990 		mcp->mc_status &= ~MC_POLL_RUNNING;
1991 		tid = mcp->mc_tid;
1992 	}
1993 	mcp->mc_status |= flag;
1994 	mcp->mc_tid = 0;
1995 	mutex_exit(&mcp->mc_lock);
1996 	if (tid)
1997 		(void) untimeout(tid);
1998 
1999 	return (DDI_SUCCESS);
2000 }
2001 
2002 /* caller must clear the SUSPEND bits or this will do nothing */
2003 
2004 int
2005 mc_resume(mc_opl_t *mcp, uint32_t flag)
2006 {
2007 	int i;
2008 	uint64_t basepa;
2009 
2010 	mutex_enter(&mcp->mc_lock);
2011 	basepa = mcp->mc_start_address;
2012 	if (get_base_address(mcp) == DDI_FAILURE) {
2013 		mutex_exit(&mcp->mc_lock);
2014 		return (DDI_FAILURE);
2015 	}
2016 
2017 	if (basepa != mcp->mc_start_address) {
2018 		if (mcp->mlist)
2019 			mc_memlist_delete(mcp->mlist);
2020 		mcp->mlist = NULL;
2021 		mc_get_mlist(mcp);
2022 	}
2023 
2024 	mcp->mc_status &= ~flag;
2025 	mcp->mc_list->mc_start_address = mcp->mc_start_address;
2026 
2027 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2028 		mutex_exit(&mcp->mc_lock);
2029 		return (DDI_SUCCESS);
2030 	}
2031 
2032 	if (!(mcp->mc_status & MC_POLL_RUNNING)) {
2033 		/* restart memory patrol checking */
2034 		mcp->mc_status |= MC_POLL_RUNNING;
2035 		for (i = 0; i < BANKNUM_PER_SB; i++) {
2036 			if (mcp->mc_bank[i].mcb_status & BANK_INSTALLED) {
2037 				restart_patrol(mcp, i, NULL);
2038 			}
2039 		}
2040 		/* check error asap */
2041 		mcp->mc_tid = timeout(mc_check_errors, mcp, 1);
2042 	}
2043 	mutex_exit(&mcp->mc_lock);
2044 
2045 	return (DDI_SUCCESS);
2046 }
2047 
2048 static mc_opl_t *
2049 mc_pa_to_mcp(uint64_t pa)
2050 {
2051 	mc_inst_list_t *p;
2052 	ASSERT(MUTEX_HELD(&mcmutex));
2053 	for (p = mc_instances; p; p = p->next) {
2054 		/* if mac patrol is suspended, we cannot rely on it */
2055 		if (!(p->mc_opl->mc_status & MC_POLL_RUNNING) ||
2056 			(p->mc_opl->mc_status & MC_SOFT_SUSPENDED))
2057 			continue;
2058 		if ((p->mc_start_address <= pa) &&
2059 			(pa < (p->mc_start_address + p->mc_size))) {
2060 			return (p->mc_opl);
2061 		}
2062 	}
2063 	return (NULL);
2064 }
2065 
2066 /*
2067  * Get Physical Board number from Logical one.
2068  */
2069 static int
2070 mc_opl_get_physical_board(int sb)
2071 {
2072 	if (&opl_get_physical_board) {
2073 		return (opl_get_physical_board(sb));
2074 	}
2075 
2076 	cmn_err(CE_NOTE, "!opl_get_physical_board() not loaded\n");
2077 	return (-1);
2078 }
2079 
2080 /* ARGSUSED */
2081 int
2082 mc_get_mem_unum(int synd_code, uint64_t flt_addr, char *buf, int buflen,
2083 	int *lenp)
2084 {
2085 	mc_opl_t *mcp;
2086 	int bank;
2087 	int sb;
2088 
2089 	mutex_enter(&mcmutex);
2090 
2091 	if (((mcp = mc_pa_to_mcp(flt_addr)) == NULL) ||
2092 		(!pa_is_valid(mcp, flt_addr))) {
2093 		mutex_exit(&mcmutex);
2094 		if (snprintf(buf, buflen, "UNKNOWN") >= buflen) {
2095 			return (ENOSPC);
2096 		} else {
2097 			if (lenp)
2098 				*lenp = strlen(buf);
2099 		}
2100 		return (0);
2101 	}
2102 
2103 	bank = pa_to_bank(mcp, flt_addr - mcp->mc_start_address);
2104 	sb = mc_opl_get_physical_board(mcp->mc_board_num);
2105 
2106 	if (sb == -1) {
2107 		mutex_exit(&mcmutex);
2108 		return (ENXIO);
2109 	}
2110 
2111 	if (snprintf(buf, buflen, "/CMU%d/B%d", sb, bank) >= buflen) {
2112 		mutex_exit(&mcmutex);
2113 		return (ENOSPC);
2114 	} else {
2115 		if (lenp)
2116 			*lenp = strlen(buf);
2117 	}
2118 	mutex_exit(&mcmutex);
2119 	return (0);
2120 }
2121 
2122 int
2123 opl_mc_suspend()
2124 {
2125 	mc_opl_t *mcp;
2126 	mc_inst_list_t *p;
2127 
2128 	mutex_enter(&mcmutex);
2129 
2130 	for (p = mc_instances; p; p = p->next) {
2131 		mcp = p->mc_opl;
2132 		(void) mc_suspend(mcp, MC_SOFT_SUSPENDED);
2133 	}
2134 
2135 	mutex_exit(&mcmutex);
2136 	return (0);
2137 }
2138 
2139 int
2140 opl_mc_resume()
2141 {
2142 	mc_opl_t *mcp;
2143 	mc_inst_list_t *p;
2144 
2145 	mutex_enter(&mcmutex);
2146 
2147 	for (p = mc_instances; p; p = p->next) {
2148 		mcp = p->mc_opl;
2149 		(void) mc_resume(mcp, MC_SOFT_SUSPENDED);
2150 	}
2151 
2152 	mutex_exit(&mcmutex);
2153 	return (0);
2154 }
2155 
2156 static void
2157 insert_mcp(mc_opl_t *mcp)
2158 {
2159 	mc_inst_list_t	*p;
2160 
2161 	p = kmem_zalloc(sizeof (mc_inst_list_t), KM_SLEEP);
2162 	p->mc_opl = mcp;
2163 	p->mc_board_num = mcp->mc_board_num;
2164 	p->mc_start_address = mcp->mc_start_address;
2165 	p->mc_size = mcp->mc_size;
2166 	mcp->mc_list = p;
2167 
2168 	mutex_enter(&mcmutex);
2169 
2170 	p->next = mc_instances;
2171 	mc_instances = p;
2172 
2173 	mutex_exit(&mcmutex);
2174 }
2175 
2176 static void
2177 delete_mcp(mc_opl_t *mcp)
2178 {
2179 	mc_inst_list_t *prev, *current;
2180 	mc_inst_list_t *p;
2181 
2182 	p = mcp->mc_list;
2183 
2184 	if (mc_instances == p) {
2185 		mc_instances = p->next;
2186 		kmem_free(p, sizeof (mc_inst_list_t));
2187 		return;
2188 	}
2189 	prev = mc_instances;
2190 	for (current = mc_instances; current != NULL; current = current->next) {
2191 		if (current == p) {
2192 			prev->next = p->next;
2193 			kmem_free(p, sizeof (mc_inst_list_t));
2194 			return;
2195 		}
2196 		prev = current;
2197 	}
2198 }
2199 
2200 /* Error injection interface */
2201 
2202 /* ARGSUSED */
2203 int
2204 mc_inject_error(int error_type, uint64_t pa, uint32_t flags)
2205 {
2206 	mc_opl_t *mcp;
2207 	int bank;
2208 	uint32_t dimm_addr;
2209 	uint32_t cntl;
2210 	mc_addr_info_t maddr;
2211 	uint32_t data, stat;
2212 	int both_sides = 0;
2213 	uint64_t pa0;
2214 	on_trap_data_t otd;
2215 	extern void cpu_flush_ecache(void);
2216 
2217 	MC_LOG("HW mc_inject_error(%x, %lx, %x)\n", error_type, pa, flags);
2218 
2219 	mutex_enter(&mcmutex);
2220 
2221 	if ((mcp = mc_pa_to_mcp(pa)) == NULL) {
2222 		mutex_exit(&mcmutex);
2223 		MC_LOG("mc_inject_error: invalid pa\n");
2224 		return (ENOTSUP);
2225 	}
2226 
2227 	mutex_enter(&mcp->mc_lock);
2228 	mutex_exit(&mcmutex);
2229 
2230 	if (mcp->mc_status & (MC_SOFT_SUSPENDED | MC_DRIVER_SUSPENDED)) {
2231 		mutex_exit(&mcp->mc_lock);
2232 		MC_LOG("mc-opl has been suspended.  No error injection.\n");
2233 		return (EBUSY);
2234 	}
2235 
2236 	/* convert pa to offset within the board */
2237 	MC_LOG("pa %lx, offset %lx\n", pa, pa - mcp->mc_start_address);
2238 
2239 	if (!pa_is_valid(mcp, pa)) {
2240 		mutex_exit(&mcp->mc_lock);
2241 		return (EINVAL);
2242 	}
2243 
2244 	pa0 = pa - mcp->mc_start_address;
2245 
2246 	bank = pa_to_bank(mcp, pa0);
2247 
2248 	if (flags & MC_INJECT_FLAG_OTHER)
2249 		bank = bank ^ 1;
2250 
2251 	if (MC_INJECT_MIRROR(error_type) && !IS_MIRROR(mcp, bank)) {
2252 		mutex_exit(&mcp->mc_lock);
2253 		MC_LOG("Not mirror mode\n");
2254 		return (EINVAL);
2255 	}
2256 
2257 	dimm_addr = pa_to_dimm(mcp, pa0);
2258 
2259 	MC_LOG("injecting error to /LSB%d/B%d/D%x\n",
2260 		mcp->mc_board_num, bank, dimm_addr);
2261 
2262 
2263 	switch (error_type) {
2264 	case MC_INJECT_INTERMITTENT_MCE:
2265 	case MC_INJECT_PERMANENT_MCE:
2266 	case MC_INJECT_MUE:
2267 		both_sides = 1;
2268 	}
2269 
2270 	if (flags & MC_INJECT_FLAG_RESET)
2271 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), 0);
2272 
2273 	ST_MAC_REG(MAC_EG_ADD(mcp, bank), dimm_addr & MAC_EG_ADD_MASK);
2274 
2275 	if (both_sides) {
2276 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), 0);
2277 		ST_MAC_REG(MAC_EG_ADD(mcp, bank^1),
2278 			dimm_addr & MAC_EG_ADD_MASK);
2279 	}
2280 
2281 	switch (error_type) {
2282 	case MC_INJECT_UE:
2283 	case MC_INJECT_SUE:
2284 	case MC_INJECT_MUE:
2285 		if (flags & MC_INJECT_FLAG_PATH) {
2286 			cntl = MAC_EG_ADD_FIX
2287 				|MAC_EG_FORCE_READ00|MAC_EG_FORCE_READ16
2288 				|MAC_EG_DERR_ONCE;
2289 		} else {
2290 			cntl = MAC_EG_ADD_FIX|MAC_EG_FORCE_DERR00
2291 				|MAC_EG_FORCE_DERR16|MAC_EG_DERR_ONCE;
2292 		}
2293 		flags |= MC_INJECT_FLAG_ST;
2294 		break;
2295 	case MC_INJECT_INTERMITTENT_CE:
2296 	case MC_INJECT_INTERMITTENT_MCE:
2297 		if (flags & MC_INJECT_FLAG_PATH) {
2298 			cntl = MAC_EG_ADD_FIX
2299 				|MAC_EG_FORCE_READ00
2300 				|MAC_EG_DERR_ONCE;
2301 		} else {
2302 			cntl = MAC_EG_ADD_FIX
2303 				|MAC_EG_FORCE_DERR16
2304 				|MAC_EG_DERR_ONCE;
2305 		}
2306 		flags |= MC_INJECT_FLAG_ST;
2307 		break;
2308 	case MC_INJECT_PERMANENT_CE:
2309 	case MC_INJECT_PERMANENT_MCE:
2310 		if (flags & MC_INJECT_FLAG_PATH) {
2311 			cntl = MAC_EG_ADD_FIX
2312 				|MAC_EG_FORCE_READ00
2313 				|MAC_EG_DERR_ALWAYS;
2314 		} else {
2315 			cntl = MAC_EG_ADD_FIX
2316 				|MAC_EG_FORCE_DERR16
2317 				|MAC_EG_DERR_ALWAYS;
2318 		}
2319 		flags |= MC_INJECT_FLAG_ST;
2320 		break;
2321 	case MC_INJECT_CMPE:
2322 		data = 0xabcdefab;
2323 		stphys(pa, data);
2324 		cpu_flush_ecache();
2325 		MC_LOG("CMPE: writing data %x to %lx\n", data, pa);
2326 		ST_MAC_REG(MAC_MIRR(mcp, bank), MAC_MIRR_BANK_EXCLUSIVE);
2327 		stphys(pa, data ^ 0xffffffff);
2328 		cpu_flush_ecache();
2329 		ST_MAC_REG(MAC_MIRR(mcp, bank), 0);
2330 		MC_LOG("CMPE: write new data %xto %lx\n", data, pa);
2331 		cntl = 0;
2332 		break;
2333 	case MC_INJECT_NOP:
2334 		cntl = 0;
2335 		break;
2336 	default:
2337 		MC_LOG("mc_inject_error: invalid option\n");
2338 		cntl = 0;
2339 	}
2340 
2341 	if (cntl) {
2342 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl & MAC_EG_SETUP_MASK);
2343 		ST_MAC_REG(MAC_EG_CNTL(mcp, bank), cntl);
2344 
2345 		if (both_sides) {
2346 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl &
2347 				MAC_EG_SETUP_MASK);
2348 			ST_MAC_REG(MAC_EG_CNTL(mcp, bank^1), cntl);
2349 		}
2350 	}
2351 
2352 	/*
2353 	 * For all injection cases except compare error, we
2354 	 * must write to the PA to trigger the error.
2355 	 */
2356 
2357 	if (flags & MC_INJECT_FLAG_ST) {
2358 		data = 0xf0e0d0c0;
2359 		MC_LOG("Writing %x to %lx\n", data, pa);
2360 		stphys(pa, data);
2361 		cpu_flush_ecache();
2362 	}
2363 
2364 	delay(inject_op_delay * drv_usectohz(1000 * 1000));
2365 
2366 
2367 	if (flags & MC_INJECT_FLAG_LD) {
2368 		if (flags & MC_INJECT_FLAG_NO_TRAP) {
2369 			if (on_trap(&otd, OT_DATA_EC)) {
2370 				no_trap();
2371 				MC_LOG("Trap occurred\n");
2372 			} else {
2373 				MC_LOG("On-trap Reading from %lx\n", pa);
2374 				data = ldphys(pa);
2375 				no_trap();
2376 				MC_LOG("data = %x\n", data);
2377 			}
2378 		} else {
2379 			MC_LOG("Reading from %lx\n", pa);
2380 			data = ldphys(pa);
2381 			MC_LOG("data = %x\n", data);
2382 		}
2383 	}
2384 
2385 	if (flags & MC_INJECT_FLAG_RESTART) {
2386 		delay(inject_op_delay * drv_usectohz(1000 * 1000));
2387 
2388 		MC_LOG("Restart patrol\n");
2389 		if (mc_stop(mcp, bank)) {
2390 			cmn_err(CE_WARN, "Cannot stop Memory Patrol at "
2391 				"/LSB%d/B%d\n", mcp->mc_board_num, bank);
2392 			mutex_exit(&mcp->mc_lock);
2393 			return (EIO);
2394 		}
2395 		maddr.mi_maddr.ma_bd = mcp->mc_board_num;
2396 		maddr.mi_maddr.ma_bank = bank;
2397 		maddr.mi_maddr.ma_dimm_addr = dimm_addr;
2398 		maddr.mi_valid = 1;
2399 		maddr.mi_advance = 0;
2400 		restart_patrol(mcp, bank, &maddr);
2401 	}
2402 
2403 	if (flags & MC_INJECT_FLAG_POLL) {
2404 		delay(inject_op_delay * drv_usectohz(1000 * 1000));
2405 
2406 		MC_LOG("Poll patrol error\n");
2407 		stat = LD_MAC_REG(MAC_PTRL_STAT(mcp, bank));
2408 		cntl = LD_MAC_REG(MAC_PTRL_CNTL(mcp, bank));
2409 		if (stat & (MAC_STAT_PTRL_ERRS|MAC_STAT_MI_ERRS)) {
2410 			maddr.mi_valid = 0;
2411 			maddr.mi_advance = 1;
2412 			if (IS_MIRROR(mcp, bank))
2413 				mc_error_handler_mir(mcp, bank,
2414 					&maddr);
2415 			else
2416 				mc_error_handler(mcp, bank, &maddr);
2417 
2418 			restart_patrol(mcp, bank, &maddr);
2419 		} else
2420 			restart_patrol(mcp, bank, NULL);
2421 	}
2422 
2423 	mutex_exit(&mcp->mc_lock);
2424 	return (0);
2425 }
2426 
2427 void
2428 mc_stphysio(uint64_t pa, uint32_t data)
2429 {
2430 	MC_LOG("0x%x -> pa(%lx)\n", data, pa);
2431 	stphysio(pa, data);
2432 }
2433 
2434 uint32_t
2435 mc_ldphysio(uint64_t pa)
2436 {
2437 	uint32_t rv;
2438 
2439 	rv = ldphysio(pa);
2440 	MC_LOG("pa(%lx) = 0x%x\n", pa, rv);
2441 	return (rv);
2442 }
2443