xref: /illumos-gate/usr/src/uts/sun4u/io/mc-us3i.c (revision 446fbcf9529309df2961e3c88cbe7512114ab117)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/conf.h>
30 #include <sys/ddi.h>
31 #include <sys/stat.h>
32 #include <sys/sunddi.h>
33 #include <sys/ddi_impldefs.h>
34 #include <sys/obpdefs.h>
35 #include <sys/cmn_err.h>
36 #include <sys/errno.h>
37 #include <sys/kmem.h>
38 #include <sys/open.h>
39 #include <sys/thread.h>
40 #include <sys/cpuvar.h>
41 #include <sys/x_call.h>
42 #include <sys/debug.h>
43 #include <sys/sysmacros.h>
44 #include <sys/ivintr.h>
45 #include <sys/intr.h>
46 #include <sys/intreg.h>
47 #include <sys/autoconf.h>
48 #include <sys/modctl.h>
49 #include <sys/spl.h>
50 #include <sys/async.h>
51 #include <sys/mc.h>
52 #include <sys/mc-us3i.h>
53 #include <sys/note.h>
54 #include <sys/cpu_module.h>
55 
56 /*
57  * pm-hardware-state value
58  */
59 #define	NO_SUSPEND_RESUME	"no-suspend-resume"
60 
61 /*
62  * Function prototypes
63  */
64 
65 static int mc_open(dev_t *, int, int, cred_t *);
66 static int mc_close(dev_t, int, int, cred_t *);
67 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
68 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
69 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
70 
71 /*
72  * Configuration data structures
73  */
74 static struct cb_ops mc_cb_ops = {
75 	mc_open,			/* open */
76 	mc_close,			/* close */
77 	nulldev,			/* strategy */
78 	nulldev,			/* print */
79 	nodev,				/* dump */
80 	nulldev,			/* read */
81 	nulldev,			/* write */
82 	mc_ioctl,			/* ioctl */
83 	nodev,				/* devmap */
84 	nodev,				/* mmap */
85 	nodev,				/* segmap */
86 	nochpoll,			/* poll */
87 	ddi_prop_op,			/* cb_prop_op */
88 	0,				/* streamtab */
89 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
90 	CB_REV,				/* rev */
91 	nodev,				/* cb_aread */
92 	nodev				/* cb_awrite */
93 };
94 
95 static struct dev_ops mc_ops = {
96 	DEVO_REV,			/* rev */
97 	0,				/* refcnt  */
98 	ddi_no_info,			/* getinfo */
99 	nulldev,			/* identify */
100 	nulldev,			/* probe */
101 	mc_attach,			/* attach */
102 	mc_detach,			/* detach */
103 	nulldev,			/* reset */
104 	&mc_cb_ops,			/* cb_ops */
105 	(struct bus_ops *)0,		/* bus_ops */
106 	nulldev				/* power */
107 };
108 
109 /*
110  * Driver globals
111  */
112 static void *mcp;
113 static int nmcs = 0;
114 static int seg_id;
115 static int nsegments;
116 static uint64_t	memsize;
117 
118 static uint_t	mc_debug = 0;
119 
120 static int getreg;
121 static int nregs;
122 struct memory_reg_info *reg_info;
123 
124 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
125 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
126 static mc_dlist_t *device_head, *device_tail;
127 
128 static kmutex_t	mcmutex;
129 static kmutex_t	mcdatamutex;
130 
131 extern struct mod_ops mod_driverops;
132 
133 static struct modldrv modldrv = {
134 	&mod_driverops,			/* module type, this one is a driver */
135 	"Memory-controller: %I%",	/* module name */
136 	&mc_ops,			/* driver ops */
137 };
138 
139 static struct modlinkage modlinkage = {
140 	MODREV_1,		/* rev */
141 	(void *)&modldrv,
142 	NULL
143 };
144 
145 static int mc_get_memory_reg_info(struct mc_soft_state *softsp);
146 static void mc_construct(struct mc_soft_state *softsp);
147 static void mc_delete(int mc_id);
148 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
149 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
150 static void *mc_node_get(int id, mc_dlist_t *head);
151 static void mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm);
152 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
153     int buflen, int *lenp);
154 static int mc_get_mem_info(int synd_code, uint64_t paddr,
155     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
156     int *segsp, int *banksp, int *mcidp);
157 
158 #pragma weak p2get_mem_unum
159 #pragma weak p2get_mem_info
160 #pragma weak plat_add_mem_unum_label
161 
162 /* For testing only */
163 struct test_unum {
164 	int		synd_code;
165 	uint64_t	paddr;
166 	char 		unum[UNUM_NAMLEN];
167 	int		len;
168 };
169 
170 /*
171  * These are the module initialization routines.
172  */
173 
174 int
175 _init(void)
176 {
177 	int error;
178 
179 	if ((error = ddi_soft_state_init(&mcp,
180 	    sizeof (struct mc_soft_state), 1)) != 0)
181 		return (error);
182 
183 	error =  mod_install(&modlinkage);
184 	if (error == 0) {
185 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
186 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
187 	}
188 
189 	return (error);
190 }
191 
192 int
193 _fini(void)
194 {
195 	int error;
196 
197 	if ((error = mod_remove(&modlinkage)) != 0)
198 		return (error);
199 
200 	ddi_soft_state_fini(&mcp);
201 	mutex_destroy(&mcmutex);
202 	mutex_destroy(&mcdatamutex);
203 	return (0);
204 }
205 
206 int
207 _info(struct modinfo *modinfop)
208 {
209 	return (mod_info(&modlinkage, modinfop));
210 }
211 
212 static int
213 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
214 {
215 	struct mc_soft_state *softsp;
216 	struct dimm_info *dimminfop;
217 	int instance, len, err;
218 	int mcreg1_len;
219 
220 	switch (cmd) {
221 	case DDI_ATTACH:
222 		break;
223 
224 	case DDI_RESUME:
225 		return (DDI_SUCCESS);
226 
227 	default:
228 		return (DDI_FAILURE);
229 	}
230 
231 	instance = ddi_get_instance(devi);
232 
233 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
234 		return (DDI_FAILURE);
235 
236 	softsp = ddi_get_soft_state(mcp, instance);
237 
238 	/* Set the dip in the soft state */
239 	softsp->dip = devi;
240 
241 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
242 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
243 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property\n",
244 		    instance, "portid"));
245 		goto bad;
246 	}
247 
248 	DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: mc %d portid %d, cpuid %d\n",
249 	    instance, softsp->portid, CPU->cpu_id));
250 
251 	/* Get the content of Memory Control Register I from obp */
252 	mcreg1_len = sizeof (uint64_t);
253 	if ((ddi_getlongprop_buf(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
254 	    "memory-control-register-1", (caddr_t)&(softsp->mcreg1),
255 	    &mcreg1_len) == DDI_PROP_SUCCESS) &&
256 	    (mcreg1_len == sizeof (uint64_t))) {
257 		softsp->mcr_read_ok = 1;
258 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d from obp: Reg1: 0x%lx\n",
259 		instance, softsp->mcreg1));
260 	}
261 
262 	/* attach fails if mcreg1 cannot be accessed */
263 	if (!softsp->mcr_read_ok) {
264 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get mcreg1\n",
265 		    instance));
266 		goto bad;
267 	}
268 
269 	/* nothing to suspend/resume here */
270 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
271 	    "pm-hardware-state", NO_SUSPEND_RESUME,
272 	    sizeof (NO_SUSPEND_RESUME));
273 
274 	/*
275 	 * Get the label of dimms and pin routing information from the
276 	 * memory-layout property of the memory controller.
277 	 */
278 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
279 	    "memory-layout", (caddr_t)&dimminfop, &len);
280 	if (err == DDI_PROP_SUCCESS && dimminfop->table_width == 1) {
281 		/* Set the pointer and size of property in the soft state */
282 		softsp->memlayoutp = dimminfop;
283 		softsp->memlayoutlen = len;
284 	} else {
285 		/*
286 		 * memory-layout property was not found or some other
287 		 * error occured, plat_get_mem_unum() will not work
288 		 * for this mc.
289 		 */
290 		softsp->memlayoutp = NULL;
291 		softsp->memlayoutlen = 0;
292 		DPRINTF(MC_ATTACH_DEBUG,
293 		    ("mc %d: missing or unsupported memory-layout property\n",
294 		    instance));
295 	}
296 
297 	mutex_enter(&mcmutex);
298 
299 	/* Get the physical segments from memory/reg, just once for all MC */
300 	if (!getreg) {
301 		if (mc_get_memory_reg_info(softsp) != 0) {
302 			goto bad1;
303 		}
304 		getreg = 1;
305 	}
306 
307 	/* Construct the physical and logical layout of the MC */
308 	mc_construct(softsp);
309 
310 	if (nmcs == 1) {
311 		if (&p2get_mem_unum)
312 			p2get_mem_unum = mc_get_mem_unum;
313 		if (&p2get_mem_info)
314 			p2get_mem_info = mc_get_mem_info;
315 	}
316 
317 	if (ddi_create_minor_node(devi, "mc-us3i", S_IFCHR, instance,
318 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
319 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
320 		    " failed \n"));
321 		goto bad1;
322 	}
323 	mutex_exit(&mcmutex);
324 
325 	ddi_report_dev(devi);
326 	return (DDI_SUCCESS);
327 
328 bad1:
329 	/* release all allocated data struture for this MC */
330 	mc_delete(softsp->portid);
331 	mutex_exit(&mcmutex);
332 	if (softsp->memlayoutp != NULL)
333 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
334 
335 bad:
336 	cmn_err(CE_WARN, "mc-us3i: attach failed for instance %d\n", instance);
337 	ddi_soft_state_free(mcp, instance);
338 	return (DDI_FAILURE);
339 }
340 
341 /* ARGSUSED */
342 static int
343 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
344 {
345 	int instance;
346 	struct mc_soft_state *softsp;
347 
348 	/* get the instance of this devi */
349 	instance = ddi_get_instance(devi);
350 
351 	/* get the soft state pointer for this device node */
352 	softsp = ddi_get_soft_state(mcp, instance);
353 
354 	switch (cmd) {
355 	case DDI_SUSPEND:
356 		return (DDI_SUCCESS);
357 
358 	case DDI_DETACH:
359 		break;
360 
361 	default:
362 		return (DDI_FAILURE);
363 	}
364 
365 	DPRINTF(MC_DETACH_DEBUG, ("mc %d DETACH: portid %d\n", instance,
366 	    softsp->portid));
367 
368 	mutex_enter(&mcmutex);
369 
370 	/* release all allocated data struture for this MC */
371 	mc_delete(softsp->portid);
372 
373 	if (softsp->memlayoutp != NULL)
374 		kmem_free(softsp->memlayoutp, softsp->memlayoutlen);
375 
376 	if (nmcs == 0) {
377 		if (&p2get_mem_unum)
378 			p2get_mem_unum = NULL;
379 		if (&p2get_mem_info)
380 			p2get_mem_info = NULL;
381 	}
382 
383 	mutex_exit(&mcmutex);
384 
385 	ddi_remove_minor_node(devi, NULL);
386 	/* free up the soft state */
387 	ddi_soft_state_free(mcp, instance);
388 
389 	return (DDI_SUCCESS);
390 }
391 
392 /* ARGSUSED */
393 static int
394 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
395 {
396 	int status = 0;
397 
398 	/* verify that otyp is appropriate */
399 	if (otyp != OTYP_CHR) {
400 		return (EINVAL);
401 	}
402 
403 	mutex_enter(&mcmutex);
404 	/* At least one attached? */
405 	if (nmcs == 0) {
406 		status = ENXIO;
407 	}
408 	mutex_exit(&mcmutex);
409 
410 	return (status);
411 }
412 
413 /* ARGSUSED */
414 static int
415 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
416 {
417 	return (0);
418 }
419 
420 /*
421  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
422  * MCIOC_CTRLCONF, MCIOC_CONTROL.
423  *
424  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
425  * associated with various length struct. If given number is less than the
426  * number in kernel, update the number and return EINVAL so that user could
427  * allocate enough space for it.
428  *
429  */
430 
431 /* ARGSUSED */
432 static int
433 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
434 	int *rval_p)
435 {
436 	size_t	size;
437 	struct mc_memconf mcmconf;
438 	struct mc_memory *mcmem, mcmem_in;
439 	struct mc_segment *mcseg, mcseg_in;
440 	struct mc_bank mcbank;
441 	struct mc_devgrp mcdevgrp;
442 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
443 	struct mc_control *mccontrol, mccontrol_in;
444 	struct seg_info *seg = NULL;
445 	struct bank_info *bank = NULL;
446 	struct dgrp_info *dgrp = NULL;
447 	struct mctrl_info *mcport;
448 	mc_dlist_t *mctrl;
449 	int i, status = 0;
450 	cpu_t *cpu;
451 
452 	switch (cmd) {
453 	case MCIOC_MEMCONF:
454 		mutex_enter(&mcdatamutex);
455 
456 		mcmconf.nmcs = nmcs;
457 		mcmconf.nsegments = nsegments;
458 		mcmconf.nbanks = NLOGBANKS_PER_SEG;
459 		mcmconf.ndevgrps = NDGRPS_PER_MC;
460 		mcmconf.ndevs = NDIMMS_PER_DGRP;
461 		mcmconf.len_dev = MAX_DEVLEN;
462 		mcmconf.xfer_size = TRANSFER_SIZE;
463 
464 		mutex_exit(&mcdatamutex);
465 
466 		if (copyout(&mcmconf, (void *)arg, sizeof (mcmconf)))
467 			return (EFAULT);
468 		return (0);
469 
470 	/*
471 	 * input: nsegments and allocate space for various length of segmentids
472 	 *
473 	 * return    0: size, number of segments, and all segment ids,
474 	 *		where glocal and local ids are identical.
475 	 *	EINVAL: if the given nsegments is less than that in kernel and
476 	 *		nsegments of struct will be updated.
477 	 *	EFAULT: if other errors in kernel.
478 	 */
479 	case MCIOC_MEM:
480 		if (copyin((void *)arg, &mcmem_in, sizeof (mcmem_in)) != 0)
481 			return (EFAULT);
482 
483 		mutex_enter(&mcdatamutex);
484 		if (mcmem_in.nsegments < nsegments) {
485 			mcmem_in.nsegments = nsegments;
486 			mutex_exit(&mcdatamutex);
487 			if (copyout(&mcmem_in, (void *)arg, sizeof (mcmem_in)))
488 				status = EFAULT;
489 			else
490 				status = EINVAL;
491 
492 			return (status);
493 		}
494 
495 		size = sizeof (*mcmem) + (nsegments - 1) *
496 		    sizeof (mcmem->segmentids[0]);
497 		mcmem = kmem_zalloc(size, KM_SLEEP);
498 
499 		mcmem->size = memsize;
500 		mcmem->nsegments = nsegments;
501 		seg = (struct seg_info *)seg_head;
502 		for (i = 0; i < nsegments; i++) {
503 			ASSERT(seg != NULL);
504 			mcmem->segmentids[i].globalid = seg->seg_node.id;
505 			mcmem->segmentids[i].localid = seg->seg_node.id;
506 			seg = (struct seg_info *)seg->seg_node.next;
507 		}
508 		mutex_exit(&mcdatamutex);
509 
510 		if (copyout(mcmem, (void *)arg, size))
511 			status = EFAULT;
512 
513 		kmem_free(mcmem, size);
514 		return (status);
515 
516 	/*
517 	 * input: id, nbanks and allocate space for various length of bankids
518 	 *
519 	 * return    0: base, size, number of banks, and all bank ids,
520 	 *		where global id is unique of all banks and local id
521 	 *		is only unique for mc.
522 	 *	EINVAL: either id isn't found or if given nbanks is less than
523 	 *		that in kernel and nbanks of struct will be updated.
524 	 *	EFAULT: if other errors in kernel.
525 	 */
526 	case MCIOC_SEG:
527 
528 		if (copyin((void *)arg, &mcseg_in, sizeof (mcseg_in)) != 0)
529 			return (EFAULT);
530 
531 		mutex_enter(&mcdatamutex);
532 		if ((seg = mc_node_get(mcseg_in.id, seg_head)) == NULL) {
533 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
534 			    "id %d\n", mcseg_in.id));
535 			mutex_exit(&mcdatamutex);
536 			return (EFAULT);
537 		}
538 
539 		if (mcseg_in.nbanks < seg->nbanks) {
540 			mcseg_in.nbanks = seg->nbanks;
541 			mutex_exit(&mcdatamutex);
542 			if (copyout(&mcseg_in, (void *)arg, sizeof (mcseg_in)))
543 				status = EFAULT;
544 			else
545 				status = EINVAL;
546 
547 			return (status);
548 		}
549 
550 		size = sizeof (*mcseg) + (seg->nbanks - 1) *
551 		    sizeof (mcseg->bankids[0]);
552 		mcseg = kmem_zalloc(size, KM_SLEEP);
553 
554 		mcseg->id = seg->seg_node.id;
555 		mcseg->ifactor = seg->ifactor;
556 		mcseg->base = seg->base;
557 		mcseg->size = seg->size;
558 		mcseg->nbanks = seg->nbanks;
559 
560 		bank = seg->head;
561 
562 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg %p bank %p\n",
563 		    seg->nbanks, (void *) seg, (void *) bank));
564 
565 		i = 0;
566 		while (bank != NULL) {
567 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
568 			    i, bank->bank_node.id));
569 			mcseg->bankids[i].globalid = bank->bank_node.id;
570 			mcseg->bankids[i++].localid = bank->local_id;
571 			bank = bank->next;
572 		}
573 		ASSERT(i == seg->nbanks);
574 		mutex_exit(&mcdatamutex);
575 
576 		if (copyout(mcseg, (void *)arg, size))
577 			status = EFAULT;
578 
579 		kmem_free(mcseg, size);
580 		return (status);
581 
582 	/*
583 	 * input: id
584 	 *
585 	 * return    0: mask, match, size, and devgrpid,
586 	 *		where global id is unique of all devgrps and local id
587 	 *		is only unique for mc.
588 	 *	EINVAL: if id isn't found
589 	 *	EFAULT: if other errors in kernel.
590 	 */
591 	case MCIOC_BANK:
592 		if (copyin((void *)arg, &mcbank, sizeof (mcbank)) != 0)
593 			return (EFAULT);
594 
595 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
596 
597 		mutex_enter(&mcdatamutex);
598 
599 		if ((bank = mc_node_get(mcbank.id, bank_head)) == NULL) {
600 			mutex_exit(&mcdatamutex);
601 			return (EINVAL);
602 		}
603 
604 		mcbank.mask = bank->mask;
605 		mcbank.match = bank->match;
606 		mcbank.size = bank->size;
607 		mcbank.devgrpid.globalid = bank->devgrp_id;
608 		mcbank.devgrpid.localid =
609 		    bank->bank_node.id % NLOGBANKS_PER_SEG;
610 
611 		mutex_exit(&mcdatamutex);
612 
613 		if (copyout(&mcbank, (void *)arg, sizeof (mcbank)))
614 			return (EFAULT);
615 		return (0);
616 
617 	/*
618 	 * input:id and allocate space for various length of deviceids
619 	 *
620 	 * return    0: size and number of devices.
621 	 *	EINVAL: id isn't found
622 	 *	EFAULT: if other errors in kernel.
623 	 */
624 	case MCIOC_DEVGRP:
625 
626 		if (copyin((void *)arg, &mcdevgrp, sizeof (mcdevgrp)) != 0)
627 			return (EFAULT);
628 
629 		mutex_enter(&mcdatamutex);
630 		if ((dgrp = mc_node_get(mcdevgrp.id, dgrp_head)) == NULL) {
631 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
632 			    "%d\n", mcdevgrp.id));
633 			mutex_exit(&mcdatamutex);
634 			return (EINVAL);
635 		}
636 
637 		mcdevgrp.ndevices = dgrp->ndevices;
638 		mcdevgrp.size = dgrp->size;
639 
640 		mutex_exit(&mcdatamutex);
641 
642 		if (copyout(&mcdevgrp, (void *)arg, sizeof (mcdevgrp)))
643 			status = EFAULT;
644 
645 		return (status);
646 
647 	/*
648 	 * input: nmcs and allocate space for various length of mcids
649 	 *
650 	 * return    0: number of mc, and all mcids,
651 	 *		where glocal and local ids are identical.
652 	 *	EINVAL: if the given nmcs is less than that in kernel and
653 	 *		nmcs of struct will be updated.
654 	 *	EFAULT: if other errors in kernel.
655 	 */
656 	case MCIOC_CTRLCONF:
657 		if (copyin((void *)arg, &mcctrlconf_in,
658 		    sizeof (mcctrlconf_in)) != 0)
659 			return (EFAULT);
660 
661 		mutex_enter(&mcdatamutex);
662 		if (mcctrlconf_in.nmcs < nmcs) {
663 			mcctrlconf_in.nmcs = nmcs;
664 			mutex_exit(&mcdatamutex);
665 			if (copyout(&mcctrlconf_in, (void *)arg,
666 			    sizeof (mcctrlconf_in)))
667 				status = EFAULT;
668 			else
669 				status = EINVAL;
670 
671 			return (status);
672 		}
673 
674 		/*
675 		 * Cannot just use the size of the struct because of the various
676 		 * length struct
677 		 */
678 		size = sizeof (*mcctrlconf) + ((nmcs - 1) *
679 		    sizeof (mcctrlconf->mcids[0]));
680 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
681 
682 		mcctrlconf->nmcs = nmcs;
683 
684 		/* Get all MC ids and add to mcctrlconf */
685 		mctrl = mctrl_head;
686 		i = 0;
687 		while (mctrl != NULL) {
688 			mcctrlconf->mcids[i].globalid = mctrl->id;
689 			mcctrlconf->mcids[i].localid = mctrl->id;
690 			i++;
691 			mctrl = mctrl->next;
692 		}
693 		ASSERT(i == nmcs);
694 
695 		mutex_exit(&mcdatamutex);
696 
697 		if (copyout(mcctrlconf, (void *)arg, size))
698 			status = EFAULT;
699 
700 		kmem_free(mcctrlconf, size);
701 		return (status);
702 
703 	/*
704 	 * input:id, ndevgrps and allocate space for various length of devgrpids
705 	 *
706 	 * return    0: number of devgrp, and all devgrpids,
707 	 *		is unique of all devgrps and local id is only unique
708 	 *		for mc.
709 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
710 	 *		less than that in kernel and ndevgrps of struct will
711 	 *		be updated.
712 	 *	EFAULT: if other errors in kernel.
713 	 */
714 	case MCIOC_CONTROL:
715 		if (copyin((void *)arg, &mccontrol_in,
716 		    sizeof (mccontrol_in)) != 0)
717 			return (EFAULT);
718 
719 		mutex_enter(&mcdatamutex);
720 		if ((mcport = mc_node_get(mccontrol_in.id,
721 		    mctrl_head)) == NULL) {
722 			mutex_exit(&mcdatamutex);
723 			return (EINVAL);
724 		}
725 
726 		/*
727 		 * mcport->ndevgrps zero means Memory Controller is disable.
728 		 */
729 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
730 		    (mcport->ndevgrps == 0)) {
731 			mccontrol_in.ndevgrps = mcport->ndevgrps;
732 			mutex_exit(&mcdatamutex);
733 			if (copyout(&mccontrol_in, (void *)arg,
734 			    sizeof (mccontrol_in)))
735 				status = EFAULT;
736 			else if (mcport->ndevgrps != 0)
737 				status = EINVAL;
738 
739 			return (status);
740 		}
741 
742 		size = sizeof (*mccontrol) + (mcport->ndevgrps - 1) *
743 		    sizeof (mccontrol->devgrpids[0]);
744 		mccontrol = kmem_zalloc(size, KM_SLEEP);
745 
746 		mccontrol->id = mcport->mctrl_node.id;
747 		mccontrol->ndevgrps = mcport->ndevgrps;
748 		for (i = 0; i < mcport->ndevgrps; i++) {
749 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
750 			mccontrol->devgrpids[i].localid =
751 			    mcport->devgrpids[i] % NDGRPS_PER_MC;
752 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %d\n",
753 			    i));
754 		}
755 		mutex_exit(&mcdatamutex);
756 
757 		if (copyout(mccontrol, (void *)arg, size))
758 			status = EFAULT;
759 
760 		kmem_free(mccontrol, size);
761 		return (status);
762 
763 	/*
764 	 * input:id
765 	 *
766 	 * return    0: CPU flushed successfully.
767 	 *	EINVAL: the id wasn't found
768 	 */
769 	case MCIOC_ECFLUSH:
770 		mutex_enter(&cpu_lock);
771 		cpu = cpu_get((processorid_t)arg);
772 		mutex_exit(&cpu_lock);
773 		if (cpu == NULL)
774 			return (EINVAL);
775 
776 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
777 
778 		return (0);
779 
780 	default:
781 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
782 		return (EFAULT);
783 	}
784 }
785 
786 /*
787  * Gets the reg property from the memory node. This provides the various
788  * memory segments, at bank-boundries, dimm-pair boundries, in the form
789  * of [base, size] pairs. Continuous segments, spanning boundries are
790  * merged into one.
791  * Returns 0 for success and -1 for failure.
792  */
793 static int
794 mc_get_memory_reg_info(struct mc_soft_state *softsp)
795 {
796 	dev_info_t *devi;
797 	int len;
798 	int i;
799 	struct memory_reg_info *mregi;
800 
801 	_NOTE(ARGUNUSED(softsp))
802 
803 	if ((devi = ddi_find_devinfo("memory", -1, 0)) == NULL) {
804 		DPRINTF(MC_REG_DEBUG,
805 		    ("mc-us3i: cannot find memory node under root\n"));
806 		return (-1);
807 	}
808 
809 	if (ddi_getlongprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
810 	    "reg", (caddr_t)&reg_info, &len) != DDI_PROP_SUCCESS) {
811 		DPRINTF(MC_REG_DEBUG,
812 		    ("mc-us3i: reg undefined under memory\n"));
813 		return (-1);
814 	}
815 
816 	nregs = len/sizeof (*mregi);
817 
818 	DPRINTF(MC_REG_DEBUG, ("mc_get_memory_reg_info: nregs %d"
819 	    "reg_info %p\n", nregs, (void *) reg_info));
820 
821 	mregi = reg_info;
822 
823 	/* debug printfs  */
824 	for (i = 0; i < nregs; i++) {
825 		DPRINTF(MC_REG_DEBUG, (" [0x%lx, 0x%lx] ",
826 		    mregi->base, mregi->size));
827 		mregi++;
828 	}
829 
830 	return (0);
831 }
832 
833 /*
834  * Initialize a logical bank
835  */
836 static struct bank_info *
837 mc_add_bank(int bankid, uint64_t mask, uint64_t match, uint64_t size,
838     int dgrpid)
839 {
840 	struct bank_info *banki;
841 
842 	if ((banki = mc_node_get(bankid, bank_head)) != NULL) {
843 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: bank %d exists\n",
844 		    bankid));
845 		return (banki);
846 	}
847 
848 	banki = kmem_zalloc(sizeof (*banki), KM_SLEEP);
849 
850 	banki->bank_node.id = bankid;
851 	banki->devgrp_id = dgrpid;
852 	banki->mask = mask;
853 	banki->match = match;
854 	banki->base = match;
855 	banki->size = size;
856 
857 	mc_node_add((mc_dlist_t *)banki, &bank_head, &bank_tail);
858 
859 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_bank: id %d mask 0x%lx match 0x%lx"
860 	    " base 0x%lx size 0x%lx\n", bankid, mask, match,
861 	    banki->base, banki->size));
862 
863 	return (banki);
864 }
865 
866 /*
867  * Use the bank's base address to find out whether to initialize a new segment,
868  * or weave the bank into an existing segment. If the tail bank of a previous
869  * segment is not continuous with the new bank, the new bank goes into a new
870  * segment.
871  */
872 static void
873 mc_add_segment(struct bank_info *banki)
874 {
875 	struct seg_info *segi;
876 	struct bank_info *tb;
877 
878 	/* does this bank start a new segment? */
879 	if ((segi = mc_node_get(seg_id, seg_head)) == NULL) {
880 		/* this should happen for the first segment only */
881 		goto new_seg;
882 	}
883 
884 	tb = segi->tail;
885 	/* discontiguous banks go into a new segment, increment the seg_id */
886 	if (banki->base > (tb->base + tb->size)) {
887 		seg_id++;
888 		goto new_seg;
889 	}
890 
891 	/* weave the bank into the segment */
892 	segi->nbanks++;
893 	tb->next = banki;
894 
895 	banki->seg_id = segi->seg_node.id;
896 	banki->local_id = tb->local_id + 1;
897 
898 	/* contiguous or interleaved? */
899 	if (banki->base != (tb->base + tb->size))
900 		segi->ifactor++;
901 
902 	segi->size += banki->size;
903 	segi->tail = banki;
904 
905 	memsize += banki->size;
906 
907 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d add bank: id %d"
908 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
909 	    banki->size));
910 
911 	return;
912 
913 new_seg:
914 	segi = kmem_zalloc(sizeof (*segi), KM_SLEEP);
915 
916 	segi->seg_node.id = seg_id;
917 	segi->nbanks = 1;
918 	segi->ifactor = 1;
919 	segi->base = banki->base;
920 	segi->size = banki->size;
921 	segi->head = banki;
922 	segi->tail = banki;
923 
924 	banki->seg_id = segi->seg_node.id;
925 	banki->local_id = 0;
926 
927 	mc_node_add((mc_dlist_t *)segi, &seg_head, &seg_tail);
928 	nsegments++;
929 
930 	memsize += banki->size;
931 
932 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segment: id %d new bank: id %d"
933 	    "size 0x%lx\n", segi->seg_node.id, banki->bank_node.id,
934 	    banki->size));
935 }
936 
937 /*
938  * Returns the address bit number (row index) that controls the logical/external
939  * bank assignment in interleave of kind internal-external same dimm-pair,
940  * internal-external both dimm-pair. This is done by using the dimm-densities
941  * and part-type.
942  */
943 static int
944 get_row_shift(int row_index, struct dgrp_info *dgrp)
945 {
946 	int shift;
947 
948 	switch (dgrp->base_device) {
949 	case BASE_DEVICE_128Mb:
950 	case BASE_DEVICE_256Mb:
951 		/* 128Mb and 256Mb devices have same bank select mask */
952 		shift = ADDR_GEN_128Mb_X8_ROW_0;
953 		break;
954 	case BASE_DEVICE_512Mb:
955 	case BASE_DEVICE_1Gb:
956 		/* 512 and 1Gb devices have same bank select mask */
957 		shift = ADDR_GEN_512Mb_X8_ROW_0;
958 		break;
959 	}
960 
961 	if (dgrp->part_type == PART_TYPE_X4)
962 		shift += 1;
963 
964 	shift += row_index;
965 
966 	return (shift);
967 }
968 
969 
970 static void
971 get_device_select(int interleave, struct dgrp_info *dgrp,
972     int *ds_shift, int *bs_shift)
973 {
974 
975 	switch (interleave) {
976 	case INTERLEAVE_DISABLE:
977 	/* Fall Through */
978 	case INTERLEAVE_INTERNAL:
979 		/* Bit 33 selects the dimm group/pair */
980 		*ds_shift = DIMM_PAIR_SELECT_SHIFT;
981 		if (dgrp->nlogbanks == 2) {
982 			/* Bit 32 selects the logical bank */
983 			*bs_shift = LOG_BANK_SELECT_SHIFT;
984 		}
985 		break;
986 	case INTERLEAVE_INTEXT_SAME_DIMM_PAIR:
987 		/* Bit 33 selects the dimm group/pair */
988 		*ds_shift =  DIMM_PAIR_SELECT_SHIFT;
989 		if (dgrp->nlogbanks == 2) {
990 			/* Row[2] selects the logical bank */
991 			*bs_shift = get_row_shift(2, dgrp);
992 		}
993 		break;
994 	case INTERLEAVE_INTEXT_BOTH_DIMM_PAIR:
995 		if (dgrp->nlogbanks == 2) {
996 			/* Row[3] selects the dimm group/pair */
997 			*ds_shift = get_row_shift(3, dgrp);
998 
999 			/* Row[2] selects the logical bank */
1000 			*bs_shift = get_row_shift(2, dgrp);
1001 		} else {
1002 			/* Row[2] selects the dimm group/pair */
1003 			*ds_shift = get_row_shift(2, dgrp);
1004 		}
1005 		break;
1006 	}
1007 }
1008 
1009 static void
1010 mc_add_xor_banks(struct mctrl_info *mctrl,
1011     uint64_t mask, uint64_t match, int interleave)
1012 {
1013 	int i, j, nbits, nbanks;
1014 	int bankid;
1015 	int dselect[4];
1016 	int ds_shift = -1, bs_shift = -1;
1017 	uint64_t id, size, xmatch;
1018 	struct bank_info *banki;
1019 	struct dgrp_info *dgrp;
1020 
1021 	/* xor mode - assume 2 identical dimm-pairs */
1022 	if ((dgrp = mc_node_get(mctrl->devgrpids[0], dgrp_head)) == NULL) {
1023 		return;
1024 	}
1025 
1026 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1027 
1028 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1029 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1030 
1031 	/* xor enable means, bit 21 is used for dimm-pair select */
1032 	mask |= XOR_DEVICE_SELECT_MASK;
1033 	if (dgrp->nlogbanks == NLOGBANKS_PER_DGRP) {
1034 		/* bit 20 is used for logbank select */
1035 		mask |= XOR_BANK_SELECT_MASK;
1036 	}
1037 
1038 	/* find out the bits set to 1 in mask, nbits can be 2 or 4 */
1039 	nbits = 0;
1040 	for (i = 0; i <= DIMM_PAIR_SELECT_SHIFT; i++) {
1041 		if ((((mask >> i) & 1) == 1) && (nbits < 4)) {
1042 			dselect[nbits] = i;
1043 			nbits++;
1044 		}
1045 	}
1046 
1047 	/* number or banks can be 4 or 16 */
1048 	nbanks = 1 << nbits;
1049 
1050 	size = (dgrp->size * 2)/nbanks;
1051 
1052 	bankid = mctrl->mctrl_node.id * NLOGBANKS_PER_MC;
1053 
1054 	/* each bit position of the mask decides the match & base for bank */
1055 	for (i = 0; i < nbanks; i++) {
1056 		xmatch = 0;
1057 		for (j = 0; j < nbits; j++) {
1058 			xmatch |= (i & (1ULL << j)) << (dselect[j] - j);
1059 		}
1060 		/* xor ds bits to get the dimm-pair */
1061 		id = ((xmatch & (1ULL << ds_shift)) >> ds_shift) ^
1062 			((xmatch & (1ULL << XOR_DEVICE_SELECT_SHIFT)) >>
1063 			XOR_DEVICE_SELECT_SHIFT);
1064 		banki = mc_add_bank(bankid, mask, match | xmatch, size,
1065 		    mctrl->devgrpids[id]);
1066 		mc_add_segment(banki);
1067 		bankid++;
1068 	}
1069 }
1070 
1071 /*
1072  * Based on interleave, dimm-densities, part-type determine the mask
1073  * and match per bank, construct the logical layout by adding segments
1074  * and banks
1075  */
1076 static int
1077 mc_add_dgrp_banks(uint64_t bankid, uint64_t dgrpid,
1078     uint64_t mask, uint64_t match, int interleave)
1079 {
1080 	int nbanks = 0;
1081 	struct bank_info *banki;
1082 	struct dgrp_info *dgrp;
1083 	int ds_shift = -1, bs_shift = -1;
1084 	uint64_t size;
1085 	uint64_t match_save;
1086 
1087 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) == NULL) {
1088 		return (0);
1089 	}
1090 
1091 	get_device_select(interleave, dgrp, &ds_shift, &bs_shift);
1092 
1093 	mask |= (ds_shift == -1 ? 0 : (1ULL << ds_shift));
1094 	mask |= (bs_shift == -1 ? 0 : (1ULL << bs_shift));
1095 	match |= (ds_shift == -1 ? 0 : ((dgrpid & 1) << ds_shift));
1096 	match_save = match;
1097 	size = dgrp->size/dgrp->nlogbanks;
1098 
1099 	/* for bankid 0, 2, 4 .. */
1100 	match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1101 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1102 	    " mask 0x%lx bs_shift %d match 0x%lx\n",
1103 	    interleave, mask, bs_shift, match));
1104 	banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1105 	nbanks++;
1106 	mc_add_segment(banki);
1107 
1108 	if (dgrp->nlogbanks == 2) {
1109 		/*
1110 		 * Set match value to original before adding second
1111 		 * logical bank interleaving information.
1112 		 */
1113 		match = match_save;
1114 		bankid++;
1115 		match |= (bs_shift == -1 ? 0 : ((bankid & 1) << bs_shift));
1116 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_segments: interleave %d"
1117 		    " mask 0x%lx shift %d match 0x%lx\n",
1118 		    interleave, mask, bs_shift, match));
1119 		banki = mc_add_bank(bankid, mask, match, size, dgrpid);
1120 		nbanks++;
1121 		mc_add_segment(banki);
1122 	}
1123 
1124 	return (nbanks);
1125 }
1126 
1127 /*
1128  * Construct the logical layout
1129  */
1130 static void
1131 mc_logical_layout(struct mctrl_info *mctrl, struct mc_soft_state *softsp)
1132 {
1133 	int i;
1134 	uint64_t mcid, bankid, interleave, mask, match;
1135 
1136 	if (mctrl->ndevgrps == 0)
1137 		return;
1138 
1139 	mcid = mctrl->mctrl_node.id;
1140 	mask = MC_SELECT_MASK;
1141 	match = mcid << MC_SELECT_SHIFT;
1142 
1143 	interleave = (softsp->mcreg1 & MCREG1_INTERLEAVE_MASK) >>
1144 	    MCREG1_INTERLEAVE_SHIFT;
1145 
1146 	/* Two dimm pairs and xor bit set */
1147 	if (mctrl->ndevgrps == NDGRPS_PER_MC &&
1148 	    (softsp->mcreg1 & MCREG1_XOR_ENABLE)) {
1149 		mc_add_xor_banks(mctrl, mask, match, interleave);
1150 		return;
1151 	}
1152 
1153 	/*
1154 	 * For xor bit unset or only one dimm pair.
1155 	 * In one dimm pair case, even if xor bit is set, xor
1156 	 * interleaving is only taking place in dimm's internal
1157 	 * banks. Dimm and external bank select bits are the
1158 	 * same as those without xor bit set.
1159 	 */
1160 	bankid = mcid * NLOGBANKS_PER_MC;
1161 	for (i = 0; i < mctrl->ndevgrps; i++) {
1162 		bankid += mc_add_dgrp_banks(bankid, mctrl->devgrpids[i],
1163 				mask, match, interleave);
1164 	}
1165 }
1166 
1167 /*
1168  * Get the dimm-pair's size from the reg_info
1169  */
1170 static uint64_t
1171 get_devgrp_size(uint64_t start)
1172 {
1173 	int i;
1174 	uint64_t size;
1175 	uint64_t end, reg_start, reg_end;
1176 	struct memory_reg_info *regi;
1177 
1178 	/* dgrp end address */
1179 	end = start + DGRP_SIZE_MAX - 1;
1180 
1181 	regi = reg_info;
1182 	size = 0;
1183 	for (i = 0; i < nregs; i++) {
1184 		reg_start = regi->base;
1185 		reg_end = regi->base + regi->size - 1;
1186 
1187 		/* completely outside */
1188 		if ((reg_end < start) || (reg_start > end)) {
1189 			regi++;
1190 			continue;
1191 		}
1192 
1193 		/* completely inside */
1194 		if ((reg_start <= start) && (reg_end >= end)) {
1195 			return (DGRP_SIZE_MAX);
1196 		}
1197 
1198 		/* start is inside, but not the end, get the remainder */
1199 		if (reg_start < start) {
1200 			size = regi->size - (start - reg_start);
1201 			regi++;
1202 			continue;
1203 		}
1204 
1205 		/* add up size for all within range */
1206 		size += regi->size;
1207 		regi++;
1208 	}
1209 
1210 	return (size);
1211 }
1212 
1213 /*
1214  * Each device group is a pair (dimm-pair) of identical single/dual dimms.
1215  * Determine the dimm-pair's dimm-densities and part-type using the MCR-I.
1216  */
1217 static void
1218 mc_add_devgrp(int dgrpid, struct mc_soft_state *softsp)
1219 {
1220 	int i, mcid, devid, dgrpoffset;
1221 	struct dgrp_info *dgrp;
1222 	struct device_info *dev;
1223 	struct dimm_info *dimmp = (struct dimm_info *)softsp->memlayoutp;
1224 
1225 	mcid = softsp->portid;
1226 
1227 	/* add the entry on dgrp_info list */
1228 	if ((dgrp = mc_node_get(dgrpid, dgrp_head)) != NULL) {
1229 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: devgrp %d exists\n",
1230 		    dgrpid));
1231 		return;
1232 	}
1233 
1234 	dgrp = kmem_zalloc(sizeof (*dgrp), KM_SLEEP);
1235 
1236 	dgrp->dgrp_node.id = dgrpid;
1237 
1238 	/* a devgrp has identical (type & size) pair */
1239 	if ((dgrpid & 1) == 0) {
1240 		/* dimm-pair 0, 2, 4, 6 */
1241 		if (softsp->mcreg1 & MCREG1_DIMM1_BANK1)
1242 			dgrp->nlogbanks = 2;
1243 		else
1244 			dgrp->nlogbanks = 1;
1245 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN1_MASK) >>
1246 		    MCREG1_ADDRGEN1_SHIFT;
1247 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM1_MASK) >>
1248 		    MCREG1_X4DIMM1_SHIFT;
1249 	} else {
1250 		/* dimm-pair 1, 3, 5, 7 */
1251 		if (softsp->mcreg1 & MCREG1_DIMM2_BANK3)
1252 			dgrp->nlogbanks = 2;
1253 		else
1254 			dgrp->nlogbanks = 1;
1255 		dgrp->base_device = (softsp->mcreg1 & MCREG1_ADDRGEN2_MASK) >>
1256 		    MCREG1_ADDRGEN2_SHIFT;
1257 		dgrp->part_type = (softsp->mcreg1 & MCREG1_X4DIMM2_MASK) >>
1258 		    MCREG1_X4DIMM2_SHIFT;
1259 	}
1260 
1261 	dgrp->base = MC_BASE(mcid) + DGRP_BASE(dgrpid);
1262 	dgrp->size = get_devgrp_size(dgrp->base);
1263 
1264 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: id %d size %ld logbanks %d"
1265 	    " base_device %d part_type %d\n", dgrpid, dgrp->size,
1266 	    dgrp->nlogbanks, dgrp->base_device, dgrp->part_type));
1267 
1268 	dgrpoffset = dgrpid % NDGRPS_PER_MC;
1269 	dgrp->ndevices = NDIMMS_PER_DGRP;
1270 	/* add the entry for the (identical) pair of dimms/device */
1271 	for (i = 0; i < NDIMMS_PER_DGRP; i++) {
1272 		devid = dgrpid * NDIMMS_PER_DGRP + i;
1273 		dgrp->deviceids[i] = devid;
1274 
1275 		if ((dev = mc_node_get(devid, device_head)) != NULL) {
1276 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: device %d "
1277 			    "exists\n", devid));
1278 			continue;
1279 		}
1280 
1281 		dev = kmem_zalloc(sizeof (*dev), KM_SLEEP);
1282 
1283 		dev->dev_node.id = devid;
1284 
1285 		dev->size = dgrp->size/2;
1286 
1287 		if (dimmp) {
1288 			(void) strncpy(dev->label, (char *)dimmp->label[
1289 			    i + NDIMMS_PER_DGRP * dgrpoffset],
1290 			    MAX_DEVLEN);
1291 
1292 			DPRINTF(MC_CNSTRC_DEBUG, ("mc_add_devgrp: dimm %d %s\n",
1293 			    dev->dev_node.id, dev->label));
1294 		}
1295 
1296 		mc_node_add((mc_dlist_t *)dev, &device_head, &device_tail);
1297 	}
1298 
1299 	mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1300 }
1301 
1302 /*
1303  * Construct the physical and logical layout
1304  */
1305 static void
1306 mc_construct(struct mc_soft_state *softsp)
1307 {
1308 	int i, mcid, dgrpid;
1309 	struct mctrl_info *mctrl;
1310 
1311 	mcid = softsp->portid;
1312 
1313 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mcid %d, mcreg1 0x%lx\n",
1314 	    mcid, softsp->mcreg1));
1315 
1316 	/*
1317 	 * Construct the Physical & Logical Layout
1318 	 */
1319 	mutex_enter(&mcdatamutex);
1320 
1321 	/* allocate for mctrl_info */
1322 	if ((mctrl = mc_node_get(mcid, mctrl_head)) != NULL) {
1323 		DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: mctrl %d exists\n",
1324 		    mcid));
1325 		mutex_exit(&mcdatamutex);
1326 		return;
1327 	}
1328 
1329 	mctrl = kmem_zalloc(sizeof (*mctrl), KM_SLEEP);
1330 
1331 	mctrl->mctrl_node.id = mcid;
1332 
1333 	i = 0;
1334 	dgrpid = mcid * NDGRPS_PER_MC;
1335 	if (softsp->mcreg1 & MCREG1_DIMM1_BANK0) {
1336 		mc_add_devgrp(dgrpid, softsp);
1337 		mctrl->devgrpids[i] = dgrpid;
1338 		mctrl->ndevgrps++;
1339 		i++;
1340 	}
1341 
1342 	if (softsp->mcreg1 & MCREG1_DIMM2_BANK2) {
1343 		dgrpid++;
1344 		mc_add_devgrp(dgrpid, softsp);
1345 		mctrl->devgrpids[i] = dgrpid;
1346 		mctrl->ndevgrps++;
1347 	}
1348 
1349 	mc_logical_layout(mctrl, softsp);
1350 
1351 	mctrl->dimminfop = (struct dimm_info *)softsp->memlayoutp;
1352 
1353 	nmcs++;
1354 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1355 
1356 	mutex_exit(&mcdatamutex);
1357 
1358 	DPRINTF(MC_CNSTRC_DEBUG, ("mc_construct: nmcs %d memsize %ld"
1359 	    "nsegments %d\n", nmcs, memsize, nsegments));
1360 }
1361 
1362 /*
1363  * Delete nodes related to the given MC on mc, device group, device,
1364  * and bank lists. Moreover, delete corresponding segment if its connected
1365  * banks are all removed.
1366  */
1367 static void
1368 mc_delete(int mc_id)
1369 {
1370 	int i, j, dgrpid, devid, bankid;
1371 	struct mctrl_info *mctrl;
1372 	struct dgrp_info *dgrp;
1373 	struct device_info *devp;
1374 	struct seg_info *segi;
1375 	struct bank_info *banki;
1376 
1377 	mutex_enter(&mcdatamutex);
1378 
1379 	/* delete mctrl_info */
1380 	if ((mctrl = mc_node_get(mc_id, mctrl_head)) != NULL) {
1381 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1382 		kmem_free(mctrl, sizeof (*mctrl));
1383 		nmcs--;
1384 	} else
1385 		DPRINTF(MC_DESTRC_DEBUG, ("mc_delete: mctrl is not found\n"));
1386 
1387 	/* delete device groups and devices of the detached MC */
1388 	for (i = 0; i < NDGRPS_PER_MC; i++) {
1389 		dgrpid = mc_id * NDGRPS_PER_MC + i;
1390 		if (!(dgrp = mc_node_get(dgrpid, dgrp_head))) {
1391 			continue;
1392 		}
1393 
1394 		for (j = 0; j < NDIMMS_PER_DGRP; j++) {
1395 			devid = dgrpid * NDIMMS_PER_DGRP + j;
1396 			if (devp = mc_node_get(devid, device_head)) {
1397 				mc_node_del((mc_dlist_t *)devp,
1398 				    &device_head, &device_tail);
1399 				kmem_free(devp, sizeof (*devp));
1400 			} else
1401 				DPRINTF(MC_DESTRC_DEBUG,
1402 				    ("mc_delete: no dev %d\n", devid));
1403 		}
1404 
1405 		mc_node_del((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1406 		kmem_free(dgrp, sizeof (*dgrp));
1407 	}
1408 
1409 	/* delete all banks and associated segments */
1410 	for (i = 0; i < NLOGBANKS_PER_MC; i++) {
1411 		bankid = mc_id * NLOGBANKS_PER_MC + i;
1412 		if (!(banki = mc_node_get(bankid, bank_head))) {
1413 			continue;
1414 		}
1415 
1416 		/* bank and segments go together */
1417 		if ((segi = mc_node_get(banki->seg_id, seg_head)) != NULL) {
1418 			mc_node_del((mc_dlist_t *)segi, &seg_head, &seg_tail);
1419 			kmem_free(segi, sizeof (*segi));
1420 			nsegments--;
1421 		}
1422 
1423 		mc_node_del((mc_dlist_t *)banki, &bank_head, &bank_tail);
1424 		kmem_free(banki, sizeof (*banki));
1425 	}
1426 
1427 	mutex_exit(&mcdatamutex);
1428 }
1429 
1430 /*
1431  * mc_dlist is a double linking list, including unique id, and pointers to
1432  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1433  * and mctrl_info has it at the top to share the operations, add, del, and get.
1434  *
1435  * The new node is added at the tail and is not sorted.
1436  *
1437  * Input: The pointer of node to be added, head and tail of the list
1438  */
1439 
1440 static void
1441 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1442 {
1443 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1444 	    node->id, (void *) *head, (void *) *tail));
1445 
1446 	if (*head != NULL) {
1447 		node->prev = *tail;
1448 		node->next = (*tail)->next;
1449 		(*tail)->next = node;
1450 		*tail = node;
1451 	} else {
1452 		node->next = node->prev = NULL;
1453 		*head = *tail = node;
1454 	}
1455 }
1456 
1457 /*
1458  * Input: The pointer of node to be deleted, head and tail of the list
1459  *
1460  * Deleted node will be at the following positions
1461  * 1. At the tail of the list
1462  * 2. At the head of the list
1463  * 3. At the head and tail of the list, i.e. only one left.
1464  * 4. At the middle of the list
1465  */
1466 
1467 static void
1468 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1469 {
1470 	if (node->next == NULL) {
1471 		/* deleted node is at the tail of list */
1472 		*tail = node->prev;
1473 	} else {
1474 		node->next->prev = node->prev;
1475 	}
1476 
1477 	if (node->prev == NULL) {
1478 		/* deleted node is at the head of list */
1479 		*head = node->next;
1480 	} else {
1481 		node->prev->next = node->next;
1482 	}
1483 }
1484 
1485 /*
1486  * Search the list from the head of the list to match the given id
1487  * Input: id and the head of the list
1488  * Return: pointer of found node
1489  */
1490 static void *
1491 mc_node_get(int id, mc_dlist_t *head)
1492 {
1493 	mc_dlist_t *node;
1494 
1495 	node = head;
1496 	while (node != NULL) {
1497 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1498 		    node->id, id));
1499 		if (node->id == id)
1500 			break;
1501 		node = node->next;
1502 	}
1503 	return (node);
1504 }
1505 
1506 /*
1507  * Memory subsystem provides 144 bits (128 Data bits, 9 ECC bits and 7
1508  * unused bits) interface via a pair of DIMMs. Mapping of Data/ECC bits
1509  * to a specific DIMM pin is described by the memory-layout property
1510  * via two tables: dimm table and pin table.
1511  *
1512  * Memory-layout property arranges data/ecc bits in the following order:
1513  *
1514  *   Bit#  143                          16 15       7 6           0
1515  *        |      Data[127:0]              | ECC[8:0] | Unused[6:0] |
1516  *
1517  * dimm table: 1 bit is used to store DIMM number (2 possible DIMMs) for
1518  *	each Data/ECC bit. Thus, it needs 18 bytes (144/8) to represent
1519  *	all Data/ECC bits in this table. Information is stored in big
1520  *	endian order, i.e. dimm_table[0] represents information for
1521  *	logical bit# 143 to 136.
1522  *
1523  * pin table: 1 byte is used to store pin position for each Data/ECC bit.
1524  *	Thus, this table is 144 bytes long. Information is stored in little
1525  *	endian order, i.e, pin_table[0] represents pin number of logical
1526  *	bit 0 and pin_table[143] contains pin number for logical bit 143
1527  *	(i.e. data bit# 127).
1528  *
1529  * qwordmap table below is used to map mc_get_mem_unum "synd_code" value into
1530  * logical bit position assigned above by the memory-layout property.
1531  */
1532 
1533 #define	QWORD_SIZE	144
1534 static uint8_t qwordmap[QWORD_SIZE] =
1535 {
1536 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1537 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1538 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1539 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1540 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1541 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1542 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1543 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1544 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3
1545 };
1546 
1547 
1548 /* ARGSUSED */
1549 static int
1550 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1551 {
1552 	int i;
1553 	int pos_cacheline, position, index, idx4dimm;
1554 	int qwlayout = synd_code;
1555 	short offset, data;
1556 	char unum[UNUM_NAMLEN];
1557 	struct dimm_info *dimmp;
1558 	struct pin_info *pinp;
1559 	struct bank_info *bank;
1560 	struct mctrl_info *mctrl;
1561 
1562 	/*
1563 	 * Enforce old Openboot requirement for synd code, either a single-bit
1564 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1565 	 */
1566 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1567 		return (EINVAL);
1568 
1569 	unum[0] = '\0';
1570 
1571 	DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:qwlayout %d phyaddr 0x%lx\n",
1572 	    qwlayout, paddr));
1573 
1574 	/*
1575 	 * Scan all logical banks to get one responding to the physical
1576 	 * address. Then compute the index to look up dimm and pin tables
1577 	 * to generate the unmuber.
1578 	 */
1579 	mutex_enter(&mcdatamutex);
1580 	bank = (struct bank_info *)bank_head;
1581 	while (bank != NULL) {
1582 		int mcid, mcdgrpid, dimmoffset;
1583 
1584 		/*
1585 		 * Physical Address is in a bank if (Addr & Mask) == Match
1586 		 */
1587 		if ((paddr & bank->mask) != bank->match) {
1588 			bank = (struct bank_info *)bank->bank_node.next;
1589 			continue;
1590 		}
1591 
1592 		mcid = bank->bank_node.id / NLOGBANKS_PER_MC;
1593 		mctrl = mc_node_get(mcid, mctrl_head);
1594 		ASSERT(mctrl != NULL);
1595 
1596 		DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:mc %d bank %d "
1597 		    "dgrp %d\n", mcid, bank->bank_node.id, bank->devgrp_id));
1598 
1599 		mcdgrpid = bank->devgrp_id % NDGRPS_PER_MC;
1600 		dimmoffset = mcdgrpid * NDIMMS_PER_DGRP;
1601 
1602 		dimmp = (struct dimm_info *)mctrl->dimminfop;
1603 		if (dimmp == NULL) {
1604 			mutex_exit(&mcdatamutex);
1605 			return (ENXIO);
1606 		}
1607 
1608 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1609 			/*
1610 			 * single-bit error handling, we can identify specific
1611 			 * DIMM.
1612 			 */
1613 
1614 			pinp = (struct pin_info *)&dimmp->data[0];
1615 
1616 			pos_cacheline = qwordmap[qwlayout];
1617 			position = 143 - pos_cacheline;
1618 			index = position / 8;
1619 			offset = 7 - (position % 8);
1620 
1621 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:position "
1622 			    "%d\n", position));
1623 			/*
1624 			 * Trade-off: We cound't add pin number to
1625 			 * unumber string because statistic number
1626 			 * pumps up at the corresponding dimm not pin.
1627 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1628 			 * pinp->pintable[pos_cacheline]);
1629 			 */
1630 			DPRINTF(MC_GUNUM_DEBUG, ("mc_get_mem_unum:pin number "
1631 			    "%1u\n", (uint_t)pinp->pintable[pos_cacheline]));
1632 			data = pinp->dimmtable[index];
1633 			idx4dimm = (data >> offset) & 1;
1634 
1635 			(void) strncpy(unum,
1636 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1637 			    UNUM_NAMLEN);
1638 
1639 			DPRINTF(MC_GUNUM_DEBUG,
1640 				("mc_get_mem_unum:unum %s\n", unum));
1641 
1642 			/*
1643 			 * platform hook for adding label information to unum.
1644 			 */
1645 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, idx4dimm);
1646 		} else {
1647 			char *p = unum;
1648 			size_t res = UNUM_NAMLEN;
1649 
1650 			/*
1651 			 * multi-bit error handling, we can only identify
1652 			 * bank of DIMMs.
1653 			 */
1654 
1655 			for (i = 0; (i < NDIMMS_PER_DGRP) && (res > 0); i++) {
1656 				(void) snprintf(p, res, "%s%s",
1657 				    i == 0 ? "" : " ",
1658 				    (char *)dimmp->label[dimmoffset + i]);
1659 				res -= strlen(p);
1660 				p += strlen(p);
1661 			}
1662 
1663 			/*
1664 			 * platform hook for adding label information
1665 			 * to unum.
1666 			 */
1667 			mc_add_mem_unum_label(unum, mcid, mcdgrpid, -1);
1668 		}
1669 		mutex_exit(&mcdatamutex);
1670 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1671 		    (strlen(unum) >= buflen)) {
1672 			return (ENOSPC);
1673 		} else {
1674 			(void) strncpy(buf, unum, UNUM_NAMLEN);
1675 			*lenp = strlen(buf);
1676 			return (0);
1677 		}
1678 	}	/* end of while loop for logic bank list */
1679 
1680 	mutex_exit(&mcdatamutex);
1681 	return (ENXIO);
1682 }
1683 
1684 static int
1685 mc_get_mem_info(int synd_code, uint64_t paddr,
1686     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1687     int *segsp, int *banksp, int *mcidp)
1688 {
1689 	struct bank_info *bankp;
1690 
1691 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1692 		return (EINVAL);
1693 
1694 	/*
1695 	 * Scan all logical banks to get one responding to the physical
1696 	 * address. Then compute the index to look up dimm and pin tables
1697 	 * to generate the unmuber.
1698 	 */
1699 	mutex_enter(&mcdatamutex);
1700 	bankp = (struct bank_info *)bank_head;
1701 	while (bankp != NULL) {
1702 		struct seg_info *segp;
1703 		int mcid;
1704 
1705 		/*
1706 		 * Physical Address is in a bank if (Addr & Mask) == Match
1707 		 */
1708 		if ((paddr & bankp->mask) != bankp->match) {
1709 			bankp = (struct bank_info *)bankp->bank_node.next;
1710 			continue;
1711 		}
1712 
1713 		mcid = bankp->bank_node.id / NLOGBANKS_PER_MC;
1714 
1715 		/*
1716 		 * Get the corresponding segment.
1717 		 */
1718 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1719 		    seg_head)) == NULL) {
1720 			mutex_exit(&mcdatamutex);
1721 			return (EFAULT);
1722 		}
1723 
1724 		*mem_sizep = memsize;
1725 		*seg_sizep = segp->size;
1726 		*bank_sizep = bankp->size;
1727 		*segsp = nsegments;
1728 		*banksp = segp->nbanks;
1729 		*mcidp = mcid;
1730 
1731 		mutex_exit(&mcdatamutex);
1732 		return (0);
1733 
1734 	}	/* end of while loop for logic bank list */
1735 
1736 	mutex_exit(&mcdatamutex);
1737 	return (ENXIO);
1738 }
1739 /*
1740  * mc-us3i driver allows a platform to add extra label
1741  * information to the unum string. If a platform implements a
1742  * kernel function called plat_add_mem_unum_label() it will be
1743  * executed. This would typically be implemented in the platmod.
1744  */
1745 static void
1746 mc_add_mem_unum_label(char *unum, int mcid, int bank, int dimm)
1747 {
1748 	if (&plat_add_mem_unum_label)
1749 		plat_add_mem_unum_label(unum, mcid, bank, dimm);
1750 }
1751