xref: /illumos-gate/usr/src/uts/sun4u/io/mc-us3.c (revision 193974072f41a843678abf5f61979c748687e66b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/conf.h>
28 #include <sys/ddi.h>
29 #include <sys/stat.h>
30 #include <sys/sunddi.h>
31 #include <sys/ddi_impldefs.h>
32 #include <sys/obpdefs.h>
33 #include <sys/cmn_err.h>
34 #include <sys/errno.h>
35 #include <sys/kmem.h>
36 #include <sys/open.h>
37 #include <sys/thread.h>
38 #include <sys/cpuvar.h>
39 #include <sys/x_call.h>
40 #include <sys/debug.h>
41 #include <sys/sysmacros.h>
42 #include <sys/ivintr.h>
43 #include <sys/intr.h>
44 #include <sys/intreg.h>
45 #include <sys/autoconf.h>
46 #include <sys/modctl.h>
47 #include <sys/spl.h>
48 #include <sys/async.h>
49 #include <sys/mc.h>
50 #include <sys/mc-us3.h>
51 #include <sys/cpu_module.h>
52 #include <sys/platform_module.h>
53 
54 /*
55  * Function prototypes
56  */
57 
58 static int mc_open(dev_t *, int, int, cred_t *);
59 static int mc_close(dev_t, int, int, cred_t *);
60 static int mc_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
61 static int mc_attach(dev_info_t *, ddi_attach_cmd_t);
62 static int mc_detach(dev_info_t *, ddi_detach_cmd_t);
63 
64 /*
65  * Configuration data structures
66  */
67 static struct cb_ops mc_cb_ops = {
68 	mc_open,			/* open */
69 	mc_close,			/* close */
70 	nulldev,			/* strategy */
71 	nulldev,			/* print */
72 	nodev,				/* dump */
73 	nulldev,			/* read */
74 	nulldev,			/* write */
75 	mc_ioctl,			/* ioctl */
76 	nodev,				/* devmap */
77 	nodev,				/* mmap */
78 	nodev,				/* segmap */
79 	nochpoll,			/* poll */
80 	ddi_prop_op,			/* cb_prop_op */
81 	0,				/* streamtab */
82 	D_MP | D_NEW | D_HOTPLUG,	/* Driver compatibility flag */
83 	CB_REV,				/* rev */
84 	nodev,				/* cb_aread */
85 	nodev				/* cb_awrite */
86 };
87 
88 static struct dev_ops mc_ops = {
89 	DEVO_REV,			/* rev */
90 	0,				/* refcnt  */
91 	ddi_getinfo_1to1,		/* getinfo */
92 	nulldev,			/* identify */
93 	nulldev,			/* probe */
94 	mc_attach,			/* attach */
95 	mc_detach,			/* detach */
96 	nulldev,			/* reset */
97 	&mc_cb_ops,			/* cb_ops */
98 	(struct bus_ops *)0,		/* bus_ops */
99 	nulldev,			/* power */
100 	ddi_quiesce_not_needed,			/* quiesce */
101 };
102 
103 /*
104  * Driver globals
105  */
106 static void *mcp;
107 static int nmcs = 0;
108 static int seg_id = 0;
109 static int nsegments = 0;
110 static uint64_t memsize = 0;
111 static int maxbanks = 0;
112 
113 static mc_dlist_t *seg_head, *seg_tail, *bank_head, *bank_tail;
114 static mc_dlist_t *mctrl_head, *mctrl_tail, *dgrp_head, *dgrp_tail;
115 static mc_dlist_t *device_head, *device_tail;
116 
117 static kmutex_t	mcmutex;
118 static kmutex_t	mcdatamutex;
119 
120 static krwlock_t mcdimmsids_rw;
121 
122 /* pointer to cache of DIMM serial ids */
123 static dimm_sid_cache_t	*mc_dimm_sids;
124 static int		max_entries;
125 
126 extern struct mod_ops mod_driverops;
127 
128 static struct modldrv modldrv = {
129 	&mod_driverops,			/* module type, this one is a driver */
130 	"Memory-controller",		/* module name */
131 	&mc_ops,			/* driver ops */
132 };
133 
134 static struct modlinkage modlinkage = {
135 	MODREV_1,		/* rev */
136 	(void *)&modldrv,
137 	NULL
138 };
139 
140 static int mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf,
141     int buflen, int *lenp);
142 static int mc_get_mem_info(int synd_code, uint64_t paddr,
143     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
144     int *segsp, int *banksp, int *mcidp);
145 static int mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp);
146 static int mc_get_mem_offset(uint64_t paddr, uint64_t *offp);
147 static int mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr);
148 static int mc_init_sid_cache(void);
149 static int mc_get_mcregs(struct mc_soft_state *);
150 static void mc_construct(int mc_id, void *dimminfop);
151 static int mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop);
152 static void mlayout_del(int mc_id, int delete);
153 static struct seg_info *seg_match_base(u_longlong_t base);
154 static void mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
155 static void mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail);
156 static mc_dlist_t *mc_node_get(int id, mc_dlist_t *head);
157 static void mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm);
158 static int mc_populate_sid_cache(void);
159 static int mc_get_sid_cache_index(int mcid);
160 static void mc_update_bank(struct bank_info *bank);
161 
162 #pragma weak p2get_mem_unum
163 #pragma weak p2get_mem_info
164 #pragma weak p2get_mem_sid
165 #pragma weak p2get_mem_offset
166 #pragma	weak p2get_mem_addr
167 #pragma weak p2init_sid_cache
168 #pragma weak plat_add_mem_unum_label
169 #pragma weak plat_alloc_sid_cache
170 #pragma weak plat_populate_sid_cache
171 
172 #define	QWORD_SIZE		144
173 #define	QWORD_SIZE_BYTES	(QWORD_SIZE / 8)
174 
175 /*
176  * These are the module initialization routines.
177  */
178 
179 int
180 _init(void)
181 {
182 	int error;
183 
184 	if ((error = ddi_soft_state_init(&mcp,
185 	    sizeof (struct mc_soft_state), 1)) != 0)
186 		return (error);
187 
188 	error =  mod_install(&modlinkage);
189 	if (error == 0) {
190 		mutex_init(&mcmutex, NULL, MUTEX_DRIVER, NULL);
191 		mutex_init(&mcdatamutex, NULL, MUTEX_DRIVER, NULL);
192 		rw_init(&mcdimmsids_rw, NULL, RW_DRIVER, NULL);
193 	}
194 
195 	return (error);
196 }
197 
198 int
199 _fini(void)
200 {
201 	int error;
202 
203 	if ((error = mod_remove(&modlinkage)) != 0)
204 		return (error);
205 
206 	ddi_soft_state_fini(&mcp);
207 	mutex_destroy(&mcmutex);
208 	mutex_destroy(&mcdatamutex);
209 	rw_destroy(&mcdimmsids_rw);
210 
211 	if (mc_dimm_sids)
212 		kmem_free(mc_dimm_sids, sizeof (dimm_sid_cache_t) *
213 		    max_entries);
214 
215 	return (0);
216 }
217 
218 int
219 _info(struct modinfo *modinfop)
220 {
221 	return (mod_info(&modlinkage, modinfop));
222 }
223 
224 static int
225 mc_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
226 {
227 	struct mc_soft_state *softsp;
228 	struct dimm_info *dimminfop;
229 	int instance, len, err;
230 
231 	/* get the instance of this devi */
232 	instance = ddi_get_instance(devi);
233 
234 	switch (cmd) {
235 	case DDI_ATTACH:
236 		break;
237 
238 	case DDI_RESUME:
239 		/* get the soft state pointer for this device node */
240 		softsp = ddi_get_soft_state(mcp, instance);
241 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: DDI_RESUME: updating MADRs\n",
242 		    instance));
243 		/*
244 		 * During resume, the source and target board's bank_infos
245 		 * need to be updated with the new mc MADR values.  This is
246 		 * implemented with existing functionality by first removing
247 		 * the props and allocated data structs, and then adding them
248 		 * back in.
249 		 */
250 		if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
251 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
252 		    MEM_CFG_PROP_NAME) == 1) {
253 			(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
254 			    MEM_CFG_PROP_NAME);
255 		}
256 		mlayout_del(softsp->portid, 0);
257 		if (mc_get_mcregs(softsp) == -1) {
258 			cmn_err(CE_WARN, "mc_attach: mc%d DDI_RESUME failure\n",
259 			    instance);
260 		}
261 		return (DDI_SUCCESS);
262 
263 	default:
264 		return (DDI_FAILURE);
265 	}
266 
267 	if (ddi_soft_state_zalloc(mcp, instance) != DDI_SUCCESS)
268 		return (DDI_FAILURE);
269 
270 	softsp = ddi_get_soft_state(mcp, instance);
271 
272 	/* Set the dip in the soft state */
273 	softsp->dip = devi;
274 
275 	if ((softsp->portid = (int)ddi_getprop(DDI_DEV_T_ANY, softsp->dip,
276 	    DDI_PROP_DONTPASS, "portid", -1)) == -1) {
277 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to get %s property",
278 		    instance, "portid"));
279 		goto bad;
280 	}
281 
282 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d ATTACH: portid %d, cpuid %d\n",
283 	    instance, softsp->portid, CPU->cpu_id));
284 
285 	/* map in the registers for this device. */
286 	if (ddi_map_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0)) {
287 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d: unable to map registers",
288 		    instance));
289 		goto bad;
290 	}
291 
292 	/*
293 	 * Get the label of dimms and pin routing information at memory-layout
294 	 * property if the memory controller is enabled.
295 	 *
296 	 * Basically every memory-controller node on every machine should
297 	 * have one of these properties unless the memory controller is
298 	 * physically not capable of having memory attached to it, e.g.
299 	 * Excalibur's slave processor.
300 	 */
301 	err = ddi_getlongprop(DDI_DEV_T_ANY, softsp->dip, DDI_PROP_DONTPASS,
302 	    "memory-layout", (caddr_t)&dimminfop, &len);
303 	if (err == DDI_PROP_SUCCESS) {
304 		/*
305 		 * Set the pointer and size of property in the soft state
306 		 */
307 		softsp->memlayoutp = dimminfop;
308 		softsp->size = len;
309 	} else if (err == DDI_PROP_NOT_FOUND) {
310 		/*
311 		 * This is a disable MC. Clear out the pointer and size
312 		 * of property in the soft state
313 		 */
314 		softsp->memlayoutp = NULL;
315 		softsp->size = 0;
316 	} else {
317 		DPRINTF(MC_ATTACH_DEBUG, ("mc%d is disabled: dimminfop %p\n",
318 		    instance, (void *)dimminfop));
319 		goto bad2;
320 	}
321 
322 	DPRINTF(MC_ATTACH_DEBUG, ("mc%d: dimminfop=0x%p data=0x%lx len=%d\n",
323 	    instance, (void *)dimminfop, *(uint64_t *)dimminfop, len));
324 
325 	/* Get MC registers and construct all needed data structure */
326 	if (mc_get_mcregs(softsp) == -1)
327 		goto bad1;
328 
329 	mutex_enter(&mcmutex);
330 	if (nmcs == 1) {
331 		if (&p2get_mem_unum)
332 			p2get_mem_unum = mc_get_mem_unum;
333 		if (&p2get_mem_info)
334 			p2get_mem_info = mc_get_mem_info;
335 		if (&p2get_mem_sid)
336 			p2get_mem_sid = mc_get_mem_sid;
337 		if (&p2get_mem_offset)
338 			p2get_mem_offset = mc_get_mem_offset;
339 		if (&p2get_mem_addr)
340 			p2get_mem_addr = mc_get_mem_addr;
341 		if (&p2init_sid_cache)
342 			p2init_sid_cache = mc_init_sid_cache;
343 	}
344 
345 	mutex_exit(&mcmutex);
346 
347 	/*
348 	 * Update DIMM serial id information if the DIMM serial id
349 	 * cache has already been initialized.
350 	 */
351 	if (mc_dimm_sids) {
352 		rw_enter(&mcdimmsids_rw, RW_WRITER);
353 		(void) mc_populate_sid_cache();
354 		rw_exit(&mcdimmsids_rw);
355 	}
356 
357 	if (ddi_create_minor_node(devi, "mc-us3", S_IFCHR, instance,
358 	    "ddi_mem_ctrl", 0) != DDI_SUCCESS) {
359 		DPRINTF(MC_ATTACH_DEBUG, ("mc_attach: create_minor_node"
360 		    " failed \n"));
361 		goto bad1;
362 	}
363 
364 	ddi_report_dev(devi);
365 	return (DDI_SUCCESS);
366 
367 bad1:
368 	/* release all allocated data struture for this MC */
369 	mlayout_del(softsp->portid, 0);
370 	if (softsp->memlayoutp != NULL)
371 		kmem_free(softsp->memlayoutp, softsp->size);
372 
373 	/* remove the libdevinfo property */
374 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
375 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
376 	    MEM_CFG_PROP_NAME) == 1) {
377 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
378 		    MEM_CFG_PROP_NAME);
379 	}
380 
381 bad2:
382 	/* unmap the registers for this device. */
383 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
384 
385 bad:
386 	ddi_soft_state_free(mcp, instance);
387 	return (DDI_FAILURE);
388 }
389 
390 /* ARGSUSED */
391 static int
392 mc_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
393 {
394 	int instance;
395 	struct mc_soft_state *softsp;
396 
397 	/* get the instance of this devi */
398 	instance = ddi_get_instance(devi);
399 
400 	/* get the soft state pointer for this device node */
401 	softsp = ddi_get_soft_state(mcp, instance);
402 
403 	switch (cmd) {
404 	case DDI_SUSPEND:
405 		return (DDI_SUCCESS);
406 
407 	case DDI_DETACH:
408 		break;
409 
410 	default:
411 		return (DDI_FAILURE);
412 	}
413 
414 	DPRINTF(MC_DETACH_DEBUG, ("mc%d DETACH: portid= %d, table 0x%p\n",
415 	    instance, softsp->portid, softsp->memlayoutp));
416 
417 	/* remove the libdevinfo property */
418 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
419 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
420 	    MEM_CFG_PROP_NAME) == 1) {
421 		(void) ddi_prop_remove(DDI_DEV_T_NONE, softsp->dip,
422 		    MEM_CFG_PROP_NAME);
423 	}
424 
425 	/* release all allocated data struture for this MC */
426 	mlayout_del(softsp->portid, 1);
427 	if (softsp->memlayoutp != NULL)
428 		kmem_free(softsp->memlayoutp, softsp->size);
429 
430 	/* unmap the registers */
431 	ddi_unmap_regs(softsp->dip, 0, (caddr_t *)&softsp->mc_base, 0, 0);
432 
433 	mutex_enter(&mcmutex);
434 	if (nmcs == 0) {
435 		if (&p2get_mem_unum)
436 			p2get_mem_unum = NULL;
437 		if (&p2get_mem_info)
438 			p2get_mem_info = NULL;
439 		if (&p2get_mem_sid)
440 			p2get_mem_sid = NULL;
441 		if (&p2get_mem_offset)
442 			p2get_mem_offset = NULL;
443 		if (&p2get_mem_addr)
444 			p2get_mem_addr = NULL;
445 		if (&p2init_sid_cache)
446 			p2init_sid_cache = NULL;
447 	}
448 
449 	mutex_exit(&mcmutex);
450 
451 	ddi_remove_minor_node(devi, NULL);
452 
453 	/* free up the soft state */
454 	ddi_soft_state_free(mcp, instance);
455 
456 	return (DDI_SUCCESS);
457 }
458 
459 /* ARGSUSED */
460 static int
461 mc_open(dev_t *devp, int flag, int otyp, cred_t *credp)
462 {
463 
464 	/* verify that otyp is appropriate */
465 	if (otyp != OTYP_CHR) {
466 		return (EINVAL);
467 	}
468 
469 	return (0);
470 }
471 
472 /* ARGSUSED */
473 static int
474 mc_close(dev_t devp, int flag, int otyp, cred_t *credp)
475 {
476 	return (0);
477 }
478 
479 /*
480  * cmd includes MCIOC_MEMCONF, MCIOC_MEM, MCIOC_SEG, MCIOC_BANK, MCIOC_DEVGRP,
481  * MCIOC_CTRLCONF, MCIOC_CONTROL.
482  *
483  * MCIOC_MEM, MCIOC_SEG, MCIOC_CTRLCONF, and MCIOC_CONTROL are
484  * associated with various length struct. If given number is less than the
485  * number in kernel, update the number and return EINVAL so that user could
486  * allocate enough space for it.
487  *
488  */
489 
490 /* ARGSUSED */
491 static int
492 mc_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p,
493 	int *rval_p)
494 {
495 	size_t	size;
496 	struct mc_memconf mcmconf;
497 	struct mc_memory *mcmem, mcmem_in;
498 	struct mc_segment *mcseg, mcseg_in;
499 	struct mc_bank mcbank;
500 	struct mc_devgrp mcdevgrp;
501 	struct mc_ctrlconf *mcctrlconf, mcctrlconf_in;
502 	struct mc_control *mccontrol, mccontrol_in;
503 	struct seg_info *seg = NULL;
504 	struct bank_info *bank = NULL;
505 	struct dgrp_info *dgrp = NULL;
506 	struct mctrl_info *mcport;
507 	mc_dlist_t *mctrl;
508 	int i, status = 0;
509 	cpu_t *cpu;
510 
511 	switch (cmd) {
512 	case MCIOC_MEMCONF:
513 		mutex_enter(&mcdatamutex);
514 
515 		mcmconf.nmcs = nmcs;
516 		mcmconf.nsegments = nsegments;
517 		mcmconf.nbanks = maxbanks;
518 		mcmconf.ndevgrps = NDGRPS;
519 		mcmconf.ndevs = NDIMMS;
520 		mcmconf.len_dev = MAX_DEVLEN;
521 		mcmconf.xfer_size = TRANSFER_SIZE;
522 
523 		mutex_exit(&mcdatamutex);
524 
525 		if (copyout(&mcmconf, (void *)arg, sizeof (struct mc_memconf)))
526 			return (EFAULT);
527 		return (0);
528 
529 	/*
530 	 * input: nsegments and allocate space for various length of segmentids
531 	 *
532 	 * return    0: size, number of segments, and all segment ids,
533 	 *		where glocal and local ids are identical.
534 	 *	EINVAL: if the given nsegments is less than that in kernel and
535 	 *		nsegments of struct will be updated.
536 	 *	EFAULT: if other errors in kernel.
537 	 */
538 	case MCIOC_MEM:
539 		if (copyin((void *)arg, &mcmem_in,
540 		    sizeof (struct mc_memory)) != 0)
541 			return (EFAULT);
542 
543 		mutex_enter(&mcdatamutex);
544 		if (mcmem_in.nsegments < nsegments) {
545 			mcmem_in.nsegments = nsegments;
546 			if (copyout(&mcmem_in, (void *)arg,
547 			    sizeof (struct mc_memory)))
548 				status = EFAULT;
549 			else
550 				status = EINVAL;
551 
552 			mutex_exit(&mcdatamutex);
553 			return (status);
554 		}
555 
556 		size = sizeof (struct mc_memory) + (nsegments - 1) *
557 		    sizeof (mcmem->segmentids[0]);
558 		mcmem = kmem_zalloc(size, KM_SLEEP);
559 
560 		mcmem->size = memsize;
561 		mcmem->nsegments = nsegments;
562 		seg = (struct seg_info *)seg_head;
563 		for (i = 0; i < nsegments; i++) {
564 			ASSERT(seg != NULL);
565 			mcmem->segmentids[i].globalid = seg->seg_node.id;
566 			mcmem->segmentids[i].localid = seg->seg_node.id;
567 			seg = (struct seg_info *)seg->seg_node.next;
568 		}
569 		mutex_exit(&mcdatamutex);
570 
571 		if (copyout(mcmem, (void *)arg, size))
572 			status = EFAULT;
573 
574 		kmem_free(mcmem, size);
575 		return (status);
576 
577 	/*
578 	 * input: id, nbanks and allocate space for various length of bankids
579 	 *
580 	 * return    0: base, size, number of banks, and all bank ids,
581 	 *		where global id is unique of all banks and local id
582 	 *		is only unique for mc.
583 	 *	EINVAL: either id isn't found or if given nbanks is less than
584 	 *		that in kernel and nbanks of struct will be updated.
585 	 *	EFAULT: if other errors in kernel.
586 	 */
587 	case MCIOC_SEG:
588 
589 		if (copyin((void *)arg, &mcseg_in,
590 		    sizeof (struct mc_segment)) != 0)
591 			return (EFAULT);
592 
593 		mutex_enter(&mcdatamutex);
594 		if ((seg = (struct seg_info *)mc_node_get(mcseg_in.id,
595 		    seg_head)) == NULL) {
596 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG: seg not match, "
597 			    "id %d\n", mcseg_in.id));
598 			mutex_exit(&mcdatamutex);
599 			return (EFAULT);
600 		}
601 
602 		if (mcseg_in.nbanks < seg->nbanks) {
603 			mcseg_in.nbanks = seg->nbanks;
604 			if (copyout(&mcseg_in, (void *)arg,
605 			    sizeof (struct mc_segment)))
606 				status = EFAULT;
607 			else
608 				status = EINVAL;
609 
610 			mutex_exit(&mcdatamutex);
611 			return (status);
612 		}
613 
614 		size = sizeof (struct mc_segment) + (seg->nbanks - 1) *
615 		    sizeof (mcseg->bankids[0]);
616 		mcseg = kmem_zalloc(size, KM_SLEEP);
617 
618 		mcseg->id = seg->seg_node.id;
619 		mcseg->ifactor = seg->ifactor;
620 		mcseg->base = seg->base;
621 		mcseg->size = seg->size;
622 		mcseg->nbanks = seg->nbanks;
623 
624 		bank = seg->hb_inseg;
625 
626 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:nbanks %d seg 0x%p bank %p\n",
627 		    seg->nbanks, (void *)seg, (void *)bank));
628 
629 		i = 0;
630 		while (bank != NULL) {
631 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_SEG:idx %d bank_id %d\n",
632 			    i, bank->bank_node.id));
633 			mcseg->bankids[i].globalid = bank->bank_node.id;
634 			mcseg->bankids[i++].localid =
635 			    bank->local_id;
636 			bank = bank->n_inseg;
637 		}
638 		ASSERT(i == seg->nbanks);
639 		mutex_exit(&mcdatamutex);
640 
641 		if (copyout(mcseg, (void *)arg, size))
642 			status = EFAULT;
643 
644 		kmem_free(mcseg, size);
645 		return (status);
646 
647 	/*
648 	 * input: id
649 	 *
650 	 * return    0: mask, match, size, and devgrpid,
651 	 *		where global id is unique of all devgrps and local id
652 	 *		is only unique for mc.
653 	 *	EINVAL: if id isn't found
654 	 *	EFAULT: if other errors in kernel.
655 	 */
656 	case MCIOC_BANK:
657 		if (copyin((void *)arg, &mcbank, sizeof (struct mc_bank)) != 0)
658 			return (EFAULT);
659 
660 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank id %d\n", mcbank.id));
661 
662 		mutex_enter(&mcdatamutex);
663 
664 		if ((bank = (struct bank_info *)mc_node_get(mcbank.id,
665 		    bank_head)) == NULL) {
666 			mutex_exit(&mcdatamutex);
667 			return (EINVAL);
668 		}
669 
670 		DPRINTF(MC_CMD_DEBUG, ("MCIOC_BANK: bank %d (0x%p) valid %hu\n",
671 		    bank->bank_node.id, (void *)bank, bank->valid));
672 
673 		/*
674 		 * If (Physic Address & MASK) == MATCH, Physic Address is
675 		 * located at this bank. The lower physical address bits
676 		 * are at [9-6].
677 		 */
678 		mcbank.mask = (~(bank->lk | ~(MADR_LK_MASK >>
679 		    MADR_LK_SHIFT))) << MADR_LPA_SHIFT;
680 		mcbank.match = bank->lm << MADR_LPA_SHIFT;
681 		mcbank.size = bank->size;
682 		mcbank.devgrpid.globalid = bank->devgrp_id;
683 		mcbank.devgrpid.localid = bank->devgrp_id % NDGRPS;
684 
685 		mutex_exit(&mcdatamutex);
686 
687 		if (copyout(&mcbank, (void *)arg, sizeof (struct mc_bank)))
688 			return (EFAULT);
689 		return (0);
690 
691 	/*
692 	 * input:id and allocate space for various length of deviceids
693 	 *
694 	 * return    0: size and number of devices.
695 	 *	EINVAL: id isn't found
696 	 *	EFAULT: if other errors in kernel.
697 	 */
698 	case MCIOC_DEVGRP:
699 
700 		if (copyin((void *)arg, &mcdevgrp,
701 		    sizeof (struct mc_devgrp)) != 0)
702 			return (EFAULT);
703 
704 		mutex_enter(&mcdatamutex);
705 		if ((dgrp = (struct dgrp_info *)mc_node_get(mcdevgrp.id,
706 		    dgrp_head)) == NULL) {
707 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_DEVGRP: not match, id "
708 			    "%d\n", mcdevgrp.id));
709 			mutex_exit(&mcdatamutex);
710 			return (EINVAL);
711 		}
712 
713 		mcdevgrp.ndevices = dgrp->ndevices;
714 		mcdevgrp.size = dgrp->size;
715 
716 		mutex_exit(&mcdatamutex);
717 
718 		if (copyout(&mcdevgrp, (void *)arg, sizeof (struct mc_devgrp)))
719 			status = EFAULT;
720 
721 		return (status);
722 
723 	/*
724 	 * input: nmcs and allocate space for various length of mcids
725 	 *
726 	 * return    0: number of mc, and all mcids,
727 	 *		where glocal and local ids are identical.
728 	 *	EINVAL: if the given nmcs is less than that in kernel and
729 	 *		nmcs of struct will be updated.
730 	 *	EFAULT: if other errors in kernel.
731 	 */
732 	case MCIOC_CTRLCONF:
733 		if (copyin((void *)arg, &mcctrlconf_in,
734 		    sizeof (struct mc_ctrlconf)) != 0)
735 			return (EFAULT);
736 
737 		mutex_enter(&mcdatamutex);
738 		if (mcctrlconf_in.nmcs < nmcs) {
739 			mcctrlconf_in.nmcs = nmcs;
740 			if (copyout(&mcctrlconf_in, (void *)arg,
741 			    sizeof (struct mc_ctrlconf)))
742 				status = EFAULT;
743 			else
744 				status = EINVAL;
745 
746 			mutex_exit(&mcdatamutex);
747 			return (status);
748 		}
749 
750 		/*
751 		 * Cannot just use the size of the struct because of the various
752 		 * length struct
753 		 */
754 		size = sizeof (struct mc_ctrlconf) + ((nmcs - 1) *
755 		    sizeof (mcctrlconf->mcids[0]));
756 		mcctrlconf = kmem_zalloc(size, KM_SLEEP);
757 
758 		mcctrlconf->nmcs = nmcs;
759 
760 		/* Get all MC ids and add to mcctrlconf */
761 		mctrl = mctrl_head;
762 		i = 0;
763 		while (mctrl != NULL) {
764 			mcctrlconf->mcids[i].globalid = mctrl->id;
765 			mcctrlconf->mcids[i].localid = mctrl->id;
766 			i++;
767 			mctrl = mctrl->next;
768 		}
769 		ASSERT(i == nmcs);
770 
771 		mutex_exit(&mcdatamutex);
772 
773 		if (copyout(mcctrlconf, (void *)arg, size))
774 			status = EFAULT;
775 
776 		kmem_free(mcctrlconf, size);
777 		return (status);
778 
779 	/*
780 	 * input:id, ndevgrps and allocate space for various length of devgrpids
781 	 *
782 	 * return    0: number of devgrp, and all devgrpids,
783 	 *		is unique of all devgrps and local id is only unique
784 	 *		for mc.
785 	 *	EINVAL: either if id isn't found or if the given ndevgrps is
786 	 *		less than that in kernel and ndevgrps of struct will
787 	 *		be updated.
788 	 *	EFAULT: if other errors in kernel.
789 	 */
790 	case MCIOC_CONTROL:
791 		if (copyin((void *)arg, &mccontrol_in,
792 		    sizeof (struct mc_control)) != 0)
793 			return (EFAULT);
794 
795 		mutex_enter(&mcdatamutex);
796 		if ((mcport = (struct mctrl_info *)mc_node_get(mccontrol_in.id,
797 		    mctrl_head)) == NULL) {
798 			mutex_exit(&mcdatamutex);
799 			return (EINVAL);
800 		}
801 
802 		/*
803 		 * mcport->ndevgrps zero means Memory Controller is disable.
804 		 */
805 		if ((mccontrol_in.ndevgrps < mcport->ndevgrps) ||
806 		    (mcport->ndevgrps == 0)) {
807 			mccontrol_in.ndevgrps = mcport->ndevgrps;
808 			if (copyout(&mccontrol_in, (void *)arg,
809 			    sizeof (struct mc_control)))
810 				status = EFAULT;
811 			else if (mcport->ndevgrps != 0)
812 				status = EINVAL;
813 
814 			mutex_exit(&mcdatamutex);
815 			return (status);
816 		}
817 
818 		size = sizeof (struct mc_control) + (mcport->ndevgrps - 1) *
819 		    sizeof (mccontrol->devgrpids[0]);
820 		mccontrol = kmem_zalloc(size, KM_SLEEP);
821 
822 		mccontrol->id = mcport->mctrl_node.id;
823 		mccontrol->ndevgrps = mcport->ndevgrps;
824 		for (i = 0; i < mcport->ndevgrps; i++) {
825 			mccontrol->devgrpids[i].globalid = mcport->devgrpids[i];
826 			mccontrol->devgrpids[i].localid =
827 			    mcport->devgrpids[i] % NDGRPS;
828 			DPRINTF(MC_CMD_DEBUG, ("MCIOC_CONTROL: devgrp id %lu\n",
829 			    *(uint64_t *)&mccontrol->devgrpids[i]));
830 		}
831 		mutex_exit(&mcdatamutex);
832 
833 		if (copyout(mccontrol, (void *)arg, size))
834 			status = EFAULT;
835 
836 		kmem_free(mccontrol, size);
837 		return (status);
838 
839 	/*
840 	 * input:id
841 	 *
842 	 * return    0: CPU flushed successfully.
843 	 *	EINVAL: the id wasn't found
844 	 */
845 	case MCIOC_ECFLUSH:
846 		mutex_enter(&cpu_lock);
847 		cpu = cpu_get((processorid_t)arg);
848 		mutex_exit(&cpu_lock);
849 		if (cpu == NULL)
850 			return (EINVAL);
851 
852 		xc_one(arg, (xcfunc_t *)cpu_flush_ecache, 0, 0);
853 
854 		return (0);
855 
856 	default:
857 		DPRINTF(MC_CMD_DEBUG, ("DEFAULT: cmd is wrong\n"));
858 		return (EFAULT);
859 	}
860 }
861 
862 /*
863  * Get Memory Address Decoding Registers and construct list.
864  * flag is to workaround Cheetah's restriction where register cannot be mapped
865  * if port id(MC registers on it) == cpu id(process is running on it).
866  */
867 static int
868 mc_get_mcregs(struct mc_soft_state *softsp)
869 {
870 	int i;
871 	int err = 0;
872 	uint64_t madreg;
873 	uint64_t ma_reg_array[NBANKS];	/* there are NBANKS of madrs */
874 
875 	/* Construct lists for MC, mctrl_info, dgrp_info, and device_info */
876 	mc_construct(softsp->portid, softsp->memlayoutp);
877 
878 	/*
879 	 * If memlayoutp is NULL, the Memory Controller is disable, and
880 	 * doesn't need to create any bank and segment.
881 	 */
882 	if (softsp->memlayoutp == NULL)
883 		goto exit;
884 
885 	/*
886 	 * Get the content of 4 Memory Address Decoding Registers, and
887 	 * construct lists of logical banks and segments.
888 	 */
889 	for (i = 0; i < NBANKS; i++) {
890 		DPRINTF(MC_REG_DEBUG, ("get_mcregs: mapreg=0x%p portid=%d "
891 		    "cpu=%d\n", (void *)softsp->mc_base, softsp->portid,
892 		    CPU->cpu_id));
893 
894 		kpreempt_disable();
895 		if (softsp->portid == (cpunodes[CPU->cpu_id].portid))
896 			madreg = get_mcr(MADR0OFFSET + (i * REGOFFSET));
897 		else
898 			madreg = *((uint64_t *)(softsp->mc_base + MADR0OFFSET +
899 			    (i * REGOFFSET)));
900 		kpreempt_enable();
901 
902 		DPRINTF(MC_REG_DEBUG, ("get_mcregs 2: memlayoutp=0x%p madreg "
903 		    "reg=0x%lx\n", softsp->memlayoutp, madreg));
904 
905 		ma_reg_array[i] = madreg;
906 
907 		if ((err = mlayout_add(softsp->portid, i, madreg,
908 		    softsp->memlayoutp)) == -1)
909 			break;
910 	}
911 
912 	/*
913 	 * Create the logical bank property for this mc node. This
914 	 * property is an encoded array of the madr for each logical
915 	 * bank (there are NBANKS of these).
916 	 */
917 	if (ddi_prop_exists(DDI_DEV_T_ANY, softsp->dip,
918 	    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
919 	    MEM_CFG_PROP_NAME) != 1) {
920 		(void) ddi_prop_create(DDI_DEV_T_NONE, softsp->dip,
921 		    DDI_PROP_CANSLEEP, MEM_CFG_PROP_NAME,
922 		    (caddr_t)&ma_reg_array, sizeof (ma_reg_array));
923 	}
924 
925 exit:
926 	if (!err) {
927 		mutex_enter(&mcdatamutex);
928 		nmcs++;
929 		mutex_exit(&mcdatamutex);
930 	}
931 	return (err);
932 }
933 
934 /*
935  * Translate a <DIMM, offset> pair to a physical address.
936  */
937 static int
938 mc_offset_to_addr(struct seg_info *seg,
939     struct bank_info *bank, uint64_t off, uint64_t *addr)
940 {
941 	uint64_t base, size, line, remainder;
942 	uint32_t ifactor;
943 
944 	/*
945 	 * Compute the half-dimm size in bytes.
946 	 * Note that bank->size represents the number of data bytes,
947 	 * and does not include the additional bits used for ecc, mtag,
948 	 * and mtag ecc information in each 144-bit checkword.
949 	 * For calculating the offset to a checkword we need the size
950 	 * including the additional 8 bytes for each 64 data bytes of
951 	 * a cache line.
952 	 */
953 	size = ((bank->size / 4) / 64) * 72;
954 
955 	/*
956 	 * Check if the offset is within this bank. This depends on the position
957 	 * of the bank, i.e., whether it is the front bank or the back bank.
958 	 */
959 	base = size * bank->pos;
960 
961 	if ((off < base) || (off >= (base + size)))
962 		return (-1);
963 
964 	/*
965 	 * Compute the offset within the half-dimm.
966 	 */
967 	off -= base;
968 
969 	/*
970 	 * Compute the line within the half-dimm. This is the same as the line
971 	 * within the bank since each DIMM in a bank contributes uniformly
972 	 * 144 bits (18 bytes) to a cache line.
973 	 */
974 	line = off / QWORD_SIZE_BYTES;
975 
976 	remainder = off % QWORD_SIZE_BYTES;
977 
978 	/*
979 	 * Compute the line within the segment.
980 	 * The bank->lm field indicates the order in which cache lines are
981 	 * distributed across the banks of a segment (See the Cheetah PRM).
982 	 * The interleave factor the bank is programmed with is used instead
983 	 * of the segment interleave factor since a segment can be composed
984 	 * of banks with different interleave factors if the banks are not
985 	 * uniform in size.
986 	 */
987 	ifactor = (bank->lk ^ 0xF) + 1;
988 	line = (line * ifactor) + bank->lm;
989 
990 	/*
991 	 * Compute the physical address assuming that there are 64 data bytes
992 	 * in a cache line.
993 	 */
994 	*addr = (line << 6) + seg->base;
995 	*addr += remainder * 16;
996 
997 	return (0);
998 }
999 
1000 /*
1001  * Translate a physical address to a <DIMM, offset> pair.
1002  */
1003 static void
1004 mc_addr_to_offset(struct seg_info *seg,
1005     struct bank_info *bank, uint64_t addr, uint64_t *off)
1006 {
1007 	uint64_t base, size, line, remainder;
1008 	uint32_t ifactor;
1009 
1010 	/*
1011 	 * Compute the line within the segment assuming that there are 64 data
1012 	 * bytes in a cache line.
1013 	 */
1014 	line = (addr - seg->base) / 64;
1015 
1016 	/*
1017 	 * The lm (lower match) field from the Memory Address Decoding Register
1018 	 * for this bank determines which lines within a memory segment this
1019 	 * bank should respond to.  These are the actual address bits the
1020 	 * interleave is done over (See the Cheetah PRM).
1021 	 * In other words, the lm field indicates the order in which the cache
1022 	 * lines are distributed across the banks of a segment, and thusly it
1023 	 * can be used to compute the line within this bank. This is the same as
1024 	 * the line within the half-dimm. This is because each DIMM in a bank
1025 	 * contributes uniformly to every cache line.
1026 	 */
1027 	ifactor = (bank->lk ^ 0xF) + 1;
1028 	line = (line - bank->lm)/ifactor;
1029 
1030 	/*
1031 	 * Compute the offset within the half-dimm. This depends on whether
1032 	 * or not the bank is a front logical bank or a back logical bank.
1033 	 */
1034 	*off = line * QWORD_SIZE_BYTES;
1035 
1036 	/*
1037 	 * Compute the half-dimm size in bytes.
1038 	 * Note that bank->size represents the number of data bytes,
1039 	 * and does not include the additional bits used for ecc, mtag,
1040 	 * and mtag ecc information in each 144-bit quadword.
1041 	 * For calculating the offset to a checkword we need the size
1042 	 * including the additional 8 bytes for each 64 data bytes of
1043 	 * a cache line.
1044 	 */
1045 	size = ((bank->size / 4) / 64) * 72;
1046 
1047 	/*
1048 	 * Compute the offset within the dimm to the nearest line. This depends
1049 	 * on whether or not the bank is a front logical bank or a back logical
1050 	 * bank.
1051 	 */
1052 	base = size * bank->pos;
1053 	*off += base;
1054 
1055 	remainder = (addr - seg->base) % 64;
1056 	remainder /= 16;
1057 	*off += remainder;
1058 }
1059 
1060 /*
1061  * A cache line is composed of four quadwords with the associated ECC, the
1062  * MTag along with its associated ECC. This is depicted below:
1063  *
1064  * |                    Data                    |   ECC   | Mtag |MTag ECC|
1065  *  127                                         0 8       0 2    0 3      0
1066  *
1067  * synd_code will be mapped as the following order to mc_get_mem_unum.
1068  *  143                                         16        7      4        0
1069  *
1070  * |  Quadword  0  |  Quadword  1  |  Quadword  2  |  Quadword  3  |
1071  *  575         432 431         288 287         144 143		   0
1072  *
1073  * dimm table: each bit at a cache line needs two bits to present one of
1074  *      four dimms. So it needs 144 bytes(576 * 2 / 8). The content is in
1075  *      big edian order, i.e. dimm_table[0] presents for bit 572 to 575.
1076  *
1077  * pin table: each bit at a cache line needs one byte to present pin position,
1078  *      where max. is 230. So it needs 576 bytes. The order of table index is
1079  *      the same as bit position at a cache line, i.e. pin_table[0] presents
1080  *      for bit 0, Mtag ECC 0 of Quadword 3.
1081  *
1082  * This is a mapping from syndrome code to QuadWord Logical layout at Safari.
1083  * Referring to Figure 3-4, Excalibur Architecture Manual.
1084  * This table could be moved to cheetah.c if other platform teams agree with
1085  * the bit layout at QuadWord.
1086  */
1087 
1088 static uint8_t qwordmap[] =
1089 {
1090 16,   17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,
1091 32,   33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,
1092 48,   49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,
1093 64,   65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,  79,
1094 80,   81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,
1095 96,   97,  98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
1096 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
1097 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
1098 7,    8,   9,  10,  11,  12,  13,  14,  15,   4,   5,   6,   0,   1,   2,   3,
1099 };
1100 
1101 
1102 /* ARGSUSED */
1103 static int
1104 mc_get_mem_unum(int synd_code, uint64_t paddr, char *buf, int buflen, int *lenp)
1105 {
1106 	int i, upper_pa, lower_pa, dimmoffset;
1107 	int quadword, pos_cacheline, position, index, idx4dimm;
1108 	int qwlayout = synd_code;
1109 	short offset, data;
1110 	char unum[UNUM_NAMLEN];
1111 	struct dimm_info *dimmp;
1112 	struct pin_info *pinp;
1113 	struct bank_info *bank;
1114 
1115 	/*
1116 	 * Enforce old Openboot requirement for synd code, either a single-bit
1117 	 * code from 0..QWORD_SIZE-1 or -1 (multi-bit error).
1118 	 */
1119 	if (qwlayout < -1 || qwlayout >= QWORD_SIZE)
1120 		return (EINVAL);
1121 
1122 	unum[0] = '\0';
1123 
1124 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1125 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1126 
1127 	DPRINTF(MC_GUNUM_DEBUG, ("qwlayout %d\n", qwlayout));
1128 
1129 	/*
1130 	 * Scan all logical banks to get one responding to the physical
1131 	 * address. Then compute the index to look up dimm and pin tables
1132 	 * to generate the unum.
1133 	 */
1134 	mutex_enter(&mcdatamutex);
1135 	bank = (struct bank_info *)bank_head;
1136 	while (bank != NULL) {
1137 		int bankid, mcid, bankno_permc;
1138 
1139 		bankid = bank->bank_node.id;
1140 		bankno_permc = bankid % NBANKS;
1141 		mcid = bankid / NBANKS;
1142 
1143 		/*
1144 		 * The Address Decoding logic decodes the different fields
1145 		 * in the Memory Address Decoding register to determine
1146 		 * whether a particular logical bank should respond to a
1147 		 * physical address.
1148 		 */
1149 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1150 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1151 			bank = (struct bank_info *)bank->bank_node.next;
1152 			continue;
1153 		}
1154 
1155 		dimmoffset = (bankno_permc % NDGRPS) * NDIMMS;
1156 
1157 		dimmp = (struct dimm_info *)bank->dimminfop;
1158 		ASSERT(dimmp != NULL);
1159 
1160 		if ((qwlayout >= 0) && (qwlayout < QWORD_SIZE)) {
1161 			/*
1162 			 * single-bit error handling, we can identify specific
1163 			 * DIMM.
1164 			 */
1165 
1166 			pinp = (struct pin_info *)&dimmp->data[0];
1167 
1168 			if (!dimmp->sym_flag)
1169 				pinp++;
1170 
1171 			quadword = (paddr & 0x3f) / 16;
1172 			/* or quadword = (paddr >> 4) % 4; */
1173 			pos_cacheline = ((3 - quadword) * QWORD_SIZE) +
1174 			    qwordmap[qwlayout];
1175 			position = 575 - pos_cacheline;
1176 			index = position * 2 / 8;
1177 			offset = position % 4;
1178 
1179 			/*
1180 			 * Trade-off: We couldn't add pin number to
1181 			 * unum string because statistic number
1182 			 * pumps up at the corresponding dimm not pin.
1183 			 * (void) sprintf(unum, "Pin %1u ", (uint_t)
1184 			 * pinp->pintable[pos_cacheline]);
1185 			 */
1186 			DPRINTF(MC_GUNUM_DEBUG, ("Pin number %1u\n",
1187 			    (uint_t)pinp->pintable[pos_cacheline]));
1188 			data = pinp->dimmtable[index];
1189 			idx4dimm = (data >> ((3 - offset) * 2)) & 3;
1190 
1191 			(void) strncpy(unum,
1192 			    (char *)dimmp->label[dimmoffset + idx4dimm],
1193 			    UNUM_NAMLEN);
1194 			DPRINTF(MC_GUNUM_DEBUG, ("unum %s\n", unum));
1195 			/*
1196 			 * platform hook for adding label information to unum.
1197 			 */
1198 			mc_add_mem_unum_label(unum, mcid, bankno_permc,
1199 			    idx4dimm);
1200 		} else {
1201 			char *p = unum;
1202 			size_t res = UNUM_NAMLEN;
1203 
1204 			/*
1205 			 * multi-bit error handling, we can only identify
1206 			 * bank of DIMMs.
1207 			 */
1208 
1209 			for (i = 0; (i < NDIMMS) && (res > 0); i++) {
1210 				(void) snprintf(p, res, "%s%s",
1211 				    i == 0 ? "" : " ",
1212 				    (char *)dimmp->label[dimmoffset + i]);
1213 				res -= strlen(p);
1214 				p += strlen(p);
1215 			}
1216 
1217 			/*
1218 			 * platform hook for adding label information
1219 			 * to unum.
1220 			 */
1221 			mc_add_mem_unum_label(unum, mcid, bankno_permc, -1);
1222 		}
1223 		mutex_exit(&mcdatamutex);
1224 		if ((strlen(unum) >= UNUM_NAMLEN) ||
1225 		    (strlen(unum) >= buflen)) {
1226 			return (ENAMETOOLONG);
1227 		} else {
1228 			(void) strncpy(buf, unum, buflen);
1229 			*lenp = strlen(buf);
1230 			return (0);
1231 		}
1232 	}	/* end of while loop for logical bank list */
1233 
1234 	mutex_exit(&mcdatamutex);
1235 	return (ENXIO);
1236 }
1237 
1238 /* ARGSUSED */
1239 static int
1240 mc_get_mem_offset(uint64_t paddr, uint64_t *offp)
1241 {
1242 	int upper_pa, lower_pa;
1243 	struct bank_info *bank;
1244 	struct seg_info *seg;
1245 
1246 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1247 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1248 
1249 	/*
1250 	 * Scan all logical banks to get one responding to the physical
1251 	 * address.
1252 	 */
1253 	mutex_enter(&mcdatamutex);
1254 	bank = (struct bank_info *)bank_head;
1255 	while (bank != NULL) {
1256 		/*
1257 		 * The Address Decoding logic decodes the different fields
1258 		 * in the Memory Address Decoding register to determine
1259 		 * whether a particular logical bank should respond to a
1260 		 * physical address.
1261 		 */
1262 		if ((!bank->valid) || ((~(~(upper_pa ^ bank->um) |
1263 		    bank->uk)) || (~(~(lower_pa ^ bank->lm) | bank->lk)))) {
1264 			bank = (struct bank_info *)bank->bank_node.next;
1265 			continue;
1266 		}
1267 
1268 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1269 		ASSERT(seg != NULL);
1270 		ASSERT(paddr >= seg->base);
1271 
1272 		mc_addr_to_offset(seg, bank, paddr, offp);
1273 
1274 		mutex_exit(&mcdatamutex);
1275 		return (0);
1276 	}
1277 
1278 	mutex_exit(&mcdatamutex);
1279 	return (ENXIO);
1280 }
1281 
1282 /*
1283  * Translate a DIMM <id, offset> pair to a physical address.
1284  */
1285 static int
1286 mc_get_mem_addr(int mcid, char *sid, uint64_t off, uint64_t *paddr)
1287 {
1288 	struct seg_info *seg;
1289 	struct bank_info *bank;
1290 	int first_seg_id;
1291 	int i, found;
1292 
1293 	ASSERT(sid != NULL);
1294 
1295 	mutex_enter(&mcdatamutex);
1296 
1297 	rw_enter(&mcdimmsids_rw, RW_READER);
1298 
1299 	/*
1300 	 * If DIMM serial ids have not been cached yet, tell the
1301 	 * caller to try again.
1302 	 */
1303 	if (mc_dimm_sids == NULL) {
1304 		rw_exit(&mcdimmsids_rw);
1305 		return (EAGAIN);
1306 	}
1307 
1308 	for (i = 0; i < max_entries; i++) {
1309 		if (mc_dimm_sids[i].mcid == mcid)
1310 			break;
1311 	}
1312 
1313 	if (i == max_entries) {
1314 		rw_exit(&mcdimmsids_rw);
1315 		mutex_exit(&mcdatamutex);
1316 		return (ENODEV);
1317 	}
1318 
1319 	first_seg_id = mc_dimm_sids[i].seg_id;
1320 
1321 	seg = (struct seg_info *)mc_node_get(first_seg_id, seg_head);
1322 
1323 	rw_exit(&mcdimmsids_rw);
1324 
1325 	if (seg == NULL) {
1326 		mutex_exit(&mcdatamutex);
1327 		return (ENODEV);
1328 	}
1329 
1330 	found = 0;
1331 
1332 	for (bank = seg->hb_inseg; bank; bank = bank->n_inseg) {
1333 		ASSERT(bank->valid);
1334 
1335 		for (i = 0; i < NDIMMS; i++) {
1336 			if (strncmp((char *)bank->dimmsidp[i], sid,
1337 			    DIMM_SERIAL_ID_LEN)  == 0)
1338 				break;
1339 		}
1340 
1341 		if (i == NDIMMS)
1342 			continue;
1343 
1344 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1)
1345 			continue;
1346 		found = 1;
1347 		break;
1348 	}
1349 
1350 	if (found) {
1351 		mutex_exit(&mcdatamutex);
1352 		return (0);
1353 	}
1354 
1355 	/*
1356 	 * If a bank wasn't found, it may be in another segment.
1357 	 * This can happen if the different logical banks of an MC
1358 	 * have different interleave factors.  To deal with this
1359 	 * possibility, we'll do a brute-force search for banks
1360 	 * for this MC with a different seg id then above.
1361 	 */
1362 	bank = (struct bank_info *)bank_head;
1363 	while (bank != NULL) {
1364 
1365 		if (!bank->valid) {
1366 			bank = (struct bank_info *)bank->bank_node.next;
1367 			continue;
1368 		}
1369 
1370 		if (bank->bank_node.id / NBANKS != mcid) {
1371 			bank = (struct bank_info *)bank->bank_node.next;
1372 			continue;
1373 		}
1374 
1375 		/* Ignore banks in the segment we looked in above. */
1376 		if (bank->seg_id == mc_dimm_sids[i].seg_id) {
1377 			bank = (struct bank_info *)bank->bank_node.next;
1378 			continue;
1379 		}
1380 
1381 		for (i = 0; i < NDIMMS; i++) {
1382 			if (strncmp((char *)bank->dimmsidp[i], sid,
1383 			    DIMM_SERIAL_ID_LEN)  == 0)
1384 				break;
1385 		}
1386 
1387 		if (i == NDIMMS) {
1388 			bank = (struct bank_info *)bank->bank_node.next;
1389 			continue;
1390 		}
1391 
1392 		seg = (struct seg_info *)mc_node_get(bank->seg_id, seg_head);
1393 
1394 		if (mc_offset_to_addr(seg, bank, off, paddr) == -1) {
1395 			bank = (struct bank_info *)bank->bank_node.next;
1396 			continue;
1397 		}
1398 
1399 		found = 1;
1400 		break;
1401 	}
1402 
1403 	mutex_exit(&mcdatamutex);
1404 
1405 	if (found)
1406 		return (0);
1407 	else
1408 		return (ENOENT);
1409 }
1410 
1411 static int
1412 mc_get_mem_info(int synd_code, uint64_t paddr,
1413     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
1414     int *segsp, int *banksp, int *mcidp)
1415 {
1416 	int upper_pa, lower_pa;
1417 	struct bank_info *bankp;
1418 
1419 	if (synd_code < -1 || synd_code >= QWORD_SIZE)
1420 		return (EINVAL);
1421 
1422 	upper_pa = (paddr & MADR_UPA_MASK) >> MADR_UPA_SHIFT;
1423 	lower_pa = (paddr & MADR_LPA_MASK) >> MADR_LPA_SHIFT;
1424 
1425 	/*
1426 	 * Scan all logical banks to get one responding to the physical
1427 	 * address.
1428 	 */
1429 	mutex_enter(&mcdatamutex);
1430 	bankp = (struct bank_info *)bank_head;
1431 	while (bankp != NULL) {
1432 		struct seg_info *segp;
1433 		int bankid, mcid;
1434 
1435 		bankid = bankp->bank_node.id;
1436 		mcid = bankid / NBANKS;
1437 
1438 		/*
1439 		 * The Address Decoding logic decodes the different fields
1440 		 * in the Memory Address Decoding register to determine
1441 		 * whether a particular logical bank should respond to a
1442 		 * physical address.
1443 		 */
1444 		if ((!bankp->valid) || ((~(~(upper_pa ^ bankp->um) |
1445 		    bankp->uk)) || (~(~(lower_pa ^ bankp->lm) | bankp->lk)))) {
1446 			bankp = (struct bank_info *)bankp->bank_node.next;
1447 			continue;
1448 		}
1449 
1450 		/*
1451 		 * Get the corresponding segment.
1452 		 */
1453 		if ((segp = (struct seg_info *)mc_node_get(bankp->seg_id,
1454 		    seg_head)) == NULL) {
1455 			mutex_exit(&mcdatamutex);
1456 			return (EFAULT);
1457 		}
1458 
1459 		*mem_sizep = memsize;
1460 		*seg_sizep = segp->size;
1461 		*bank_sizep = bankp->size;
1462 		*segsp = nsegments;
1463 		*banksp = segp->nbanks;
1464 		*mcidp = mcid;
1465 
1466 		mutex_exit(&mcdatamutex);
1467 
1468 		return (0);
1469 
1470 	}	/* end of while loop for logical bank list */
1471 
1472 	mutex_exit(&mcdatamutex);
1473 	return (ENXIO);
1474 }
1475 
1476 /*
1477  * Construct lists for an enabled MC where size of memory is 0.
1478  * The lists are connected as follows:
1479  * Attached MC -> device group list -> device list(per devgrp).
1480  */
1481 static void
1482 mc_construct(int mc_id, void *dimminfop)
1483 {
1484 	int i, j, idx, dmidx;
1485 	struct mctrl_info *mctrl;
1486 	struct dgrp_info *dgrp;
1487 	struct device_info *dev;
1488 	struct	dimm_info *dimmp = (struct  dimm_info *)dimminfop;
1489 
1490 	mutex_enter(&mcdatamutex);
1491 	/* allocate for mctrl_info and bank_info */
1492 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id,
1493 	    mctrl_head)) != NULL) {
1494 		cmn_err(CE_WARN, "mc_construct: mctrl %d exists\n", mc_id);
1495 		mutex_exit(&mcdatamutex);
1496 		return;
1497 	}
1498 
1499 	mctrl = kmem_zalloc(sizeof (struct mctrl_info), KM_SLEEP);
1500 
1501 	/*
1502 	 * If dimminfop is NULL, the Memory Controller is disable, and
1503 	 * the number of device group will be zero.
1504 	 */
1505 	if (dimminfop == NULL) {
1506 		mctrl->mctrl_node.id = mc_id;
1507 		mctrl->ndevgrps = 0;
1508 		mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1509 		mutex_exit(&mcdatamutex);
1510 		return;
1511 	}
1512 
1513 	/* add the entry on dgrp_info list */
1514 	for (i = 0; i < NDGRPS; i++) {
1515 		idx = mc_id * NDGRPS + i;
1516 		mctrl->devgrpids[i] = idx;
1517 		if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head))
1518 		    != NULL) {
1519 			cmn_err(CE_WARN, "mc_construct: devgrp %d exists\n",
1520 			    idx);
1521 			continue;
1522 		}
1523 
1524 		dgrp = kmem_zalloc(sizeof (struct dgrp_info), KM_SLEEP);
1525 
1526 		/* add the entry on device_info list */
1527 		for (j = 0; j < NDIMMS; j++) {
1528 			dmidx = idx * NDIMMS + j;
1529 			dgrp->deviceids[j] = dmidx;
1530 			if ((dev = (struct device_info *)
1531 			    mc_node_get(dmidx, device_head)) != NULL) {
1532 				cmn_err(CE_WARN, "mc_construct: device %d "
1533 				    "exists\n", dmidx);
1534 				continue;
1535 			}
1536 			dev = kmem_zalloc(sizeof (struct device_info),
1537 			    KM_SLEEP);
1538 			dev->dev_node.id = dmidx;
1539 			dev->size = 0;
1540 			(void) strncpy(dev->label, (char *)
1541 			    dimmp->label[i * NDIMMS + j], MAX_DEVLEN);
1542 
1543 			mc_node_add((mc_dlist_t *)dev, &device_head,
1544 			    &device_tail);
1545 		}	/* for loop for constructing device_info */
1546 
1547 		dgrp->dgrp_node.id = idx;
1548 		dgrp->ndevices = NDIMMS;
1549 		dgrp->size = 0;
1550 		mc_node_add((mc_dlist_t *)dgrp, &dgrp_head, &dgrp_tail);
1551 
1552 	}	/* end of for loop for constructing dgrp_info list */
1553 
1554 	mctrl->mctrl_node.id = mc_id;
1555 	mctrl->ndevgrps = NDGRPS;
1556 	mc_node_add((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1557 	mutex_exit(&mcdatamutex);
1558 }
1559 
1560 /*
1561  * Construct lists for Memory Configuration at logical viewpoint.
1562  *
1563  * Retrieve information from Memory Address Decoding Register and set up
1564  * bank and segment lists. Link bank to its corresponding device group, and
1565  * update size of device group and devices. Also connect bank to the segment.
1566  *
1567  * Memory Address Decoding Register
1568  * -------------------------------------------------------------------------
1569  * |63|62    53|52      41|40  37|36     20|19 18|17  14|13 12|11  8|7     0|
1570  * |-----------|----------|------|---------|-----|------|-----|-----|-------|
1571  * |V |    -   |    UK    |   -  |    UM   |  -  |  LK  |  -  | LM  |   -   |
1572  * -------------------------------------------------------------------------
1573  *
1574  */
1575 
1576 static int
1577 mlayout_add(int mc_id, int bank_no, uint64_t reg, void *dimminfop)
1578 {
1579 	int i, dmidx, idx;
1580 	uint32_t ifactor;
1581 	int status = 0;
1582 	uint64_t size, base;
1583 	struct seg_info *seg_curr;
1584 	struct bank_info *bank_curr;
1585 	struct dgrp_info *dgrp;
1586 	struct device_info *dev;
1587 	union {
1588 		struct {
1589 			uint64_t valid	: 1;
1590 			uint64_t resrv1	: 10;
1591 			uint64_t uk	: 12;
1592 			uint64_t resrv2	: 4;
1593 			uint64_t um	: 17;
1594 			uint64_t resrv3	: 2;
1595 			uint64_t lk	: 4;
1596 			uint64_t resrv4	: 2;
1597 			uint64_t lm	: 4;
1598 			uint64_t resrv5	: 8;
1599 		} _s;
1600 		uint64_t madreg;
1601 	} mcreg;
1602 
1603 	mcreg.madreg = reg;
1604 
1605 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: mc_id %d, bank num "
1606 	    "%d, reg 0x%lx\n", mc_id, bank_no, reg));
1607 
1608 	/* add the entry on bank_info list */
1609 	idx = mc_id * NBANKS + bank_no;
1610 
1611 	mutex_enter(&mcdatamutex);
1612 	if ((bank_curr = (struct bank_info *)mc_node_get(idx, bank_head))
1613 	    != NULL) {
1614 		cmn_err(CE_WARN, "mlayout_add: bank %d exists\n", bank_no);
1615 		goto exit;
1616 	}
1617 
1618 	bank_curr = kmem_zalloc(sizeof (struct bank_info), KM_SLEEP);
1619 	bank_curr->bank_node.id = idx;
1620 	bank_curr->valid = mcreg._s.valid;
1621 	bank_curr->dimminfop = dimminfop;
1622 
1623 	if (!mcreg._s.valid) {
1624 		mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1625 		goto exit;
1626 	}
1627 
1628 	/*
1629 	 * size of a logical bank = size of segment / interleave factor
1630 	 * This fomula is not only working for regular configuration,
1631 	 * i.e. number of banks at a segment equals to the max
1632 	 * interleave factor, but also for special case, say 3 bank
1633 	 * interleave. One bank is 2 way interleave and other two are
1634 	 * 4 way. So the sizes of banks are size of segment/2 and /4
1635 	 * respectively.
1636 	 */
1637 	ifactor = (mcreg._s.lk ^ 0xF) + 1;
1638 	size = (((mcreg._s.uk & 0x3FF) + 1) * 0x4000000) / ifactor;
1639 	base = mcreg._s.um & ~mcreg._s.uk;
1640 	base <<= MADR_UPA_SHIFT;
1641 
1642 	bank_curr->uk = mcreg._s.uk;
1643 	bank_curr->um = mcreg._s.um;
1644 	bank_curr->lk = mcreg._s.lk;
1645 	bank_curr->lm = mcreg._s.lm;
1646 	bank_curr->size = size;
1647 
1648 	/*
1649 	 * The bank's position depends on which halves of the DIMMs it consists
1650 	 * of. The front-side halves of the 4 DIMMs constitute the front bank
1651 	 * and the back-side halves constitute the back bank. Bank numbers
1652 	 * 0 and 1 are front-side banks and bank numbers 2 and 3 are back side
1653 	 * banks.
1654 	 */
1655 	bank_curr->pos = bank_no >> 1;
1656 	ASSERT((bank_curr->pos == 0) || (bank_curr->pos == 1));
1657 
1658 	/*
1659 	 * Workaround to keep gcc and SS12 lint happy.
1660 	 * Lint expects lk, uk and um in the format statement below
1661 	 * to use %lx, but this produces a warning when compiled with
1662 	 * gcc.
1663 	 */
1664 
1665 #if defined(lint)
1666 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1667 	    "lk 0x%lx uk 0x%lx um 0x%lx ifactor 0x%x size 0x%lx base 0x%lx\n",
1668 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1669 #else /* lint */
1670 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add 3: logical bank num %d, "
1671 	    "lk 0x%x uk 0x%x um 0x%x ifactor 0x%x size 0x%lx base 0x%lx\n",
1672 	    idx, mcreg._s.lk, mcreg._s.uk, mcreg._s.um, ifactor, size, base));
1673 #endif /* lint */
1674 
1675 	/* connect the entry and update the size on dgrp_info list */
1676 	idx = mc_id * NDGRPS + (bank_no % NDGRPS);
1677 	if ((dgrp = (struct dgrp_info *)mc_node_get(idx, dgrp_head)) == NULL) {
1678 		/* all avaiable dgrp should be linked at mc_construct */
1679 		cmn_err(CE_WARN, "mlayout_add: dgrp %d doesn't exist\n", idx);
1680 		kmem_free(bank_curr, sizeof (struct bank_info));
1681 		status = -1;
1682 		goto exit;
1683 	}
1684 
1685 	bank_curr->devgrp_id = idx;
1686 	dgrp->size += size;
1687 
1688 	/* Update the size of entry on device_info list */
1689 	for (i = 0; i < NDIMMS; i++) {
1690 		dmidx = dgrp->dgrp_node.id * NDIMMS + i;
1691 		dgrp->deviceids[i] = dmidx;
1692 
1693 		/* avaiable device should be linked at mc_construct */
1694 		if ((dev = (struct device_info *)mc_node_get(dmidx,
1695 		    device_head)) == NULL) {
1696 			cmn_err(CE_WARN, "mlayout_add:dev %d doesn't exist\n",
1697 			    dmidx);
1698 			kmem_free(bank_curr, sizeof (struct bank_info));
1699 			status = -1;
1700 			goto exit;
1701 		}
1702 
1703 		dev->size += (size / NDIMMS);
1704 
1705 		DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add DIMM:id %d, size %lu\n",
1706 		    dmidx, size));
1707 	}
1708 
1709 	/*
1710 	 * Get the segment by matching the base address, link this bank
1711 	 * to the segment. If not matched, allocate a new segment and
1712 	 * add it at segment list.
1713 	 */
1714 	if (seg_curr = seg_match_base(base)) {
1715 		seg_curr->nbanks++;
1716 		seg_curr->size += size;
1717 		if (ifactor > seg_curr->ifactor)
1718 			seg_curr->ifactor = ifactor;
1719 		bank_curr->seg_id = seg_curr->seg_node.id;
1720 	} else {
1721 		seg_curr = (struct seg_info *)
1722 		    kmem_zalloc(sizeof (struct seg_info), KM_SLEEP);
1723 		bank_curr->seg_id = seg_id;
1724 		seg_curr->seg_node.id = seg_id++;
1725 		seg_curr->base = base;
1726 		seg_curr->size = size;
1727 		seg_curr->nbanks = 1;
1728 		seg_curr->ifactor = ifactor;
1729 		mc_node_add((mc_dlist_t *)seg_curr, &seg_head, &seg_tail);
1730 
1731 		nsegments++;
1732 	}
1733 
1734 	/* Get the local id of bank which is only unique per segment. */
1735 	bank_curr->local_id = seg_curr->nbanks - 1;
1736 
1737 	/* add bank at the end of the list; not sorted by bankid */
1738 	if (seg_curr->hb_inseg != NULL) {
1739 		bank_curr->p_inseg = seg_curr->tb_inseg;
1740 		bank_curr->n_inseg = seg_curr->tb_inseg->n_inseg;
1741 		seg_curr->tb_inseg->n_inseg = bank_curr;
1742 		seg_curr->tb_inseg = bank_curr;
1743 	} else {
1744 		bank_curr->n_inseg = bank_curr->p_inseg = NULL;
1745 		seg_curr->hb_inseg = seg_curr->tb_inseg = bank_curr;
1746 	}
1747 	DPRINTF(MC_CNSTRC_DEBUG, ("mlayout_add: + bank to seg, id %d\n",
1748 	    seg_curr->seg_node.id));
1749 
1750 	if (mc_dimm_sids) {
1751 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1752 		mc_update_bank(bank_curr);
1753 		rw_exit(&mcdimmsids_rw);
1754 	}
1755 	mc_node_add((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1756 
1757 	memsize += size;
1758 	if (seg_curr->nbanks > maxbanks)
1759 		maxbanks = seg_curr->nbanks;
1760 
1761 exit:
1762 	mutex_exit(&mcdatamutex);
1763 	return (status);
1764 }
1765 
1766 /*
1767  * Delete nodes related to the given MC on mc, device group, device,
1768  * and bank lists. Moreover, delete corresponding segment if its connected
1769  * banks are all removed.
1770  *
1771  * The "delete" argument is 1 if this is called as a result of DDI_DETACH. In
1772  * this case, the DIMM data structures need to be deleted. The argument is
1773  * 0 if this called as a result of DDI_SUSPEND/DDI_RESUME. In this case,
1774  * the DIMM data structures are left alone.
1775  */
1776 static void
1777 mlayout_del(int mc_id, int delete)
1778 {
1779 	int i, j, dgrpid, devid, bankid, ndevgrps;
1780 	struct seg_info *seg;
1781 	struct bank_info *bank_curr;
1782 	struct mctrl_info *mctrl;
1783 	mc_dlist_t *dgrp_ptr;
1784 	mc_dlist_t *dev_ptr;
1785 	uint64_t base;
1786 
1787 	mutex_enter(&mcdatamutex);
1788 
1789 	/* delete mctrl_info */
1790 	if ((mctrl = (struct mctrl_info *)mc_node_get(mc_id, mctrl_head)) !=
1791 	    NULL) {
1792 		ndevgrps = mctrl->ndevgrps;
1793 		mc_node_del((mc_dlist_t *)mctrl, &mctrl_head, &mctrl_tail);
1794 		kmem_free(mctrl, sizeof (struct mctrl_info));
1795 		nmcs--;
1796 
1797 		/*
1798 		 * There is no other list left for disabled MC.
1799 		 */
1800 		if (ndevgrps == 0) {
1801 			mutex_exit(&mcdatamutex);
1802 			return;
1803 		}
1804 	} else
1805 		cmn_err(CE_WARN, "MC mlayout_del: mctrl is not found\n");
1806 
1807 	/* Delete device groups and devices of the detached MC */
1808 	for (i = 0; i < NDGRPS; i++) {
1809 		dgrpid = mc_id * NDGRPS + i;
1810 		if (!(dgrp_ptr = mc_node_get(dgrpid, dgrp_head))) {
1811 			cmn_err(CE_WARN, "mlayout_del: no devgrp %d\n", dgrpid);
1812 			continue;
1813 		}
1814 
1815 		for (j = 0; j < NDIMMS; j++) {
1816 			devid = dgrpid * NDIMMS + j;
1817 			if (dev_ptr = mc_node_get(devid, device_head)) {
1818 				mc_node_del(dev_ptr, &device_head,
1819 				    &device_tail);
1820 				kmem_free(dev_ptr, sizeof (struct device_info));
1821 			} else {
1822 				cmn_err(CE_WARN, "mlayout_del: no dev %d\n",
1823 				    devid);
1824 			}
1825 		}
1826 
1827 		mc_node_del(dgrp_ptr, &dgrp_head, &dgrp_tail);
1828 		kmem_free(dgrp_ptr, sizeof (struct dgrp_info));
1829 	}
1830 
1831 	/* Delete banks and segments if it has no bank */
1832 	for (i = 0; i < NBANKS; i++) {
1833 		bankid = mc_id * NBANKS + i;
1834 		DPRINTF(MC_DESTRC_DEBUG, ("bank id %d\n", bankid));
1835 		if (!(bank_curr = (struct bank_info *)mc_node_get(bankid,
1836 		    bank_head))) {
1837 			cmn_err(CE_WARN, "mlayout_del: no bank %d\n", bankid);
1838 			continue;
1839 		}
1840 
1841 		if (bank_curr->valid) {
1842 			base = bank_curr->um & ~bank_curr->uk;
1843 			base <<= MADR_UPA_SHIFT;
1844 			bank_curr->valid = 0;
1845 			memsize -= bank_curr->size;
1846 
1847 			/* Delete bank at segment and segment if no bank left */
1848 			if (!(seg = seg_match_base(base))) {
1849 				cmn_err(CE_WARN, "mlayout_del: no seg\n");
1850 				mc_node_del((mc_dlist_t *)bank_curr, &bank_head,
1851 				    &bank_tail);
1852 				kmem_free(bank_curr, sizeof (struct bank_info));
1853 				continue;
1854 			}
1855 
1856 			/* update the bank list at the segment */
1857 			if (bank_curr->n_inseg == NULL) {
1858 				/* node is at the tail of list */
1859 				seg->tb_inseg = bank_curr->p_inseg;
1860 			} else {
1861 				bank_curr->n_inseg->p_inseg =
1862 				    bank_curr->p_inseg;
1863 			}
1864 
1865 			if (bank_curr->p_inseg == NULL) {
1866 				/* node is at the head of list */
1867 				seg->hb_inseg = bank_curr->n_inseg;
1868 			} else {
1869 				bank_curr->p_inseg->n_inseg =
1870 				    bank_curr->n_inseg;
1871 			}
1872 
1873 			seg->nbanks--;
1874 			seg->size -= bank_curr->size;
1875 
1876 			if (seg->nbanks == 0) {
1877 				mc_node_del((mc_dlist_t *)seg, &seg_head,
1878 				    &seg_tail);
1879 				kmem_free(seg, sizeof (struct seg_info));
1880 				nsegments--;
1881 			}
1882 
1883 		}
1884 		mc_node_del((mc_dlist_t *)bank_curr, &bank_head, &bank_tail);
1885 		kmem_free(bank_curr, sizeof (struct bank_info));
1886 	}	/* end of for loop for four banks */
1887 
1888 	if (mc_dimm_sids && delete) {
1889 		rw_enter(&mcdimmsids_rw, RW_WRITER);
1890 		i = mc_get_sid_cache_index(mc_id);
1891 		if (i >= 0) {
1892 			mc_dimm_sids[i].state = MC_DIMM_SIDS_INVALID;
1893 			if (mc_dimm_sids[i].sids) {
1894 				kmem_free(mc_dimm_sids[i].sids,
1895 				    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS));
1896 				mc_dimm_sids[i].sids = NULL;
1897 			}
1898 		}
1899 		rw_exit(&mcdimmsids_rw);
1900 	}
1901 
1902 	mutex_exit(&mcdatamutex);
1903 }
1904 
1905 /*
1906  * Search the segment in the list starting at seg_head by base address
1907  * input: base address
1908  * return: pointer of found segment or null if not found.
1909  */
1910 static struct seg_info *
1911 seg_match_base(u_longlong_t base)
1912 {
1913 	static struct seg_info *seg_ptr;
1914 
1915 	seg_ptr = (struct seg_info *)seg_head;
1916 	while (seg_ptr != NULL) {
1917 		DPRINTF(MC_LIST_DEBUG, ("seg_match: base %lu,given base %llu\n",
1918 		    seg_ptr->base, base));
1919 		if (seg_ptr->base == base)
1920 			break;
1921 		seg_ptr = (struct seg_info *)seg_ptr->seg_node.next;
1922 	}
1923 	return (seg_ptr);
1924 }
1925 
1926 /*
1927  * mc_dlist is a double linking list, including unique id, and pointers to
1928  * next, and previous nodes. seg_info, bank_info, dgrp_info, device_info,
1929  * and mctrl_info has it at the top to share the operations, add, del, and get.
1930  *
1931  * The new node is added at the tail and is not sorted.
1932  *
1933  * Input: The pointer of node to be added, head and tail of the list
1934  */
1935 
1936 static void
1937 mc_node_add(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1938 {
1939 	DPRINTF(MC_LIST_DEBUG, ("mc_node_add: node->id %d head %p tail %p\n",
1940 	    node->id, (void *)*head, (void *)*tail));
1941 
1942 	if (*head != NULL) {
1943 		node->prev = *tail;
1944 		node->next = (*tail)->next;
1945 		(*tail)->next = node;
1946 		*tail = node;
1947 	} else {
1948 		node->next = node->prev = NULL;
1949 		*head = *tail = node;
1950 	}
1951 }
1952 
1953 /*
1954  * Input: The pointer of node to be deleted, head and tail of the list
1955  *
1956  * Deleted node will be at the following positions
1957  * 1. At the tail of the list
1958  * 2. At the head of the list
1959  * 3. At the head and tail of the list, i.e. only one left.
1960  * 4. At the middle of the list
1961  */
1962 
1963 static void
1964 mc_node_del(mc_dlist_t *node, mc_dlist_t **head, mc_dlist_t **tail)
1965 {
1966 	if (node->next == NULL) {
1967 		/* deleted node is at the tail of list */
1968 		*tail = node->prev;
1969 	} else {
1970 		node->next->prev = node->prev;
1971 	}
1972 
1973 	if (node->prev == NULL) {
1974 		/* deleted node is at the head of list */
1975 		*head = node->next;
1976 	} else {
1977 		node->prev->next = node->next;
1978 	}
1979 }
1980 
1981 /*
1982  * Search the list from the head of the list to match the given id
1983  * Input: id and the head of the list
1984  * Return: pointer of found node
1985  */
1986 static mc_dlist_t *
1987 mc_node_get(int id, mc_dlist_t *head)
1988 {
1989 	mc_dlist_t *node;
1990 
1991 	node = head;
1992 	while (node != NULL) {
1993 		DPRINTF(MC_LIST_DEBUG, ("mc_node_get: id %d, given id %d\n",
1994 		    node->id, id));
1995 		if (node->id == id)
1996 			break;
1997 		node = node->next;
1998 	}
1999 	return (node);
2000 }
2001 
2002 /*
2003  * mc-us3 driver allows a platform to add extra label
2004  * information to the unum string. If a platform implements a
2005  * kernel function called plat_add_mem_unum_label() it will be
2006  * executed. This would typically be implemented in the platmod.
2007  */
2008 static void
2009 mc_add_mem_unum_label(char *buf, int mcid, int bank, int dimm)
2010 {
2011 	if (&plat_add_mem_unum_label)
2012 		plat_add_mem_unum_label(buf, mcid, bank, dimm);
2013 }
2014 
2015 static int
2016 mc_get_sid_cache_index(int mcid)
2017 {
2018 	int	i;
2019 
2020 	for (i = 0; i < max_entries; i++) {
2021 		if (mcid == mc_dimm_sids[i].mcid)
2022 			return (i);
2023 	}
2024 
2025 	return (-1);
2026 }
2027 
2028 static void
2029 mc_update_bank(struct bank_info *bank)
2030 {
2031 	int i, j;
2032 	int bankid, mcid, dgrp_no;
2033 
2034 	/*
2035 	 * Mark the MC if DIMM sids are not available.
2036 	 * Mark which segment the DIMMs belong to.  Allocate
2037 	 * space to store DIMM serial ids which are later
2038 	 * provided by the platform layer, and update the bank_info
2039 	 * structure with pointers to its serial ids.
2040 	 */
2041 	bankid = bank->bank_node.id;
2042 	mcid = bankid / NBANKS;
2043 	i = mc_get_sid_cache_index(mcid);
2044 	if (mc_dimm_sids[i].state == MC_DIMM_SIDS_INVALID)
2045 		mc_dimm_sids[i].state = MC_DIMM_SIDS_REQUESTED;
2046 
2047 	mc_dimm_sids[i].seg_id = bank->seg_id;
2048 
2049 	if (mc_dimm_sids[i].sids == NULL) {
2050 		mc_dimm_sids[i].sids = (dimm_sid_t *)kmem_zalloc(
2051 		    sizeof (dimm_sid_t) * (NDGRPS * NDIMMS), KM_SLEEP);
2052 	}
2053 
2054 	dgrp_no = bank->devgrp_id % NDGRPS;
2055 
2056 	for (j = 0; j < NDIMMS; j++) {
2057 		bank->dimmsidp[j] =
2058 		    &mc_dimm_sids[i].sids[j + (NDIMMS * dgrp_no)];
2059 	}
2060 }
2061 
2062 static int
2063 mc_populate_sid_cache(void)
2064 {
2065 	struct bank_info	*bank;
2066 
2067 	if (&plat_populate_sid_cache == 0)
2068 		return (ENOTSUP);
2069 
2070 	ASSERT(RW_WRITE_HELD(&mcdimmsids_rw));
2071 
2072 	bank = (struct bank_info *)bank_head;
2073 	while (bank != NULL) {
2074 		if (!bank->valid) {
2075 			bank = (struct bank_info *)bank->bank_node.next;
2076 			continue;
2077 		}
2078 
2079 		mc_update_bank(bank);
2080 
2081 		bank = (struct bank_info *)bank->bank_node.next;
2082 	}
2083 
2084 
2085 	/*
2086 	 * Call to the platform layer to populate the cache
2087 	 * with DIMM serial ids.
2088 	 */
2089 	return (plat_populate_sid_cache(mc_dimm_sids, max_entries));
2090 }
2091 
2092 static void
2093 mc_init_sid_cache_thr(void)
2094 {
2095 	ASSERT(mc_dimm_sids == NULL);
2096 
2097 	mutex_enter(&mcdatamutex);
2098 	rw_enter(&mcdimmsids_rw, RW_WRITER);
2099 
2100 	mc_dimm_sids = plat_alloc_sid_cache(&max_entries);
2101 	(void) mc_populate_sid_cache();
2102 
2103 	rw_exit(&mcdimmsids_rw);
2104 	mutex_exit(&mcdatamutex);
2105 }
2106 
2107 static int
2108 mc_init_sid_cache(void)
2109 {
2110 	if (&plat_alloc_sid_cache) {
2111 		(void) thread_create(NULL, 0, mc_init_sid_cache_thr, NULL, 0,
2112 		    &p0, TS_RUN, minclsyspri);
2113 		return (0);
2114 	} else
2115 		return (ENOTSUP);
2116 }
2117 
2118 static int
2119 mc_get_mem_sid(int mcid, int dimm, char *buf, int buflen, int *lenp)
2120 {
2121 	int	i;
2122 
2123 	if (buflen < DIMM_SERIAL_ID_LEN)
2124 		return (ENOSPC);
2125 
2126 	/*
2127 	 * If DIMM serial ids have not been cached yet, tell the
2128 	 * caller to try again.
2129 	 */
2130 	if (!rw_tryenter(&mcdimmsids_rw, RW_READER))
2131 		return (EAGAIN);
2132 
2133 	if (mc_dimm_sids == NULL) {
2134 		rw_exit(&mcdimmsids_rw);
2135 		return (EAGAIN);
2136 	}
2137 
2138 	/*
2139 	 * Find dimm serial id using mcid and dimm #
2140 	 */
2141 	for (i = 0; i < max_entries; i++) {
2142 		if (mc_dimm_sids[i].mcid == mcid)
2143 			break;
2144 	}
2145 	if ((i == max_entries) || (!mc_dimm_sids[i].sids)) {
2146 		rw_exit(&mcdimmsids_rw);
2147 		return (ENOENT);
2148 	}
2149 
2150 	(void) strlcpy(buf, mc_dimm_sids[i].sids[dimm],
2151 	    DIMM_SERIAL_ID_LEN);
2152 	*lenp = strlen(buf);
2153 
2154 	rw_exit(&mcdimmsids_rw);
2155 	return (0);
2156 }
2157