xref: /illumos-gate/usr/src/uts/sun4u/io/sbd_mem.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * memory support routines for sbd.
31  */
32 
33 #include <sys/debug.h>
34 #include <sys/types.h>
35 #include <sys/errno.h>
36 #include <sys/param.h>
37 #include <sys/dditypes.h>
38 #include <sys/kmem.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/sunndi.h>
43 #include <sys/ddi_impldefs.h>
44 #include <sys/sysmacros.h>
45 #include <sys/machsystm.h>
46 #include <sys/spitregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/cpu_module.h>
49 #include <sys/promif.h>
50 #include <sys/memlist_impl.h>
51 #include <sys/mem_cage.h>
52 #include <sys/lgrp.h>
53 #include <sys/platform_module.h>
54 #include <vm/seg_kmem.h>
55 
56 #include <sys/sbdpriv.h>
57 
58 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
59 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
60 
61 static int		sbd_post_detach_mem_unit(sbd_mem_unit_t *mp,
62 				sbderror_t *ep);
63 static int		sbd_reserve_mem_spans(memhandle_t *mhp,
64 					struct memlist *mlist);
65 static int		sbd_check_boundaries(struct memlist *orig_memlist,
66 					sbd_mem_unit_t *s_mp,
67 					sbd_mem_unit_t *t_mp);
68 static int		sbd_select_mem_target(sbd_handle_t *hp,
69 				sbd_mem_unit_t *mp, struct memlist *ml);
70 static void		sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t
71 					*ep);
72 static int		memlist_canfit(struct memlist *s_mlist,
73 					struct memlist *t_mlist);
74 static void		sbd_mem_cleanup(sbd_mem_unit_t *s_mp,
75 				sbd_mem_unit_t *t_mp, sbderror_t *ep);
76 static void		sbd_flush_ecache(uint64_t a, uint64_t b);
77 
78 struct memlist *
79 sbd_get_memlist(sbd_mem_unit_t *mp, sbderror_t *ep)
80 {
81 	struct memlist	*mlist;
82 	static fn_t	f = "sbd_get_memlist";
83 	sbd_board_t 	*sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
84 	sbdp_handle_t	*hdp;
85 	sbd_handle_t	*hp = MACHBD2HD(sbp);
86 
87 	PR_MEM("%s...\n", f);
88 
89 	/*
90 	 * Return cached memlist, if present.
91 	 * This memlist will be present following an
92 	 * unconfigure (a.k.a: detach) of this memunit.
93 	 * It should only be used in the case were a configure
94 	 * is bringing this memunit back in without going
95 	 * through the disconnect and connect states.
96 	 */
97 	if (mp->sbm_mlist) {
98 		PR_MEM("%s: found cached memlist\n", f);
99 
100 		mlist = memlist_dup(mp->sbm_mlist);
101 	} else {
102 		/* attempt to construct a memlist using phys_install */
103 
104 		/*
105 		 * NOTE: this code block assumes only one memunit per
106 		 * board.  This is currently safe because the function
107 		 * sbd_init_mem_devlist() forces this assumption to be
108 		 * valid.
109 		 */
110 
111 		/* round down to slice base address */
112 		/* build mlist from the lower layer */
113 		hdp = sbd_get_sbdp_handle(sbp, hp);
114 		mlist = sbdp_get_memlist(hdp, mp->sbm_cm.sbdev_dip);
115 		if (mlist == NULL) {
116 			SBD_GET_PERR(hdp->h_err, ep);
117 			PR_MEM("sbd:%s: failed to get memlist for "
118 				"dip (0x%p) ecode %d errno %d", f,
119 				(void *)mp->sbm_cm.sbdev_dip,
120 				ep->e_code, ep->e_errno);
121 			sbd_release_sbdp_handle(hdp);
122 			return (NULL);
123 		}
124 		sbd_release_sbdp_handle(hdp);
125 	}
126 
127 	PR_MEM("%s: memlist for mem-unit (%d.%d), dip 0x%p:\n",
128 		f, sbp->sb_num,
129 		mp->sbm_cm.sbdev_unum,
130 		(void *)mp->sbm_cm.sbdev_dip);
131 	SBD_MEMLIST_DUMP(mlist);
132 
133 	return (mlist);
134 }
135 
136 int
137 sbd_pre_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
138 {
139 	int		err_flag = 0;
140 	sbderror_t	*ep = SBD_HD2ERR(hp);
141 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
142 	int		d, i;
143 	sbdp_handle_t	*hdp;
144 	static fn_t	f = "sbd_pre_attach_mem";
145 
146 	PR_MEM("%s...\n", f);
147 
148 	SBD_SET_ERR(ep, 0);
149 	hdp = sbd_get_sbdp_handle(sbp, hp);
150 
151 	for (d = 0; d < devnum; d++) {
152 		sbd_mem_unit_t	*mp;
153 		int		unit;
154 		dev_info_t	*dip;
155 		sbd_istate_t	state;
156 		int		rv;
157 
158 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
159 		ASSERT(devlist[d].dv_dip != NULL);
160 
161 		dip = devlist[d].dv_dip;
162 		unit = sbdp_get_unit_num(hdp, dip);
163 		if (unit == -1) {
164 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
165 				continue;
166 			else {
167 				SBD_GET_PERR(hdp->h_err, ep);
168 				err_flag = 1;
169 				break;
170 			}
171 		}
172 
173 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
174 
175 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
176 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
177 
178 		PR_MEM("sbd: OS attach mem-unit (%d.%d)\n",
179 			sbp->sb_num,
180 			mp->sbm_cm.sbdev_unum);
181 
182 		state = mp->sbm_cm.sbdev_state;
183 		switch (state) {
184 		case SBD_STATE_UNCONFIGURED:
185 			/* use memlist cached by sbd_post_detach_mem_unit */
186 			if (mp->sbm_mlist != NULL) {
187 				PR_MEM("%s: recovering from UNCONFIG"
188 					" mem-unit (%d.%d)\n",
189 					f, sbp->sb_num,
190 					mp->sbm_cm.sbdev_unum);
191 
192 				PR_MEM("%s: re-configure cached memlist:\n", f);
193 				SBD_MEMLIST_DUMP(mp->sbm_mlist);
194 
195 				/*
196 				 * kphysm del handle should have been freed
197 				 */
198 				ASSERT((mp->sbm_flags & SBD_MFLAG_RELOWNER)
199 					== 0);
200 			} else {
201 				if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
202 					continue;
203 				else {
204 					SBD_GET_PERR(hdp->h_err, ep);
205 					err_flag = 1;
206 					PR_MEM("%s: mem-unit (%d.%d)"
207 						" unusable\n",
208 						f, sbp->sb_num,
209 						mp->sbm_cm.sbdev_unum);
210 					break;
211 				}
212 			}
213 
214 			/*FALLTHROUGH*/
215 
216 		case SBD_STATE_CONNECTED:
217 			PR_MEM("%s: reprogramming mem hardware (board %d)\n",
218 				f, sbp->sb_num);
219 
220 			for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
221 				if (mp->sbm_dip[i] == NULL)
222 					continue;
223 				dip = mp->sbm_dip[i];
224 
225 				PR_MEM("%s: enabling mc 0x%p on board %d\n",
226 					f, (void *)dip, sbp->sb_num);
227 
228 				rv = sbdphw_enable_memctrl(hdp, dip);
229 				if (rv < 0) {
230 					SBD_GET_PERR(hdp->h_err, ep);
231 					cmn_err(CE_WARN,
232 					"%s: failed to program mem ctrlr %p on "
233 					"board %d", f, (void *)mp->sbm_dip[i],
234 					sbp->sb_num);
235 					err_flag = 1;
236 				}
237 			}
238 			break;
239 
240 		default:
241 			cmn_err(CE_WARN,
242 				"%s: unexpected state (%d) for mem-unit "
243 				"(%d.%d)", f, state, sbp->sb_num,
244 				mp->sbm_cm.sbdev_unum);
245 			if (SBD_GET_ERR(ep) == 0) {
246 				SBD_SET_ERR(ep, ESBD_STATE);
247 				err_flag = 1;
248 			}
249 			break;
250 		}
251 
252 		/* exit for loop if error encountered */
253 		if (err_flag) {
254 			SBD_SET_ERRSTR(ep,
255 			    sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
256 			break;
257 		}
258 	}
259 	sbd_release_sbdp_handle(hdp);
260 
261 	return (err_flag ? -1 : 0);
262 }
263 
264 int
265 sbd_post_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
266 {
267 	int		d;
268 	sbdp_handle_t	*hdp;
269 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
270 	sbderror_t	*ep = SBD_HD2ERR(hp);
271 	static fn_t	f = "sbd_post_attach_mem";
272 
273 	PR_MEM("%s...\n", f);
274 	hdp = sbd_get_sbdp_handle(sbp, hp);
275 
276 	for (d = 0; d < devnum; d++) {
277 		sbd_mem_unit_t	*mp;
278 		dev_info_t	*dip;
279 		int		unit;
280 		struct memlist	*mlist, *ml;
281 
282 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
283 		ASSERT(devlist[d].dv_dip != NULL);
284 
285 		dip = devlist[d].dv_dip;
286 		unit = sbdp_get_unit_num(hdp, dip);
287 		if (unit == -1) {
288 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
289 				continue;
290 			else {
291 				SBD_GET_PERR(hdp->h_err, ep);
292 				break;
293 			}
294 		}
295 
296 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
297 
298 		mlist = sbd_get_memlist(mp, ep);
299 		if (mlist == NULL) {
300 			cmn_err(CE_WARN,
301 				"%s: no memlist for mem-unit (%d.%d)",
302 				f,
303 				sbp->sb_num,
304 				mp->sbm_cm.sbdev_unum);
305 
306 			if (SBD_GET_ERR(ep) == 0) {
307 				SBD_SET_ERR(ep, ESBD_MEMFAIL);
308 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
309 			}
310 
311 			continue;
312 		}
313 
314 		/*
315 		 * Verify the memory really did successfully attach
316 		 * by checking for its existence in phys_install.
317 		 */
318 
319 		memlist_read_lock();
320 		if (memlist_intersect(phys_install, mlist) == 0) {
321 			memlist_read_unlock();
322 
323 			cmn_err(CE_WARN,
324 				"%s: mem-unit (%d.%d) memlist not in"
325 				" phys_install", f, sbp->sb_num,
326 				mp->sbm_cm.sbdev_unum);
327 
328 			if (SBD_GET_ERR(ep) == 0) {
329 				SBD_SET_ERR(ep, ESBD_INTERNAL);
330 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
331 			}
332 
333 			memlist_delete(mlist);
334 			continue;
335 		}
336 		memlist_read_unlock();
337 
338 		for (ml = mlist; ml != NULL; ml = ml->next) {
339 			sbdp_mem_add_span(hdp, ml->address, ml->size);
340 		}
341 
342 		memlist_delete(mlist);
343 
344 		/*
345 		 * Destroy cached memlist, if any.
346 		 * There will be a cached memlist in sbm_mlist if
347 		 * this board is being configured directly after
348 		 * an unconfigure.
349 		 * To support this transition, sbd_post_detach_mem
350 		 * left a copy of the last known memlist in sbm_mlist.
351 		 * This memlist could differ from any derived from
352 		 * hardware if while this memunit was last configured
353 		 * the system detected and deleted bad pages from
354 		 * phys_install.  The location of those bad pages
355 		 * will be reflected in the cached memlist.
356 		 */
357 		if (mp->sbm_mlist) {
358 			memlist_delete(mp->sbm_mlist);
359 			mp->sbm_mlist = NULL;
360 		}
361 		sbd_init_mem_unit_data(mp, ep);
362 	}
363 
364 	sbd_release_sbdp_handle(hdp);
365 	return (0);
366 }
367 
368 int
369 sbd_pre_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
370 {
371 	int		d;
372 	int		unit;
373 	sbdp_handle_t	*hdp;
374 	sbderror_t	*ep = SBD_HD2ERR(hp);
375 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
376 	dev_info_t	*dip;
377 
378 	hdp = sbd_get_sbdp_handle(sbp, hp);
379 
380 	for (d = 0; d < devnum; d++) {
381 		sbd_mem_unit_t *mp;
382 
383 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
384 		ASSERT(devlist[d].dv_dip != NULL);
385 
386 		dip = devlist[d].dv_dip;
387 		unit = sbdp_get_unit_num(hdp, dip);
388 		if (unit == -1) {
389 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
390 				continue;
391 			else {
392 				SBD_GET_PERR(hdp->h_err, ep);
393 				sbd_release_sbdp_handle(hdp);
394 				return (-1);
395 			}
396 		}
397 
398 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
399 
400 		/* sanity check */
401 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
402 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
403 
404 		PR_MEM("sbd: OS detach mem-unit (%d.%d)\n",
405 			sbp->sb_num, mp->sbm_cm.sbdev_unum);
406 	}
407 
408 	sbd_release_sbdp_handle(hdp);
409 	return (0);
410 }
411 
412 int
413 sbd_post_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
414 {
415 	int		d, rv;
416 	sbdp_handle_t	*hdp;
417 	sbd_board_t	*sbp;
418 	sbd_mem_unit_t	*s_mp, *t_mp;
419 	static fn_t	f = "sbd_post_detach_mem";
420 
421 	PR_MEM("%s...\n", f);
422 
423 	sbp = SBDH2BD(hp->h_sbd);
424 
425 	hdp = sbd_get_sbdp_handle(sbp, hp);
426 
427 
428 	rv = 0;
429 	for (d = 0; d < devnum; d++) {
430 		sbderror_t	*ep;
431 		dev_info_t	*dip;
432 		int		unit;
433 
434 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
435 		ASSERT(devlist[d].dv_dip != NULL);
436 
437 		ep = &devlist[d].dv_error;
438 		if ((SBD_GET_ERR(SBD_HD2ERR(hp)) != 0) ||
439 		    (sbd_set_err_in_hdl(hp, ep) == 0)) {
440 			rv = -1;
441 		}
442 
443 		dip = devlist[d].dv_dip;
444 		unit = sbdp_get_unit_num(hdp, dip);
445 		if (unit == -1) {
446 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
447 				continue;
448 			else {
449 				if (rv != -1)
450 					SBD_GET_PERR(hdp->h_err, ep);
451 				break;
452 			}
453 		}
454 
455 		s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
456 
457 		ASSERT(s_mp->sbm_cm.sbdev_sbp == sbp);
458 
459 		if (rv == -1) {
460 			if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
461 				t_mp = s_mp->sbm_peer;
462 			} else {
463 				/* this is no target unit */
464 				t_mp = NULL;
465 			}
466 
467 			sbd_mem_cleanup(s_mp, t_mp, ep);
468 		} else if (sbd_post_detach_mem_unit(s_mp, ep))
469 			rv = -1;
470 	}
471 
472 	sbd_release_sbdp_handle(hdp);
473 	return (rv);
474 }
475 
476 static void
477 sbd_add_memory_spans(sbd_board_t *sbp, struct memlist *ml)
478 {
479 	sbdp_handle_t	*hdp;
480 	static fn_t	f = "sbd_add_memory_spans";
481 
482 	PR_MEM("%s...", f);
483 	SBD_MEMLIST_DUMP(ml);
484 
485 #ifdef DEBUG
486 	memlist_read_lock();
487 	if (memlist_intersect(phys_install, ml)) {
488 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
489 	}
490 	memlist_read_unlock();
491 #endif
492 	hdp = sbd_get_sbdp_handle(NULL, NULL);
493 
494 	for (; ml; ml = ml->next) {
495 		update_membounds_t umb;
496 		pfn_t	base;
497 		pgcnt_t	npgs;
498 		int	rv;
499 
500 		base = _b64top(ml->address);
501 		npgs = _b64top(ml->size);
502 
503 		umb.u_board = sbp->sb_num;
504 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
505 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
506 
507 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
508 		rv = kphysm_add_memory_dynamic(base, npgs);
509 
510 		(void) sbdp_mem_add_span(hdp, ml->address, ml->size);
511 
512 		if (rv != KPHYSM_OK) {
513 			cmn_err(CE_WARN, "sbd:%s:"
514 				" unexpected kphysm_add_memory_dynamic"
515 				" return value %d;"
516 				" basepfn=0x%lx, npages=%ld\n",
517 				f, rv, base, npgs);
518 
519 			continue;
520 		}
521 		kcage_range_lock();
522 		rv = kcage_range_add(base, npgs, 1);
523 		kcage_range_unlock();
524 		if (rv != 0)
525 			continue;
526 	}
527 	sbd_release_sbdp_handle(hdp);
528 }
529 
530 /* hack for test scripts.  *** remove before code finalized *** */
531 int sbd_last_target;
532 
533 static int
534 sbd_post_detach_mem_unit(sbd_mem_unit_t *s_mp, sbderror_t *ep)
535 {
536 	uint64_t	sz;
537 	uint64_t	sm;
538 	uint64_t	t_basepa;
539 	uint64_t	tmp_basepa;
540 	uint64_t	s_basepa;
541 	sbd_board_t 	*sbp;
542 	sbdp_handle_t	*hdp;
543 	uint64_t	s_nbytes;
544 	uint64_t	s_new_basepa;
545 	sbd_mem_unit_t	*t_mp, *x_mp;
546 	struct memlist	*ml;
547 	int		rv;
548 	static fn_t	f = "sbd_post_detach_mem_unit";
549 	sbd_handle_t	*hp;
550 
551 	PR_MEM("%s...\n", f);
552 
553 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
554 	hp = MACHBD2HD(sbp);
555 	hdp = sbd_get_sbdp_handle(sbp, hp);
556 
557 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
558 		cmn_err(CE_WARN,
559 			"sbd:%s: no alignment for mem-unit (%d.%d)",
560 			f, sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
561 		SBD_GET_PERR(hdp->h_err, ep);
562 		sbd_release_sbdp_handle(hdp);
563 		return (-1);
564 	}
565 	sm = sz - 1;
566 
567 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
568 	PR_MEM("%s: brd %d: deleted memlist (EMPTY maybe okay):\n",
569 		f, sbp->sb_num);
570 	SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
571 
572 	/* sanity check */
573 	ASSERT(s_mp->sbm_del_mlist == NULL ||
574 		(s_mp->sbm_flags & SBD_MFLAG_RELDONE) != 0);
575 
576 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
577 		t_mp = s_mp->sbm_peer;
578 
579 		ASSERT(t_mp != NULL);
580 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
581 		ASSERT(t_mp->sbm_peer == s_mp);
582 
583 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RELDONE);
584 		ASSERT(t_mp->sbm_del_mlist);
585 
586 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
587 		PR_MEM("%s: target brd %d: deleted memlist:\n",
588 			f, sbp->sb_num);
589 		SBD_MEMLIST_DUMP(t_mp->sbm_del_mlist);
590 	} else {
591 		/* this is no target unit */
592 		t_mp = NULL;
593 	}
594 
595 	/*
596 	 * Verify the memory really did successfully detach
597 	 * by checking for its non-existence in phys_install.
598 	 */
599 	rv = 0;
600 	memlist_read_lock();
601 	if (s_mp->sbm_flags & SBD_MFLAG_RELDONE) {
602 		x_mp = s_mp;
603 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
604 	}
605 	if (rv == 0 && t_mp && (t_mp->sbm_flags & SBD_MFLAG_RELDONE)) {
606 		x_mp = t_mp;
607 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
608 	}
609 	memlist_read_unlock();
610 
611 	if (rv) {
612 		sbp = (sbd_board_t *)x_mp->sbm_cm.sbdev_sbp;
613 
614 		cmn_err(CE_WARN,
615 			"%s: %smem-unit (%d.%d) memlist still in phys_install",
616 			f,
617 			x_mp == t_mp ? "target " : "",
618 			sbp->sb_num,
619 			x_mp->sbm_cm.sbdev_unum);
620 		SBD_SET_ERR(ep, ESBD_INTERNAL);
621 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[x_mp->sbm_cm.sbdev_unum]);
622 		sbd_release_sbdp_handle(hdp);
623 		return (-1);
624 	}
625 
626 	s_basepa	= _ptob64(s_mp->sbm_basepfn);
627 	s_nbytes	= _ptob64(s_mp->sbm_npages);
628 
629 	if (t_mp != NULL) {
630 		t_basepa	= _ptob64(t_mp->sbm_basepfn);
631 		s_new_basepa	= (s_basepa & ~ sm) +
632 					_ptob64(t_mp->sbm_slice_offset);
633 
634 		/*
635 		 * We had to swap mem-units, so update
636 		 * memlists accordingly with new base
637 		 * addresses.
638 		 */
639 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
640 			ml->address -= t_basepa;
641 			ml->address += s_new_basepa;
642 		}
643 
644 		/*
645 		 * There is no need to explicitly rename the target delete
646 		 * memlist, because sbm_del_mlist and sbm_mlist always
647 		 * point to the same memlist for a copy/rename operation.
648 		 */
649 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
650 
651 		PR_MEM("%s: renamed target memlist and delete memlist", f);
652 		SBD_MEMLIST_DUMP(t_mp->sbm_mlist);
653 
654 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
655 			ml->address -= s_basepa;
656 			ml->address += t_basepa;
657 		}
658 
659 		PR_MEM("%s: renamed source memlist", f);
660 		SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
661 
662 #ifdef DEBUG
663 		ASSERT(s_mp->sbm_mlist != s_mp->sbm_del_mlist);
664 		/*
665 		 * Renaming s_mp->sbm_del_mlist is not necessary.  This
666 		 * list is not used beyond this point, and in fact, is
667 		 *  disposed of at the end of this function.
668 		 */
669 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
670 			ml->address -= s_basepa;
671 			ml->address += t_basepa;
672 		}
673 
674 		PR_MEM("%s: renamed source delete memlist", f);
675 		SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
676 #endif
677 
678 		if (s_mp->sbm_flags & SBD_MFLAG_MEMUPSIZE) {
679 			struct memlist	*nl;
680 			int mlret;
681 
682 			/*
683 			 * We had to perform a copy-rename from a
684 			 * small memory node to a big memory node.
685 			 * Need to add back the remaining memory on
686 			 * the big board that wasn't used by that
687 			 * from the small board during the copy.
688 			 * Subtract out the portion of the target memory
689 			 * node that was taken over by the source memory
690 			 * node.
691 			 */
692 			nl = memlist_dup(t_mp->sbm_mlist);
693 			mlret = memlist_delete_span(s_basepa, s_nbytes, &nl);
694 			PR_MEM("%s: mlret = %d\n", f, mlret);
695 
696 			sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
697 			PR_MEM("%s: adding back remaining portion"
698 				" of mem-unit (%d.%d), memlist:\n",
699 				f, sbp->sb_num,
700 				t_mp->sbm_cm.sbdev_unum);
701 
702 			SBD_MEMLIST_DUMP(nl);
703 
704 			sbd_add_memory_spans(sbp, nl);
705 
706 			memlist_delete(nl);
707 		}
708 	}
709 
710 
711 	if (t_mp != NULL) {
712 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
713 		hdp->h_board = sbp->sb_num;
714 		/* delete target's entire address space */
715 		tmp_basepa = t_basepa & ~ sm;
716 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
717 		ASSERT(rv == 0);
718 
719 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
720 		hdp->h_board = sbp->sb_num;
721 		tmp_basepa = s_basepa & ~ sm;
722 		sz = s_new_basepa & sm;
723 		/* delete source board's vacant address space */
724 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
725 		ASSERT(rv == 0);
726 	} else {
727 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
728 		hdp->h_board = sbp->sb_num;
729 		tmp_basepa = s_basepa & ~ sm;
730 		/* delete board's entire address space */
731 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
732 		ASSERT(rv == 0);
733 	}
734 
735 #ifdef LINT
736 	rv = rv;
737 #endif
738 
739 	sbd_mem_cleanup(s_mp, t_mp, ep);
740 
741 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
742 	PR_MEM("%s: board %d's memlist:", f, sbp->sb_num);
743 	SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
744 
745 	sbd_release_sbdp_handle(hdp);
746 	return (0);
747 }
748 
749 static void
750 sbd_mem_cleanup(sbd_mem_unit_t *s_mp, sbd_mem_unit_t *t_mp, sbderror_t *ep)
751 {
752 	sbd_board_t *sbp;
753 
754 	/* clean up target mem unit */
755 	if (t_mp != NULL) {
756 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
757 
758 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
759 		/*
760 		 * sbm_del_mlist and sbm_mlist point at the same list
761 		 * We only need to delete one and then set both pointers
762 		 * to NULL
763 		 */
764 		memlist_delete(t_mp->sbm_del_mlist);
765 
766 		t_mp->sbm_del_mlist = NULL;
767 		t_mp->sbm_mlist = NULL;
768 		t_mp->sbm_peer = NULL;
769 		t_mp->sbm_flags = 0;
770 		t_mp->sbm_cm.sbdev_busy = 0;
771 		sbd_init_mem_unit_data(t_mp, ep);
772 
773 		/*
774 		 * now that copy/rename has completed, undo this
775 		 * work that was done in sbd_release_mem_done.
776 		 */
777 		/*
778 		 * If error don't set the target to configured
779 		 */
780 		if (SBD_GET_ERR(ep) == 0) {
781 			SBD_DEV_CLR_UNREFERENCED(sbp, SBD_COMP_MEM, 0);
782 			SBD_DEV_CLR_RELEASED(sbp, SBD_COMP_MEM, 0);
783 			SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, 0,
784 				SBD_STATE_CONFIGURED);
785 		}
786 
787 /* hack for test scripts.  *** remove before code finalized *** */
788 sbd_last_target = sbp->sb_num;
789 	}
790 
791 	/*
792 	 * clean up (source) board's mem unit structure.
793 	 * NOTE: sbm_mlist is retained.  It is referred to as the
794 	 * cached memlist.  The cached memlist is used to re-attach
795 	 * (configure back in) this memunit from the unconfigured
796 	 * state.
797 	 */
798 	if (s_mp != NULL) {
799 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
800 
801 		/*
802 		 * Don't want to call memlist_delete for sbm_del_mlist,
803 		 * since that list points to the sbm_list
804 		 */
805 		s_mp->sbm_del_mlist = NULL;
806 		s_mp->sbm_peer = NULL;
807 		s_mp->sbm_flags = 0;
808 		s_mp->sbm_cm.sbdev_busy = 0;
809 		sbd_init_mem_unit_data(s_mp, ep);
810 	}
811 }
812 
813 /*
814  * Successful return from this function will have the memory
815  * handle in sbp->sb_dev[..mem-unit...].sbm_memhandle allocated
816  * and waiting.  This routine's job is to select the memory that
817  * actually has to be released (detached) which may not necessarily
818  * be the same memory node that came in in devlist[],
819  * i.e. a copy-rename is needed.
820  */
821 int
822 sbd_pre_release_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
823 {
824 	extern int	kcage_on;
825 	int		d;
826 	int		err_flag = 0;
827 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
828 	sbderror_t	*ep = SBD_HD2ERR(hp);
829 	sbderror_t	*lep;
830 	static fn_t	f = "sbd_pre_release_mem";
831 
832 	PR_MEM("%s...\n", f);
833 
834 	if (kcage_on == 0) {
835 		/*
836 		 * Can't Detach memory if Cage is OFF.
837 		 */
838 		cmn_err(CE_WARN, "%s: kernel cage is disabled", f);
839 		SBD_SET_ERR(ep, ESBD_KCAGE_OFF);
840 		return (-1);
841 	}
842 
843 	for (d = 0; d < devnum; d++) {
844 		int		rv;
845 		memquery_t	mq;
846 		sbd_mem_unit_t	*mp;
847 		struct memlist	*ml;
848 
849 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
850 		ASSERT(devlist[d].dv_dip != NULL);
851 
852 		mp = SBD_GET_BOARD_MEMUNIT(sbp, d);
853 
854 		/*
855 		 * If all the mem unit is marked as failed then don't allow the
856 		 * operation
857 		 */
858 		if (mp->sbm_cm.sbdev_cond == SBD_COND_FAILED) {
859 			SBD_SET_ERR(ep, ESBD_STATE);
860 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
861 			err_flag = -1;
862 			break;
863 		}
864 
865 		ASSERT(d == mp->sbm_cm.sbdev_unum);
866 
867 		/*
868 		 * if interleave is set to across boards fail the op
869 		 */
870 		if (mp->sbm_interleave) {
871 			SBD_SET_ERR(ep, ESBD_MEMINTLV);
872 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
873 			err_flag = -1;
874 			break;
875 		}
876 
877 		lep = &devlist[d].dv_error;
878 		if (SBD_GET_ERR(lep) != 0) {
879 			err_flag = -1;
880 			(void) sbd_set_err_in_hdl(hp, lep);
881 			break;
882 		}
883 
884 		if (mp->sbm_flags & SBD_MFLAG_RESERVED) {
885 			/*
886 			 * Board is currently involved in a delete
887 			 * memory operation. Can't detach this guy until
888 			 * that operation completes.
889 			 */
890 			cmn_err(CE_WARN,
891 				"%s: ineligible mem-unit (%d.%d) for detach",
892 				f, sbp->sb_num,
893 				mp->sbm_cm.sbdev_unum);
894 
895 			SBD_SET_ERR(lep, ESBD_INVAL);
896 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
897 			(void) sbd_set_err_in_hdl(hp, lep);
898 			err_flag = -1;
899 			break;
900 		}
901 
902 		/*
903 		 * Check whether the detaching memory requires a
904 		 * copy-rename.
905 		 */
906 		ASSERT(mp->sbm_npages != 0);
907 		rv = kphysm_del_span_query(
908 			mp->sbm_basepfn, mp->sbm_npages, &mq);
909 		if (rv != KPHYSM_OK) {
910 			cmn_err(CE_WARN,
911 				"%s: unexpected kphysm_del_span_query"
912 				" return value %d;"
913 				" basepfn 0x%lx, npages 0x%lx,"
914 				" mem-unit (%d.%d), dip 0x%p",
915 				f,
916 				rv,
917 				mp->sbm_basepfn,
918 				mp->sbm_npages,
919 				sbp->sb_num,
920 				mp->sbm_cm.sbdev_unum,
921 				(void *)mp->sbm_cm.sbdev_dip);
922 
923 			SBD_SET_ERR(lep, ESBD_INTERNAL);
924 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
925 			(void) sbd_set_err_in_hdl(hp, lep);
926 			err_flag = -1;
927 			break;
928 		}
929 
930 		if (mq.nonrelocatable != 0) {
931 			if (!(hp->h_iap->i_flags & SBD_FLAG_QUIESCE_OKAY)) {
932 				/* caller wasn't prompted for a suspend */
933 					SBD_SET_ERR(lep, ESBD_QUIESCE_REQD);
934 					SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
935 					(void) sbd_set_err_in_hdl(hp, lep);
936 					err_flag = 1;
937 					break;
938 			}
939 		}
940 
941 		/* flags should be clean at this time */
942 		ASSERT(mp->sbm_flags == 0);
943 
944 		ASSERT(mp->sbm_del_mlist == NULL);	/* should be null */
945 
946 		if (mp->sbm_mlist != NULL) {
947 			memlist_delete(mp->sbm_mlist);
948 			mp->sbm_mlist = NULL;
949 		}
950 
951 		ml = sbd_get_memlist(mp, lep);
952 		(void) sbd_set_err_in_hdl(hp, lep);
953 		if (ml == NULL) {
954 			PR_MEM("%s: no memlist found for board %d\n",
955 				f, sbp->sb_num);
956 			err_flag = -1;
957 			break;
958 		}
959 
960 		/* allocate a kphysm handle */
961 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
962 		if (rv != KPHYSM_OK) {
963 			memlist_delete(ml);
964 
965 			cmn_err(CE_WARN,
966 				"%s: unexpected kphysm_del_gethandle"
967 				" return value %d", f, rv);
968 
969 			SBD_SET_ERR(lep, ESBD_INTERNAL);
970 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
971 			(void) sbd_set_err_in_hdl(hp, lep);
972 			err_flag = -1;
973 			break;
974 		}
975 		mp->sbm_flags |= SBD_MFLAG_RELOWNER;
976 
977 		if ((mq.nonrelocatable != 0) ||
978 			sbd_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
979 			/*
980 			 * Either the detaching memory node contains
981 			 * non-reloc memory or we failed to reserve the
982 			 * detaching memory node (which did _not_ have
983 			 * any non-reloc memory, i.e. some non-reloc mem
984 			 * got onboard).
985 			 */
986 
987 			if (sbd_select_mem_target(hp, mp, ml)) {
988 				int rv;
989 
990 				/*
991 				 * We had no luck locating a target
992 				 * memory node to be the recipient of
993 				 * the non-reloc memory on the node
994 				 * we're trying to detach.
995 				 * Clean up be disposing the mem handle
996 				 * and the mem list.
997 				 */
998 				rv = kphysm_del_release(mp->sbm_memhandle);
999 				if (rv != KPHYSM_OK) {
1000 					/*
1001 					 * can do nothing but complain
1002 					 * and hope helpful for debug
1003 					 */
1004 					cmn_err(CE_WARN, "sbd:%s: unexpected"
1005 						" kphysm_del_release return"
1006 						" value %d",
1007 						f, rv);
1008 				}
1009 				mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1010 
1011 				memlist_delete(ml);
1012 
1013 				/* make sure sbm_flags is clean */
1014 				ASSERT(mp->sbm_flags == 0);
1015 
1016 				cmn_err(CE_WARN,
1017 					"%s: no available target for "
1018 					"mem-unit (%d.%d)",
1019 					f, sbp->sb_num,
1020 					mp->sbm_cm.sbdev_unum);
1021 
1022 				SBD_SET_ERR(lep, ESBD_NO_TARGET);
1023 				SBD_SET_ERRSTR(lep,
1024 					sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1025 				(void) sbd_set_err_in_hdl(hp, lep);
1026 
1027 				err_flag = -1;
1028 				break;
1029 			}
1030 
1031 			/*
1032 			 * ml is not memlist_deleted here because
1033 			 * it has been assigned to mp->sbm_mlist
1034 			 * by sbd_select_mem_target.
1035 			 */
1036 		} else {
1037 			/* no target needed to detach this board */
1038 			mp->sbm_flags |= SBD_MFLAG_RESERVED;
1039 			mp->sbm_peer = NULL;
1040 			mp->sbm_del_mlist = ml;
1041 			mp->sbm_mlist = ml;
1042 			mp->sbm_cm.sbdev_busy = 1;
1043 		}
1044 #ifdef DEBUG
1045 		ASSERT(mp->sbm_mlist != NULL);
1046 
1047 		if (mp->sbm_flags & SBD_MFLAG_SOURCE) {
1048 			int src, targ;
1049 
1050 			sbp = (sbd_board_t *)
1051 				mp->sbm_peer->sbm_cm.sbdev_sbp;
1052 			targ = sbp->sb_num;
1053 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1054 			src = sbp->sb_num;
1055 			PR_MEM("%s: release of board %d requires copy/rename;"
1056 				" selected target board %d\n",
1057 				f, src, targ);
1058 		} else {
1059 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1060 			PR_MEM("%s: copy/rename not required to release"
1061 				" board %d\n", f, sbp->sb_num);
1062 		}
1063 
1064 		ASSERT(mp->sbm_flags & SBD_MFLAG_RELOWNER);
1065 		ASSERT(mp->sbm_flags & SBD_MFLAG_RESERVED);
1066 #endif
1067 	}
1068 
1069 	return (err_flag);
1070 }
1071 
1072 void
1073 sbd_release_mem_done(sbd_handle_t *hp, int unit)
1074 {
1075 	sbd_mem_unit_t	*s_mp, *t_mp, *mp;
1076 	sbderror_t	*ep = SBD_HD2ERR(hp);
1077 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1078 	int		rv;
1079 	static fn_t	f = "sbd_release_mem_done";
1080 
1081 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1082 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1083 
1084 	/*
1085 	 * This unit will be flagged with SBD_MFLAG_SOURCE, if it
1086 	 * has a target unit.
1087 	 */
1088 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1089 		t_mp = s_mp->sbm_peer;
1090 		ASSERT(t_mp != NULL);
1091 		ASSERT(t_mp->sbm_peer == s_mp);
1092 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
1093 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RESERVED);
1094 	} else {
1095 		/* this is no target unit */
1096 		t_mp = NULL;
1097 	}
1098 
1099 	/* free delete handle */
1100 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RELOWNER);
1101 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RESERVED);
1102 
1103 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1104 	if (rv != KPHYSM_OK) {
1105 		/*
1106 		 * can do nothing but complain
1107 		 * and hope helpful for debug
1108 		 */
1109 		cmn_err(CE_WARN, "sbd:%s: unexpected kphysm_del_release"
1110 			" return value %d", f, rv);
1111 	}
1112 	s_mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1113 
1114 	/*
1115 	 * If an error was encountered during release, clean up
1116 	 * the source (and target, if present) unit data.
1117 	 */
1118 	if (SBD_GET_ERR(ep) != 0) {
1119 
1120 		PR_MEM("%s: unit %d.%d: error %d noted\n",
1121 			f, sbp->sb_num,
1122 			s_mp->sbm_cm.sbdev_unum,
1123 			SBD_GET_ERR(ep));
1124 
1125 		sbd_mem_cleanup(s_mp, t_mp, ep);
1126 
1127 		/* bail out */
1128 		return;
1129 	}
1130 
1131 	SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM, unit);
1132 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_RELEASE);
1133 
1134 	if (t_mp != NULL) {
1135 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1136 		/*
1137 		 * the kphysm delete operation that drained the source
1138 		 * board also drained this target board.  Since the source
1139 		 * board drain is now known to have succeeded, we know this
1140 		 * target board is drained too.
1141 		 */
1142 		SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM,
1143 			t_mp->sbm_cm.sbdev_unum);
1144 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1145 			t_mp->sbm_cm.sbdev_unum,
1146 			SBD_STATE_RELEASE);
1147 
1148 		/*
1149 		 * NOTE: do not transition target's board state,
1150 		 * even if the mem-unit was the last configure
1151 		 * unit of the board.  When copy/rename completes
1152 		 * this mem-unit will transitioned back to
1153 		 * the configured state.  In the meantime, the
1154 		 * board's must remain as is.
1155 		 */
1156 	}
1157 
1158 	/* if board(s) had deleted memory, verify it is gone */
1159 	rv = 0;
1160 	memlist_read_lock();
1161 	if (s_mp->sbm_del_mlist != NULL) {
1162 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1163 		mp = s_mp;
1164 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1165 	}
1166 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1167 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1168 		mp = t_mp;
1169 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1170 	}
1171 	memlist_read_unlock();
1172 	if (rv) {
1173 		cmn_err(CE_WARN, "sbd:%s: %smem-unit (%d.%d): "
1174 			"deleted memory still found in phys_install",
1175 			f,
1176 			(mp == t_mp ? "target " : ""),
1177 			sbp->sb_num,
1178 			mp->sbm_cm.sbdev_unum);
1179 
1180 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1181 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1182 		return;
1183 	}
1184 
1185 	s_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1186 	if (t_mp != NULL) {
1187 		t_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1188 		t_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1189 	}
1190 
1191 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1192 
1193 	SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM, unit);
1194 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_UNREFERENCED);
1195 
1196 	PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1197 		f, sbp->sb_num,
1198 		s_mp->sbm_cm.sbdev_unum);
1199 
1200 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1201 
1202 	if (t_mp != NULL) {
1203 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1204 
1205 		SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM,
1206 			t_mp->sbm_cm.sbdev_unum);
1207 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1208 			t_mp->sbm_cm.sbdev_unum,
1209 			SBD_STATE_UNREFERENCED);
1210 
1211 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1212 
1213 		PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1214 			f, sbp->sb_num,
1215 			t_mp->sbm_cm.sbdev_unum);
1216 
1217 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1218 	}
1219 }
1220 
1221 int
1222 sbd_disconnect_mem(sbd_handle_t *hp, int unit)
1223 {
1224 	static fn_t	f = "sbd_disconnect_mem";
1225 	sbd_mem_unit_t	*mp;
1226 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1227 
1228 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1229 
1230 	ASSERT(mp->sbm_cm.sbdev_state == SBD_STATE_CONNECTED ||
1231 	    mp->sbm_cm.sbdev_state == SBD_STATE_UNCONFIGURED);
1232 
1233 	PR_MEM("%s...\n", f);
1234 
1235 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1236 		memlist_delete(mp->sbm_del_mlist);
1237 	mp->sbm_del_mlist = NULL;
1238 
1239 	if (mp->sbm_mlist) {
1240 		memlist_delete(mp->sbm_mlist);
1241 		mp->sbm_mlist = NULL;
1242 	}
1243 
1244 	return (0);
1245 }
1246 
1247 int
1248 sbd_cancel_mem(sbd_handle_t *hp, int unit)
1249 {
1250 	sbd_mem_unit_t	*s_mp, *t_mp;
1251 	sbd_istate_t	state;
1252 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1253 	sbd_board_t	*tsbp;
1254 	static fn_t	f = "sbd_cancel_mem";
1255 	sbderror_t	*ep = SBD_HD2ERR(hp);
1256 
1257 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1258 
1259 	state = s_mp->sbm_cm.sbdev_state;
1260 
1261 	if (s_mp->sbm_flags & SBD_MFLAG_TARGET) {
1262 		/* must cancel source board, not target board */
1263 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1264 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
1265 		return (-1);
1266 	} else if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1267 		t_mp = s_mp->sbm_peer;
1268 		tsbp = t_mp->sbm_cm.sbdev_sbp;
1269 		ASSERT(t_mp != NULL);
1270 		ASSERT(t_mp->sbm_peer == s_mp);
1271 
1272 		/* must always match the source board's state */
1273 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1274 	} else {
1275 		/* this is no target unit */
1276 		t_mp = NULL;
1277 	}
1278 
1279 	switch (state) {
1280 	case SBD_STATE_UNREFERENCED:	/* state set by sbd_release_mem_done */
1281 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1282 
1283 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1284 			PR_MEM("%s: undoing target board %d memory delete\n",
1285 				f, tsbp->sb_num);
1286 			sbd_add_memory_spans(tsbp, t_mp->sbm_del_mlist);
1287 			SBD_DEV_CLR_UNREFERENCED(tsbp, SBD_COMP_MEM,
1288 				t_mp->sbm_cm.sbdev_unum);
1289 		}
1290 
1291 		if (s_mp->sbm_del_mlist != NULL) {
1292 			PR_MEM("%s: undoing board %d memory delete\n",
1293 				f, sbp->sb_num);
1294 			sbd_add_memory_spans(sbp, s_mp->sbm_del_mlist);
1295 		}
1296 
1297 		/*FALLTHROUGH*/
1298 
1299 	case SBD_STATE_CONFIGURED:
1300 		/*
1301 		 * we got here because of an error early in the release process
1302 		 * Just leave the memory as is and report the error
1303 		 */
1304 
1305 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1306 
1307 		if (t_mp != NULL) {
1308 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1309 			t_mp->sbm_del_mlist = NULL;
1310 
1311 			if (t_mp->sbm_mlist != NULL) {
1312 				memlist_delete(t_mp->sbm_mlist);
1313 				t_mp->sbm_mlist = NULL;
1314 			}
1315 
1316 			t_mp->sbm_peer = NULL;
1317 			t_mp->sbm_flags = 0;
1318 			t_mp->sbm_cm.sbdev_busy = 0;
1319 			sbd_init_mem_unit_data(t_mp, ep);
1320 
1321 			SBD_DEV_CLR_RELEASED(tsbp, SBD_COMP_MEM,
1322 				t_mp->sbm_cm.sbdev_unum);
1323 
1324 			SBD_DEVICE_TRANSITION(tsbp, SBD_COMP_MEM,
1325 				t_mp->sbm_cm.sbdev_unum,
1326 				SBD_STATE_CONFIGURED);
1327 		}
1328 
1329 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1330 			memlist_delete(s_mp->sbm_del_mlist);
1331 		s_mp->sbm_del_mlist = NULL;
1332 
1333 		if (s_mp->sbm_mlist != NULL) {
1334 			memlist_delete(s_mp->sbm_mlist);
1335 			s_mp->sbm_mlist = NULL;
1336 		}
1337 
1338 		s_mp->sbm_peer = NULL;
1339 		s_mp->sbm_flags = 0;
1340 		s_mp->sbm_cm.sbdev_busy = 0;
1341 		sbd_init_mem_unit_data(s_mp, ep);
1342 
1343 		return (0);
1344 	default:
1345 		PR_MEM("%s: WARNING unexpected state (%d) for "
1346 			"mem-unit %d.%d\n",
1347 			f,
1348 			(int)state,
1349 			sbp->sb_num,
1350 			s_mp->sbm_cm.sbdev_unum);
1351 
1352 		return (-1);
1353 	}
1354 	/*NOTREACHED*/
1355 }
1356 
1357 void
1358 sbd_init_mem_unit(sbd_board_t *sbp, int unit, sbderror_t *ep)
1359 {
1360 	sbd_istate_t	new_state;
1361 	sbd_mem_unit_t	*mp;
1362 	dev_info_t	*cur_mc_dip;
1363 	int		failed_mcs = 0, present_mcs = 0;
1364 	sbd_cond_t	mc_cond;
1365 	int		i;
1366 
1367 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1368 
1369 	if (SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_MEM, unit)) {
1370 		new_state = SBD_STATE_CONFIGURED;
1371 	} else if (SBD_DEV_IS_PRESENT(sbp, SBD_COMP_MEM, unit)) {
1372 		new_state = SBD_STATE_CONNECTED;
1373 	} else if (mp->sbm_cm.sbdev_dip != NULL) {
1374 		new_state = SBD_STATE_OCCUPIED;
1375 	} else {
1376 		new_state = SBD_STATE_EMPTY;
1377 	}
1378 
1379 	/*
1380 	 * Check all the possible memory nodes on the board.  If all of them
1381 	 * have a failed status mark memory as failed. Otherwise mem is ok
1382 	 */
1383 	if (!sbp->sb_memaccess_ok) {
1384 		mp->sbm_cm.sbdev_cond = SBD_COND_UNKNOWN;
1385 		return;
1386 	}
1387 
1388 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
1389 		cur_mc_dip = mp->sbm_dip[i];
1390 
1391 		if (cur_mc_dip == NULL)
1392 			continue;
1393 
1394 		present_mcs |= (1 << i);
1395 
1396 		mc_cond = sbd_get_comp_cond(cur_mc_dip);
1397 		if (mc_cond == SBD_COND_FAILED) {
1398 			failed_mcs |= (1 << i);
1399 		}
1400 	}
1401 
1402 	if (failed_mcs == present_mcs) {
1403 		/*
1404 		 * All mem nodes failed, therefore mark all mem
1405 		 * as failed
1406 		 */
1407 		mp->sbm_cm.sbdev_cond = SBD_COND_FAILED;
1408 	} else {
1409 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1410 	}
1411 
1412 	sbd_init_mem_unit_data(mp, ep);
1413 
1414 	/*
1415 	 * Any changes to this memory unit should be performed above
1416 	 * this call to ensure the unit is fully initialized
1417 	 * before transitioning to the new state.
1418 	 */
1419 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, new_state);
1420 
1421 }
1422 
1423 static void
1424 sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t *ep)
1425 {
1426 	uint64_t	basepa;
1427 	uint64_t	sz;
1428 	sbd_board_t	*sbp = mp->sbm_cm.sbdev_sbp;
1429 	sbdp_handle_t	*hdp;
1430 	static fn_t	f = "sbd_init_mem_unit_data";
1431 	sbd_handle_t	*hp = MACHBD2HD(sbp);
1432 
1433 	PR_MEM("%s...\n", f);
1434 
1435 	/* a little sanity checking */
1436 	ASSERT(mp->sbm_peer == NULL);
1437 	ASSERT(mp->sbm_flags == 0);
1438 
1439 	hdp = sbd_get_sbdp_handle(sbp, hp);
1440 
1441 	/* get basepfn of mem unit */
1442 	if (sbdphw_get_base_physaddr(hdp, mp->sbm_cm.sbdev_dip, &basepa)) {
1443 		cmn_err(CE_WARN, "sbd:%s: failed to get physaddr"
1444 			" for mem-unit (%d.%d)",
1445 			f,
1446 			sbp->sb_num,
1447 			mp->sbm_cm.sbdev_unum);
1448 		SBD_GET_PERR(hdp->h_err, ep);
1449 		sbd_release_sbdp_handle(hdp);
1450 		return;
1451 	}
1452 	mp->sbm_basepfn = _b64top(basepa);
1453 
1454 	/* attempt to get number of pages from PDA */
1455 	mp->sbm_npages = sbdp_get_mem_size(hdp);
1456 
1457 	/* if didn't work, calculate using memlist */
1458 	if (mp->sbm_npages == 0) {
1459 		struct memlist	*ml, *mlist;
1460 		mlist = sbd_get_memlist(mp, ep);
1461 		for (ml = mlist; ml; ml = ml->next)
1462 			mp->sbm_npages += btop(ml->size);
1463 		memlist_delete(mlist);
1464 	}
1465 
1466 
1467 	if (sbdp_get_mem_alignment(hdp, mp->sbm_cm.sbdev_dip, &sz)) {
1468 		cmn_err(CE_WARN,
1469 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1470 			f, sbp->sb_num, mp->sbm_cm.sbdev_unum);
1471 		SBD_GET_PERR(hdp->h_err, ep);
1472 		sbd_release_sbdp_handle(hdp);
1473 		return;
1474 	}
1475 	mp->sbm_alignment_mask = _b64top(sz);
1476 
1477 
1478 	mp->sbm_interleave = sbdp_isinterleaved(hdp,
1479 	    mp->sbm_cm.sbdev_dip);
1480 
1481 	PR_MEM("%s: board %d (basepfn = 0x%lx, npgs = 0x%lx interleave %d)\n",
1482 		f, sbp->sb_num,
1483 		mp->sbm_basepfn,
1484 		mp->sbm_npages,
1485 		mp->sbm_interleave);
1486 
1487 	sbd_release_sbdp_handle(hdp);
1488 }
1489 
1490 static int
1491 sbd_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
1492 {
1493 	int		err;
1494 	pfn_t		base;
1495 	pgcnt_t		npgs;
1496 	struct memlist	*mc;
1497 	static fn_t	f = "sbd_reserve_mem_spans";
1498 
1499 	PR_MEM("%s...\n", f);
1500 
1501 	/*
1502 	 * Walk the supplied memlist scheduling each span for removal
1503 	 * with kphysm_del_span.  It is possible that a span may intersect
1504 	 * an area occupied by the cage.
1505 	 */
1506 	for (mc = ml; mc != NULL; mc = mc->next) {
1507 		base = _b64top(mc->address);
1508 		npgs = _b64top(mc->size);
1509 
1510 		err = kphysm_del_span(*mhp, base, npgs);
1511 		if (err != KPHYSM_OK) {
1512 			cmn_err(CE_WARN, "sbd:%s memory reserve failed."
1513 				" unexpected kphysm_del_span return value %d;"
1514 				" basepfn=0x%lx npages=%ld",
1515 				f, err, base, npgs);
1516 			return (-1);
1517 		}
1518 	}
1519 	return (0);
1520 }
1521 
1522 /* debug counters */
1523 int sbd_smt_realigned;
1524 int sbd_smt_preference[4];
1525 
1526 #ifdef DEBUG
1527 uint_t sbd_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
1528 #endif
1529 
1530 /*
1531  * Verify that there is no memory overlapping if copy-rename is
1532  * done with the selected target board.
1533  *
1534  * Returns 0 if OK, -1 otherwise.
1535  */
1536 static int
1537 sbd_check_boundaries(struct memlist *orig_memlist, sbd_mem_unit_t *s_mp,
1538 	sbd_mem_unit_t *t_mp)
1539 {
1540 	struct memlist	*new_memlist;
1541 	int mlret;
1542 	static fn_t	f = "sbd_check_boundaries";
1543 
1544 	new_memlist = memlist_dup(orig_memlist);
1545 	if (new_memlist == NULL) {
1546 		PR_MEM("%s: can't dup original memlist\n", f);
1547 		return (-1);
1548 	}
1549 
1550 	mlret = memlist_delete_span(
1551 		_ptob64(s_mp->sbm_basepfn),
1552 		_ptob64(s_mp->sbm_npages),
1553 		&new_memlist);
1554 	if (mlret != MEML_SPANOP_OK) {
1555 		PR_MEM("%s: del s/s mlret = %d\n", f, mlret);
1556 		goto check_done;
1557 	}
1558 
1559 	mlret = memlist_delete_span(
1560 		_ptob64(t_mp->sbm_basepfn),
1561 		_ptob64(t_mp->sbm_npages),
1562 		&new_memlist);
1563 	if (mlret != MEML_SPANOP_OK) {
1564 		PR_MEM("%s: del t/t mlret = %d\n", f, mlret);
1565 		goto check_done;
1566 	}
1567 
1568 	mlret = memlist_add_span(
1569 		_ptob64(t_mp->sbm_basepfn),
1570 		_ptob64(s_mp->sbm_npages),
1571 		&new_memlist);
1572 	if (mlret != MEML_SPANOP_OK) {
1573 		PR_MEM("%s: add t/s mlret = %d\n", f, mlret);
1574 		goto check_done;
1575 	}
1576 
1577 	mlret = memlist_add_span(
1578 		_ptob64(s_mp->sbm_basepfn),
1579 		_ptob64(t_mp->sbm_npages),
1580 		&new_memlist);
1581 	if (mlret != MEML_SPANOP_OK) {
1582 		PR_MEM("%s: add s/t mlret = %d\n", f, mlret);
1583 	}
1584 
1585 check_done:
1586 	memlist_delete(new_memlist);
1587 
1588 	if (mlret == MEML_SPANOP_OK)
1589 		return (0);
1590 	else
1591 		return (-1);
1592 }
1593 
1594 /*
1595  * Find and reserve a copy/rename target board suitable for the
1596  * given source board.
1597  * All boards in the system are examined and categorized in relation to
1598  * their memory size versus the source board's memory size.  Order of
1599  * preference is:
1600  *	1st: board has same memory size
1601  * 	2nd: board has larger memory size
1602  *	3rd: board has smaller memory size
1603  *	4th: board has smaller memory size, available memory will be reduced.
1604  * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
1605  * span to which the MC responds to address span that appropriately covers
1606  * the nonrelocatable span of the source board.
1607  */
1608 static int
1609 sbd_select_mem_target(sbd_handle_t *hp,
1610 	sbd_mem_unit_t *s_mp, struct memlist *s_ml)
1611 {
1612 	uint64_t	sz;
1613 	pgcnt_t		sm;
1614 	int		n_sets = 4; /* same, larger, smaller, clipped */
1615 	int		preference; /* lower value is higher preference */
1616 	int		n_units_per_set;
1617 	int		idx;
1618 	sbd_mem_unit_t	**sets;
1619 	sbdp_handle_t	*hdp;
1620 	int		t_bd;
1621 	sbd_softstate_t	*softsp;
1622 	int		t_unit;
1623 	int		max_boards;
1624 	int		rv;
1625 	sbd_board_t	*s_sbp, *t_sbp;
1626 	sbd_mem_unit_t	*t_mp, *c_mp;
1627 	struct memlist	*d_ml, *t_ml, *x_ml;
1628 	memquery_t	s_mq = {0};
1629 	static fn_t	f = "sbd_select_mem_target";
1630 
1631 	PR_MEM("%s...\n", f);
1632 
1633 	ASSERT(s_ml != NULL);
1634 
1635 	s_sbp = s_mp->sbm_cm.sbdev_sbp;
1636 
1637 	hdp = sbd_get_sbdp_handle(s_sbp, hp);
1638 
1639 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
1640 		sbderror_t	*ep = SBD_HD2ERR(hp);
1641 		cmn_err(CE_WARN,
1642 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1643 			f, s_sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
1644 		SBD_GET_PERR(hdp->h_err, ep);
1645 		sbd_release_sbdp_handle(hdp);
1646 		return (-1);
1647 	}
1648 	sm = sz - 1;
1649 	sbd_release_sbdp_handle(hdp);
1650 
1651 	softsp = (sbd_softstate_t *)s_sbp->sb_softsp;
1652 
1653 	max_boards = softsp->max_boards;
1654 	n_units_per_set = max_boards * MAX_MEM_UNITS_PER_BOARD;
1655 	sets = GETSTRUCT(sbd_mem_unit_t *, n_units_per_set * n_sets);
1656 
1657 	/*
1658 	 * Make one pass through all memory units on all boards
1659 	 * and categorize them with respect to the source board.
1660 	 */
1661 	for (t_bd = 0; t_bd < max_boards; t_bd++) {
1662 		/*
1663 		 * The board structs are a contiguous array
1664 		 * so we take advantage of that to find the
1665 		 * correct board struct pointer for a given
1666 		 * board number.
1667 		 */
1668 		t_sbp = (sbd_board_t *)softsp->sbd_boardlist;
1669 		t_sbp += t_bd;
1670 
1671 		/* source board can not be its own target */
1672 		if (s_sbp->sb_num == t_sbp->sb_num)
1673 			continue;
1674 
1675 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
1676 
1677 			t_mp = SBD_GET_BOARD_MEMUNIT(t_sbp, t_unit);
1678 
1679 			/* this memory node must be attached */
1680 			if (!SBD_DEV_IS_ATTACHED(t_sbp, SBD_COMP_MEM, t_unit))
1681 				continue;
1682 
1683 			/* source unit can not be its own target */
1684 			if (s_mp == t_mp) {
1685 				/* catch this in debug kernels */
1686 				ASSERT(0);
1687 				continue;
1688 			}
1689 
1690 			/*
1691 			 * this memory node must not already be reserved
1692 			 * by some other memory delete operation.
1693 			 */
1694 			if (t_mp->sbm_flags & SBD_MFLAG_RESERVED)
1695 				continue;
1696 
1697 			/*
1698 			 * categorize the memory node
1699 			 * If this is a smaller memory node, create a
1700 			 * temporary, edited copy of the source board's
1701 			 * memlist containing only the span of the non-
1702 			 * relocatable pages.
1703 			 */
1704 			if (t_mp->sbm_npages == s_mp->sbm_npages) {
1705 				preference = 0;
1706 				t_mp->sbm_slice_offset = 0;
1707 			} else if (t_mp->sbm_npages > s_mp->sbm_npages) {
1708 				preference = 1;
1709 				t_mp->sbm_slice_offset = 0;
1710 			} else {
1711 				/*
1712 				 * We do not allow other options right now
1713 				 */
1714 				continue;
1715 			}
1716 
1717 			sbd_smt_preference[preference]++;
1718 
1719 			/* calculate index to start of preference set */
1720 			idx  = n_units_per_set * preference;
1721 			/* calculate offset to respective element */
1722 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
1723 
1724 			ASSERT(idx < n_units_per_set * n_sets);
1725 			sets[idx] = t_mp;
1726 		}
1727 	}
1728 
1729 	/*
1730 	 * NOTE: this would be a good place to sort each candidate
1731 	 * set in to some desired order, e.g. memory size in ascending
1732 	 * order.  Without an additional sorting step here, the order
1733 	 * within a set is ascending board number order.
1734 	 */
1735 
1736 	c_mp = NULL;
1737 	x_ml = NULL;
1738 	t_ml = NULL;
1739 	for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
1740 		memquery_t mq;
1741 
1742 		/* cleanup t_ml after previous pass */
1743 		if (t_ml != NULL) {
1744 			memlist_delete(t_ml);
1745 			t_ml = NULL;
1746 		}
1747 
1748 		/* get candidate target board mem unit */
1749 		t_mp = sets[idx];
1750 		if (t_mp == NULL)
1751 			continue;
1752 
1753 		t_sbp = t_mp->sbm_cm.sbdev_sbp;
1754 
1755 		/* get target board memlist */
1756 		t_ml = sbd_get_memlist(t_mp, SBD_HD2ERR(hp));
1757 		if (t_ml == NULL) {
1758 			cmn_err(CE_WARN, "sbd:%s: no memlist for"
1759 				" mem-unit %d, board %d",
1760 				f,
1761 				t_sbp->sb_num,
1762 				t_mp->sbm_cm.sbdev_unum);
1763 
1764 			continue;
1765 		}
1766 
1767 		/* get appropriate source board memlist */
1768 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1769 			spgcnt_t excess;
1770 
1771 			/*
1772 			 * make a copy of the source board memlist
1773 			 * then edit it to remove the spans that
1774 			 * are outside the calculated span of
1775 			 * [pfn..s_mq.last_nonrelocatable].
1776 			 */
1777 			if (x_ml != NULL)
1778 				memlist_delete(x_ml);
1779 
1780 			x_ml = memlist_dup(s_ml);
1781 			if (x_ml == NULL) {
1782 				PR_MEM("%s: memlist_dup failed\n", f);
1783 				/* TODO: should abort */
1784 				continue;
1785 			}
1786 
1787 			/* trim off lower portion */
1788 			excess = t_mp->sbm_slice_offset;
1789 			if (excess > 0) {
1790 				int mlret;
1791 
1792 				mlret = memlist_delete_span(
1793 					_ptob64(s_mp->sbm_basepfn),
1794 					_ptob64(excess),
1795 					&x_ml);
1796 				PR_MEM("%s: mlret = %d\n", f, mlret);
1797 			}
1798 
1799 			/*
1800 			 * Since this candidate target board is smaller
1801 			 * than the source board, s_mq must have been
1802 			 * initialized in previous loop while processing
1803 			 * this or some other candidate board.
1804 			 * FIXME: this is weak.
1805 			 */
1806 			ASSERT(s_mq.phys_pages != 0);
1807 
1808 			/* trim off upper portion */
1809 			excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
1810 				- (s_mq.last_nonrelocatable + 1);
1811 			if (excess > 0) {
1812 				pfn_t p;
1813 				int mlret;
1814 
1815 				p  = s_mq.last_nonrelocatable + 1;
1816 				p -= excess;
1817 
1818 				mlret = memlist_delete_span(
1819 					_ptob64(p),
1820 					_ptob64(excess),
1821 					&x_ml);
1822 				PR_MEM("%s: mlret = %d\n", f, mlret);
1823 			}
1824 
1825 			PR_MEM("%s: brd %d: edited source memlist:\n",
1826 				f, s_sbp->sb_num);
1827 			SBD_MEMLIST_DUMP(x_ml);
1828 
1829 #ifdef DEBUG
1830 			/* sanity check memlist */
1831 			d_ml = x_ml;
1832 			while (d_ml->next != NULL)
1833 				d_ml = d_ml->next;
1834 			ASSERT(x_ml->address == _ptob64(s_mp->sbm_basepfn) +
1835 				_ptob64(t_mp->sbm_slice_offset));
1836 			ASSERT(d_ml->address + d_ml->size ==
1837 				_ptob64(s_mq.last_nonrelocatable + 1));
1838 #endif
1839 
1840 			/*
1841 			 * x_ml now describes only the portion of the
1842 			 * source board that will be moved during the
1843 			 * copy/rename operation.
1844 			 */
1845 			d_ml = x_ml;
1846 		} else {
1847 			/* use original memlist; all spans will be moved */
1848 			d_ml = s_ml;
1849 		}
1850 
1851 		/* verify target can support source memory spans. */
1852 		if (memlist_canfit(d_ml, t_ml) == 0) {
1853 			PR_MEM("%s: source memlist won't"
1854 				" fit in target memlist\n", f);
1855 			PR_MEM("%s: source memlist:\n", f);
1856 			SBD_MEMLIST_DUMP(d_ml);
1857 			PR_MEM("%s: target memlist:\n", f);
1858 			SBD_MEMLIST_DUMP(t_ml);
1859 
1860 			continue;
1861 		}
1862 
1863 		/* NOTE: the value of d_ml is not used beyond this point */
1864 
1865 		PR_MEM("%s: checking for no-reloc on board %d, "
1866 			" basepfn=0x%lx, npages=%ld\n",
1867 			f,
1868 			t_sbp->sb_num,
1869 			t_mp->sbm_basepfn,
1870 			t_mp->sbm_npages);
1871 
1872 		rv = kphysm_del_span_query(
1873 			t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
1874 		if (rv != KPHYSM_OK) {
1875 			PR_MEM("%s: kphysm_del_span_query:"
1876 				" unexpected return value %d\n", f, rv);
1877 
1878 			continue;
1879 		}
1880 
1881 		if (mq.nonrelocatable != 0) {
1882 			PR_MEM("%s: candidate board %d has"
1883 				" nonrelocatable span [0x%lx..0x%lx]\n",
1884 				f,
1885 				t_sbp->sb_num,
1886 				mq.first_nonrelocatable,
1887 				mq.last_nonrelocatable);
1888 
1889 			continue;
1890 		}
1891 
1892 #ifdef DEBUG
1893 		/*
1894 		 * This is a debug tool for excluding certain boards
1895 		 * from being selected as a target board candidate.
1896 		 * sbd_ignore_board is only tested by this driver.
1897 		 * It must be set with adb, obp, /etc/system or your
1898 		 * favorite debugger.
1899 		 */
1900 		if (sbd_ignore_board &
1901 			(1 << (t_sbp->sb_num - 1))) {
1902 			PR_MEM("%s: sbd_ignore_board flag set,"
1903 				" ignoring board %d as candidate\n",
1904 				f, t_sbp->sb_num);
1905 			continue;
1906 		}
1907 #endif
1908 
1909 		/*
1910 		 * Make sure there is no memory overlap if this
1911 		 * target board is used for copy-rename.
1912 		 */
1913 		if (sbd_check_boundaries(phys_install, s_mp, t_mp) != 0)
1914 			continue;
1915 
1916 		/*
1917 		 * Reserve excess source board memory, if any.
1918 		 *
1919 		 * When the number of pages on the candidate target
1920 		 * board is less than the number of pages on the source,
1921 		 * then some spans (clearly) of the source board's address
1922 		 * space will not be covered by physical memory after the
1923 		 * copy/rename completes.  The following code block
1924 		 * schedules those spans to be deleted.
1925 		 */
1926 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1927 			pfn_t pfn;
1928 			int mlret;
1929 
1930 			d_ml = memlist_dup(s_ml);
1931 			if (d_ml == NULL) {
1932 				PR_MEM("%s: cant dup src brd memlist\n", f);
1933 				/* TODO: should abort */
1934 				continue;
1935 			}
1936 
1937 			/* calculate base pfn relative to target board */
1938 			pfn  = s_mp->sbm_basepfn & ~sm;
1939 			pfn += t_mp->sbm_slice_offset;
1940 
1941 			/* remove span that will reside on candidate board */
1942 			mlret = memlist_delete_span(
1943 				_ptob64(pfn),
1944 				_ptob64(t_mp->sbm_npages),
1945 				&d_ml);
1946 			PR_MEM("%s: mlret = %d\n", f, mlret);
1947 
1948 			PR_MEM("%s: brd %d: reserving src brd memlist:\n",
1949 				f, s_sbp->sb_num);
1950 			SBD_MEMLIST_DUMP(d_ml);
1951 
1952 			/* reserve excess spans */
1953 			if (sbd_reserve_mem_spans(
1954 				&s_mp->sbm_memhandle, d_ml) != 0) {
1955 
1956 				/* likely more non-reloc pages appeared */
1957 				/* TODO: restart from top? */
1958 				continue;
1959 			}
1960 		} else {
1961 			/* no excess source board memory */
1962 			d_ml = NULL;
1963 		}
1964 
1965 		s_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1966 
1967 		/*
1968 		 * reserve all memory on target board.
1969 		 * NOTE: source board's memhandle is used.
1970 		 *
1971 		 * If this succeeds (eq 0), then target selection is
1972 		 * complete and all unwanted memory spans, both source and
1973 		 * target, have been reserved.  Loop is terminated.
1974 		 */
1975 		if (sbd_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
1976 			PR_MEM("%s: brd %d: target board memory reserved\n",
1977 				f, t_sbp->sb_num);
1978 
1979 			/* a candidate target board is now reserved */
1980 			t_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1981 			c_mp = t_mp;
1982 
1983 			/* *** EXITING LOOP *** */
1984 			break;
1985 		}
1986 
1987 		/* did not successfully reserve the target board. */
1988 		PR_MEM("%s: could not reserve target board %d\n",
1989 			f, t_sbp->sb_num);
1990 
1991 		/*
1992 		 * NOTE: an undo of the sbd_reserve_mem_span work
1993 		 * will happen automatically when the memhandle
1994 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
1995 		 */
1996 
1997 		s_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1998 	}
1999 
2000 	/* clean up after memlist editing logic */
2001 	if (x_ml != NULL)
2002 		memlist_delete(x_ml);
2003 
2004 	FREESTRUCT(sets, sbd_mem_unit_t *, n_units_per_set * n_sets);
2005 
2006 	/*
2007 	 * c_mp will be NULL when the entire sets[] array
2008 	 * has been searched without reserving a target board.
2009 	 */
2010 	if (c_mp == NULL) {
2011 		PR_MEM("%s: brd %d: target selection failed.\n",
2012 			f, s_sbp->sb_num);
2013 
2014 		if (t_ml != NULL)
2015 			memlist_delete(t_ml);
2016 
2017 		return (-1);
2018 	}
2019 
2020 	PR_MEM("%s: found target board %d for source board %d\n",
2021 		f,
2022 		t_sbp->sb_num,
2023 		s_sbp->sb_num);
2024 
2025 	s_mp->sbm_peer = c_mp;
2026 	s_mp->sbm_flags |= SBD_MFLAG_SOURCE;
2027 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2028 	s_mp->sbm_mlist = s_ml;
2029 	s_mp->sbm_cm.sbdev_busy = 1;
2030 
2031 	c_mp->sbm_peer = s_mp;
2032 	c_mp->sbm_flags |= SBD_MFLAG_TARGET;
2033 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2034 	c_mp->sbm_mlist = t_ml;
2035 	c_mp->sbm_cm.sbdev_busy = 1;
2036 
2037 	s_mp->sbm_flags &= ~SBD_MFLAG_MEMRESIZE;
2038 	if (c_mp->sbm_npages > s_mp->sbm_npages) {
2039 		s_mp->sbm_flags |= SBD_MFLAG_MEMUPSIZE;
2040 		PR_MEM("%s: upsize (source pgs 0x%lx < target pgs 0x%lx)\n",
2041 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2042 	} else if (c_mp->sbm_npages < s_mp->sbm_npages) {
2043 		s_mp->sbm_flags |= SBD_MFLAG_MEMDOWNSIZE;
2044 		PR_MEM("%s: downsize (source pgs 0x%lx > target pgs 0x%lx)\n",
2045 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2046 	}
2047 
2048 	return (0);
2049 }
2050 
2051 int
2052 sbd_move_memory(sbd_handle_t *hp, sbd_board_t *s_bp, sbd_board_t *t_bp)
2053 {
2054 	int	ret;
2055 	sbdp_handle_t	*hdp;
2056 	sbderror_t	*ep = SBD_HD2ERR(hp);
2057 
2058 	hdp = sbd_get_sbdp_handle(s_bp, hp);
2059 
2060 	ret = sbdp_move_memory(hdp, t_bp->sb_num);
2061 	if (ret != 0)
2062 		SBD_GET_PERR(hdp->h_err, ep);
2063 
2064 	sbd_release_sbdp_handle(hdp);
2065 
2066 	return (ret);
2067 }
2068 
2069 /*
2070  * Memlist support.
2071  */
2072 void
2073 memlist_delete(struct memlist *mlist)
2074 {
2075 	sbdp_handle_t	*hdp;
2076 
2077 	hdp = sbd_get_sbdp_handle(NULL, NULL);
2078 
2079 	(void) sbdp_del_memlist(hdp, mlist);
2080 
2081 	sbd_release_sbdp_handle(hdp);
2082 }
2083 
2084 struct memlist *
2085 memlist_dup(struct memlist *mlist)
2086 {
2087 	struct memlist *hl, *prev;
2088 
2089 	if (mlist == NULL)
2090 		return (NULL);
2091 
2092 	prev = NULL;
2093 	hl = NULL;
2094 	for (; mlist; mlist = mlist->next) {
2095 		struct memlist *mp;
2096 
2097 		mp = memlist_get_one();
2098 		if (mp == NULL) {
2099 			if (hl != NULL)
2100 				memlist_free_list(hl);
2101 			hl = NULL;
2102 			break;
2103 		}
2104 		mp->address = mlist->address;
2105 		mp->size = mlist->size;
2106 		mp->next = NULL;
2107 		mp->prev = prev;
2108 
2109 		if (prev == NULL)
2110 			hl = mp;
2111 		else
2112 			prev->next = mp;
2113 		prev = mp;
2114 	}
2115 
2116 	return (hl);
2117 }
2118 
2119 void
2120 memlist_dump(struct memlist *mlist)
2121 {
2122 	register struct memlist *ml;
2123 
2124 	if (mlist == NULL) {
2125 		PR_MEM("memlist> EMPTY\n");
2126 	} else {
2127 		for (ml = mlist; ml; ml = ml->next)
2128 			PR_MEM("memlist> 0x%" PRIx64 " "
2129 				"0x%" PRIx64 " \n",
2130 				ml->address, ml->size);
2131 	}
2132 }
2133 
2134 int
2135 memlist_intersect(struct memlist *al, struct memlist *bl)
2136 {
2137 	uint64_t	astart, aend, bstart, bend;
2138 
2139 	if ((al == NULL) || (bl == NULL))
2140 		return (0);
2141 
2142 	aend = al->address + al->size;
2143 	bstart = bl->address;
2144 	bend = bl->address + bl->size;
2145 
2146 	while (al && bl) {
2147 		while (al && (aend <= bstart))
2148 			if ((al = al->next) != NULL)
2149 				aend = al->address + al->size;
2150 		if (al == NULL)
2151 			return (0);
2152 
2153 		if ((astart = al->address) <= bstart)
2154 			return (1);
2155 
2156 		while (bl && (bend <= astart))
2157 			if ((bl = bl->next) != NULL)
2158 				bend = bl->address + bl->size;
2159 		if (bl == NULL)
2160 			return (0);
2161 
2162 		if ((bstart = bl->address) <= astart)
2163 			return (1);
2164 	}
2165 
2166 	return (0);
2167 }
2168 
2169 /*
2170  * Determine whether the source memlist (s_mlist) will
2171  * fit into the target memlist (t_mlist) in terms of
2172  * size and holes (i.e. based on same relative base address).
2173  */
2174 static int
2175 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2176 {
2177 	int		rv = 0;
2178 	uint64_t	s_basepa, t_basepa;
2179 	struct memlist	*s_ml, *t_ml;
2180 
2181 	if ((s_mlist == NULL) || (t_mlist == NULL))
2182 		return (0);
2183 
2184 	/*
2185 	 * Base both memlists on common base address (0).
2186 	 */
2187 	s_basepa = s_mlist->address;
2188 	t_basepa = t_mlist->address;
2189 
2190 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
2191 		s_ml->address -= s_basepa;
2192 
2193 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
2194 		t_ml->address -= t_basepa;
2195 
2196 	s_ml = s_mlist;
2197 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2198 		uint64_t	s_start, s_end;
2199 		uint64_t	t_start, t_end;
2200 
2201 		t_start = t_ml->address;
2202 		t_end = t_start + t_ml->size;
2203 
2204 		for (; s_ml; s_ml = s_ml->next) {
2205 			s_start = s_ml->address;
2206 			s_end = s_start + s_ml->size;
2207 
2208 			if ((s_start < t_start) || (s_end > t_end))
2209 				break;
2210 		}
2211 	}
2212 	/*
2213 	 * If we ran out of source memlist chunks that mean
2214 	 * we found a home for all of them.
2215 	 */
2216 	if (s_ml == NULL)
2217 		rv = 1;
2218 
2219 	/*
2220 	 * Need to add base addresses back since memlists
2221 	 * are probably in use by caller.
2222 	 */
2223 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->next)
2224 		s_ml->address += s_basepa;
2225 
2226 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->next)
2227 		t_ml->address += t_basepa;
2228 
2229 	return (rv);
2230 }
2231 
2232 void
2233 sbd_attach_mem(sbd_handle_t *hp, sbderror_t *ep)
2234 {
2235 	sbd_mem_unit_t	*mp;
2236 	dev_info_t	*dip;
2237 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2238 	sbdp_handle_t	*hdp;
2239 	int		err, unit;
2240 	struct memlist	*ml, *mc;
2241 	static fn_t	f = "sbd_attach_mem";
2242 	int		i;
2243 
2244 	PR_MEM("%s...\n", f);
2245 
2246 	/*
2247 	 * all four cpus have to be attached before
2248 	 * configuring mem
2249 	 */
2250 	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
2251 		sbd_cpu_unit_t	*cpup;
2252 		struct cpu	*cp;
2253 
2254 		if (!SBD_DEV_IS_PRESENT(sbp, SBD_COMP_CPU, i))
2255 			continue;
2256 
2257 		if (!SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_CPU, i))
2258 			goto error;
2259 
2260 		cpup = SBD_GET_BOARD_CPUUNIT(sbp, i);
2261 
2262 		if (cpup == NULL)
2263 			goto error;
2264 
2265 		mutex_enter(&cpu_lock);
2266 		cp = cpu_get(cpup->sbc_cpu_id);
2267 		if (cp == NULL) {
2268 			mutex_exit(&cpu_lock);
2269 			cmn_err(CE_WARN,
2270 			    "sbd:%s: cpu_get failed for cpu %d",
2271 			    f, cpup->sbc_cpu_id);
2272 			goto error;
2273 		}
2274 		if (cpu_is_poweredoff(cp)) {
2275 			mutex_exit(&cpu_lock);
2276 			goto error;
2277 		}
2278 		mutex_exit(&cpu_lock);
2279 		continue;
2280 
2281 error:
2282 		SBD_SET_ERR(ep, ESBD_CPUONLINE);
2283 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[i]);
2284 		(void) sbd_set_err_in_hdl(hp, ep);
2285 		return;
2286 	}
2287 
2288 	dip = *(sbp->sb_devlist[NIX(SBD_COMP_MEM)]);
2289 
2290 	hdp = sbd_get_sbdp_handle(sbp, hp);
2291 	unit = sbdp_get_unit_num(hdp, dip);
2292 	if (unit < 0) {
2293 		SBD_GET_PERR(hdp->h_err, ep);
2294 		sbd_release_sbdp_handle(hdp);
2295 		return;
2296 	}
2297 
2298 	ASSERT(sbp->sb_mempath[unit] != NULL);
2299 	ASSERT(e_ddi_branch_held(dip));
2300 
2301 	(void) ddi_pathname(dip, sbp->sb_mempath[unit]);
2302 
2303 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
2304 
2305 	ml = sbd_get_memlist(mp, ep);
2306 	if (ml == NULL) {
2307 		cmn_err(CE_WARN,
2308 			"sbd:%s: failed to get memlist for "
2309 			"board %d", f, sbp->sb_num);
2310 		/*
2311 		 * Need to record an error and return.
2312 		 */
2313 		SBD_SET_ERR(ep, ESBD_MEMFAIL);
2314 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2315 		sbd_release_sbdp_handle(hdp);
2316 		return;
2317 	}
2318 
2319 	SBD_MEMLIST_DUMP(ml);
2320 	err = 0;
2321 	for (mc = ml; mc; mc = mc->next) {
2322 		update_membounds_t umb;
2323 		pfn_t	base;
2324 		pgcnt_t npgs;
2325 
2326 		base = (pfn_t)(mc->address >> PAGESHIFT);
2327 		npgs = (pgcnt_t)(mc->size >> PAGESHIFT);
2328 
2329 		umb.u_board = sbp->sb_num;
2330 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
2331 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
2332 
2333 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2334 		err = kphysm_add_memory_dynamic(base, npgs);
2335 
2336 		if (err != KPHYSM_OK) {
2337 			cmn_err(CE_WARN,
2338 			    "%s: kphysm_add_memory_dynamic fail %d", f, err);
2339 
2340 			/* translate kphysm error */
2341 			switch (err) {
2342 			case KPHYSM_ERESOURCE:
2343 				err = ESBD_NOMEM;
2344 				break;
2345 
2346 			case KPHYSM_EFAULT:
2347 				err = ESBD_FAULT;
2348 				break;
2349 
2350 			default:
2351 				err = ESBD_INVAL;
2352 				break;
2353 			}
2354 			break;
2355 		}
2356 
2357 		kcage_range_lock();
2358 		err = kcage_range_add(base, npgs, 1);
2359 		kcage_range_unlock();
2360 		if (err != 0) {
2361 			cmn_err(CE_WARN,
2362 			    "%s: kcage_range_add fail %d", f, err);
2363 
2364 			/* Translate kcage error. */
2365 			switch (err) {
2366 			case ENOMEM:
2367 				err = ESBD_NOMEM;
2368 				break;
2369 			default:
2370 				err = ESBD_INVAL;
2371 				break;
2372 			}
2373 			break;
2374 		}
2375 		(void) sbdp_mem_add_span(hdp, mc->address, mc->size);
2376 	}
2377 
2378 	if (err != 0) {
2379 		SBD_SET_ERR(ep, err);
2380 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2381 	}
2382 
2383 	memlist_delete(ml);
2384 	sbd_release_sbdp_handle(hdp);
2385 
2386 	/*
2387 	 * Now attach all mem devinfo nodes to the device tree.
2388 	 */
2389 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
2390 		if (mp->sbm_dip[i] == NULL)
2391 			continue;
2392 		ASSERT(e_ddi_branch_held(mp->sbm_dip[i]));
2393 		if (e_ddi_branch_configure(mp->sbm_dip[i], NULL, 0) &&
2394 		    SBD_GET_ERR(ep) == 0) {
2395 			SBD_SET_ERR(ep, ESBD_INVAL);
2396 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2397 		}
2398 	}
2399 }
2400 
2401 typedef struct {
2402 	kcondvar_t cond;
2403 	kmutex_t lock;
2404 	int error;
2405 	int done;
2406 } sbd_release_mem_sync_t;
2407 
2408 /*
2409  * When we reach here the memory being drained should have
2410  * already been reserved in sbd_pre_release_mem().
2411  * Our only task here is to kick off the "drain".
2412  * Returns -1 when error encountered or zero for success.
2413  */
2414 int
2415 sbd_release_mem(sbd_handle_t *hp, dev_info_t *dip, int unit)
2416 {
2417 	memhandle_t	mh;
2418 	int		err;
2419 	int		cancel_flag = 0;
2420 	int		e_code = 0;
2421 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2422 	sbd_release_mem_sync_t rms;
2423 	static fn_t	f = "sbd_release_mem";
2424 
2425 	/*
2426 	 * If this node has a scheduled memory delete operation,
2427 	 * it will have a memhandle.  If it does have a memhandle (the
2428 	 * return value of sbd_get_memhandle is zero when true),
2429 	 * then perform the delete.
2430 	 */
2431 
2432 	if ((cancel_flag = sbd_get_memhandle(hp, dip, &mh)) != 0) {
2433 		cmn_err(CE_WARN, "%s: couldn't get the memhandle\n", f);
2434 		return (cancel_flag);
2435 	}
2436 
2437 	bzero((void *) &rms, sizeof (rms));
2438 
2439 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
2440 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
2441 
2442 	mutex_enter(&rms.lock);
2443 	err = kphysm_del_start(mh, sbd_release_memory_done, (void *) &rms);
2444 	if (err == KPHYSM_OK) {
2445 		/* wait for completion */
2446 		while (!rms.done) {
2447 			if (cancel_flag) {
2448 				/* previously canceled */
2449 				cv_wait(&rms.cond, &rms.lock);
2450 			} else if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
2451 				/* interrupted: cancel and wait */
2452 				cancel_flag = -1;
2453 				(void) kphysm_del_cancel(mh);
2454 			}
2455 		}
2456 		/* get the result of the memory delete operation */
2457 		err = rms.error;
2458 	} else {
2459 		(void) kphysm_del_release(mh);
2460 	}
2461 
2462 	mutex_exit(&rms.lock);
2463 
2464 	cv_destroy(&rms.cond);
2465 	mutex_destroy(&rms.lock);
2466 
2467 	if (err != KPHYSM_OK) {
2468 		switch (err) {
2469 			case KPHYSM_ENOWORK:
2470 				e_code = ESBD_NOERROR;
2471 				break;
2472 
2473 			case KPHYSM_EHANDLE:
2474 			case KPHYSM_ESEQUENCE:
2475 				e_code = ESBD_INTERNAL;
2476 				break;
2477 
2478 			case KPHYSM_ENOTVIABLE:
2479 				e_code = ESBD_MEM_NOTVIABLE;
2480 				break;
2481 
2482 			case KPHYSM_EREFUSED:
2483 				e_code = ESBD_MEM_REFUSED;
2484 				break;
2485 
2486 			case KPHYSM_ENONRELOC:
2487 				e_code = ESBD_MEM_NONRELOC;
2488 				break;
2489 
2490 			case KPHYSM_ECANCELLED:
2491 				e_code = ESBD_MEM_CANCELLED;
2492 				break;
2493 
2494 			case KPHYSM_ERESOURCE:
2495 				e_code = ESBD_MEMFAIL;
2496 				break;
2497 
2498 			default:
2499 				cmn_err(CE_WARN, "sbd:%s:"
2500 					" unexpected kphysm error code %d,"
2501 					" dip 0x%p",
2502 					f, err, (void *)dip);
2503 
2504 				e_code = ESBD_IO;
2505 				break;
2506 		}
2507 
2508 		if (e_code != 0) {
2509 			cancel_flag = -1;
2510 			SBD_SET_ERR(SBD_HD2ERR(hp), e_code);
2511 			SBD_SET_ERRSTR(SBD_HD2ERR(hp), sbp->sb_mempath[unit]);
2512 		}
2513 	}
2514 
2515 	return (cancel_flag);
2516 }
2517 
2518 /*
2519  * Memory has been logically removed by the time this routine is called.
2520  */
2521 void
2522 sbd_release_memory_done(void *arg, int error)
2523 {
2524 	sbd_release_mem_sync_t *ds = arg;
2525 
2526 	mutex_enter(&ds->lock);
2527 	ds->error = error;
2528 	ds->done = 1;
2529 	cv_signal(&ds->cond);
2530 	mutex_exit(&ds->lock);
2531 }
2532 
2533 /*
2534  * If detaching node contains memory that is "non-permanent"
2535  * then the memory adr's are simply cleared.  If the memory
2536  * is non-relocatable, then do a copy-rename.
2537  */
2538 int
2539 sbd_detach_memory(sbd_handle_t *hp, sbderror_t *ep, sbd_mem_unit_t *s_mp,
2540 	int unit)
2541 {
2542 	int			rv;
2543 	sbd_mem_unit_t		*t_mp;
2544 	sbd_istate_t		state;
2545 	sbdp_handle_t		*hdp;
2546 	sbd_board_t 		*sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
2547 	sbd_board_t		*tbp;
2548 	static fn_t		f = "sbd_detach_memory";
2549 
2550 	PR_MEM("%s...\n", f);
2551 
2552 	/* lookup target mem unit and target board structure, if any */
2553 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
2554 		t_mp = s_mp->sbm_peer;
2555 		ASSERT(t_mp != NULL);
2556 		ASSERT(t_mp->sbm_peer == s_mp);
2557 		tbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
2558 	} else {
2559 		t_mp = NULL;
2560 	}
2561 
2562 	/* verify mem unit's state is UNREFERENCED */
2563 	state = s_mp->sbm_cm.sbdev_state;
2564 	if (state != SBD_STATE_UNREFERENCED) {
2565 		cmn_err(CE_WARN, "%s: invalid state transition for"
2566 			" mem-unit (%d.%d)",
2567 			f,
2568 			sbp->sb_num,
2569 			s_mp->sbm_cm.sbdev_unum);
2570 		SBD_SET_ERR(ep, ESBD_STATE);
2571 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2572 		return (-1);
2573 	}
2574 
2575 	/* verify target mem unit's state is UNREFERENCED, if any */
2576 	if (t_mp != NULL) {
2577 		state = t_mp->sbm_cm.sbdev_state;
2578 		if (state != SBD_STATE_UNREFERENCED) {
2579 			cmn_err(CE_WARN, "%s: invalid state transition for"
2580 				" target mem-unit (%d.%d)",
2581 				f,
2582 				tbp->sb_num,
2583 				t_mp->sbm_cm.sbdev_unum);
2584 			SBD_SET_ERR(ep, ESBD_STATE);
2585 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2586 			return (-1);
2587 		}
2588 	}
2589 
2590 	/*
2591 	 * Displacement flush all ecaches in the system.
2592 	 * That's the fastest way to remove all cache references
2593 	 * to the detaching memory.
2594 	 */
2595 	xc_all(sbd_flush_ecache, 0, 0);
2596 
2597 	hdp = sbd_get_sbdp_handle(sbp, hp);
2598 
2599 	/*
2600 	 * If there is no target board (no copy/rename was needed), then
2601 	 * we're done!
2602 	 */
2603 	if (t_mp == NULL) {
2604 		/*
2605 		 * Reprogram interconnect hardware and disable
2606 		 * memory controllers for memory node that's going away.
2607 		 */
2608 
2609 		rv = sbdphw_disable_memctrl(hdp, s_mp->sbm_cm.sbdev_dip);
2610 		if (rv) {
2611 			cmn_err(CE_WARN,
2612 				"%s: failed to deprogram mem-unit (%d.%d),"
2613 				" dip 0x%p",
2614 				f,
2615 				sbp->sb_num,
2616 				s_mp->sbm_cm.sbdev_unum,
2617 				(void *)s_mp->sbm_cm.sbdev_dip);
2618 			/*
2619 			 * Make sure we don't rewrite an sbdp error
2620 			 */
2621 			if (SBD_GET_ERR(ep) != 0) {
2622 				SBD_SET_ERR(ep, ESBD_HW_PROGRAM);
2623 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2624 			}
2625 		}
2626 	} else {
2627 		rv = sbd_move_memory(hp, sbp, tbp);
2628 		if (rv) {
2629 			int i;
2630 
2631 			cmn_err(CE_WARN, "%s: failed to move memory"
2632 				" from board %d to board %d",
2633 				f,
2634 				sbp->sb_num,
2635 				tbp->sb_num);
2636 			/*
2637 			 * Make sure we don't rewrite an sbdp error
2638 			 */
2639 			if (SBD_GET_ERR(ep) != 0) {
2640 				SBD_SET_ERR(ep, ESBD_INTERNAL);
2641 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2642 			}
2643 			/*
2644 			 * If we failed here, it means that the target board's
2645 			 * memory has been unconfigured.  We need to configure
2646 			 * it back
2647 			 */
2648 			for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
2649 				int		unit;
2650 				dev_info_t	*dip;
2651 				dev_info_t	**devlist;
2652 
2653 
2654 				devlist = tbp->sb_devlist[NIX(SBD_COMP_MEM)];
2655 				dip = devlist[i];
2656 				sbd_reset_error_sbdph(hdp);
2657 				unit = sbdp_get_unit_num(hdp, dip);
2658 
2659 				/*
2660 				 * We already saved the error that created
2661 				 * this mess.  If we fail, make sure not
2662 				 * to overwrite the original error
2663 				 */
2664 				if (unit == -1) {
2665 					continue;
2666 				}
2667 				if (sbd_cancel_mem(hp, unit) != 0)
2668 					continue;
2669 
2670 				t_mp->sbm_flags = 0;
2671 				/*
2672 				 * clean up
2673 				 */
2674 				sbd_mem_cleanup(s_mp, t_mp, ep);
2675 				if (s_mp->sbm_mlist) {
2676 					memlist_delete(s_mp->sbm_mlist);
2677 					s_mp->sbm_mlist = NULL;
2678 				}
2679 
2680 				SBD_DEVICE_TRANSITION(tbp, SBD_COMP_MEM,
2681 				    unit, SBD_STATE_CONFIGURED);
2682 			}
2683 		}
2684 
2685 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
2686 			f,
2687 			rv ? "FAILED" : "COMPLETED",
2688 			sbp->sb_num,
2689 			tbp->sb_num);
2690 	}
2691 
2692 	if (rv == 0) {
2693 		update_membounds_t umb;
2694 
2695 		umb.u_board = sbp->sb_num;
2696 		umb.u_base = (uint64_t)-1;
2697 		umb.u_len = (uint64_t)-1;
2698 
2699 		lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
2700 	}
2701 
2702 	sbd_release_sbdp_handle(hdp);
2703 	return (rv);
2704 }
2705 
2706 /*ARGSUSED*/
2707 static void
2708 sbd_flush_ecache(uint64_t a, uint64_t b)
2709 {
2710 	cpu_flush_ecache();
2711 }
2712