xref: /illumos-gate/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 68ac2337)
125cf1a30Sjl /*
225cf1a30Sjl  * CDDL HEADER START
325cf1a30Sjl  *
425cf1a30Sjl  * The contents of this file are subject to the terms of the
525cf1a30Sjl  * Common Development and Distribution License (the "License").
625cf1a30Sjl  * You may not use this file except in compliance with the License.
725cf1a30Sjl  *
825cf1a30Sjl  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
925cf1a30Sjl  * or http://www.opensolaris.org/os/licensing.
1025cf1a30Sjl  * See the License for the specific language governing permissions
1125cf1a30Sjl  * and limitations under the License.
1225cf1a30Sjl  *
1325cf1a30Sjl  * When distributing Covered Code, include this CDDL HEADER in each
1425cf1a30Sjl  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1525cf1a30Sjl  * If applicable, add the following below this CDDL HEADER, with the
1625cf1a30Sjl  * fields enclosed by brackets "[]" replaced with your own identifying
1725cf1a30Sjl  * information: Portions Copyright [yyyy] [name of copyright owner]
1825cf1a30Sjl  *
1925cf1a30Sjl  * CDDL HEADER END
2025cf1a30Sjl  */
2125cf1a30Sjl /*
22*68ac2337Sjl  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
2325cf1a30Sjl  * Use is subject to license terms.
2425cf1a30Sjl  */
2525cf1a30Sjl 
2625cf1a30Sjl #pragma ident	"%Z%%M%	%I%	%E% SMI"
2725cf1a30Sjl 
2825cf1a30Sjl /*
2925cf1a30Sjl  * DR memory support routines.
3025cf1a30Sjl  */
3125cf1a30Sjl 
3225cf1a30Sjl #include <sys/note.h>
3325cf1a30Sjl #include <sys/debug.h>
3425cf1a30Sjl #include <sys/types.h>
3525cf1a30Sjl #include <sys/errno.h>
3625cf1a30Sjl #include <sys/param.h>
3725cf1a30Sjl #include <sys/dditypes.h>
3825cf1a30Sjl #include <sys/kmem.h>
3925cf1a30Sjl #include <sys/conf.h>
4025cf1a30Sjl #include <sys/ddi.h>
4125cf1a30Sjl #include <sys/sunddi.h>
4225cf1a30Sjl #include <sys/sunndi.h>
4325cf1a30Sjl #include <sys/ddi_impldefs.h>
4425cf1a30Sjl #include <sys/ndi_impldefs.h>
4525cf1a30Sjl #include <sys/sysmacros.h>
4625cf1a30Sjl #include <sys/machsystm.h>
4725cf1a30Sjl #include <sys/spitregs.h>
4825cf1a30Sjl #include <sys/cpuvar.h>
4925cf1a30Sjl #include <sys/promif.h>
5025cf1a30Sjl #include <vm/seg_kmem.h>
5125cf1a30Sjl #include <sys/lgrp.h>
5225cf1a30Sjl #include <sys/platform_module.h>
5325cf1a30Sjl 
5425cf1a30Sjl #include <vm/page.h>
5525cf1a30Sjl 
5625cf1a30Sjl #include <sys/dr.h>
5725cf1a30Sjl #include <sys/dr_util.h>
5825cf1a30Sjl #include <sys/drmach.h>
59*68ac2337Sjl #include <sys/kobj.h>
6025cf1a30Sjl 
6125cf1a30Sjl extern struct memlist	*phys_install;
62*68ac2337Sjl extern vnode_t		*retired_pages;
6325cf1a30Sjl 
6425cf1a30Sjl /* TODO: push this reference below drmach line */
6525cf1a30Sjl extern int		kcage_on;
6625cf1a30Sjl 
6725cf1a30Sjl /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
68*68ac2337Sjl static char *dr_ie_fmt = "dr_mem.c %d";
6925cf1a30Sjl 
7025cf1a30Sjl typedef enum {
7125cf1a30Sjl 	DR_TP_INVALID = -1,
7225cf1a30Sjl 	DR_TP_SAME,
7325cf1a30Sjl 	DR_TP_LARGE,
7425cf1a30Sjl 	DR_TP_NONRELOC,
7525cf1a30Sjl 	DR_TP_FLOATING
7625cf1a30Sjl } dr_target_pref_t;
7725cf1a30Sjl 
7825cf1a30Sjl static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
7925cf1a30Sjl static int		dr_reserve_mem_spans(memhandle_t *mhp,
8025cf1a30Sjl 				struct memlist *mlist);
8125cf1a30Sjl static int		dr_select_mem_target(dr_handle_t *hp,
8225cf1a30Sjl 				dr_mem_unit_t *mp, struct memlist *ml);
8325cf1a30Sjl static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
8425cf1a30Sjl static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
8525cf1a30Sjl static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
8625cf1a30Sjl 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
8725cf1a30Sjl 				struct memlist *s_ml, struct memlist *x_ml,
8825cf1a30Sjl 				struct memlist *b_ml);
8925cf1a30Sjl 
9025cf1a30Sjl static int		memlist_canfit(struct memlist *s_mlist,
9125cf1a30Sjl 				struct memlist *t_mlist);
9225cf1a30Sjl static int		dr_del_mlist_query(struct memlist *mlist,
9325cf1a30Sjl 				memquery_t *mp);
9425cf1a30Sjl static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
9525cf1a30Sjl 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
9625cf1a30Sjl 				dr_mem_unit_t *t_mp);
9725cf1a30Sjl static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
9825cf1a30Sjl 				dr_mem_unit_t *s_mp);
9925cf1a30Sjl static int		dr_memlist_canfit(struct memlist *s_mlist,
10025cf1a30Sjl 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
10125cf1a30Sjl 				dr_mem_unit_t *t_mp);
10225cf1a30Sjl 
10325cf1a30Sjl /*
10425cf1a30Sjl  * dr_mem_unit_t.sbm_flags
10525cf1a30Sjl  */
10625cf1a30Sjl #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
10725cf1a30Sjl #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
10825cf1a30Sjl #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
10925cf1a30Sjl #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
11025cf1a30Sjl #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
11125cf1a30Sjl 
11225cf1a30Sjl /* helper macros */
11325cf1a30Sjl #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
11425cf1a30Sjl #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
11525cf1a30Sjl 
11625cf1a30Sjl static struct memlist *
11725cf1a30Sjl dr_get_memlist(dr_mem_unit_t *mp)
11825cf1a30Sjl {
11925cf1a30Sjl 	struct memlist	*mlist = NULL;
12025cf1a30Sjl 	sbd_error_t	*err;
12125cf1a30Sjl 	static fn_t	f = "dr_get_memlist";
12225cf1a30Sjl 
12325cf1a30Sjl 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
12425cf1a30Sjl 
12525cf1a30Sjl 	/*
12625cf1a30Sjl 	 * Return cached memlist, if present.
12725cf1a30Sjl 	 * This memlist will be present following an
12825cf1a30Sjl 	 * unconfigure (a.k.a: detach) of this memunit.
12925cf1a30Sjl 	 * It should only be used in the case were a configure
13025cf1a30Sjl 	 * is bringing this memunit back in without going
13125cf1a30Sjl 	 * through the disconnect and connect states.
13225cf1a30Sjl 	 */
13325cf1a30Sjl 	if (mp->sbm_mlist) {
13425cf1a30Sjl 		PR_MEM("%s: found cached memlist\n", f);
13525cf1a30Sjl 
13625cf1a30Sjl 		mlist = memlist_dup(mp->sbm_mlist);
13725cf1a30Sjl 	} else {
13825cf1a30Sjl 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
13925cf1a30Sjl 
14025cf1a30Sjl 		/* attempt to construct a memlist using phys_install */
14125cf1a30Sjl 
14225cf1a30Sjl 		/* round down to slice base address */
14325cf1a30Sjl 		basepa &= ~(mp->sbm_slice_size - 1);
14425cf1a30Sjl 
14525cf1a30Sjl 		/* get a copy of phys_install to edit */
14625cf1a30Sjl 		memlist_read_lock();
14725cf1a30Sjl 		mlist = memlist_dup(phys_install);
14825cf1a30Sjl 		memlist_read_unlock();
14925cf1a30Sjl 
15025cf1a30Sjl 		/* trim lower irrelevant span */
15125cf1a30Sjl 		if (mlist)
15225cf1a30Sjl 			mlist = memlist_del_span(mlist, 0ull, basepa);
15325cf1a30Sjl 
15425cf1a30Sjl 		/* trim upper irrelevant span */
15525cf1a30Sjl 		if (mlist) {
15625cf1a30Sjl 			uint64_t endpa;
15725cf1a30Sjl 
15825cf1a30Sjl 			basepa += mp->sbm_slice_size;
15925cf1a30Sjl 			endpa = _ptob64(physmax + 1);
16025cf1a30Sjl 			if (endpa > basepa)
16125cf1a30Sjl 				mlist = memlist_del_span(
16225cf1a30Sjl 					mlist, basepa,
16325cf1a30Sjl 					endpa - basepa);
16425cf1a30Sjl 		}
16525cf1a30Sjl 
16625cf1a30Sjl 		if (mlist) {
16725cf1a30Sjl 			/* successfully built a memlist */
16825cf1a30Sjl 			PR_MEM("%s: derived memlist from phys_install\n", f);
16925cf1a30Sjl 		}
17025cf1a30Sjl 
17125cf1a30Sjl 		/* if no mlist yet, try platform layer */
17225cf1a30Sjl 		if (!mlist) {
17325cf1a30Sjl 			err = drmach_mem_get_memlist(
17425cf1a30Sjl 				mp->sbm_cm.sbdev_id, &mlist);
17525cf1a30Sjl 			if (err) {
17625cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
17725cf1a30Sjl 				mlist = NULL; /* paranoia */
17825cf1a30Sjl 			}
17925cf1a30Sjl 		}
18025cf1a30Sjl 	}
18125cf1a30Sjl 
18225cf1a30Sjl 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
18325cf1a30Sjl 	PR_MEMLIST_DUMP(mlist);
18425cf1a30Sjl 
18525cf1a30Sjl 	return (mlist);
18625cf1a30Sjl }
18725cf1a30Sjl 
18825cf1a30Sjl typedef struct {
18925cf1a30Sjl 	kcondvar_t cond;
19025cf1a30Sjl 	kmutex_t lock;
19125cf1a30Sjl 	int error;
19225cf1a30Sjl 	int done;
19325cf1a30Sjl } dr_release_mem_sync_t;
19425cf1a30Sjl 
19525cf1a30Sjl /*
19625cf1a30Sjl  * Memory has been logically removed by the time this routine is called.
19725cf1a30Sjl  */
19825cf1a30Sjl static void
19925cf1a30Sjl dr_mem_del_done(void *arg, int error)
20025cf1a30Sjl {
20125cf1a30Sjl 	dr_release_mem_sync_t *ds = arg;
20225cf1a30Sjl 
20325cf1a30Sjl 	mutex_enter(&ds->lock);
20425cf1a30Sjl 	ds->error = error;
20525cf1a30Sjl 	ds->done = 1;
20625cf1a30Sjl 	cv_signal(&ds->cond);
20725cf1a30Sjl 	mutex_exit(&ds->lock);
20825cf1a30Sjl }
20925cf1a30Sjl 
21025cf1a30Sjl /*
21125cf1a30Sjl  * When we reach here the memory being drained should have
21225cf1a30Sjl  * already been reserved in dr_pre_release_mem().
21325cf1a30Sjl  * Our only task here is to kick off the "drain" and wait
21425cf1a30Sjl  * for it to finish.
21525cf1a30Sjl  */
21625cf1a30Sjl void
21725cf1a30Sjl dr_release_mem(dr_common_unit_t *cp)
21825cf1a30Sjl {
21925cf1a30Sjl 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
22025cf1a30Sjl 	int		err;
22125cf1a30Sjl 	dr_release_mem_sync_t rms;
22225cf1a30Sjl 	static fn_t	f = "dr_release_mem";
22325cf1a30Sjl 
22425cf1a30Sjl 	/* check that this memory unit has been reserved */
22525cf1a30Sjl 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
22625cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
22725cf1a30Sjl 		return;
22825cf1a30Sjl 	}
22925cf1a30Sjl 
23025cf1a30Sjl 	bzero((void *) &rms, sizeof (rms));
23125cf1a30Sjl 
23225cf1a30Sjl 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
23325cf1a30Sjl 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
23425cf1a30Sjl 
23525cf1a30Sjl 	mutex_enter(&rms.lock);
23625cf1a30Sjl 	err = kphysm_del_start(mp->sbm_memhandle,
23725cf1a30Sjl 		dr_mem_del_done, (void *) &rms);
23825cf1a30Sjl 	if (err == KPHYSM_OK) {
23925cf1a30Sjl 		/* wait for completion or interrupt */
24025cf1a30Sjl 		while (!rms.done) {
24125cf1a30Sjl 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
24225cf1a30Sjl 				/* then there is a pending UNIX signal */
24325cf1a30Sjl 				(void) kphysm_del_cancel(mp->sbm_memhandle);
24425cf1a30Sjl 
24525cf1a30Sjl 				/* wait for completion */
24625cf1a30Sjl 				while (!rms.done)
24725cf1a30Sjl 					cv_wait(&rms.cond, &rms.lock);
24825cf1a30Sjl 			}
24925cf1a30Sjl 		}
25025cf1a30Sjl 		/* get the result of the memory delete operation */
25125cf1a30Sjl 		err = rms.error;
25225cf1a30Sjl 	}
25325cf1a30Sjl 	mutex_exit(&rms.lock);
25425cf1a30Sjl 
25525cf1a30Sjl 	cv_destroy(&rms.cond);
25625cf1a30Sjl 	mutex_destroy(&rms.lock);
25725cf1a30Sjl 
25825cf1a30Sjl 	if (err != KPHYSM_OK) {
25925cf1a30Sjl 		int e_code;
26025cf1a30Sjl 
26125cf1a30Sjl 		switch (err) {
26225cf1a30Sjl 			case KPHYSM_ENOWORK:
26325cf1a30Sjl 				e_code = ESBD_NOERROR;
26425cf1a30Sjl 				break;
26525cf1a30Sjl 
26625cf1a30Sjl 			case KPHYSM_EHANDLE:
26725cf1a30Sjl 			case KPHYSM_ESEQUENCE:
26825cf1a30Sjl 				e_code = ESBD_INTERNAL;
26925cf1a30Sjl 				break;
27025cf1a30Sjl 
27125cf1a30Sjl 			case KPHYSM_ENOTVIABLE:
27225cf1a30Sjl 				e_code = ESBD_MEM_NOTVIABLE;
27325cf1a30Sjl 				break;
27425cf1a30Sjl 
27525cf1a30Sjl 			case KPHYSM_EREFUSED:
27625cf1a30Sjl 				e_code = ESBD_MEM_REFUSED;
27725cf1a30Sjl 				break;
27825cf1a30Sjl 
27925cf1a30Sjl 			case KPHYSM_ENONRELOC:
28025cf1a30Sjl 				e_code = ESBD_MEM_NONRELOC;
28125cf1a30Sjl 				break;
28225cf1a30Sjl 
28325cf1a30Sjl 			case KPHYSM_ECANCELLED:
28425cf1a30Sjl 				e_code = ESBD_MEM_CANCELLED;
28525cf1a30Sjl 				break;
28625cf1a30Sjl 
28725cf1a30Sjl 			case KPHYSM_ERESOURCE:
28825cf1a30Sjl 				e_code = ESBD_MEMFAIL;
28925cf1a30Sjl 				break;
29025cf1a30Sjl 
29125cf1a30Sjl 			default:
29225cf1a30Sjl 				cmn_err(CE_WARN,
29325cf1a30Sjl 					"%s: unexpected kphysm error code %d,"
29425cf1a30Sjl 					" id 0x%p",
29525cf1a30Sjl 					f, err, mp->sbm_cm.sbdev_id);
29625cf1a30Sjl 
29725cf1a30Sjl 				e_code = ESBD_IO;
29825cf1a30Sjl 				break;
29925cf1a30Sjl 		}
30025cf1a30Sjl 
30125cf1a30Sjl 		if (e_code != ESBD_NOERROR) {
30225cf1a30Sjl 			dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
30325cf1a30Sjl 		}
30425cf1a30Sjl 	}
30525cf1a30Sjl }
30625cf1a30Sjl 
30725cf1a30Sjl void
30825cf1a30Sjl dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
30925cf1a30Sjl {
31025cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
31125cf1a30Sjl 
31225cf1a30Sjl 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
31325cf1a30Sjl 	struct memlist	*ml, *mc;
31425cf1a30Sjl 	sbd_error_t	*err;
31525cf1a30Sjl 	static fn_t	f = "dr_attach_mem";
31625cf1a30Sjl 
31725cf1a30Sjl 	PR_MEM("%s...\n", f);
31825cf1a30Sjl 
31925cf1a30Sjl 	dr_lock_status(hp->h_bd);
32025cf1a30Sjl 	err = drmach_configure(cp->sbdev_id, 0);
32125cf1a30Sjl 	dr_unlock_status(hp->h_bd);
32225cf1a30Sjl 	if (err) {
32325cf1a30Sjl 		DRERR_SET_C(&cp->sbdev_error, &err);
32425cf1a30Sjl 		return;
32525cf1a30Sjl 	}
32625cf1a30Sjl 
32725cf1a30Sjl 	ml = dr_get_memlist(mp);
32825cf1a30Sjl 	for (mc = ml; mc; mc = mc->next) {
32925cf1a30Sjl 		int		 rv;
33025cf1a30Sjl 		sbd_error_t	*err;
33125cf1a30Sjl 
33225cf1a30Sjl 		rv = kphysm_add_memory_dynamic(
33325cf1a30Sjl 			(pfn_t)(mc->address >> PAGESHIFT),
33425cf1a30Sjl 			(pgcnt_t)(mc->size >> PAGESHIFT));
33525cf1a30Sjl 		if (rv != KPHYSM_OK) {
33625cf1a30Sjl 			/*
33725cf1a30Sjl 			 * translate kphysm error and
33825cf1a30Sjl 			 * store in devlist error
33925cf1a30Sjl 			 */
34025cf1a30Sjl 			switch (rv) {
34125cf1a30Sjl 			case KPHYSM_ERESOURCE:
34225cf1a30Sjl 				rv = ESBD_NOMEM;
34325cf1a30Sjl 				break;
34425cf1a30Sjl 
34525cf1a30Sjl 			case KPHYSM_EFAULT:
34625cf1a30Sjl 				rv = ESBD_FAULT;
34725cf1a30Sjl 				break;
34825cf1a30Sjl 
34925cf1a30Sjl 			default:
35025cf1a30Sjl 				rv = ESBD_INTERNAL;
35125cf1a30Sjl 				break;
35225cf1a30Sjl 			}
35325cf1a30Sjl 
35425cf1a30Sjl 			if (rv == ESBD_INTERNAL) {
35525cf1a30Sjl 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
35625cf1a30Sjl 			} else
35725cf1a30Sjl 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
35825cf1a30Sjl 			break;
35925cf1a30Sjl 		}
36025cf1a30Sjl 
36125cf1a30Sjl 		err = drmach_mem_add_span(
36225cf1a30Sjl 			mp->sbm_cm.sbdev_id, mc->address, mc->size);
36325cf1a30Sjl 		if (err) {
36425cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
36525cf1a30Sjl 			break;
36625cf1a30Sjl 		}
36725cf1a30Sjl 	}
36825cf1a30Sjl 
36925cf1a30Sjl 	memlist_delete(ml);
37025cf1a30Sjl 
37125cf1a30Sjl 	/* back out if configure failed */
37225cf1a30Sjl 	if (mp->sbm_cm.sbdev_error != NULL) {
37325cf1a30Sjl 		dr_lock_status(hp->h_bd);
37425cf1a30Sjl 		err = drmach_unconfigure(cp->sbdev_id, 0);
37525cf1a30Sjl 		if (err)
37625cf1a30Sjl 			sbd_err_clear(&err);
37725cf1a30Sjl 		dr_unlock_status(hp->h_bd);
37825cf1a30Sjl 	}
37925cf1a30Sjl }
38025cf1a30Sjl 
38125cf1a30Sjl static struct memlist *
38225cf1a30Sjl dr_memlist_del_retired_pages(struct memlist *mlist)
38325cf1a30Sjl {
38425cf1a30Sjl 	page_t		*pp;
38525cf1a30Sjl 	pfn_t		pfn;
38625cf1a30Sjl 	kmutex_t	*vphm;
387*68ac2337Sjl 	vnode_t		*vp = retired_pages;
38825cf1a30Sjl 	static fn_t	f = "dr_memlist_del_retired_pages";
38925cf1a30Sjl 
39025cf1a30Sjl 	vphm = page_vnode_mutex(vp);
39125cf1a30Sjl 	mutex_enter(vphm);
39225cf1a30Sjl 
39325cf1a30Sjl 	PR_MEM("%s\n", f);
39425cf1a30Sjl 
39525cf1a30Sjl 	if ((pp = vp->v_pages) == NULL) {
39625cf1a30Sjl 		mutex_exit(vphm);
39725cf1a30Sjl 		return (mlist);
39825cf1a30Sjl 	}
39925cf1a30Sjl 
40025cf1a30Sjl 	do {
40125cf1a30Sjl 		ASSERT(pp != NULL);
40225cf1a30Sjl 		/*
40325cf1a30Sjl 		 * page_downgrade happens after page_hashin, so we
40425cf1a30Sjl 		 * can't assert PAGE_SE. Just assert locked to catch
40525cf1a30Sjl 		 * changes to the retired vnode locking scheme.
40625cf1a30Sjl 		 */
40725cf1a30Sjl 		ASSERT(PAGE_LOCKED(pp));
408*68ac2337Sjl 		ASSERT(pp->p_vnode == retired_pages);
40925cf1a30Sjl 
41025cf1a30Sjl 		if (!page_trylock(pp, SE_SHARED))
41125cf1a30Sjl 			continue;
41225cf1a30Sjl 
41325cf1a30Sjl 		pfn = page_pptonum(pp);
41425cf1a30Sjl 
41525cf1a30Sjl 		ASSERT((pp->p_offset >> PAGESHIFT) == pfn);
41625cf1a30Sjl 		/*
41725cf1a30Sjl 		 * Page retirement currently breaks large pages into PAGESIZE
41825cf1a30Sjl 		 * pages. If this changes, need to remove the assert and deal
41925cf1a30Sjl 		 * with different page sizes.
42025cf1a30Sjl 		 */
42125cf1a30Sjl 		ASSERT(pp->p_szc == 0);
42225cf1a30Sjl 
42325cf1a30Sjl 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
42425cf1a30Sjl 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
42525cf1a30Sjl 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
42625cf1a30Sjl 			    "from memlist\n", ptob(pfn), pfn);
42725cf1a30Sjl 		}
42825cf1a30Sjl 
42925cf1a30Sjl 		page_unlock(pp);
43025cf1a30Sjl 	} while ((pp = pp->p_vpnext) != vp->v_pages);
43125cf1a30Sjl 
43225cf1a30Sjl 	mutex_exit(vphm);
43325cf1a30Sjl 
43425cf1a30Sjl 	return (mlist);
43525cf1a30Sjl }
43625cf1a30Sjl 
43725cf1a30Sjl static int
43825cf1a30Sjl dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
43925cf1a30Sjl {
44025cf1a30Sjl 	int		rv = -1;
44125cf1a30Sjl 	time_t		 copytime;
44225cf1a30Sjl 	drmachid_t	 cr_id;
44325cf1a30Sjl 	dr_sr_handle_t	*srhp = NULL;
44425cf1a30Sjl 	dr_board_t	*t_bp, *s_bp;
44525cf1a30Sjl 	struct memlist	*c_ml, *d_ml;
44625cf1a30Sjl 	sbd_error_t	*err;
44725cf1a30Sjl 	static fn_t	 f = "dr_move_memory";
44825cf1a30Sjl 
44925cf1a30Sjl 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
45025cf1a30Sjl 		f,
45125cf1a30Sjl 		s_mp->sbm_cm.sbdev_path,
45225cf1a30Sjl 		t_mp->sbm_cm.sbdev_path);
45325cf1a30Sjl 
45425cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
45525cf1a30Sjl 	ASSERT(s_mp->sbm_peer == t_mp);
45625cf1a30Sjl 	ASSERT(s_mp->sbm_mlist);
45725cf1a30Sjl 
45825cf1a30Sjl 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
45925cf1a30Sjl 	ASSERT(t_mp->sbm_peer == s_mp);
46025cf1a30Sjl 
46125cf1a30Sjl 	/*
46225cf1a30Sjl 	 * create a memlist of spans to copy by removing
46325cf1a30Sjl 	 * the spans that have been deleted, if any, from
46425cf1a30Sjl 	 * the full source board memlist.  s_mp->sbm_del_mlist
46525cf1a30Sjl 	 * will be NULL if there were no spans deleted from
46625cf1a30Sjl 	 * the source board.
46725cf1a30Sjl 	 */
46825cf1a30Sjl 	c_ml = memlist_dup(s_mp->sbm_mlist);
46925cf1a30Sjl 	d_ml = s_mp->sbm_del_mlist;
47025cf1a30Sjl 	while (d_ml != NULL) {
47125cf1a30Sjl 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
47225cf1a30Sjl 		d_ml = d_ml->next;
47325cf1a30Sjl 	}
47425cf1a30Sjl 
47525cf1a30Sjl 	/*
47625cf1a30Sjl 	 * Remove retired pages from the copy list. The page content
47725cf1a30Sjl 	 * need not be copied since the pages are no longer in use.
47825cf1a30Sjl 	 */
47925cf1a30Sjl 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
48025cf1a30Sjl 	PR_MEMLIST_DUMP(c_ml);
48125cf1a30Sjl 
48225cf1a30Sjl 	c_ml = dr_memlist_del_retired_pages(c_ml);
48325cf1a30Sjl 
48425cf1a30Sjl 	PR_MEM("%s: copy list after removing retired pages:\n", f);
48525cf1a30Sjl 	PR_MEMLIST_DUMP(c_ml);
48625cf1a30Sjl 
48725cf1a30Sjl 	/*
48825cf1a30Sjl 	 * With parallel copy, it shouldn't make a difference which
48925cf1a30Sjl 	 * CPU is the actual master during copy-rename since all
49025cf1a30Sjl 	 * CPUs participate in the parallel copy anyway.
49125cf1a30Sjl 	 */
49225cf1a30Sjl 	affinity_set(CPU_CURRENT);
49325cf1a30Sjl 
49425cf1a30Sjl 	err = drmach_copy_rename_init(
49525cf1a30Sjl 		t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
49625cf1a30Sjl 	if (err) {
49725cf1a30Sjl 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
49825cf1a30Sjl 		affinity_clear();
49925cf1a30Sjl 		memlist_delete(c_ml);
50025cf1a30Sjl 		return (-1);
50125cf1a30Sjl 	}
50225cf1a30Sjl 
50325cf1a30Sjl 	srhp = dr_get_sr_handle(hp);
50425cf1a30Sjl 	ASSERT(srhp);
50525cf1a30Sjl 
50625cf1a30Sjl 	copytime = lbolt;
50725cf1a30Sjl 
50825cf1a30Sjl 	/* Quiesce the OS.  */
50925cf1a30Sjl 	if (dr_suspend(srhp)) {
51025cf1a30Sjl 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
51125cf1a30Sjl 			" for copy-rename", f);
51225cf1a30Sjl 
51325cf1a30Sjl 		err = drmach_copy_rename_fini(cr_id);
51425cf1a30Sjl 		if (err) {
51525cf1a30Sjl 			/*
51625cf1a30Sjl 			 * no error is expected since the program has
51725cf1a30Sjl 			 * not yet run.
51825cf1a30Sjl 			 */
51925cf1a30Sjl 
52025cf1a30Sjl 			/* catch this in debug kernels */
52125cf1a30Sjl 			ASSERT(0);
52225cf1a30Sjl 
52325cf1a30Sjl 			sbd_err_clear(&err);
52425cf1a30Sjl 		}
52525cf1a30Sjl 
52625cf1a30Sjl 		/* suspend error reached via hp */
52725cf1a30Sjl 		s_mp->sbm_cm.sbdev_error = hp->h_err;
52825cf1a30Sjl 		hp->h_err = NULL;
52925cf1a30Sjl 		goto done;
53025cf1a30Sjl 	}
53125cf1a30Sjl 
53225cf1a30Sjl 	drmach_copy_rename(cr_id);
53325cf1a30Sjl 
53425cf1a30Sjl 	/* Resume the OS.  */
53525cf1a30Sjl 	dr_resume(srhp);
53625cf1a30Sjl 
53725cf1a30Sjl 	copytime = lbolt - copytime;
53825cf1a30Sjl 
53925cf1a30Sjl 	if (err = drmach_copy_rename_fini(cr_id))
54025cf1a30Sjl 		goto done;
54125cf1a30Sjl 
54225cf1a30Sjl 	/*
54325cf1a30Sjl 	 * Rename memory for lgroup.
54425cf1a30Sjl 	 * Source and target board numbers are packaged in arg.
54525cf1a30Sjl 	 */
54625cf1a30Sjl 	s_bp = s_mp->sbm_cm.sbdev_bp;
54725cf1a30Sjl 	t_bp = t_mp->sbm_cm.sbdev_bp;
54825cf1a30Sjl 
54925cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
55025cf1a30Sjl 		(uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
55125cf1a30Sjl 
55225cf1a30Sjl 
55325cf1a30Sjl 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
55425cf1a30Sjl 		f, copytime, copytime / hz);
55525cf1a30Sjl 
55625cf1a30Sjl 	rv = 0;
55725cf1a30Sjl done:
55825cf1a30Sjl 	if (srhp)
55925cf1a30Sjl 		dr_release_sr_handle(srhp);
56025cf1a30Sjl 	if (err)
56125cf1a30Sjl 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
56225cf1a30Sjl 	affinity_clear();
56325cf1a30Sjl 
56425cf1a30Sjl 	return (rv);
56525cf1a30Sjl }
56625cf1a30Sjl 
56725cf1a30Sjl /*
56825cf1a30Sjl  * If detaching node contains memory that is "non-permanent"
56925cf1a30Sjl  * then the memory adr's are simply cleared.  If the memory
57025cf1a30Sjl  * is non-relocatable, then do a copy-rename.
57125cf1a30Sjl  */
57225cf1a30Sjl void
57325cf1a30Sjl dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
57425cf1a30Sjl {
57525cf1a30Sjl 	int			rv = 0;
57625cf1a30Sjl 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
57725cf1a30Sjl 	dr_mem_unit_t		*t_mp;
57825cf1a30Sjl 	dr_state_t		state;
57925cf1a30Sjl 	static fn_t		f = "dr_detach_mem";
58025cf1a30Sjl 
58125cf1a30Sjl 	PR_MEM("%s...\n", f);
58225cf1a30Sjl 
58325cf1a30Sjl 	/* lookup target mem unit and target board structure, if any */
58425cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
58525cf1a30Sjl 		t_mp = s_mp->sbm_peer;
58625cf1a30Sjl 		ASSERT(t_mp != NULL);
58725cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
58825cf1a30Sjl 	} else {
58925cf1a30Sjl 		t_mp = NULL;
59025cf1a30Sjl 	}
59125cf1a30Sjl 
59225cf1a30Sjl 	/* verify mem unit's state is UNREFERENCED */
59325cf1a30Sjl 	state = s_mp->sbm_cm.sbdev_state;
59425cf1a30Sjl 	if (state != DR_STATE_UNREFERENCED) {
59525cf1a30Sjl 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
59625cf1a30Sjl 		return;
59725cf1a30Sjl 	}
59825cf1a30Sjl 
59925cf1a30Sjl 	/* verify target mem unit's state is UNREFERENCED, if any */
60025cf1a30Sjl 	if (t_mp != NULL) {
60125cf1a30Sjl 		state = t_mp->sbm_cm.sbdev_state;
60225cf1a30Sjl 		if (state != DR_STATE_UNREFERENCED) {
60325cf1a30Sjl 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
60425cf1a30Sjl 			return;
60525cf1a30Sjl 		}
60625cf1a30Sjl 	}
60725cf1a30Sjl 
60825cf1a30Sjl 	/*
60925cf1a30Sjl 	 * If there is no target board (no copy/rename was needed), then
61025cf1a30Sjl 	 * we're done!
61125cf1a30Sjl 	 */
61225cf1a30Sjl 	if (t_mp == NULL) {
61325cf1a30Sjl 		sbd_error_t *err;
61425cf1a30Sjl 		/*
61525cf1a30Sjl 		 * Reprogram interconnect hardware and disable
61625cf1a30Sjl 		 * memory controllers for memory node that's going away.
61725cf1a30Sjl 		 */
61825cf1a30Sjl 
61925cf1a30Sjl 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
62025cf1a30Sjl 		if (err) {
62125cf1a30Sjl 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
62225cf1a30Sjl 			rv = -1;
62325cf1a30Sjl 		}
62425cf1a30Sjl 	} else {
62525cf1a30Sjl 		rv = dr_move_memory(hp, s_mp, t_mp);
62625cf1a30Sjl 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
62725cf1a30Sjl 			f,
62825cf1a30Sjl 			rv ? "FAILED" : "COMPLETED",
62925cf1a30Sjl 			s_mp->sbm_cm.sbdev_bp->b_num,
63025cf1a30Sjl 			t_mp->sbm_cm.sbdev_bp->b_num);
63125cf1a30Sjl 
63225cf1a30Sjl 		if (rv != 0)
63325cf1a30Sjl 			(void) dr_cancel_mem(s_mp);
63425cf1a30Sjl 	}
63525cf1a30Sjl 
63625cf1a30Sjl 	if (rv == 0) {
63725cf1a30Sjl 		sbd_error_t *err;
63825cf1a30Sjl 
63925cf1a30Sjl 		dr_lock_status(hp->h_bd);
64025cf1a30Sjl 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
64125cf1a30Sjl 		dr_unlock_status(hp->h_bd);
64225cf1a30Sjl 		if (err)
64325cf1a30Sjl 			sbd_err_clear(&err);
64425cf1a30Sjl 	}
64525cf1a30Sjl }
64625cf1a30Sjl 
64725cf1a30Sjl /*
64825cf1a30Sjl  * This routine acts as a wrapper for kphysm_del_span_query in order to
64925cf1a30Sjl  * support potential memory holes in a board's physical address space.
65025cf1a30Sjl  * It calls kphysm_del_span_query for each node in a memlist and accumulates
65125cf1a30Sjl  * the results in *mp.
65225cf1a30Sjl  */
65325cf1a30Sjl static int
65425cf1a30Sjl dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
65525cf1a30Sjl {
65625cf1a30Sjl 	struct memlist	*ml;
65725cf1a30Sjl 	int		 rv = 0;
65825cf1a30Sjl 
65925cf1a30Sjl 
66025cf1a30Sjl 	if (mlist == NULL)
66125cf1a30Sjl 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
66225cf1a30Sjl 
66325cf1a30Sjl 	mp->phys_pages = 0;
66425cf1a30Sjl 	mp->managed = 0;
66525cf1a30Sjl 	mp->nonrelocatable = 0;
66625cf1a30Sjl 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
66725cf1a30Sjl 	mp->last_nonrelocatable = 0;
66825cf1a30Sjl 
66925cf1a30Sjl 	for (ml = mlist; ml; ml = ml->next) {
67025cf1a30Sjl 		memquery_t mq;
67125cf1a30Sjl 
67225cf1a30Sjl 		rv = kphysm_del_span_query(
67325cf1a30Sjl 			_b64top(ml->address), _b64top(ml->size), &mq);
67425cf1a30Sjl 		if (rv)
67525cf1a30Sjl 			break;
67625cf1a30Sjl 
67725cf1a30Sjl 		mp->phys_pages += mq.phys_pages;
67825cf1a30Sjl 		mp->managed += mq.managed;
67925cf1a30Sjl 		mp->nonrelocatable += mq.nonrelocatable;
68025cf1a30Sjl 
68125cf1a30Sjl 		if (mq.nonrelocatable != 0) {
68225cf1a30Sjl 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
68325cf1a30Sjl 				mp->first_nonrelocatable =
68425cf1a30Sjl 					mq.first_nonrelocatable;
68525cf1a30Sjl 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
68625cf1a30Sjl 				mp->last_nonrelocatable =
68725cf1a30Sjl 					mq.last_nonrelocatable;
68825cf1a30Sjl 		}
68925cf1a30Sjl 	}
69025cf1a30Sjl 
69125cf1a30Sjl 	if (mp->nonrelocatable == 0)
69225cf1a30Sjl 		mp->first_nonrelocatable = 0;	/* XXX */
69325cf1a30Sjl 
69425cf1a30Sjl 	return (rv);
69525cf1a30Sjl }
69625cf1a30Sjl 
69725cf1a30Sjl /*
69825cf1a30Sjl  * NOTE: This routine is only partially smart about multiple
69925cf1a30Sjl  *	 mem-units.  Need to make mem-status structure smart
70025cf1a30Sjl  *	 about them also.
70125cf1a30Sjl  */
70225cf1a30Sjl int
70325cf1a30Sjl dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
70425cf1a30Sjl {
70525cf1a30Sjl 	int		m, mix;
70625cf1a30Sjl 	memdelstat_t	mdst;
70725cf1a30Sjl 	memquery_t	mq;
70825cf1a30Sjl 	dr_board_t	*bp;
70925cf1a30Sjl 	dr_mem_unit_t	*mp;
71025cf1a30Sjl 	sbd_mem_stat_t	*msp;
71125cf1a30Sjl 	static fn_t	f = "dr_mem_status";
71225cf1a30Sjl 
71325cf1a30Sjl 	bp = hp->h_bd;
71425cf1a30Sjl 	devset &= DR_DEVS_PRESENT(bp);
71525cf1a30Sjl 
71625cf1a30Sjl 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
71725cf1a30Sjl 		int		rv;
71825cf1a30Sjl 		sbd_error_t	*err;
71925cf1a30Sjl 		drmach_status_t	 pstat;
72025cf1a30Sjl 		dr_mem_unit_t	*p_mp;
72125cf1a30Sjl 
72225cf1a30Sjl 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
72325cf1a30Sjl 			continue;
72425cf1a30Sjl 
72525cf1a30Sjl 		mp = dr_get_mem_unit(bp, m);
72625cf1a30Sjl 
72725cf1a30Sjl 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
72825cf1a30Sjl 			/* present, but not fully initialized */
72925cf1a30Sjl 			continue;
73025cf1a30Sjl 		}
73125cf1a30Sjl 
73225cf1a30Sjl 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
73325cf1a30Sjl 			continue;
73425cf1a30Sjl 
73525cf1a30Sjl 		/* fetch platform status */
73625cf1a30Sjl 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
73725cf1a30Sjl 		if (err) {
73825cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
73925cf1a30Sjl 			continue;
74025cf1a30Sjl 		}
74125cf1a30Sjl 
74225cf1a30Sjl 		msp = &dsp->d_mem;
74325cf1a30Sjl 		bzero((caddr_t)msp, sizeof (*msp));
74425cf1a30Sjl 
74525cf1a30Sjl 		strncpy(msp->ms_cm.c_id.c_name, pstat.type,
74625cf1a30Sjl 			sizeof (msp->ms_cm.c_id.c_name));
74725cf1a30Sjl 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
74825cf1a30Sjl 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
74925cf1a30Sjl 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
75025cf1a30Sjl 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
75125cf1a30Sjl 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
75225cf1a30Sjl 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
75325cf1a30Sjl 
75425cf1a30Sjl 		msp->ms_totpages = mp->sbm_npages;
75525cf1a30Sjl 		msp->ms_basepfn = mp->sbm_basepfn;
75625cf1a30Sjl 		msp->ms_pageslost = mp->sbm_pageslost;
75725cf1a30Sjl 		msp->ms_cage_enabled = kcage_on;
75825cf1a30Sjl 
75925cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
76025cf1a30Sjl 			p_mp = mp->sbm_peer;
76125cf1a30Sjl 		else
76225cf1a30Sjl 			p_mp = NULL;
76325cf1a30Sjl 
76425cf1a30Sjl 		if (p_mp == NULL) {
76525cf1a30Sjl 			msp->ms_peer_is_target = 0;
76625cf1a30Sjl 			msp->ms_peer_ap_id[0] = '\0';
76725cf1a30Sjl 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
76825cf1a30Sjl 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
76925cf1a30Sjl 			char *minor;
77025cf1a30Sjl 
77125cf1a30Sjl 			/*
77225cf1a30Sjl 			 * b_dip doesn't have to be held for ddi_pathname()
77325cf1a30Sjl 			 * because the board struct (dr_board_t) will be
77425cf1a30Sjl 			 * destroyed before b_dip detaches.
77525cf1a30Sjl 			 */
77625cf1a30Sjl 			(void) ddi_pathname(bp->b_dip, path);
77725cf1a30Sjl 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
77825cf1a30Sjl 
77925cf1a30Sjl 			snprintf(msp->ms_peer_ap_id,
78025cf1a30Sjl 			    sizeof (msp->ms_peer_ap_id), "%s%s",
78125cf1a30Sjl 			    path, (minor == NULL) ? "" : minor);
78225cf1a30Sjl 
78325cf1a30Sjl 			kmem_free(path, MAXPATHLEN);
78425cf1a30Sjl 
78525cf1a30Sjl 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
78625cf1a30Sjl 				msp->ms_peer_is_target = 1;
78725cf1a30Sjl 		}
78825cf1a30Sjl 
78925cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
79025cf1a30Sjl 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
79125cf1a30Sjl 		else
79225cf1a30Sjl 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
79325cf1a30Sjl 
79425cf1a30Sjl 		if (rv == KPHYSM_OK) {
79525cf1a30Sjl 			/*
79625cf1a30Sjl 			 * Any pages above managed is "free",
79725cf1a30Sjl 			 * i.e. it's collected.
79825cf1a30Sjl 			 */
79925cf1a30Sjl 			msp->ms_detpages += (uint_t)(mdst.collected +
80025cf1a30Sjl 			    mdst.phys_pages - mdst.managed);
80125cf1a30Sjl 		} else {
80225cf1a30Sjl 			/*
80325cf1a30Sjl 			 * If we're UNREFERENCED or UNCONFIGURED,
80425cf1a30Sjl 			 * then the number of detached pages is
80525cf1a30Sjl 			 * however many pages are on the board.
80625cf1a30Sjl 			 * I.e. detached = not in use by OS.
80725cf1a30Sjl 			 */
80825cf1a30Sjl 			switch (msp->ms_cm.c_ostate) {
80925cf1a30Sjl 			/*
81025cf1a30Sjl 			 * changed to use cfgadm states
81125cf1a30Sjl 			 *
81225cf1a30Sjl 			 * was:
81325cf1a30Sjl 			 *	case DR_STATE_UNREFERENCED:
81425cf1a30Sjl 			 *	case DR_STATE_UNCONFIGURED:
81525cf1a30Sjl 			 */
81625cf1a30Sjl 			case SBD_STAT_UNCONFIGURED:
81725cf1a30Sjl 				msp->ms_detpages = msp->ms_totpages;
81825cf1a30Sjl 				break;
81925cf1a30Sjl 
82025cf1a30Sjl 			default:
82125cf1a30Sjl 				break;
82225cf1a30Sjl 			}
82325cf1a30Sjl 		}
82425cf1a30Sjl 
82525cf1a30Sjl 		/*
82625cf1a30Sjl 		 * kphysm_del_span_query can report non-reloc pages = total
82725cf1a30Sjl 		 * pages for memory that is not yet configured
82825cf1a30Sjl 		 */
82925cf1a30Sjl 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
83025cf1a30Sjl 			struct memlist *ml;
83125cf1a30Sjl 
83225cf1a30Sjl 			ml = dr_get_memlist(mp);
83325cf1a30Sjl 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
83425cf1a30Sjl 			memlist_delete(ml);
83525cf1a30Sjl 
83625cf1a30Sjl 			if (rv == KPHYSM_OK) {
83725cf1a30Sjl 				msp->ms_managed_pages = mq.managed;
83825cf1a30Sjl 				msp->ms_noreloc_pages = mq.nonrelocatable;
83925cf1a30Sjl 				msp->ms_noreloc_first =
84025cf1a30Sjl 				    mq.first_nonrelocatable;
84125cf1a30Sjl 				msp->ms_noreloc_last =
84225cf1a30Sjl 				    mq.last_nonrelocatable;
84325cf1a30Sjl 				msp->ms_cm.c_sflags = 0;
84425cf1a30Sjl 				if (mq.nonrelocatable) {
84525cf1a30Sjl 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
84625cf1a30Sjl 					    msp->ms_cm.c_sflags);
84725cf1a30Sjl 				}
84825cf1a30Sjl 			} else {
84925cf1a30Sjl 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
85025cf1a30Sjl 				    f, rv);
85125cf1a30Sjl 			}
85225cf1a30Sjl 		}
85325cf1a30Sjl 
85425cf1a30Sjl 		/*
85525cf1a30Sjl 		 * Check source unit state during copy-rename
85625cf1a30Sjl 		 */
85725cf1a30Sjl 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
85825cf1a30Sjl 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
85925cf1a30Sjl 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
86025cf1a30Sjl 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
86125cf1a30Sjl 
86225cf1a30Sjl 		mix++;
86325cf1a30Sjl 		dsp++;
86425cf1a30Sjl 	}
86525cf1a30Sjl 
86625cf1a30Sjl 	return (mix);
86725cf1a30Sjl }
86825cf1a30Sjl 
86925cf1a30Sjl int
87025cf1a30Sjl dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
87125cf1a30Sjl {
87225cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
87325cf1a30Sjl 
87425cf1a30Sjl 	int		err_flag = 0;
87525cf1a30Sjl 	int		d;
87625cf1a30Sjl 	sbd_error_t	*err;
87725cf1a30Sjl 	static fn_t	f = "dr_pre_attach_mem";
87825cf1a30Sjl 
87925cf1a30Sjl 	PR_MEM("%s...\n", f);
88025cf1a30Sjl 
88125cf1a30Sjl 	for (d = 0; d < devnum; d++) {
88225cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
88325cf1a30Sjl 		dr_state_t	state;
88425cf1a30Sjl 
88525cf1a30Sjl 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
88625cf1a30Sjl 
88725cf1a30Sjl 		state = mp->sbm_cm.sbdev_state;
88825cf1a30Sjl 		switch (state) {
88925cf1a30Sjl 		case DR_STATE_UNCONFIGURED:
89025cf1a30Sjl 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
89125cf1a30Sjl 				f,
89225cf1a30Sjl 				mp->sbm_cm.sbdev_path);
89325cf1a30Sjl 
89425cf1a30Sjl 			/* use memlist cached by dr_post_detach_mem_unit */
89525cf1a30Sjl 			ASSERT(mp->sbm_mlist != NULL);
89625cf1a30Sjl 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
89725cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
89825cf1a30Sjl 			PR_MEMLIST_DUMP(mp->sbm_mlist);
89925cf1a30Sjl 
90025cf1a30Sjl 			/* kphysm del handle should be have been freed */
90125cf1a30Sjl 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
90225cf1a30Sjl 
90325cf1a30Sjl 			/*FALLTHROUGH*/
90425cf1a30Sjl 
90525cf1a30Sjl 		case DR_STATE_CONNECTED:
90625cf1a30Sjl 			PR_MEM("%s: reprogramming mem hardware on %s\n",
90725cf1a30Sjl 				f, mp->sbm_cm.sbdev_bp->b_path);
90825cf1a30Sjl 
90925cf1a30Sjl 			PR_MEM("%s: enabling %s\n",
91025cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
91125cf1a30Sjl 
91225cf1a30Sjl 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
91325cf1a30Sjl 			if (err) {
91425cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
91525cf1a30Sjl 				err_flag = 1;
91625cf1a30Sjl 			}
91725cf1a30Sjl 			break;
91825cf1a30Sjl 
91925cf1a30Sjl 		default:
92025cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
92125cf1a30Sjl 			err_flag = 1;
92225cf1a30Sjl 			break;
92325cf1a30Sjl 		}
92425cf1a30Sjl 
92525cf1a30Sjl 		/* exit for loop if error encountered */
92625cf1a30Sjl 		if (err_flag)
92725cf1a30Sjl 			break;
92825cf1a30Sjl 	}
92925cf1a30Sjl 
93025cf1a30Sjl 	return (err_flag ? -1 : 0);
93125cf1a30Sjl }
93225cf1a30Sjl 
933*68ac2337Sjl static void
934*68ac2337Sjl dr_update_mc_memory()
935*68ac2337Sjl {
936*68ac2337Sjl 	void		(*mc_update_mlist)(void);
937*68ac2337Sjl 
938*68ac2337Sjl 	/*
939*68ac2337Sjl 	 * mc-opl is configured during drmach_mem_new but the memory
940*68ac2337Sjl 	 * has not been added to phys_install at that time.
941*68ac2337Sjl 	 * we must inform mc-opl to update the mlist after we
942*68ac2337Sjl 	 * attach or detach a system board.
943*68ac2337Sjl 	 */
944*68ac2337Sjl 
945*68ac2337Sjl 	mc_update_mlist = (void (*)(void))
946*68ac2337Sjl 	    modgetsymvalue("opl_mc_update_mlist", 0);
947*68ac2337Sjl 
948*68ac2337Sjl 	if (mc_update_mlist != NULL) {
949*68ac2337Sjl 		(*mc_update_mlist)();
950*68ac2337Sjl 	}
951*68ac2337Sjl }
952*68ac2337Sjl 
95325cf1a30Sjl int
95425cf1a30Sjl dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
95525cf1a30Sjl {
95625cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
95725cf1a30Sjl 
95825cf1a30Sjl 	int		d;
95925cf1a30Sjl 	static fn_t	f = "dr_post_attach_mem";
96025cf1a30Sjl 
96125cf1a30Sjl 	PR_MEM("%s...\n", f);
96225cf1a30Sjl 
96325cf1a30Sjl 	for (d = 0; d < devnum; d++) {
96425cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
96525cf1a30Sjl 		struct memlist	*mlist, *ml;
96625cf1a30Sjl 
96725cf1a30Sjl 		mlist = dr_get_memlist(mp);
96825cf1a30Sjl 		if (mlist == NULL) {
969*68ac2337Sjl 			/* OPL supports memoryless board */
97025cf1a30Sjl 			continue;
97125cf1a30Sjl 		}
97225cf1a30Sjl 
97325cf1a30Sjl 		/*
97425cf1a30Sjl 		 * Verify the memory really did successfully attach
97525cf1a30Sjl 		 * by checking for its existence in phys_install.
97625cf1a30Sjl 		 */
97725cf1a30Sjl 		memlist_read_lock();
97825cf1a30Sjl 		if (memlist_intersect(phys_install, mlist) == 0) {
97925cf1a30Sjl 			memlist_read_unlock();
98025cf1a30Sjl 
98125cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
98225cf1a30Sjl 
98325cf1a30Sjl 			PR_MEM("%s: %s memlist not in phys_install",
98425cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
98525cf1a30Sjl 
98625cf1a30Sjl 			memlist_delete(mlist);
98725cf1a30Sjl 			continue;
98825cf1a30Sjl 		}
98925cf1a30Sjl 		memlist_read_unlock();
99025cf1a30Sjl 
99125cf1a30Sjl 		for (ml = mlist; ml != NULL; ml = ml->next) {
99225cf1a30Sjl 			sbd_error_t *err;
99325cf1a30Sjl 
99425cf1a30Sjl 			err = drmach_mem_add_span(
99525cf1a30Sjl 				mp->sbm_cm.sbdev_id,
99625cf1a30Sjl 				ml->address,
99725cf1a30Sjl 				ml->size);
99825cf1a30Sjl 			if (err)
99925cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
100025cf1a30Sjl 		}
100125cf1a30Sjl 
100225cf1a30Sjl 		memlist_delete(mlist);
100325cf1a30Sjl 
100425cf1a30Sjl 		/*
100525cf1a30Sjl 		 * Destroy cached memlist, if any.
100625cf1a30Sjl 		 * There will be a cached memlist in sbm_mlist if
100725cf1a30Sjl 		 * this board is being configured directly after
100825cf1a30Sjl 		 * an unconfigure.
100925cf1a30Sjl 		 * To support this transition, dr_post_detach_mem
101025cf1a30Sjl 		 * left a copy of the last known memlist in sbm_mlist.
101125cf1a30Sjl 		 * This memlist could differ from any derived from
101225cf1a30Sjl 		 * hardware if while this memunit was last configured
101325cf1a30Sjl 		 * the system detected and deleted bad pages from
101425cf1a30Sjl 		 * phys_install.  The location of those bad pages
101525cf1a30Sjl 		 * will be reflected in the cached memlist.
101625cf1a30Sjl 		 */
101725cf1a30Sjl 		if (mp->sbm_mlist) {
101825cf1a30Sjl 			memlist_delete(mp->sbm_mlist);
101925cf1a30Sjl 			mp->sbm_mlist = NULL;
102025cf1a30Sjl 		}
102125cf1a30Sjl 	}
102225cf1a30Sjl 
1023*68ac2337Sjl 	dr_update_mc_memory();
1024*68ac2337Sjl 
102525cf1a30Sjl 	return (0);
102625cf1a30Sjl }
102725cf1a30Sjl 
102825cf1a30Sjl int
102925cf1a30Sjl dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
103025cf1a30Sjl {
103125cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
103225cf1a30Sjl 
103325cf1a30Sjl 	int d;
103425cf1a30Sjl 
103525cf1a30Sjl 	for (d = 0; d < devnum; d++) {
103625cf1a30Sjl 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
103725cf1a30Sjl 
103825cf1a30Sjl 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
103925cf1a30Sjl 	}
104025cf1a30Sjl 
104125cf1a30Sjl 	return (0);
104225cf1a30Sjl }
104325cf1a30Sjl 
104425cf1a30Sjl int
104525cf1a30Sjl dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
104625cf1a30Sjl {
104725cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
104825cf1a30Sjl 
104925cf1a30Sjl 	int		d, rv;
105025cf1a30Sjl 	static fn_t	f = "dr_post_detach_mem";
105125cf1a30Sjl 
105225cf1a30Sjl 	PR_MEM("%s...\n", f);
105325cf1a30Sjl 
105425cf1a30Sjl 	rv = 0;
105525cf1a30Sjl 	for (d = 0; d < devnum; d++) {
105625cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
105725cf1a30Sjl 
105825cf1a30Sjl 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
105925cf1a30Sjl 
106025cf1a30Sjl 		if (dr_post_detach_mem_unit(mp))
106125cf1a30Sjl 			rv = -1;
106225cf1a30Sjl 	}
1063*68ac2337Sjl 	dr_update_mc_memory();
106425cf1a30Sjl 
106525cf1a30Sjl 	return (rv);
106625cf1a30Sjl }
106725cf1a30Sjl 
106825cf1a30Sjl static void
106925cf1a30Sjl dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
107025cf1a30Sjl {
107125cf1a30Sjl 	static fn_t	f = "dr_add_memory_spans";
107225cf1a30Sjl 
107325cf1a30Sjl 	PR_MEM("%s...", f);
107425cf1a30Sjl 	PR_MEMLIST_DUMP(ml);
107525cf1a30Sjl 
107625cf1a30Sjl #ifdef DEBUG
107725cf1a30Sjl 	memlist_read_lock();
107825cf1a30Sjl 	if (memlist_intersect(phys_install, ml)) {
107925cf1a30Sjl 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
108025cf1a30Sjl 	}
108125cf1a30Sjl 	memlist_read_unlock();
108225cf1a30Sjl #endif
108325cf1a30Sjl 
108425cf1a30Sjl 	for (; ml; ml = ml->next) {
108525cf1a30Sjl 		pfn_t		 base;
108625cf1a30Sjl 		pgcnt_t		 npgs;
108725cf1a30Sjl 		int		 rv;
108825cf1a30Sjl 		sbd_error_t	*err;
108925cf1a30Sjl 
109025cf1a30Sjl 		base = _b64top(ml->address);
109125cf1a30Sjl 		npgs = _b64top(ml->size);
109225cf1a30Sjl 
109325cf1a30Sjl 		rv = kphysm_add_memory_dynamic(base, npgs);
109425cf1a30Sjl 
109525cf1a30Sjl 		err = drmach_mem_add_span(
109625cf1a30Sjl 			mp->sbm_cm.sbdev_id,
109725cf1a30Sjl 			ml->address,
109825cf1a30Sjl 			ml->size);
109925cf1a30Sjl 
110025cf1a30Sjl 		if (err)
110125cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
110225cf1a30Sjl 
110325cf1a30Sjl 		if (rv != KPHYSM_OK) {
110425cf1a30Sjl 			cmn_err(CE_WARN, "%s:"
110525cf1a30Sjl 				" unexpected kphysm_add_memory_dynamic"
110625cf1a30Sjl 				" return value %d;"
110725cf1a30Sjl 				" basepfn=0x%lx, npages=%ld\n",
110825cf1a30Sjl 				f, rv, base, npgs);
110925cf1a30Sjl 
111025cf1a30Sjl 			continue;
111125cf1a30Sjl 		}
111225cf1a30Sjl 	}
111325cf1a30Sjl }
111425cf1a30Sjl 
1115*68ac2337Sjl static int
1116*68ac2337Sjl memlist_touch(struct memlist *ml, uint64_t add)
1117*68ac2337Sjl {
1118*68ac2337Sjl 	while (ml != NULL) {
1119*68ac2337Sjl 		if ((add == ml->address) ||
1120*68ac2337Sjl 			(add == (ml->address + ml->size)))
1121*68ac2337Sjl 			return (1);
1122*68ac2337Sjl 		ml = ml->next;
1123*68ac2337Sjl 	}
1124*68ac2337Sjl 	return (0);
1125*68ac2337Sjl }
1126*68ac2337Sjl 
1127*68ac2337Sjl static sbd_error_t *
1128*68ac2337Sjl dr_process_excess_mlist(dr_mem_unit_t *s_mp,
1129*68ac2337Sjl 	dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist)
1130*68ac2337Sjl {
1131*68ac2337Sjl 	struct memlist	*ml;
1132*68ac2337Sjl 	sbd_error_t	*err;
1133*68ac2337Sjl 	static fn_t	f = "dr_process_excess_mlist";
1134*68ac2337Sjl 	uint64_t	new_pa, nbytes;
1135*68ac2337Sjl 	int rv;
1136*68ac2337Sjl 
1137*68ac2337Sjl 	err = NULL;
1138*68ac2337Sjl 
1139*68ac2337Sjl 	/*
1140*68ac2337Sjl 	 * After the small <-> big copy-rename,
1141*68ac2337Sjl 	 * the original address space for the
1142*68ac2337Sjl 	 * source board may have excess to be
1143*68ac2337Sjl 	 * deleted. This is a case different
1144*68ac2337Sjl 	 * from the big->small excess source
1145*68ac2337Sjl 	 * memory case listed below.
1146*68ac2337Sjl 	 * Remove s_mp->sbm_del_mlist from
1147*68ac2337Sjl 	 * the kernel cage glist.
1148*68ac2337Sjl 	 */
1149*68ac2337Sjl 	for (ml = s_mp->sbm_del_mlist; ml;
1150*68ac2337Sjl 		ml = ml->next) {
1151*68ac2337Sjl 		PR_MEM("%s: delete small<->big copy-"
1152*68ac2337Sjl 		    "rename source excess memory", f);
1153*68ac2337Sjl 		PR_MEMLIST_DUMP(ml);
1154*68ac2337Sjl 
1155*68ac2337Sjl 		err = drmach_mem_del_span(
1156*68ac2337Sjl 			s_mp->sbm_cm.sbdev_id,
1157*68ac2337Sjl 			    ml->address, ml->size);
1158*68ac2337Sjl 		if (err)
1159*68ac2337Sjl 			DRERR_SET_C(&s_mp->
1160*68ac2337Sjl 			    sbm_cm.sbdev_error, &err);
1161*68ac2337Sjl 		ASSERT(err == NULL);
1162*68ac2337Sjl 	}
1163*68ac2337Sjl 
1164*68ac2337Sjl 	PR_MEM("%s: adding back remaining portion"
1165*68ac2337Sjl 		" of %s, memlist:\n",
1166*68ac2337Sjl 		f, t_mp->sbm_cm.sbdev_path);
1167*68ac2337Sjl 	PR_MEMLIST_DUMP(t_excess_mlist);
1168*68ac2337Sjl 
1169*68ac2337Sjl 	for (ml = t_excess_mlist; ml; ml = ml->next) {
1170*68ac2337Sjl 	    struct memlist ml0;
1171*68ac2337Sjl 
1172*68ac2337Sjl 	    ml0.address = ml->address;
1173*68ac2337Sjl 	    ml0.size = ml->size;
1174*68ac2337Sjl 	    ml0.next = ml0.prev = NULL;
1175*68ac2337Sjl 
1176*68ac2337Sjl 	/*
1177*68ac2337Sjl 	 * If the memory object is 256 MB aligned (max page size
1178*68ac2337Sjl 	 * on OPL, it will not be coalesced to the adjacent memory
1179*68ac2337Sjl 	 * chunks.  The coalesce logic assumes contiguous page
1180*68ac2337Sjl 	 * structures for contiguous memory and we hit panic.
1181*68ac2337Sjl 	 * For anything less than 256 MB alignment, we have
1182*68ac2337Sjl 	 * to make sure that it is not adjacent to anything.
1183*68ac2337Sjl 	 * If the new chunk is adjacent to phys_install, we
1184*68ac2337Sjl 	 * truncate it to 4MB boundary.  4 MB is somewhat
1185*68ac2337Sjl 	 * arbitrary.  However we do not want to create
1186*68ac2337Sjl 	 * very small segments because they can cause problem.
1187*68ac2337Sjl 	 * The extreme case of 8K segment will fail
1188*68ac2337Sjl 	 * kphysm_add_memory_dynamic(), e.g.
1189*68ac2337Sjl 	 */
1190*68ac2337Sjl 	    if ((ml->address & (MH_MPSS_ALIGNMENT - 1)) ||
1191*68ac2337Sjl 		(ml->size & (MH_MPSS_ALIGNMENT - 1))) {
1192*68ac2337Sjl 
1193*68ac2337Sjl 		memlist_read_lock();
1194*68ac2337Sjl 		rv = memlist_touch(phys_install, ml0.address);
1195*68ac2337Sjl 		memlist_read_unlock();
1196*68ac2337Sjl 
1197*68ac2337Sjl 		if (rv) {
1198*68ac2337Sjl 		    new_pa = roundup(ml0.address + 1, MH_MIN_ALIGNMENT);
1199*68ac2337Sjl 		    nbytes = (new_pa -  ml0.address);
1200*68ac2337Sjl 		    if (nbytes >= ml0.size) {
1201*68ac2337Sjl 			t_mp->sbm_dyn_segs =
1202*68ac2337Sjl 			    memlist_del_span(t_mp->sbm_dyn_segs,
1203*68ac2337Sjl 				ml0.address, ml0.size);
1204*68ac2337Sjl 			continue;
1205*68ac2337Sjl 		    }
1206*68ac2337Sjl 		    t_mp->sbm_dyn_segs =
1207*68ac2337Sjl 			memlist_del_span(t_mp->sbm_dyn_segs,
1208*68ac2337Sjl 			    ml0.address, nbytes);
1209*68ac2337Sjl 		    ml0.size -= nbytes;
1210*68ac2337Sjl 		    ml0.address = new_pa;
1211*68ac2337Sjl 		}
1212*68ac2337Sjl 
1213*68ac2337Sjl 		if (ml0.size == 0) {
1214*68ac2337Sjl 		    continue;
1215*68ac2337Sjl 		}
1216*68ac2337Sjl 
1217*68ac2337Sjl 		memlist_read_lock();
1218*68ac2337Sjl 		rv = memlist_touch(phys_install, ml0.address + ml0.size);
1219*68ac2337Sjl 		memlist_read_unlock();
1220*68ac2337Sjl 
1221*68ac2337Sjl 		if (rv) {
1222*68ac2337Sjl 		    new_pa = rounddown(ml0.address + ml0.size - 1,
1223*68ac2337Sjl 			MH_MIN_ALIGNMENT);
1224*68ac2337Sjl 		    nbytes = (ml0.address + ml0.size - new_pa);
1225*68ac2337Sjl 		    if (nbytes >= ml0.size) {
1226*68ac2337Sjl 			t_mp->sbm_dyn_segs =
1227*68ac2337Sjl 			    memlist_del_span(t_mp->sbm_dyn_segs,
1228*68ac2337Sjl 				ml0.address, ml0.size);
1229*68ac2337Sjl 			continue;
1230*68ac2337Sjl 		    }
1231*68ac2337Sjl 		    t_mp->sbm_dyn_segs =
1232*68ac2337Sjl 			memlist_del_span(t_mp->sbm_dyn_segs,
1233*68ac2337Sjl 			    new_pa, nbytes);
1234*68ac2337Sjl 		    ml0.size -= nbytes;
1235*68ac2337Sjl 		}
1236*68ac2337Sjl 
1237*68ac2337Sjl 		if (ml0.size > 0) {
1238*68ac2337Sjl 		    dr_add_memory_spans(s_mp, &ml0);
1239*68ac2337Sjl 		}
1240*68ac2337Sjl 	    } else if (ml0.size > 0) {
1241*68ac2337Sjl 		dr_add_memory_spans(s_mp, &ml0);
1242*68ac2337Sjl 	    }
1243*68ac2337Sjl 	}
1244*68ac2337Sjl 	memlist_delete(t_excess_mlist);
1245*68ac2337Sjl 	return (err);
1246*68ac2337Sjl }
1247*68ac2337Sjl 
124825cf1a30Sjl static int
124925cf1a30Sjl dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
125025cf1a30Sjl {
125125cf1a30Sjl 	uint64_t	sz = s_mp->sbm_slice_size;
125225cf1a30Sjl 	uint64_t	sm = sz - 1;
125325cf1a30Sjl 	/* old and new below refer to PAs before and after copy-rename */
125425cf1a30Sjl 	uint64_t	s_old_basepa, s_new_basepa;
125525cf1a30Sjl 	uint64_t	t_old_basepa, t_new_basepa;
125625cf1a30Sjl 	dr_mem_unit_t	*t_mp, *x_mp;
125725cf1a30Sjl 	drmach_mem_info_t	minfo;
125825cf1a30Sjl 	struct memlist	*ml;
125925cf1a30Sjl 	struct memlist	*t_excess_mlist;
126025cf1a30Sjl 	int		rv;
126125cf1a30Sjl 	int		s_excess_mem_deleted = 0;
126225cf1a30Sjl 	sbd_error_t	*err;
126325cf1a30Sjl 	static fn_t	f = "dr_post_detach_mem_unit";
126425cf1a30Sjl 
126525cf1a30Sjl 	PR_MEM("%s...\n", f);
126625cf1a30Sjl 
126725cf1a30Sjl 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
126825cf1a30Sjl 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
126925cf1a30Sjl 		f, s_mp->sbm_cm.sbdev_path);
127025cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
127125cf1a30Sjl 
127225cf1a30Sjl 	/* sanity check */
127325cf1a30Sjl 	ASSERT(s_mp->sbm_del_mlist == NULL ||
127425cf1a30Sjl 		(s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
127525cf1a30Sjl 
127625cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
127725cf1a30Sjl 		t_mp = s_mp->sbm_peer;
127825cf1a30Sjl 		ASSERT(t_mp != NULL);
127925cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
128025cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
128125cf1a30Sjl 
128225cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
128325cf1a30Sjl 		ASSERT(t_mp->sbm_del_mlist);
128425cf1a30Sjl 
128525cf1a30Sjl 		PR_MEM("%s: target %s: deleted memlist:\n",
128625cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
128725cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
128825cf1a30Sjl 	} else {
128925cf1a30Sjl 		/* this is no target unit */
129025cf1a30Sjl 		t_mp = NULL;
129125cf1a30Sjl 	}
129225cf1a30Sjl 
129325cf1a30Sjl 	/*
129425cf1a30Sjl 	 * Verify the memory really did successfully detach
129525cf1a30Sjl 	 * by checking for its non-existence in phys_install.
129625cf1a30Sjl 	 */
129725cf1a30Sjl 	rv = 0;
129825cf1a30Sjl 	memlist_read_lock();
129925cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
130025cf1a30Sjl 		x_mp = s_mp;
130125cf1a30Sjl 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
130225cf1a30Sjl 	}
130325cf1a30Sjl 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
130425cf1a30Sjl 		x_mp = t_mp;
130525cf1a30Sjl 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
130625cf1a30Sjl 	}
130725cf1a30Sjl 	memlist_read_unlock();
130825cf1a30Sjl 
130925cf1a30Sjl 	if (rv) {
131025cf1a30Sjl 		/* error: memlist still in phys_install */
131125cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
131225cf1a30Sjl 	}
131325cf1a30Sjl 
131425cf1a30Sjl 	/*
131525cf1a30Sjl 	 * clean mem unit state and bail out if an error has been recorded.
131625cf1a30Sjl 	 */
131725cf1a30Sjl 	rv = 0;
131825cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error) {
131925cf1a30Sjl 		PR_MEM("%s: %s flags=%x", f,
132025cf1a30Sjl 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
132125cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
132225cf1a30Sjl 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
132325cf1a30Sjl 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
132425cf1a30Sjl 		rv = -1;
132525cf1a30Sjl 	}
132625cf1a30Sjl 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
132725cf1a30Sjl 		PR_MEM("%s: %s flags=%x", f,
132825cf1a30Sjl 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
132925cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
133025cf1a30Sjl 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
133125cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
133225cf1a30Sjl 		rv = -1;
133325cf1a30Sjl 	}
133425cf1a30Sjl 	if (rv)
133525cf1a30Sjl 		goto cleanup;
133625cf1a30Sjl 
133725cf1a30Sjl 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
133825cf1a30Sjl 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
133925cf1a30Sjl 	ASSERT(err == NULL);
134025cf1a30Sjl 	s_new_basepa = minfo.mi_basepa;
134125cf1a30Sjl 
134225cf1a30Sjl 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
134325cf1a30Sjl 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
134425cf1a30Sjl 
134525cf1a30Sjl 	if (t_mp != NULL) {
134625cf1a30Sjl 		struct memlist *s_copy_mlist;
134725cf1a30Sjl 
134825cf1a30Sjl 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
134925cf1a30Sjl 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
135025cf1a30Sjl 		ASSERT(err == NULL);
135125cf1a30Sjl 		t_new_basepa = minfo.mi_basepa;
135225cf1a30Sjl 
135325cf1a30Sjl 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
135425cf1a30Sjl 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
135525cf1a30Sjl 
135625cf1a30Sjl 		/*
135725cf1a30Sjl 		 * Construct copy list with original source addresses.
135825cf1a30Sjl 		 * Used to add back excess target mem.
135925cf1a30Sjl 		 */
136025cf1a30Sjl 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
136125cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
136225cf1a30Sjl 			s_copy_mlist = memlist_del_span(s_copy_mlist,
136325cf1a30Sjl 			    ml->address, ml->size);
136425cf1a30Sjl 		}
136525cf1a30Sjl 
136625cf1a30Sjl 		PR_MEM("%s: source copy list:\n:", f);
136725cf1a30Sjl 		PR_MEMLIST_DUMP(s_copy_mlist);
136825cf1a30Sjl 
136925cf1a30Sjl 		/*
137025cf1a30Sjl 		 * We had to swap mem-units, so update
137125cf1a30Sjl 		 * memlists accordingly with new base
137225cf1a30Sjl 		 * addresses.
137325cf1a30Sjl 		 */
137425cf1a30Sjl 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
137525cf1a30Sjl 			ml->address -= t_old_basepa;
137625cf1a30Sjl 			ml->address += t_new_basepa;
137725cf1a30Sjl 		}
137825cf1a30Sjl 
137925cf1a30Sjl 		/*
138025cf1a30Sjl 		 * There is no need to explicitly rename the target delete
138125cf1a30Sjl 		 * memlist, because sbm_del_mlist and sbm_mlist always
138225cf1a30Sjl 		 * point to the same memlist for a copy/rename operation.
138325cf1a30Sjl 		 */
138425cf1a30Sjl 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
138525cf1a30Sjl 
138625cf1a30Sjl 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
138725cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
138825cf1a30Sjl 
138925cf1a30Sjl 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
139025cf1a30Sjl 			ml->address -= s_old_basepa;
139125cf1a30Sjl 			ml->address += s_new_basepa;
139225cf1a30Sjl 		}
139325cf1a30Sjl 
139425cf1a30Sjl 		PR_MEM("%s: renamed source memlist:\n", f);
139525cf1a30Sjl 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1396*68ac2337Sjl 		PR_MEM("%s: source dyn seg memlist:\n", f);
1397*68ac2337Sjl 		PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
139825cf1a30Sjl 
139925cf1a30Sjl 		/*
140025cf1a30Sjl 		 * Keep track of dynamically added segments
140125cf1a30Sjl 		 * since they cannot be split if we need to delete
140225cf1a30Sjl 		 * excess source memory later for this board.
140325cf1a30Sjl 		 */
140425cf1a30Sjl 		if (t_mp->sbm_dyn_segs)
140525cf1a30Sjl 			memlist_delete(t_mp->sbm_dyn_segs);
140625cf1a30Sjl 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
140725cf1a30Sjl 		s_mp->sbm_dyn_segs = NULL;
140825cf1a30Sjl 
140925cf1a30Sjl 		/*
141025cf1a30Sjl 		 * Add back excess target memory.
141125cf1a30Sjl 		 * Subtract out the portion of the target memory
141225cf1a30Sjl 		 * node that was taken over by the source memory
141325cf1a30Sjl 		 * node.
141425cf1a30Sjl 		 */
141525cf1a30Sjl 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
141625cf1a30Sjl 		for (ml = s_copy_mlist; ml; ml = ml->next) {
141725cf1a30Sjl 			t_excess_mlist =
141825cf1a30Sjl 			    memlist_del_span(t_excess_mlist,
141925cf1a30Sjl 			    ml->address, ml->size);
142025cf1a30Sjl 		}
1421*68ac2337Sjl 		PR_MEM("%s: excess memlist:\n", f);
1422*68ac2337Sjl 		PR_MEMLIST_DUMP(t_excess_mlist);
142325cf1a30Sjl 
142425cf1a30Sjl 		/*
142525cf1a30Sjl 		 * Update dynamically added segs
142625cf1a30Sjl 		 */
142725cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
142825cf1a30Sjl 			t_mp->sbm_dyn_segs =
142925cf1a30Sjl 			    memlist_del_span(t_mp->sbm_dyn_segs,
143025cf1a30Sjl 			    ml->address, ml->size);
143125cf1a30Sjl 		}
143225cf1a30Sjl 		for (ml = t_excess_mlist; ml; ml = ml->next) {
143325cf1a30Sjl 			t_mp->sbm_dyn_segs =
143425cf1a30Sjl 			    memlist_cat_span(t_mp->sbm_dyn_segs,
143525cf1a30Sjl 			    ml->address, ml->size);
143625cf1a30Sjl 		}
143725cf1a30Sjl 		PR_MEM("%s: %s: updated dynamic seg list:\n",
143825cf1a30Sjl 		    f, t_mp->sbm_cm.sbdev_path);
143925cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
144025cf1a30Sjl 
144125cf1a30Sjl 		if (t_excess_mlist != NULL) {
1442*68ac2337Sjl 			err = dr_process_excess_mlist(s_mp, t_mp,
1443*68ac2337Sjl 				t_excess_mlist);
144425cf1a30Sjl 			s_excess_mem_deleted = 1;
144525cf1a30Sjl 		}
1446*68ac2337Sjl 
144725cf1a30Sjl 		memlist_delete(s_copy_mlist);
144825cf1a30Sjl 
144925cf1a30Sjl #ifdef DEBUG
145025cf1a30Sjl 		/*
145125cf1a30Sjl 		 * s_mp->sbm_del_mlist may still needed
145225cf1a30Sjl 		 */
145325cf1a30Sjl 		PR_MEM("%s: source delete memeory flag %d",
145425cf1a30Sjl 		    f, s_excess_mem_deleted);
145525cf1a30Sjl 		PR_MEM("%s: source delete memlist", f);
145625cf1a30Sjl 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
145725cf1a30Sjl #endif
145825cf1a30Sjl 
145925cf1a30Sjl 	}
146025cf1a30Sjl 
146125cf1a30Sjl 	if (t_mp != NULL) {
146225cf1a30Sjl 		/* delete target's entire address space */
146325cf1a30Sjl 		err = drmach_mem_del_span(
146425cf1a30Sjl 			t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
146525cf1a30Sjl 		if (err)
146625cf1a30Sjl 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
146725cf1a30Sjl 		ASSERT(err == NULL);
146825cf1a30Sjl 
146925cf1a30Sjl 		/*
147025cf1a30Sjl 		 * After the copy/rename, the original address space
147125cf1a30Sjl 		 * for the source board (which is now located on the
147225cf1a30Sjl 		 * target board) may now have some excess to be deleted.
147325cf1a30Sjl 		 * Those excess memory on the source board are kept in
147425cf1a30Sjl 		 * source board's sbm_del_mlist
147525cf1a30Sjl 		 */
147625cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
147725cf1a30Sjl 			ml = ml->next) {
147825cf1a30Sjl 			PR_MEM("%s: delete source excess memory", f);
147925cf1a30Sjl 			PR_MEMLIST_DUMP(ml);
148025cf1a30Sjl 
148125cf1a30Sjl 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
148225cf1a30Sjl 				ml->address, ml->size);
148325cf1a30Sjl 			if (err)
148425cf1a30Sjl 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
148525cf1a30Sjl 			ASSERT(err == NULL);
148625cf1a30Sjl 		}
148725cf1a30Sjl 
148825cf1a30Sjl 	} else {
148925cf1a30Sjl 		/* delete board's entire address space */
149025cf1a30Sjl 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
149125cf1a30Sjl 						s_old_basepa & ~ sm, sz);
149225cf1a30Sjl 		if (err)
149325cf1a30Sjl 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
149425cf1a30Sjl 		ASSERT(err == NULL);
149525cf1a30Sjl 	}
149625cf1a30Sjl 
149725cf1a30Sjl cleanup:
149825cf1a30Sjl 	/* clean up target mem unit */
149925cf1a30Sjl 	if (t_mp != NULL) {
150025cf1a30Sjl 		memlist_delete(t_mp->sbm_del_mlist);
150125cf1a30Sjl 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
150225cf1a30Sjl 
150325cf1a30Sjl 		t_mp->sbm_del_mlist = NULL;
150425cf1a30Sjl 		t_mp->sbm_mlist = NULL;
150525cf1a30Sjl 		t_mp->sbm_peer = NULL;
150625cf1a30Sjl 		t_mp->sbm_flags = 0;
150725cf1a30Sjl 		t_mp->sbm_cm.sbdev_busy = 0;
150825cf1a30Sjl 		dr_init_mem_unit_data(t_mp);
150925cf1a30Sjl 
151025cf1a30Sjl 	}
151125cf1a30Sjl 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
151225cf1a30Sjl 		/*
151325cf1a30Sjl 		 * now that copy/rename has completed, undo this
151425cf1a30Sjl 		 * work that was done in dr_release_mem_done.
151525cf1a30Sjl 		 */
151625cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
151725cf1a30Sjl 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
151825cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
151925cf1a30Sjl 	}
152025cf1a30Sjl 
152125cf1a30Sjl 	/*
152225cf1a30Sjl 	 * clean up (source) board's mem unit structure.
152325cf1a30Sjl 	 * NOTE: sbm_mlist is retained if no error has been record (in other
152425cf1a30Sjl 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
152525cf1a30Sjl 	 * referred to elsewhere as the cached memlist.  The cached memlist
152625cf1a30Sjl 	 * is used to re-attach (configure back in) this memunit from the
152725cf1a30Sjl 	 * unconfigured state.  The memlist is retained because it may
152825cf1a30Sjl 	 * represent bad pages that were detected while the memory was
152925cf1a30Sjl 	 * configured into the OS.  The OS deletes bad pages from phys_install.
153025cf1a30Sjl 	 * Those deletes, if any, will be represented in the cached mlist.
153125cf1a30Sjl 	 */
153225cf1a30Sjl 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
153325cf1a30Sjl 		memlist_delete(s_mp->sbm_del_mlist);
153425cf1a30Sjl 
153525cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
153625cf1a30Sjl 		memlist_delete(s_mp->sbm_mlist);
153725cf1a30Sjl 		s_mp->sbm_mlist = NULL;
153825cf1a30Sjl 	}
153925cf1a30Sjl 
154025cf1a30Sjl 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
154125cf1a30Sjl 		memlist_delete(s_mp->sbm_dyn_segs);
154225cf1a30Sjl 		s_mp->sbm_dyn_segs = NULL;
154325cf1a30Sjl 	}
154425cf1a30Sjl 
154525cf1a30Sjl 	s_mp->sbm_del_mlist = NULL;
154625cf1a30Sjl 	s_mp->sbm_peer = NULL;
154725cf1a30Sjl 	s_mp->sbm_flags = 0;
154825cf1a30Sjl 	s_mp->sbm_cm.sbdev_busy = 0;
154925cf1a30Sjl 	dr_init_mem_unit_data(s_mp);
155025cf1a30Sjl 
155125cf1a30Sjl 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
155225cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
155325cf1a30Sjl 
155425cf1a30Sjl 	return (0);
155525cf1a30Sjl }
155625cf1a30Sjl 
155725cf1a30Sjl /*
155825cf1a30Sjl  * Successful return from this function will have the memory
155925cf1a30Sjl  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
156025cf1a30Sjl  * and waiting.  This routine's job is to select the memory that
156125cf1a30Sjl  * actually has to be released (detached) which may not necessarily
156225cf1a30Sjl  * be the same memory node that came in in devlist[],
156325cf1a30Sjl  * i.e. a copy-rename is needed.
156425cf1a30Sjl  */
156525cf1a30Sjl int
156625cf1a30Sjl dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
156725cf1a30Sjl {
156825cf1a30Sjl 	int		d;
156925cf1a30Sjl 	int		err_flag = 0;
157025cf1a30Sjl 	static fn_t	f = "dr_pre_release_mem";
157125cf1a30Sjl 
157225cf1a30Sjl 	PR_MEM("%s...\n", f);
157325cf1a30Sjl 
157425cf1a30Sjl 	for (d = 0; d < devnum; d++) {
157525cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
157625cf1a30Sjl 		int		rv;
157725cf1a30Sjl 		memquery_t	mq;
157825cf1a30Sjl 		struct memlist	*ml;
157925cf1a30Sjl 
158025cf1a30Sjl 		if (mp->sbm_cm.sbdev_error) {
158125cf1a30Sjl 			err_flag = 1;
158225cf1a30Sjl 			continue;
158325cf1a30Sjl 		} else if (!kcage_on) {
158425cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
158525cf1a30Sjl 			err_flag = 1;
158625cf1a30Sjl 			continue;
158725cf1a30Sjl 		}
158825cf1a30Sjl 
158925cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
159025cf1a30Sjl 			/*
159125cf1a30Sjl 			 * Board is currently involved in a delete
159225cf1a30Sjl 			 * memory operation. Can't detach this guy until
159325cf1a30Sjl 			 * that operation completes.
159425cf1a30Sjl 			 */
159525cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
159625cf1a30Sjl 			err_flag = 1;
159725cf1a30Sjl 			break;
159825cf1a30Sjl 		}
159925cf1a30Sjl 
160025cf1a30Sjl 		/* flags should be clean at this time */
160125cf1a30Sjl 		ASSERT(mp->sbm_flags == 0);
160225cf1a30Sjl 
160325cf1a30Sjl 		ASSERT(mp->sbm_mlist == NULL);
160425cf1a30Sjl 		ASSERT(mp->sbm_del_mlist == NULL);
160525cf1a30Sjl 		if (mp->sbm_mlist != NULL) {
160625cf1a30Sjl 			memlist_delete(mp->sbm_mlist);
160725cf1a30Sjl 			mp->sbm_mlist = NULL;
160825cf1a30Sjl 		}
160925cf1a30Sjl 
161025cf1a30Sjl 		ml = dr_get_memlist(mp);
161125cf1a30Sjl 		if (ml == NULL) {
161225cf1a30Sjl 			err_flag = 1;
161325cf1a30Sjl 			PR_MEM("%s: no memlist found for %s\n",
161425cf1a30Sjl 			    f, mp->sbm_cm.sbdev_path);
161525cf1a30Sjl 			continue;
161625cf1a30Sjl 		}
161725cf1a30Sjl 
161825cf1a30Sjl 		/*
161925cf1a30Sjl 		 * Check whether the detaching memory requires a
162025cf1a30Sjl 		 * copy-rename.
162125cf1a30Sjl 		 */
162225cf1a30Sjl 		ASSERT(mp->sbm_npages != 0);
1623*68ac2337Sjl 
162425cf1a30Sjl 		rv = dr_del_mlist_query(ml, &mq);
162525cf1a30Sjl 		if (rv != KPHYSM_OK) {
162625cf1a30Sjl 			memlist_delete(ml);
162725cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
162825cf1a30Sjl 			err_flag = 1;
162925cf1a30Sjl 			break;
163025cf1a30Sjl 		}
163125cf1a30Sjl 
163225cf1a30Sjl 		if (mq.nonrelocatable != 0) {
163325cf1a30Sjl 			if (!(dr_cmd_flags(hp) &
163425cf1a30Sjl 				(SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
163525cf1a30Sjl 				memlist_delete(ml);
163625cf1a30Sjl 				/* caller wasn't prompted for a suspend */
163725cf1a30Sjl 				dr_dev_err(CE_WARN, &mp->sbm_cm,
163825cf1a30Sjl 					ESBD_QUIESCE_REQD);
163925cf1a30Sjl 				err_flag = 1;
164025cf1a30Sjl 				break;
164125cf1a30Sjl 			}
164225cf1a30Sjl 		}
164325cf1a30Sjl 
164425cf1a30Sjl 		/* allocate a kphysm handle */
164525cf1a30Sjl 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
164625cf1a30Sjl 		if (rv != KPHYSM_OK) {
164725cf1a30Sjl 			memlist_delete(ml);
164825cf1a30Sjl 
164925cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
165025cf1a30Sjl 			err_flag = 1;
165125cf1a30Sjl 			break;
165225cf1a30Sjl 		}
165325cf1a30Sjl 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
165425cf1a30Sjl 
165525cf1a30Sjl 		if ((mq.nonrelocatable != 0) ||
165625cf1a30Sjl 			dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
165725cf1a30Sjl 			/*
165825cf1a30Sjl 			 * Either the detaching memory node contains
165925cf1a30Sjl 			 * non-reloc memory or we failed to reserve the
166025cf1a30Sjl 			 * detaching memory node (which did _not_ have
166125cf1a30Sjl 			 * any non-reloc memory, i.e. some non-reloc mem
166225cf1a30Sjl 			 * got onboard).
166325cf1a30Sjl 			 */
166425cf1a30Sjl 
166525cf1a30Sjl 			if (dr_select_mem_target(hp, mp, ml)) {
166625cf1a30Sjl 				int rv;
166725cf1a30Sjl 
166825cf1a30Sjl 				/*
166925cf1a30Sjl 				 * We had no luck locating a target
167025cf1a30Sjl 				 * memory node to be the recipient of
167125cf1a30Sjl 				 * the non-reloc memory on the node
167225cf1a30Sjl 				 * we're trying to detach.
167325cf1a30Sjl 				 * Clean up be disposing the mem handle
167425cf1a30Sjl 				 * and the mem list.
167525cf1a30Sjl 				 */
167625cf1a30Sjl 				rv = kphysm_del_release(mp->sbm_memhandle);
167725cf1a30Sjl 				if (rv != KPHYSM_OK) {
167825cf1a30Sjl 					/*
167925cf1a30Sjl 					 * can do nothing but complain
168025cf1a30Sjl 					 * and hope helpful for debug
168125cf1a30Sjl 					 */
168225cf1a30Sjl 					cmn_err(CE_WARN, "%s: unexpected"
168325cf1a30Sjl 						" kphysm_del_release return"
168425cf1a30Sjl 						" value %d",
168525cf1a30Sjl 						f, rv);
168625cf1a30Sjl 				}
168725cf1a30Sjl 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
168825cf1a30Sjl 
168925cf1a30Sjl 				memlist_delete(ml);
169025cf1a30Sjl 
169125cf1a30Sjl 				/* make sure sbm_flags is clean */
169225cf1a30Sjl 				ASSERT(mp->sbm_flags == 0);
169325cf1a30Sjl 
169425cf1a30Sjl 				dr_dev_err(CE_WARN,
169525cf1a30Sjl 					&mp->sbm_cm, ESBD_NO_TARGET);
169625cf1a30Sjl 
169725cf1a30Sjl 				err_flag = 1;
169825cf1a30Sjl 				break;
169925cf1a30Sjl 			}
170025cf1a30Sjl 
170125cf1a30Sjl 			/*
170225cf1a30Sjl 			 * ml is not memlist_delete'd here because
170325cf1a30Sjl 			 * it has been assigned to mp->sbm_mlist
170425cf1a30Sjl 			 * by dr_select_mem_target.
170525cf1a30Sjl 			 */
170625cf1a30Sjl 		} else {
170725cf1a30Sjl 			/* no target needed to detach this board */
170825cf1a30Sjl 			mp->sbm_flags |= DR_MFLAG_RESERVED;
170925cf1a30Sjl 			mp->sbm_peer = NULL;
171025cf1a30Sjl 			mp->sbm_del_mlist = ml;
171125cf1a30Sjl 			mp->sbm_mlist = ml;
171225cf1a30Sjl 			mp->sbm_cm.sbdev_busy = 1;
171325cf1a30Sjl 		}
171425cf1a30Sjl #ifdef DEBUG
171525cf1a30Sjl 		ASSERT(mp->sbm_mlist != NULL);
171625cf1a30Sjl 
171725cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
171825cf1a30Sjl 			PR_MEM("%s: release of %s requires copy/rename;"
171925cf1a30Sjl 				" selected target board %s\n",
172025cf1a30Sjl 				f,
172125cf1a30Sjl 				mp->sbm_cm.sbdev_path,
172225cf1a30Sjl 				mp->sbm_peer->sbm_cm.sbdev_path);
172325cf1a30Sjl 		} else {
172425cf1a30Sjl 			PR_MEM("%s: copy/rename not required to release %s\n",
172525cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
172625cf1a30Sjl 		}
172725cf1a30Sjl 
172825cf1a30Sjl 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
172925cf1a30Sjl 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
173025cf1a30Sjl #endif
173125cf1a30Sjl 	}
173225cf1a30Sjl 
173325cf1a30Sjl 	return (err_flag ? -1 : 0);
173425cf1a30Sjl }
173525cf1a30Sjl 
173625cf1a30Sjl void
173725cf1a30Sjl dr_release_mem_done(dr_common_unit_t *cp)
173825cf1a30Sjl {
173925cf1a30Sjl 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
174025cf1a30Sjl 	dr_mem_unit_t *t_mp, *mp;
174125cf1a30Sjl 	int		rv;
174225cf1a30Sjl 	static fn_t	f = "dr_release_mem_done";
174325cf1a30Sjl 
174425cf1a30Sjl 	/*
174525cf1a30Sjl 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
174625cf1a30Sjl 	 * has a target unit.
174725cf1a30Sjl 	 */
174825cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
174925cf1a30Sjl 		t_mp = s_mp->sbm_peer;
175025cf1a30Sjl 		ASSERT(t_mp != NULL);
175125cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
175225cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
175325cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
175425cf1a30Sjl 	} else {
175525cf1a30Sjl 		/* this is no target unit */
175625cf1a30Sjl 		t_mp = NULL;
175725cf1a30Sjl 	}
175825cf1a30Sjl 
175925cf1a30Sjl 	/* free delete handle */
176025cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
176125cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
176225cf1a30Sjl 	rv = kphysm_del_release(s_mp->sbm_memhandle);
176325cf1a30Sjl 	if (rv != KPHYSM_OK) {
176425cf1a30Sjl 		/*
176525cf1a30Sjl 		 * can do nothing but complain
176625cf1a30Sjl 		 * and hope helpful for debug
176725cf1a30Sjl 		 */
176825cf1a30Sjl 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
176925cf1a30Sjl 			" return value %d", f, rv);
177025cf1a30Sjl 	}
177125cf1a30Sjl 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
177225cf1a30Sjl 
177325cf1a30Sjl 	/*
177425cf1a30Sjl 	 * If an error was encountered during release, clean up
177525cf1a30Sjl 	 * the source (and target, if present) unit data.
177625cf1a30Sjl 	 */
177725cf1a30Sjl /* XXX Can we know that sbdev_error was encountered during release? */
177825cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error != NULL) {
177925cf1a30Sjl 		PR_MEM("%s: %s: error %d noted\n",
178025cf1a30Sjl 			f,
178125cf1a30Sjl 			s_mp->sbm_cm.sbdev_path,
178225cf1a30Sjl 			s_mp->sbm_cm.sbdev_error->e_code);
178325cf1a30Sjl 
178425cf1a30Sjl 		if (t_mp != NULL) {
178525cf1a30Sjl 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
178625cf1a30Sjl 			t_mp->sbm_del_mlist = NULL;
178725cf1a30Sjl 
178825cf1a30Sjl 			if (t_mp->sbm_mlist != NULL) {
178925cf1a30Sjl 				memlist_delete(t_mp->sbm_mlist);
179025cf1a30Sjl 				t_mp->sbm_mlist = NULL;
179125cf1a30Sjl 			}
179225cf1a30Sjl 
179325cf1a30Sjl 			t_mp->sbm_peer = NULL;
179425cf1a30Sjl 			t_mp->sbm_flags = 0;
179525cf1a30Sjl 			t_mp->sbm_cm.sbdev_busy = 0;
179625cf1a30Sjl 		}
179725cf1a30Sjl 
179825cf1a30Sjl 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
179925cf1a30Sjl 			memlist_delete(s_mp->sbm_del_mlist);
180025cf1a30Sjl 		s_mp->sbm_del_mlist = NULL;
180125cf1a30Sjl 
180225cf1a30Sjl 		if (s_mp->sbm_mlist != NULL) {
180325cf1a30Sjl 			memlist_delete(s_mp->sbm_mlist);
180425cf1a30Sjl 			s_mp->sbm_mlist = NULL;
180525cf1a30Sjl 		}
180625cf1a30Sjl 
180725cf1a30Sjl 		s_mp->sbm_peer = NULL;
180825cf1a30Sjl 		s_mp->sbm_flags = 0;
180925cf1a30Sjl 		s_mp->sbm_cm.sbdev_busy = 0;
181025cf1a30Sjl 
181125cf1a30Sjl 		/* bail out */
181225cf1a30Sjl 		return;
181325cf1a30Sjl 	}
181425cf1a30Sjl 
181525cf1a30Sjl 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
181625cf1a30Sjl 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
181725cf1a30Sjl 
181825cf1a30Sjl 	if (t_mp != NULL) {
181925cf1a30Sjl 		/*
182025cf1a30Sjl 		 * the kphysm delete operation that drained the source
182125cf1a30Sjl 		 * board also drained this target board.  Since the source
182225cf1a30Sjl 		 * board drain is now known to have succeeded, we know this
182325cf1a30Sjl 		 * target board is drained too.
182425cf1a30Sjl 		 *
182525cf1a30Sjl 		 * because DR_DEV_SET_RELEASED and dr_device_transition
182625cf1a30Sjl 		 * is done here, the dr_release_dev_done should not
182725cf1a30Sjl 		 * fail.
182825cf1a30Sjl 		 */
182925cf1a30Sjl 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
183025cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
183125cf1a30Sjl 
183225cf1a30Sjl 		/*
183325cf1a30Sjl 		 * NOTE: do not transition target's board state,
183425cf1a30Sjl 		 * even if the mem-unit was the last configure
183525cf1a30Sjl 		 * unit of the board.  When copy/rename completes
183625cf1a30Sjl 		 * this mem-unit will transitioned back to
183725cf1a30Sjl 		 * the configured state.  In the meantime, the
183825cf1a30Sjl 		 * board's must remain as is.
183925cf1a30Sjl 		 */
184025cf1a30Sjl 	}
184125cf1a30Sjl 
184225cf1a30Sjl 	/* if board(s) had deleted memory, verify it is gone */
184325cf1a30Sjl 	rv = 0;
184425cf1a30Sjl 	memlist_read_lock();
184525cf1a30Sjl 	if (s_mp->sbm_del_mlist != NULL) {
184625cf1a30Sjl 		mp = s_mp;
184725cf1a30Sjl 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
184825cf1a30Sjl 	}
184925cf1a30Sjl 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
185025cf1a30Sjl 		mp = t_mp;
185125cf1a30Sjl 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
185225cf1a30Sjl 	}
185325cf1a30Sjl 	memlist_read_unlock();
185425cf1a30Sjl 	if (rv) {
185525cf1a30Sjl 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
185625cf1a30Sjl 			"deleted memory still found in phys_install",
185725cf1a30Sjl 			f,
185825cf1a30Sjl 			(mp == t_mp ? "target " : ""),
185925cf1a30Sjl 			mp->sbm_cm.sbdev_bp->b_num,
186025cf1a30Sjl 			mp->sbm_cm.sbdev_unum);
186125cf1a30Sjl 
186225cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
186325cf1a30Sjl 		return;
186425cf1a30Sjl 	}
186525cf1a30Sjl 
186625cf1a30Sjl 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
186725cf1a30Sjl 	if (t_mp != NULL)
186825cf1a30Sjl 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
186925cf1a30Sjl 
187025cf1a30Sjl 	/* this should not fail */
187125cf1a30Sjl 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
187225cf1a30Sjl 		/* catch this in debug kernels */
187325cf1a30Sjl 		ASSERT(0);
187425cf1a30Sjl 		return;
187525cf1a30Sjl 	}
187625cf1a30Sjl 
187725cf1a30Sjl 	PR_MEM("%s: marking %s release DONE\n",
187825cf1a30Sjl 		f, s_mp->sbm_cm.sbdev_path);
187925cf1a30Sjl 
188025cf1a30Sjl 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
188125cf1a30Sjl 
188225cf1a30Sjl 	if (t_mp != NULL) {
188325cf1a30Sjl 		/* should not fail */
188425cf1a30Sjl 		rv = dr_release_dev_done(&t_mp->sbm_cm);
188525cf1a30Sjl 		if (rv != 0) {
188625cf1a30Sjl 			/* catch this in debug kernels */
188725cf1a30Sjl 			ASSERT(0);
188825cf1a30Sjl 			return;
188925cf1a30Sjl 		}
189025cf1a30Sjl 
189125cf1a30Sjl 		PR_MEM("%s: marking %s release DONE\n",
189225cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
189325cf1a30Sjl 
189425cf1a30Sjl 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
189525cf1a30Sjl 	}
189625cf1a30Sjl }
189725cf1a30Sjl 
189825cf1a30Sjl /*ARGSUSED*/
189925cf1a30Sjl int
190025cf1a30Sjl dr_disconnect_mem(dr_mem_unit_t *mp)
190125cf1a30Sjl {
190225cf1a30Sjl 	static fn_t	f = "dr_disconnect_mem";
190325cf1a30Sjl 	update_membounds_t umb;
190425cf1a30Sjl 
190525cf1a30Sjl #ifdef DEBUG
190625cf1a30Sjl 	int state = mp->sbm_cm.sbdev_state;
190725cf1a30Sjl 	ASSERT(state == DR_STATE_CONNECTED ||
190825cf1a30Sjl 		state == DR_STATE_UNCONFIGURED);
190925cf1a30Sjl #endif
191025cf1a30Sjl 
191125cf1a30Sjl 	PR_MEM("%s...\n", f);
191225cf1a30Sjl 
191325cf1a30Sjl 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
191425cf1a30Sjl 		memlist_delete(mp->sbm_del_mlist);
191525cf1a30Sjl 	mp->sbm_del_mlist = NULL;
191625cf1a30Sjl 
191725cf1a30Sjl 	if (mp->sbm_mlist) {
191825cf1a30Sjl 		memlist_delete(mp->sbm_mlist);
191925cf1a30Sjl 		mp->sbm_mlist = NULL;
192025cf1a30Sjl 	}
192125cf1a30Sjl 
192225cf1a30Sjl 	/*
192325cf1a30Sjl 	 * Remove memory from lgroup
192425cf1a30Sjl 	 * For now, only board info is required.
192525cf1a30Sjl 	 */
192625cf1a30Sjl 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
192725cf1a30Sjl 	umb.u_base = (uint64_t)-1;
192825cf1a30Sjl 	umb.u_len = (uint64_t)-1;
192925cf1a30Sjl 
193025cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
193125cf1a30Sjl 
193225cf1a30Sjl 	return (0);
193325cf1a30Sjl }
193425cf1a30Sjl 
193525cf1a30Sjl int
193625cf1a30Sjl dr_cancel_mem(dr_mem_unit_t *s_mp)
193725cf1a30Sjl {
193825cf1a30Sjl 	dr_mem_unit_t	*t_mp;
193925cf1a30Sjl 	dr_state_t	state;
194025cf1a30Sjl 	static fn_t	f = "dr_cancel_mem";
194125cf1a30Sjl 
194225cf1a30Sjl 	state = s_mp->sbm_cm.sbdev_state;
194325cf1a30Sjl 
194425cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
194525cf1a30Sjl 		/* must cancel source board, not target board */
194625cf1a30Sjl 		/* TODO: set error */
194725cf1a30Sjl 		return (-1);
194825cf1a30Sjl 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
194925cf1a30Sjl 		t_mp = s_mp->sbm_peer;
195025cf1a30Sjl 		ASSERT(t_mp != NULL);
195125cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
195225cf1a30Sjl 
195325cf1a30Sjl 		/* must always match the source board's state */
195425cf1a30Sjl 		/* TODO: is this assertion correct? */
195525cf1a30Sjl 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
195625cf1a30Sjl 	} else {
195725cf1a30Sjl 		/* this is no target unit */
195825cf1a30Sjl 		t_mp = NULL;
195925cf1a30Sjl 	}
196025cf1a30Sjl 
196125cf1a30Sjl 	switch (state) {
196225cf1a30Sjl 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
196325cf1a30Sjl 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
196425cf1a30Sjl 
196525cf1a30Sjl 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
196625cf1a30Sjl 			PR_MEM("%s: undoing target %s memory delete\n",
196725cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
196825cf1a30Sjl 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
196925cf1a30Sjl 
197025cf1a30Sjl 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
197125cf1a30Sjl 		}
197225cf1a30Sjl 
197325cf1a30Sjl 		if (s_mp->sbm_del_mlist != NULL) {
197425cf1a30Sjl 			PR_MEM("%s: undoing %s memory delete\n",
197525cf1a30Sjl 				f, s_mp->sbm_cm.sbdev_path);
197625cf1a30Sjl 
197725cf1a30Sjl 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
197825cf1a30Sjl 		}
197925cf1a30Sjl 
198025cf1a30Sjl 		/*FALLTHROUGH*/
198125cf1a30Sjl 
198225cf1a30Sjl /* TODO: should no longer be possible to see the release state here */
198325cf1a30Sjl 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
198425cf1a30Sjl 
198525cf1a30Sjl 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
198625cf1a30Sjl 
198725cf1a30Sjl 		if (t_mp != NULL) {
198825cf1a30Sjl 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
198925cf1a30Sjl 			t_mp->sbm_del_mlist = NULL;
199025cf1a30Sjl 
199125cf1a30Sjl 			if (t_mp->sbm_mlist != NULL) {
199225cf1a30Sjl 				memlist_delete(t_mp->sbm_mlist);
199325cf1a30Sjl 				t_mp->sbm_mlist = NULL;
199425cf1a30Sjl 			}
199525cf1a30Sjl 
199625cf1a30Sjl 			t_mp->sbm_peer = NULL;
199725cf1a30Sjl 			t_mp->sbm_flags = 0;
199825cf1a30Sjl 			t_mp->sbm_cm.sbdev_busy = 0;
199925cf1a30Sjl 			dr_init_mem_unit_data(t_mp);
200025cf1a30Sjl 
200125cf1a30Sjl 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
200225cf1a30Sjl 
200325cf1a30Sjl 			dr_device_transition(
200425cf1a30Sjl 				&t_mp->sbm_cm, DR_STATE_CONFIGURED);
200525cf1a30Sjl 		}
200625cf1a30Sjl 
200725cf1a30Sjl 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
200825cf1a30Sjl 			memlist_delete(s_mp->sbm_del_mlist);
200925cf1a30Sjl 		s_mp->sbm_del_mlist = NULL;
201025cf1a30Sjl 
201125cf1a30Sjl 		if (s_mp->sbm_mlist != NULL) {
201225cf1a30Sjl 			memlist_delete(s_mp->sbm_mlist);
201325cf1a30Sjl 			s_mp->sbm_mlist = NULL;
201425cf1a30Sjl 		}
201525cf1a30Sjl 
201625cf1a30Sjl 		s_mp->sbm_peer = NULL;
201725cf1a30Sjl 		s_mp->sbm_flags = 0;
201825cf1a30Sjl 		s_mp->sbm_cm.sbdev_busy = 0;
201925cf1a30Sjl 		dr_init_mem_unit_data(s_mp);
202025cf1a30Sjl 
202125cf1a30Sjl 		return (0);
202225cf1a30Sjl 
202325cf1a30Sjl 	default:
202425cf1a30Sjl 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
202525cf1a30Sjl 			f, (int)state, s_mp->sbm_cm.sbdev_path);
202625cf1a30Sjl 
202725cf1a30Sjl 		return (-1);
202825cf1a30Sjl 	}
202925cf1a30Sjl 	/*NOTREACHED*/
203025cf1a30Sjl }
203125cf1a30Sjl 
203225cf1a30Sjl void
203325cf1a30Sjl dr_init_mem_unit(dr_mem_unit_t *mp)
203425cf1a30Sjl {
203525cf1a30Sjl 	dr_state_t	new_state;
203625cf1a30Sjl 
203725cf1a30Sjl 
203825cf1a30Sjl 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
203925cf1a30Sjl 		new_state = DR_STATE_CONFIGURED;
204025cf1a30Sjl 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
204125cf1a30Sjl 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
204225cf1a30Sjl 		new_state = DR_STATE_CONNECTED;
204325cf1a30Sjl 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
204425cf1a30Sjl 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
204525cf1a30Sjl 		new_state = DR_STATE_OCCUPIED;
204625cf1a30Sjl 	} else {
204725cf1a30Sjl 		new_state = DR_STATE_EMPTY;
204825cf1a30Sjl 	}
204925cf1a30Sjl 
205025cf1a30Sjl 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
205125cf1a30Sjl 		dr_init_mem_unit_data(mp);
205225cf1a30Sjl 
205325cf1a30Sjl 	/* delay transition until fully initialized */
205425cf1a30Sjl 	dr_device_transition(&mp->sbm_cm, new_state);
205525cf1a30Sjl }
205625cf1a30Sjl 
205725cf1a30Sjl static void
205825cf1a30Sjl dr_init_mem_unit_data(dr_mem_unit_t *mp)
205925cf1a30Sjl {
206025cf1a30Sjl 	drmachid_t	id = mp->sbm_cm.sbdev_id;
206125cf1a30Sjl 	drmach_mem_info_t	minfo;
206225cf1a30Sjl 	sbd_error_t	*err;
206325cf1a30Sjl 	static fn_t	f = "dr_init_mem_unit_data";
206425cf1a30Sjl 	update_membounds_t umb;
206525cf1a30Sjl 
206625cf1a30Sjl 	PR_MEM("%s...\n", f);
206725cf1a30Sjl 
206825cf1a30Sjl 	/* a little sanity checking */
206925cf1a30Sjl 	ASSERT(mp->sbm_peer == NULL);
207025cf1a30Sjl 	ASSERT(mp->sbm_flags == 0);
207125cf1a30Sjl 
207225cf1a30Sjl 	if (err = drmach_mem_get_info(id, &minfo)) {
207325cf1a30Sjl 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
207425cf1a30Sjl 		return;
207525cf1a30Sjl 	}
207625cf1a30Sjl 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
207725cf1a30Sjl 	mp->sbm_npages = _b64top(minfo.mi_size);
207825cf1a30Sjl 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
207925cf1a30Sjl 	mp->sbm_slice_size = minfo.mi_slice_size;
208025cf1a30Sjl 
208125cf1a30Sjl 	/*
208225cf1a30Sjl 	 * Add memory to lgroup
208325cf1a30Sjl 	 */
208425cf1a30Sjl 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
208525cf1a30Sjl 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
208625cf1a30Sjl 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
208725cf1a30Sjl 
208825cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
208925cf1a30Sjl 
209025cf1a30Sjl 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
209125cf1a30Sjl 		f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
209225cf1a30Sjl }
209325cf1a30Sjl 
209425cf1a30Sjl static int
209525cf1a30Sjl dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
209625cf1a30Sjl {
209725cf1a30Sjl 	int		err;
209825cf1a30Sjl 	pfn_t		base;
209925cf1a30Sjl 	pgcnt_t		npgs;
210025cf1a30Sjl 	struct memlist	*mc;
210125cf1a30Sjl 	static fn_t	f = "dr_reserve_mem_spans";
210225cf1a30Sjl 
210325cf1a30Sjl 	PR_MEM("%s...\n", f);
210425cf1a30Sjl 
210525cf1a30Sjl 	/*
210625cf1a30Sjl 	 * Walk the supplied memlist scheduling each span for removal
210725cf1a30Sjl 	 * with kphysm_del_span.  It is possible that a span may intersect
210825cf1a30Sjl 	 * an area occupied by the cage.
210925cf1a30Sjl 	 */
211025cf1a30Sjl 	for (mc = ml; mc != NULL; mc = mc->next) {
211125cf1a30Sjl 		base = _b64top(mc->address);
211225cf1a30Sjl 		npgs = _b64top(mc->size);
211325cf1a30Sjl 
211425cf1a30Sjl 		err = kphysm_del_span(*mhp, base, npgs);
211525cf1a30Sjl 		if (err != KPHYSM_OK) {
211625cf1a30Sjl 			cmn_err(CE_WARN, "%s memory reserve failed."
211725cf1a30Sjl 				" unexpected kphysm_del_span return value %d;"
211825cf1a30Sjl 				" basepfn=0x%lx npages=%ld",
211925cf1a30Sjl 				f, err, base, npgs);
212025cf1a30Sjl 
212125cf1a30Sjl 			return (-1);
212225cf1a30Sjl 		}
212325cf1a30Sjl 	}
212425cf1a30Sjl 
212525cf1a30Sjl 	return (0);
212625cf1a30Sjl }
212725cf1a30Sjl 
212825cf1a30Sjl #define	DR_SMT_NPREF_SETS	6
212925cf1a30Sjl #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
213025cf1a30Sjl 
213125cf1a30Sjl /* debug counters */
213225cf1a30Sjl int dr_smt_realigned;
213325cf1a30Sjl int dr_smt_preference[DR_SMT_NPREF_SETS];
213425cf1a30Sjl 
213525cf1a30Sjl #ifdef DEBUG
213625cf1a30Sjl uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
213725cf1a30Sjl #endif
213825cf1a30Sjl 
213925cf1a30Sjl /*
214025cf1a30Sjl  * Find and reserve a copy/rename target board suitable for the
214125cf1a30Sjl  * given source board.
214225cf1a30Sjl  * All boards in the system are examined and categorized in relation to
214325cf1a30Sjl  * their memory size versus the source board's memory size.  Order of
214425cf1a30Sjl  * preference is:
214525cf1a30Sjl  *	1st copy all source, source/target same size
214625cf1a30Sjl  *	2nd copy all source, larger target
214725cf1a30Sjl  * 	3rd copy nonrelocatable source span
214825cf1a30Sjl  */
214925cf1a30Sjl static int
215025cf1a30Sjl dr_select_mem_target(dr_handle_t *hp,
215125cf1a30Sjl 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
215225cf1a30Sjl {
215325cf1a30Sjl 	dr_target_pref_t preference; /* lower value is higher preference */
215425cf1a30Sjl 	int		idx;
215525cf1a30Sjl 	dr_mem_unit_t	**sets;
215625cf1a30Sjl 
215725cf1a30Sjl 	int		t_bd;
215825cf1a30Sjl 	int		t_unit;
215925cf1a30Sjl 	int		rv;
216025cf1a30Sjl 	dr_board_t	*s_bp, *t_bp;
216125cf1a30Sjl 	dr_mem_unit_t	*t_mp, *c_mp;
216225cf1a30Sjl 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
216325cf1a30Sjl 	memquery_t	s_mq = {0};
216425cf1a30Sjl 	static fn_t	f = "dr_select_mem_target";
216525cf1a30Sjl 
216625cf1a30Sjl 	PR_MEM("%s...\n", f);
216725cf1a30Sjl 
216825cf1a30Sjl 	ASSERT(s_ml != NULL);
216925cf1a30Sjl 
217025cf1a30Sjl 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
217125cf1a30Sjl 	    DR_SMT_NPREF_SETS);
217225cf1a30Sjl 
217325cf1a30Sjl 	s_bp = hp->h_bd;
217425cf1a30Sjl 	/* calculate the offset into the slice of the last source board pfn */
217525cf1a30Sjl 	ASSERT(s_mp->sbm_npages != 0);
217625cf1a30Sjl 
217725cf1a30Sjl 	/*
217825cf1a30Sjl 	 * Find non-relocatable span on source board.
217925cf1a30Sjl 	 */
218025cf1a30Sjl 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
218125cf1a30Sjl 	if (rv != KPHYSM_OK) {
218225cf1a30Sjl 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
218325cf1a30Sjl 		    " return value %d; basepfn 0x%lx, npages %ld\n",
218425cf1a30Sjl 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
218525cf1a30Sjl 		    s_mp->sbm_npages);
218625cf1a30Sjl 		return (-1);
218725cf1a30Sjl 	}
218825cf1a30Sjl 
218925cf1a30Sjl 	ASSERT(s_mq.phys_pages != 0);
219025cf1a30Sjl 	ASSERT(s_mq.nonrelocatable != 0);
219125cf1a30Sjl 
219225cf1a30Sjl 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
219325cf1a30Sjl 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
219425cf1a30Sjl 	    s_mq.last_nonrelocatable);
219525cf1a30Sjl 
219625cf1a30Sjl 	/* break down s_ml if it contains dynamic segments */
219725cf1a30Sjl 	b_ml = memlist_dup(s_ml);
219825cf1a30Sjl 
219925cf1a30Sjl 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
220025cf1a30Sjl 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
220125cf1a30Sjl 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
220225cf1a30Sjl 	}
220325cf1a30Sjl 
220425cf1a30Sjl 
220525cf1a30Sjl 	/*
220625cf1a30Sjl 	 * Make one pass through all memory units on all boards
220725cf1a30Sjl 	 * and categorize them with respect to the source board.
220825cf1a30Sjl 	 */
220925cf1a30Sjl 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
221025cf1a30Sjl 		/*
221125cf1a30Sjl 		 * The board structs are a contiguous array
221225cf1a30Sjl 		 * so we take advantage of that to find the
221325cf1a30Sjl 		 * correct board struct pointer for a given
221425cf1a30Sjl 		 * board number.
221525cf1a30Sjl 		 */
221625cf1a30Sjl 		t_bp = dr_lookup_board(t_bd);
221725cf1a30Sjl 
221825cf1a30Sjl 		/* source board can not be its own target */
221925cf1a30Sjl 		if (s_bp->b_num == t_bp->b_num)
222025cf1a30Sjl 			continue;
222125cf1a30Sjl 
222225cf1a30Sjl 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
222325cf1a30Sjl 
222425cf1a30Sjl 			t_mp = dr_get_mem_unit(t_bp, t_unit);
222525cf1a30Sjl 
222625cf1a30Sjl 			/* this memory node must be attached */
222725cf1a30Sjl 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
222825cf1a30Sjl 				continue;
222925cf1a30Sjl 
223025cf1a30Sjl 			/* source unit can not be its own target */
223125cf1a30Sjl 			if (s_mp == t_mp) {
223225cf1a30Sjl 				/* catch this is debug kernels */
223325cf1a30Sjl 				ASSERT(0);
223425cf1a30Sjl 				continue;
223525cf1a30Sjl 			}
223625cf1a30Sjl 
223725cf1a30Sjl 			/*
223825cf1a30Sjl 			 * this memory node must not already be reserved
223925cf1a30Sjl 			 * by some other memory delete operation.
224025cf1a30Sjl 			 */
224125cf1a30Sjl 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
224225cf1a30Sjl 				continue;
224325cf1a30Sjl 
224425cf1a30Sjl 			/* get target board memlist */
224525cf1a30Sjl 			t_ml = dr_get_memlist(t_mp);
224625cf1a30Sjl 			if (t_ml == NULL) {
224725cf1a30Sjl 				cmn_err(CE_WARN, "%s: no memlist for"
224825cf1a30Sjl 				    " mem-unit %d, board %d", f,
224925cf1a30Sjl 				    t_mp->sbm_cm.sbdev_bp->b_num,
225025cf1a30Sjl 				    t_mp->sbm_cm.sbdev_unum);
225125cf1a30Sjl 				continue;
225225cf1a30Sjl 			}
225325cf1a30Sjl 
225425cf1a30Sjl 			preference = dr_get_target_preference(hp, t_mp, s_mp,
225525cf1a30Sjl 			    t_ml, s_ml, b_ml);
225625cf1a30Sjl 
225725cf1a30Sjl 			if (preference == DR_TP_INVALID)
225825cf1a30Sjl 				continue;
225925cf1a30Sjl 
226025cf1a30Sjl 			dr_smt_preference[preference]++;
226125cf1a30Sjl 
226225cf1a30Sjl 			/* calculate index to start of preference set */
226325cf1a30Sjl 			idx  = DR_SMT_NUNITS_PER_SET * preference;
226425cf1a30Sjl 			/* calculate offset to respective element */
226525cf1a30Sjl 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
226625cf1a30Sjl 
226725cf1a30Sjl 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
226825cf1a30Sjl 			sets[idx] = t_mp;
226925cf1a30Sjl 		}
227025cf1a30Sjl 	}
227125cf1a30Sjl 
227225cf1a30Sjl 	if (b_ml != NULL)
227325cf1a30Sjl 		memlist_delete(b_ml);
227425cf1a30Sjl 
227525cf1a30Sjl 	/*
227625cf1a30Sjl 	 * NOTE: this would be a good place to sort each candidate
227725cf1a30Sjl 	 * set in to some desired order, e.g. memory size in ascending
227825cf1a30Sjl 	 * order.  Without an additional sorting step here, the order
227925cf1a30Sjl 	 * within a set is ascending board number order.
228025cf1a30Sjl 	 */
228125cf1a30Sjl 
228225cf1a30Sjl 	c_mp = NULL;
228325cf1a30Sjl 	x_ml = NULL;
228425cf1a30Sjl 	t_ml = NULL;
228525cf1a30Sjl 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
228625cf1a30Sjl 		memquery_t mq;
228725cf1a30Sjl 
228825cf1a30Sjl 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
228925cf1a30Sjl 
229025cf1a30Sjl 		ASSERT(preference != DR_TP_INVALID);
229125cf1a30Sjl 
229225cf1a30Sjl 		/* cleanup t_ml after previous pass */
229325cf1a30Sjl 		if (t_ml != NULL) {
229425cf1a30Sjl 			memlist_delete(t_ml);
229525cf1a30Sjl 			t_ml = NULL;
229625cf1a30Sjl 		}
229725cf1a30Sjl 
229825cf1a30Sjl 		/* get candidate target board mem unit */
229925cf1a30Sjl 		t_mp = sets[idx];
230025cf1a30Sjl 		if (t_mp == NULL)
230125cf1a30Sjl 			continue;
230225cf1a30Sjl 
230325cf1a30Sjl 		/* get target board memlist */
230425cf1a30Sjl 		t_ml = dr_get_memlist(t_mp);
230525cf1a30Sjl 		if (t_ml == NULL) {
230625cf1a30Sjl 			cmn_err(CE_WARN, "%s: no memlist for"
230725cf1a30Sjl 				" mem-unit %d, board %d",
230825cf1a30Sjl 				f,
230925cf1a30Sjl 				t_mp->sbm_cm.sbdev_bp->b_num,
231025cf1a30Sjl 				t_mp->sbm_cm.sbdev_unum);
231125cf1a30Sjl 
231225cf1a30Sjl 			continue;
231325cf1a30Sjl 		}
231425cf1a30Sjl 
231525cf1a30Sjl 		PR_MEM("%s: checking for no-reloc in %s, "
231625cf1a30Sjl 			" basepfn=0x%lx, npages=%ld\n",
231725cf1a30Sjl 			f,
231825cf1a30Sjl 			t_mp->sbm_cm.sbdev_path,
231925cf1a30Sjl 			t_mp->sbm_basepfn,
232025cf1a30Sjl 			t_mp->sbm_npages);
232125cf1a30Sjl 
232225cf1a30Sjl 		rv = dr_del_mlist_query(t_ml, &mq);
232325cf1a30Sjl 		if (rv != KPHYSM_OK) {
232425cf1a30Sjl 			PR_MEM("%s: kphysm_del_span_query:"
232525cf1a30Sjl 				" unexpected return value %d\n", f, rv);
232625cf1a30Sjl 
232725cf1a30Sjl 			continue;
232825cf1a30Sjl 		}
232925cf1a30Sjl 
233025cf1a30Sjl 		if (mq.nonrelocatable != 0) {
233125cf1a30Sjl 			PR_MEM("%s: candidate %s has"
233225cf1a30Sjl 				" nonrelocatable span [0x%lx..0x%lx]\n",
233325cf1a30Sjl 				f,
233425cf1a30Sjl 				t_mp->sbm_cm.sbdev_path,
233525cf1a30Sjl 				mq.first_nonrelocatable,
233625cf1a30Sjl 				mq.last_nonrelocatable);
233725cf1a30Sjl 
233825cf1a30Sjl 			continue;
233925cf1a30Sjl 		}
234025cf1a30Sjl 
234125cf1a30Sjl #ifdef DEBUG
234225cf1a30Sjl 		/*
234325cf1a30Sjl 		 * This is a debug tool for excluding certain boards
234425cf1a30Sjl 		 * from being selected as a target board candidate.
234525cf1a30Sjl 		 * dr_ignore_board is only tested by this driver.
234625cf1a30Sjl 		 * It must be set with adb, obp, /etc/system or your
234725cf1a30Sjl 		 * favorite debugger.
234825cf1a30Sjl 		 */
234925cf1a30Sjl 		if (dr_ignore_board &
235025cf1a30Sjl 			(1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
235125cf1a30Sjl 			PR_MEM("%s: dr_ignore_board flag set,"
235225cf1a30Sjl 				" ignoring %s as candidate\n",
235325cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
235425cf1a30Sjl 			continue;
235525cf1a30Sjl 		}
235625cf1a30Sjl #endif
235725cf1a30Sjl 
235825cf1a30Sjl 		/*
235925cf1a30Sjl 		 * Reserve excess source board memory, if any.
236025cf1a30Sjl 		 *
236125cf1a30Sjl 		 * Only the nonrelocatable source span will be copied
236225cf1a30Sjl 		 * so schedule the rest of the source mem to be deleted.
236325cf1a30Sjl 		 */
236425cf1a30Sjl 		switch (preference) {
236525cf1a30Sjl 		case DR_TP_NONRELOC:
236625cf1a30Sjl 			/*
236725cf1a30Sjl 			 * Get source copy memlist and use it to construct
236825cf1a30Sjl 			 * delete memlist.
236925cf1a30Sjl 			 */
237025cf1a30Sjl 			d_ml = memlist_dup(s_ml);
237125cf1a30Sjl 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
237225cf1a30Sjl 
237325cf1a30Sjl 			/* XXX */
237425cf1a30Sjl 			ASSERT(d_ml != NULL);
237525cf1a30Sjl 			ASSERT(x_ml != NULL);
237625cf1a30Sjl 
237725cf1a30Sjl 			for (ml = x_ml; ml != NULL; ml = ml->next) {
237825cf1a30Sjl 				d_ml = memlist_del_span(d_ml, ml->address,
237925cf1a30Sjl 				    ml->size);
238025cf1a30Sjl 			}
238125cf1a30Sjl 
238225cf1a30Sjl 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
238325cf1a30Sjl 			    s_mp->sbm_cm.sbdev_path);
238425cf1a30Sjl 			PR_MEMLIST_DUMP(d_ml);
238525cf1a30Sjl 
238625cf1a30Sjl 			/* reserve excess spans */
238725cf1a30Sjl 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
238825cf1a30Sjl 			    d_ml) != 0) {
238925cf1a30Sjl 				/* likely more non-reloc pages appeared */
239025cf1a30Sjl 				/* TODO: restart from top? */
239125cf1a30Sjl 				continue;
239225cf1a30Sjl 			}
239325cf1a30Sjl 			break;
239425cf1a30Sjl 		default:
239525cf1a30Sjl 			d_ml = NULL;
239625cf1a30Sjl 			break;
239725cf1a30Sjl 		}
239825cf1a30Sjl 
239925cf1a30Sjl 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
240025cf1a30Sjl 
240125cf1a30Sjl 		/*
240225cf1a30Sjl 		 * reserve all memory on target board.
240325cf1a30Sjl 		 * NOTE: source board's memhandle is used.
240425cf1a30Sjl 		 *
240525cf1a30Sjl 		 * If this succeeds (eq 0), then target selection is
240625cf1a30Sjl 		 * complete and all unwanted memory spans, both source and
240725cf1a30Sjl 		 * target, have been reserved.  Loop is terminated.
240825cf1a30Sjl 		 */
240925cf1a30Sjl 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
241025cf1a30Sjl 			PR_MEM("%s: %s: target board memory reserved\n",
241125cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
241225cf1a30Sjl 
241325cf1a30Sjl 			/* a candidate target board is now reserved */
241425cf1a30Sjl 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
241525cf1a30Sjl 			c_mp = t_mp;
241625cf1a30Sjl 
241725cf1a30Sjl 			/* *** EXITING LOOP *** */
241825cf1a30Sjl 			break;
241925cf1a30Sjl 		}
242025cf1a30Sjl 
242125cf1a30Sjl 		/* did not successfully reserve the target board. */
242225cf1a30Sjl 		PR_MEM("%s: could not reserve target %s\n",
242325cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
242425cf1a30Sjl 
242525cf1a30Sjl 		/*
242625cf1a30Sjl 		 * NOTE: an undo of the dr_reserve_mem_span work
242725cf1a30Sjl 		 * will happen automatically when the memhandle
242825cf1a30Sjl 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
242925cf1a30Sjl 		 */
243025cf1a30Sjl 
243125cf1a30Sjl 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
243225cf1a30Sjl 	}
243325cf1a30Sjl 
243425cf1a30Sjl 	/* clean up after memlist editing logic */
243525cf1a30Sjl 	if (x_ml != NULL)
243625cf1a30Sjl 		memlist_delete(x_ml);
243725cf1a30Sjl 
243825cf1a30Sjl 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
243925cf1a30Sjl 	    DR_SMT_NPREF_SETS);
244025cf1a30Sjl 
244125cf1a30Sjl 	/*
244225cf1a30Sjl 	 * c_mp will be NULL when the entire sets[] array
244325cf1a30Sjl 	 * has been searched without reserving a target board.
244425cf1a30Sjl 	 */
244525cf1a30Sjl 	if (c_mp == NULL) {
244625cf1a30Sjl 		PR_MEM("%s: %s: target selection failed.\n",
244725cf1a30Sjl 			f, s_mp->sbm_cm.sbdev_path);
244825cf1a30Sjl 
244925cf1a30Sjl 		if (t_ml != NULL)
245025cf1a30Sjl 			memlist_delete(t_ml);
245125cf1a30Sjl 
245225cf1a30Sjl 		return (-1);
245325cf1a30Sjl 	}
245425cf1a30Sjl 
245525cf1a30Sjl 	PR_MEM("%s: found target %s for source %s\n",
245625cf1a30Sjl 		f,
245725cf1a30Sjl 		c_mp->sbm_cm.sbdev_path,
245825cf1a30Sjl 		s_mp->sbm_cm.sbdev_path);
245925cf1a30Sjl 
246025cf1a30Sjl 	s_mp->sbm_peer = c_mp;
246125cf1a30Sjl 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
246225cf1a30Sjl 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
246325cf1a30Sjl 	s_mp->sbm_mlist = s_ml;
246425cf1a30Sjl 	s_mp->sbm_cm.sbdev_busy = 1;
246525cf1a30Sjl 
246625cf1a30Sjl 	c_mp->sbm_peer = s_mp;
246725cf1a30Sjl 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
246825cf1a30Sjl 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
246925cf1a30Sjl 	c_mp->sbm_mlist = t_ml;
247025cf1a30Sjl 	c_mp->sbm_cm.sbdev_busy = 1;
247125cf1a30Sjl 
247225cf1a30Sjl 	return (0);
247325cf1a30Sjl }
247425cf1a30Sjl 
247525cf1a30Sjl /*
247625cf1a30Sjl  * Returns target preference rank:
247725cf1a30Sjl  *     -1 not a valid copy-rename target board
247825cf1a30Sjl  *	0 copy all source, source/target same size
247925cf1a30Sjl  *	1 copy all source, larger target
248025cf1a30Sjl  * 	2 copy nonrelocatable source span
248125cf1a30Sjl  */
248225cf1a30Sjl static dr_target_pref_t
248325cf1a30Sjl dr_get_target_preference(dr_handle_t *hp,
248425cf1a30Sjl     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
248525cf1a30Sjl     struct memlist *t_ml, struct memlist *s_ml,
248625cf1a30Sjl     struct memlist *b_ml)
248725cf1a30Sjl {
248825cf1a30Sjl 	dr_target_pref_t preference;
248925cf1a30Sjl 	struct memlist *s_nonreloc_ml = NULL;
249025cf1a30Sjl 	drmachid_t t_id;
249125cf1a30Sjl 	static fn_t	f = "dr_get_target_preference";
249225cf1a30Sjl 
249325cf1a30Sjl 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
249425cf1a30Sjl 
249525cf1a30Sjl 	/*
249625cf1a30Sjl 	 * Can the entire source board be copied?
249725cf1a30Sjl 	 */
249825cf1a30Sjl 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
249925cf1a30Sjl 		if (s_mp->sbm_npages == t_mp->sbm_npages)
250025cf1a30Sjl 			preference = DR_TP_SAME;	/* same size */
250125cf1a30Sjl 		else
250225cf1a30Sjl 			preference = DR_TP_LARGE;	/* larger target */
250325cf1a30Sjl 	} else {
250425cf1a30Sjl 		/*
250525cf1a30Sjl 		 * Entire source won't fit so try non-relocatable memory only
250625cf1a30Sjl 		 * (target aligned).
250725cf1a30Sjl 		 */
250825cf1a30Sjl 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
250925cf1a30Sjl 		if (s_nonreloc_ml == NULL) {
251025cf1a30Sjl 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
251125cf1a30Sjl 			preference = DR_TP_INVALID;
251225cf1a30Sjl 		}
251325cf1a30Sjl 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
251425cf1a30Sjl 			preference = DR_TP_NONRELOC;
251525cf1a30Sjl 		else
251625cf1a30Sjl 			preference = DR_TP_INVALID;
251725cf1a30Sjl 	}
251825cf1a30Sjl 
251925cf1a30Sjl 	if (s_nonreloc_ml != NULL)
252025cf1a30Sjl 		memlist_delete(s_nonreloc_ml);
252125cf1a30Sjl 
252225cf1a30Sjl 	/*
252325cf1a30Sjl 	 * Force floating board preference lower than all other boards
252425cf1a30Sjl 	 * if the force flag is present; otherwise disallow the board.
252525cf1a30Sjl 	 */
252625cf1a30Sjl 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
252725cf1a30Sjl 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
252825cf1a30Sjl 			preference += DR_TP_FLOATING;
252925cf1a30Sjl 		else
253025cf1a30Sjl 			preference = DR_TP_INVALID;
253125cf1a30Sjl 	}
253225cf1a30Sjl 
253325cf1a30Sjl 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
253425cf1a30Sjl 	    preference);
253525cf1a30Sjl 
253625cf1a30Sjl 	return (preference);
253725cf1a30Sjl }
253825cf1a30Sjl 
253925cf1a30Sjl /*
254025cf1a30Sjl  * Create a memlist representing the source memory that will be copied to
254125cf1a30Sjl  * the target board.  The memory to be copied is the maximum amount that
254225cf1a30Sjl  * will fit on the target board.
254325cf1a30Sjl  */
254425cf1a30Sjl static struct memlist *
254525cf1a30Sjl dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
254625cf1a30Sjl     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
254725cf1a30Sjl {
254825cf1a30Sjl 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
254925cf1a30Sjl 	uint64_t	s_slice_mask, s_slice_base;
255025cf1a30Sjl 	uint64_t	t_slice_mask, t_slice_base;
255125cf1a30Sjl 	static fn_t	f = "dr_get_copy_mlist";
255225cf1a30Sjl 
255325cf1a30Sjl 	ASSERT(s_mlist != NULL);
255425cf1a30Sjl 	ASSERT(t_mlist != NULL);
255525cf1a30Sjl 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
255625cf1a30Sjl 
255725cf1a30Sjl 	s_slice_mask = s_mp->sbm_slice_size - 1;
255825cf1a30Sjl 	s_slice_base = s_mlist->address & ~s_slice_mask;
255925cf1a30Sjl 
256025cf1a30Sjl 	t_slice_mask = t_mp->sbm_slice_size - 1;
256125cf1a30Sjl 	t_slice_base = t_mlist->address & ~t_slice_mask;
256225cf1a30Sjl 
256325cf1a30Sjl 	t_ml = memlist_dup(t_mlist);
256425cf1a30Sjl 	s_del_ml = memlist_dup(s_mlist);
256525cf1a30Sjl 	s_copy_ml = memlist_dup(s_mlist);
256625cf1a30Sjl 
256725cf1a30Sjl 	/* XXX */
256825cf1a30Sjl 	ASSERT(t_ml != NULL);
256925cf1a30Sjl 	ASSERT(s_del_ml != NULL);
257025cf1a30Sjl 	ASSERT(s_copy_ml != NULL);
257125cf1a30Sjl 
257225cf1a30Sjl 	/*
257325cf1a30Sjl 	 * To construct the source copy memlist:
257425cf1a30Sjl 	 *
257525cf1a30Sjl 	 * The target memlist is converted to the post-rename
257625cf1a30Sjl 	 * source addresses.  This is the physical address range
257725cf1a30Sjl 	 * the target will have after the copy-rename.  Overlaying
257825cf1a30Sjl 	 * and deleting this from the current source memlist will
257925cf1a30Sjl 	 * give the source delete memlist.  The copy memlist is
258025cf1a30Sjl 	 * the reciprocal of the source delete memlist.
258125cf1a30Sjl 	 */
258225cf1a30Sjl 	for (ml = t_ml; ml != NULL; ml = ml->next) {
258325cf1a30Sjl 		/*
258425cf1a30Sjl 		 * Normalize relative to target slice base PA
258525cf1a30Sjl 		 * in order to preseve slice offsets.
258625cf1a30Sjl 		 */
258725cf1a30Sjl 		ml->address -= t_slice_base;
258825cf1a30Sjl 		/*
258925cf1a30Sjl 		 * Convert to source slice PA address.
259025cf1a30Sjl 		 */
259125cf1a30Sjl 		ml->address += s_slice_base;
259225cf1a30Sjl 	}
259325cf1a30Sjl 
259425cf1a30Sjl 	for (ml = t_ml; ml != NULL; ml = ml->next) {
259525cf1a30Sjl 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
259625cf1a30Sjl 	}
259725cf1a30Sjl 
259825cf1a30Sjl 	/*
259925cf1a30Sjl 	 * Expand the delete mlist to fully include any dynamic segments
260025cf1a30Sjl 	 * it intersects with.
260125cf1a30Sjl 	 */
260225cf1a30Sjl 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
260325cf1a30Sjl 		uint64_t del_base = ml->address;
260425cf1a30Sjl 		uint64_t del_end = ml->address + ml->size;
260525cf1a30Sjl 		struct memlist *dyn;
260625cf1a30Sjl 
260725cf1a30Sjl 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
260825cf1a30Sjl 			uint64_t dyn_base = dyn->address;
260925cf1a30Sjl 			uint64_t dyn_end = dyn->address + dyn->size;
261025cf1a30Sjl 
261125cf1a30Sjl 			if (del_base > dyn_base && del_base < dyn_end)
261225cf1a30Sjl 				del_base = dyn_base;
261325cf1a30Sjl 
261425cf1a30Sjl 			if (del_end > dyn_base && del_end < dyn_end)
261525cf1a30Sjl 				del_end = dyn_end;
261625cf1a30Sjl 		}
261725cf1a30Sjl 
261825cf1a30Sjl 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
261925cf1a30Sjl 	}
262025cf1a30Sjl 
262125cf1a30Sjl 	memlist_delete(s_del_ml);
262225cf1a30Sjl 	s_del_ml = x_ml;
262325cf1a30Sjl 
262425cf1a30Sjl 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
262525cf1a30Sjl 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
262625cf1a30Sjl 	}
262725cf1a30Sjl 
262825cf1a30Sjl 	PR_MEM("%s: source delete mlist\n", f);
262925cf1a30Sjl 	PR_MEMLIST_DUMP(s_del_ml);
263025cf1a30Sjl 
263125cf1a30Sjl 	PR_MEM("%s: source copy mlist\n", f);
263225cf1a30Sjl 	PR_MEMLIST_DUMP(s_copy_ml);
263325cf1a30Sjl 
263425cf1a30Sjl 	memlist_delete(t_ml);
263525cf1a30Sjl 	memlist_delete(s_del_ml);
263625cf1a30Sjl 
263725cf1a30Sjl 	return (s_copy_ml);
263825cf1a30Sjl }
263925cf1a30Sjl 
264025cf1a30Sjl /*
264125cf1a30Sjl  * Scan the non-relocatable spans on the source memory
264225cf1a30Sjl  * and construct a minimum mlist that includes all non-reloc
264325cf1a30Sjl  * memory subject to target alignment, and dynamic segment
264425cf1a30Sjl  * constraints where only whole dynamic segments may be deleted.
264525cf1a30Sjl  */
264625cf1a30Sjl static struct memlist *
264725cf1a30Sjl dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
264825cf1a30Sjl {
264925cf1a30Sjl 	struct memlist	*x_ml = NULL;
265025cf1a30Sjl 	struct memlist	*ml;
265125cf1a30Sjl 	static fn_t	f = "dr_get_nonreloc_mlist";
265225cf1a30Sjl 
265325cf1a30Sjl 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
265425cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
265525cf1a30Sjl 
265625cf1a30Sjl 	for (ml = s_ml; ml; ml = ml->next) {
265725cf1a30Sjl 		int rv;
265825cf1a30Sjl 		uint64_t nr_base, nr_end;
265925cf1a30Sjl 		memquery_t mq;
266025cf1a30Sjl 		struct memlist *dyn;
266125cf1a30Sjl 
266225cf1a30Sjl 		rv = kphysm_del_span_query(
266325cf1a30Sjl 			_b64top(ml->address), _b64top(ml->size), &mq);
266425cf1a30Sjl 		if (rv) {
266525cf1a30Sjl 			memlist_delete(x_ml);
266625cf1a30Sjl 			return (NULL);
266725cf1a30Sjl 		}
266825cf1a30Sjl 
266925cf1a30Sjl 		if (mq.nonrelocatable == 0)
267025cf1a30Sjl 			continue;
267125cf1a30Sjl 
267225cf1a30Sjl 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
267325cf1a30Sjl 			_ptob64(mq.first_nonrelocatable),
267425cf1a30Sjl 			_ptob64(mq.last_nonrelocatable),
267525cf1a30Sjl 			mq.first_nonrelocatable,
267625cf1a30Sjl 			mq.last_nonrelocatable);
267725cf1a30Sjl 
267825cf1a30Sjl 		/*
267925cf1a30Sjl 		 * Align the span at both ends to allow for possible
268025cf1a30Sjl 		 * cage expansion.
268125cf1a30Sjl 		 */
268225cf1a30Sjl 		nr_base = _ptob64(mq.first_nonrelocatable);
268325cf1a30Sjl 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
268425cf1a30Sjl 
268525cf1a30Sjl 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
268625cf1a30Sjl 			f, nr_base, nr_end);
268725cf1a30Sjl 
268825cf1a30Sjl 		/*
268925cf1a30Sjl 		 * Expand the non-reloc span to fully include any
269025cf1a30Sjl 		 * dynamic segments it intersects with.
269125cf1a30Sjl 		 */
269225cf1a30Sjl 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
269325cf1a30Sjl 			uint64_t dyn_base = dyn->address;
269425cf1a30Sjl 			uint64_t dyn_end = dyn->address + dyn->size;
269525cf1a30Sjl 
269625cf1a30Sjl 			if (nr_base > dyn_base && nr_base < dyn_end)
269725cf1a30Sjl 				nr_base = dyn_base;
269825cf1a30Sjl 
269925cf1a30Sjl 			if (nr_end > dyn_base && nr_end < dyn_end)
270025cf1a30Sjl 				nr_end = dyn_end;
270125cf1a30Sjl 		}
270225cf1a30Sjl 
270325cf1a30Sjl 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
270425cf1a30Sjl 	}
270525cf1a30Sjl 
270625cf1a30Sjl 	if (x_ml == NULL) {
270725cf1a30Sjl 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
270825cf1a30Sjl 		return (NULL);
270925cf1a30Sjl 	}
271025cf1a30Sjl 
271125cf1a30Sjl 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
271225cf1a30Sjl 	PR_MEMLIST_DUMP(x_ml);
271325cf1a30Sjl 
271425cf1a30Sjl 	return (x_ml);
271525cf1a30Sjl }
271625cf1a30Sjl 
271725cf1a30Sjl /*
271825cf1a30Sjl  * Check if source memlist can fit in target memlist while maintaining
271925cf1a30Sjl  * relative offsets within board.
272025cf1a30Sjl  */
272125cf1a30Sjl static int
272225cf1a30Sjl dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
272325cf1a30Sjl     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
272425cf1a30Sjl {
272525cf1a30Sjl 	int		canfit = 0;
272625cf1a30Sjl 	struct memlist	*s_ml, *t_ml, *ml;
272725cf1a30Sjl 	uint64_t	s_slice_mask, t_slice_mask;
272825cf1a30Sjl 	static fn_t	f = "dr_mlist_canfit";
272925cf1a30Sjl 
273025cf1a30Sjl 	s_ml = memlist_dup(s_mlist);
273125cf1a30Sjl 	t_ml = memlist_dup(t_mlist);
273225cf1a30Sjl 
273325cf1a30Sjl 	if (s_ml == NULL || t_ml == NULL) {
273425cf1a30Sjl 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
273525cf1a30Sjl 		goto done;
273625cf1a30Sjl 	}
273725cf1a30Sjl 
273825cf1a30Sjl 	s_slice_mask = s_mp->sbm_slice_size - 1;
273925cf1a30Sjl 	t_slice_mask = t_mp->sbm_slice_size - 1;
274025cf1a30Sjl 
274125cf1a30Sjl 	/*
274225cf1a30Sjl 	 * Normalize to slice relative offsets.
274325cf1a30Sjl 	 */
274425cf1a30Sjl 	for (ml = s_ml; ml; ml = ml->next)
274525cf1a30Sjl 		ml->address &= s_slice_mask;
274625cf1a30Sjl 
274725cf1a30Sjl 	for (ml = t_ml; ml; ml = ml->next)
274825cf1a30Sjl 		ml->address &= t_slice_mask;
274925cf1a30Sjl 
275025cf1a30Sjl 	canfit = memlist_canfit(s_ml, t_ml);
275125cf1a30Sjl done:
275225cf1a30Sjl 	memlist_delete(s_ml);
275325cf1a30Sjl 	memlist_delete(t_ml);
275425cf1a30Sjl 
275525cf1a30Sjl 	return (canfit);
275625cf1a30Sjl }
275725cf1a30Sjl 
275825cf1a30Sjl /*
275925cf1a30Sjl  * Memlist support.
276025cf1a30Sjl  */
276125cf1a30Sjl 
276225cf1a30Sjl /*
276325cf1a30Sjl  * Determine whether the source memlist (s_mlist) will
276425cf1a30Sjl  * fit into the target memlist (t_mlist) in terms of
276525cf1a30Sjl  * size and holes.  Assumes the caller has normalized the
276625cf1a30Sjl  * memlist physical addresses for comparison.
276725cf1a30Sjl  */
276825cf1a30Sjl static int
276925cf1a30Sjl memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
277025cf1a30Sjl {
277125cf1a30Sjl 	int		rv = 0;
277225cf1a30Sjl 	struct memlist	*s_ml, *t_ml;
277325cf1a30Sjl 
277425cf1a30Sjl 	if ((s_mlist == NULL) || (t_mlist == NULL))
277525cf1a30Sjl 		return (0);
277625cf1a30Sjl 
277725cf1a30Sjl 	s_ml = s_mlist;
277825cf1a30Sjl 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
277925cf1a30Sjl 		uint64_t	s_start, s_end;
278025cf1a30Sjl 		uint64_t	t_start, t_end;
278125cf1a30Sjl 
278225cf1a30Sjl 		t_start = t_ml->address;
278325cf1a30Sjl 		t_end = t_start + t_ml->size;
278425cf1a30Sjl 
278525cf1a30Sjl 		for (; s_ml; s_ml = s_ml->next) {
278625cf1a30Sjl 			s_start = s_ml->address;
278725cf1a30Sjl 			s_end = s_start + s_ml->size;
278825cf1a30Sjl 
278925cf1a30Sjl 			if ((s_start < t_start) || (s_end > t_end))
279025cf1a30Sjl 				break;
279125cf1a30Sjl 		}
279225cf1a30Sjl 	}
279325cf1a30Sjl 
279425cf1a30Sjl 	/*
279525cf1a30Sjl 	 * If we ran out of source memlist chunks that mean
279625cf1a30Sjl 	 * we found a home for all of them.
279725cf1a30Sjl 	 */
279825cf1a30Sjl 	if (s_ml == NULL)
279925cf1a30Sjl 		rv = 1;
280025cf1a30Sjl 
280125cf1a30Sjl 	return (rv);
280225cf1a30Sjl }
2803