xref: /illumos-gate/usr/src/uts/sun4u/opl/io/dr_mem.c (revision 25cf1a30)
1*25cf1a30Sjl /*
2*25cf1a30Sjl  * CDDL HEADER START
3*25cf1a30Sjl  *
4*25cf1a30Sjl  * The contents of this file are subject to the terms of the
5*25cf1a30Sjl  * Common Development and Distribution License (the "License").
6*25cf1a30Sjl  * You may not use this file except in compliance with the License.
7*25cf1a30Sjl  *
8*25cf1a30Sjl  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25cf1a30Sjl  * or http://www.opensolaris.org/os/licensing.
10*25cf1a30Sjl  * See the License for the specific language governing permissions
11*25cf1a30Sjl  * and limitations under the License.
12*25cf1a30Sjl  *
13*25cf1a30Sjl  * When distributing Covered Code, include this CDDL HEADER in each
14*25cf1a30Sjl  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25cf1a30Sjl  * If applicable, add the following below this CDDL HEADER, with the
16*25cf1a30Sjl  * fields enclosed by brackets "[]" replaced with your own identifying
17*25cf1a30Sjl  * information: Portions Copyright [yyyy] [name of copyright owner]
18*25cf1a30Sjl  *
19*25cf1a30Sjl  * CDDL HEADER END
20*25cf1a30Sjl  */
21*25cf1a30Sjl /*
22*25cf1a30Sjl  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23*25cf1a30Sjl  * Use is subject to license terms.
24*25cf1a30Sjl  */
25*25cf1a30Sjl 
26*25cf1a30Sjl #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*25cf1a30Sjl 
28*25cf1a30Sjl /*
29*25cf1a30Sjl  * DR memory support routines.
30*25cf1a30Sjl  */
31*25cf1a30Sjl 
32*25cf1a30Sjl #include <sys/note.h>
33*25cf1a30Sjl #include <sys/debug.h>
34*25cf1a30Sjl #include <sys/types.h>
35*25cf1a30Sjl #include <sys/errno.h>
36*25cf1a30Sjl #include <sys/param.h>
37*25cf1a30Sjl #include <sys/dditypes.h>
38*25cf1a30Sjl #include <sys/kmem.h>
39*25cf1a30Sjl #include <sys/conf.h>
40*25cf1a30Sjl #include <sys/ddi.h>
41*25cf1a30Sjl #include <sys/sunddi.h>
42*25cf1a30Sjl #include <sys/sunndi.h>
43*25cf1a30Sjl #include <sys/ddi_impldefs.h>
44*25cf1a30Sjl #include <sys/ndi_impldefs.h>
45*25cf1a30Sjl #include <sys/sysmacros.h>
46*25cf1a30Sjl #include <sys/machsystm.h>
47*25cf1a30Sjl #include <sys/spitregs.h>
48*25cf1a30Sjl #include <sys/cpuvar.h>
49*25cf1a30Sjl #include <sys/promif.h>
50*25cf1a30Sjl #include <vm/seg_kmem.h>
51*25cf1a30Sjl #include <sys/lgrp.h>
52*25cf1a30Sjl #include <sys/platform_module.h>
53*25cf1a30Sjl 
54*25cf1a30Sjl #include <vm/page.h>
55*25cf1a30Sjl 
56*25cf1a30Sjl #include <sys/dr.h>
57*25cf1a30Sjl #include <sys/dr_util.h>
58*25cf1a30Sjl #include <sys/drmach.h>
59*25cf1a30Sjl 
60*25cf1a30Sjl extern struct memlist	*phys_install;
61*25cf1a30Sjl extern vnode_t		retired_pages;
62*25cf1a30Sjl 
63*25cf1a30Sjl /* TODO: push this reference below drmach line */
64*25cf1a30Sjl extern int		kcage_on;
65*25cf1a30Sjl 
66*25cf1a30Sjl /* for the DR*INTERNAL_ERROR macros.  see sys/dr.h. */
67*25cf1a30Sjl static char *dr_ie_fmt = "%M% %d";
68*25cf1a30Sjl 
69*25cf1a30Sjl typedef enum {
70*25cf1a30Sjl 	DR_TP_INVALID = -1,
71*25cf1a30Sjl 	DR_TP_SAME,
72*25cf1a30Sjl 	DR_TP_LARGE,
73*25cf1a30Sjl 	DR_TP_NONRELOC,
74*25cf1a30Sjl 	DR_TP_FLOATING
75*25cf1a30Sjl } dr_target_pref_t;
76*25cf1a30Sjl 
77*25cf1a30Sjl static int		dr_post_detach_mem_unit(dr_mem_unit_t *mp);
78*25cf1a30Sjl static int		dr_reserve_mem_spans(memhandle_t *mhp,
79*25cf1a30Sjl 				struct memlist *mlist);
80*25cf1a30Sjl static int		dr_select_mem_target(dr_handle_t *hp,
81*25cf1a30Sjl 				dr_mem_unit_t *mp, struct memlist *ml);
82*25cf1a30Sjl static void		dr_init_mem_unit_data(dr_mem_unit_t *mp);
83*25cf1a30Sjl static struct memlist	*dr_memlist_del_retired_pages(struct memlist *ml);
84*25cf1a30Sjl static dr_target_pref_t	dr_get_target_preference(dr_handle_t *hp,
85*25cf1a30Sjl 				dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
86*25cf1a30Sjl 				struct memlist *s_ml, struct memlist *x_ml,
87*25cf1a30Sjl 				struct memlist *b_ml);
88*25cf1a30Sjl 
89*25cf1a30Sjl static int		memlist_canfit(struct memlist *s_mlist,
90*25cf1a30Sjl 				struct memlist *t_mlist);
91*25cf1a30Sjl static int		dr_del_mlist_query(struct memlist *mlist,
92*25cf1a30Sjl 				memquery_t *mp);
93*25cf1a30Sjl static struct memlist	*dr_get_copy_mlist(struct memlist *s_ml,
94*25cf1a30Sjl 				struct memlist *t_ml, dr_mem_unit_t *s_mp,
95*25cf1a30Sjl 				dr_mem_unit_t *t_mp);
96*25cf1a30Sjl static struct memlist	*dr_get_nonreloc_mlist(struct memlist *s_ml,
97*25cf1a30Sjl 				dr_mem_unit_t *s_mp);
98*25cf1a30Sjl static int		dr_memlist_canfit(struct memlist *s_mlist,
99*25cf1a30Sjl 				struct memlist *t_mlist, dr_mem_unit_t *s_mp,
100*25cf1a30Sjl 				dr_mem_unit_t *t_mp);
101*25cf1a30Sjl 
102*25cf1a30Sjl extern void		page_unretire_pages(void);
103*25cf1a30Sjl 
104*25cf1a30Sjl /*
105*25cf1a30Sjl  * dr_mem_unit_t.sbm_flags
106*25cf1a30Sjl  */
107*25cf1a30Sjl #define	DR_MFLAG_RESERVED	0x01	/* mem unit reserved for delete */
108*25cf1a30Sjl #define	DR_MFLAG_SOURCE		0x02	/* source brd of copy/rename op */
109*25cf1a30Sjl #define	DR_MFLAG_TARGET		0x04	/* target brd of copy/rename op */
110*25cf1a30Sjl #define	DR_MFLAG_RELOWNER	0x20	/* memory release (delete) owner */
111*25cf1a30Sjl #define	DR_MFLAG_RELDONE	0x40	/* memory release (delete) done */
112*25cf1a30Sjl 
113*25cf1a30Sjl /* helper macros */
114*25cf1a30Sjl #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
115*25cf1a30Sjl #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
116*25cf1a30Sjl 
117*25cf1a30Sjl static struct memlist *
118*25cf1a30Sjl dr_get_memlist(dr_mem_unit_t *mp)
119*25cf1a30Sjl {
120*25cf1a30Sjl 	struct memlist	*mlist = NULL;
121*25cf1a30Sjl 	sbd_error_t	*err;
122*25cf1a30Sjl 	static fn_t	f = "dr_get_memlist";
123*25cf1a30Sjl 
124*25cf1a30Sjl 	PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
125*25cf1a30Sjl 
126*25cf1a30Sjl 	/*
127*25cf1a30Sjl 	 * Return cached memlist, if present.
128*25cf1a30Sjl 	 * This memlist will be present following an
129*25cf1a30Sjl 	 * unconfigure (a.k.a: detach) of this memunit.
130*25cf1a30Sjl 	 * It should only be used in the case were a configure
131*25cf1a30Sjl 	 * is bringing this memunit back in without going
132*25cf1a30Sjl 	 * through the disconnect and connect states.
133*25cf1a30Sjl 	 */
134*25cf1a30Sjl 	if (mp->sbm_mlist) {
135*25cf1a30Sjl 		PR_MEM("%s: found cached memlist\n", f);
136*25cf1a30Sjl 
137*25cf1a30Sjl 		mlist = memlist_dup(mp->sbm_mlist);
138*25cf1a30Sjl 	} else {
139*25cf1a30Sjl 		uint64_t basepa = _ptob64(mp->sbm_basepfn);
140*25cf1a30Sjl 
141*25cf1a30Sjl 		/* attempt to construct a memlist using phys_install */
142*25cf1a30Sjl 
143*25cf1a30Sjl 		/* round down to slice base address */
144*25cf1a30Sjl 		basepa &= ~(mp->sbm_slice_size - 1);
145*25cf1a30Sjl 
146*25cf1a30Sjl 		/* get a copy of phys_install to edit */
147*25cf1a30Sjl 		memlist_read_lock();
148*25cf1a30Sjl 		mlist = memlist_dup(phys_install);
149*25cf1a30Sjl 		memlist_read_unlock();
150*25cf1a30Sjl 
151*25cf1a30Sjl 		/* trim lower irrelevant span */
152*25cf1a30Sjl 		if (mlist)
153*25cf1a30Sjl 			mlist = memlist_del_span(mlist, 0ull, basepa);
154*25cf1a30Sjl 
155*25cf1a30Sjl 		/* trim upper irrelevant span */
156*25cf1a30Sjl 		if (mlist) {
157*25cf1a30Sjl 			uint64_t endpa;
158*25cf1a30Sjl 
159*25cf1a30Sjl 			basepa += mp->sbm_slice_size;
160*25cf1a30Sjl 			endpa = _ptob64(physmax + 1);
161*25cf1a30Sjl 			if (endpa > basepa)
162*25cf1a30Sjl 				mlist = memlist_del_span(
163*25cf1a30Sjl 					mlist, basepa,
164*25cf1a30Sjl 					endpa - basepa);
165*25cf1a30Sjl 		}
166*25cf1a30Sjl 
167*25cf1a30Sjl 		if (mlist) {
168*25cf1a30Sjl 			/* successfully built a memlist */
169*25cf1a30Sjl 			PR_MEM("%s: derived memlist from phys_install\n", f);
170*25cf1a30Sjl 		}
171*25cf1a30Sjl 
172*25cf1a30Sjl 		/* if no mlist yet, try platform layer */
173*25cf1a30Sjl 		if (!mlist) {
174*25cf1a30Sjl 			err = drmach_mem_get_memlist(
175*25cf1a30Sjl 				mp->sbm_cm.sbdev_id, &mlist);
176*25cf1a30Sjl 			if (err) {
177*25cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
178*25cf1a30Sjl 				mlist = NULL; /* paranoia */
179*25cf1a30Sjl 			}
180*25cf1a30Sjl 		}
181*25cf1a30Sjl 	}
182*25cf1a30Sjl 
183*25cf1a30Sjl 	PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
184*25cf1a30Sjl 	PR_MEMLIST_DUMP(mlist);
185*25cf1a30Sjl 
186*25cf1a30Sjl 	return (mlist);
187*25cf1a30Sjl }
188*25cf1a30Sjl 
189*25cf1a30Sjl typedef struct {
190*25cf1a30Sjl 	kcondvar_t cond;
191*25cf1a30Sjl 	kmutex_t lock;
192*25cf1a30Sjl 	int error;
193*25cf1a30Sjl 	int done;
194*25cf1a30Sjl } dr_release_mem_sync_t;
195*25cf1a30Sjl 
196*25cf1a30Sjl /*
197*25cf1a30Sjl  * Memory has been logically removed by the time this routine is called.
198*25cf1a30Sjl  */
199*25cf1a30Sjl static void
200*25cf1a30Sjl dr_mem_del_done(void *arg, int error)
201*25cf1a30Sjl {
202*25cf1a30Sjl 	dr_release_mem_sync_t *ds = arg;
203*25cf1a30Sjl 
204*25cf1a30Sjl 	mutex_enter(&ds->lock);
205*25cf1a30Sjl 	ds->error = error;
206*25cf1a30Sjl 	ds->done = 1;
207*25cf1a30Sjl 	cv_signal(&ds->cond);
208*25cf1a30Sjl 	mutex_exit(&ds->lock);
209*25cf1a30Sjl }
210*25cf1a30Sjl 
211*25cf1a30Sjl /*
212*25cf1a30Sjl  * When we reach here the memory being drained should have
213*25cf1a30Sjl  * already been reserved in dr_pre_release_mem().
214*25cf1a30Sjl  * Our only task here is to kick off the "drain" and wait
215*25cf1a30Sjl  * for it to finish.
216*25cf1a30Sjl  */
217*25cf1a30Sjl void
218*25cf1a30Sjl dr_release_mem(dr_common_unit_t *cp)
219*25cf1a30Sjl {
220*25cf1a30Sjl 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
221*25cf1a30Sjl 	int		err;
222*25cf1a30Sjl 	dr_release_mem_sync_t rms;
223*25cf1a30Sjl 	static fn_t	f = "dr_release_mem";
224*25cf1a30Sjl 
225*25cf1a30Sjl 	/* check that this memory unit has been reserved */
226*25cf1a30Sjl 	if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
227*25cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
228*25cf1a30Sjl 		return;
229*25cf1a30Sjl 	}
230*25cf1a30Sjl 
231*25cf1a30Sjl 	bzero((void *) &rms, sizeof (rms));
232*25cf1a30Sjl 
233*25cf1a30Sjl 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
234*25cf1a30Sjl 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
235*25cf1a30Sjl 
236*25cf1a30Sjl 	mutex_enter(&rms.lock);
237*25cf1a30Sjl 	err = kphysm_del_start(mp->sbm_memhandle,
238*25cf1a30Sjl 		dr_mem_del_done, (void *) &rms);
239*25cf1a30Sjl 	if (err == KPHYSM_OK) {
240*25cf1a30Sjl 		/* wait for completion or interrupt */
241*25cf1a30Sjl 		while (!rms.done) {
242*25cf1a30Sjl 			if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
243*25cf1a30Sjl 				/* then there is a pending UNIX signal */
244*25cf1a30Sjl 				(void) kphysm_del_cancel(mp->sbm_memhandle);
245*25cf1a30Sjl 
246*25cf1a30Sjl 				/* wait for completion */
247*25cf1a30Sjl 				while (!rms.done)
248*25cf1a30Sjl 					cv_wait(&rms.cond, &rms.lock);
249*25cf1a30Sjl 			}
250*25cf1a30Sjl 		}
251*25cf1a30Sjl 		/* get the result of the memory delete operation */
252*25cf1a30Sjl 		err = rms.error;
253*25cf1a30Sjl 	}
254*25cf1a30Sjl 	mutex_exit(&rms.lock);
255*25cf1a30Sjl 
256*25cf1a30Sjl 	cv_destroy(&rms.cond);
257*25cf1a30Sjl 	mutex_destroy(&rms.lock);
258*25cf1a30Sjl 
259*25cf1a30Sjl 	if (err != KPHYSM_OK) {
260*25cf1a30Sjl 		int e_code;
261*25cf1a30Sjl 
262*25cf1a30Sjl 		switch (err) {
263*25cf1a30Sjl 			case KPHYSM_ENOWORK:
264*25cf1a30Sjl 				e_code = ESBD_NOERROR;
265*25cf1a30Sjl 				break;
266*25cf1a30Sjl 
267*25cf1a30Sjl 			case KPHYSM_EHANDLE:
268*25cf1a30Sjl 			case KPHYSM_ESEQUENCE:
269*25cf1a30Sjl 				e_code = ESBD_INTERNAL;
270*25cf1a30Sjl 				break;
271*25cf1a30Sjl 
272*25cf1a30Sjl 			case KPHYSM_ENOTVIABLE:
273*25cf1a30Sjl 				e_code = ESBD_MEM_NOTVIABLE;
274*25cf1a30Sjl 				break;
275*25cf1a30Sjl 
276*25cf1a30Sjl 			case KPHYSM_EREFUSED:
277*25cf1a30Sjl 				e_code = ESBD_MEM_REFUSED;
278*25cf1a30Sjl 				break;
279*25cf1a30Sjl 
280*25cf1a30Sjl 			case KPHYSM_ENONRELOC:
281*25cf1a30Sjl 				e_code = ESBD_MEM_NONRELOC;
282*25cf1a30Sjl 				break;
283*25cf1a30Sjl 
284*25cf1a30Sjl 			case KPHYSM_ECANCELLED:
285*25cf1a30Sjl 				e_code = ESBD_MEM_CANCELLED;
286*25cf1a30Sjl 				break;
287*25cf1a30Sjl 
288*25cf1a30Sjl 			case KPHYSM_ERESOURCE:
289*25cf1a30Sjl 				e_code = ESBD_MEMFAIL;
290*25cf1a30Sjl 				break;
291*25cf1a30Sjl 
292*25cf1a30Sjl 			default:
293*25cf1a30Sjl 				cmn_err(CE_WARN,
294*25cf1a30Sjl 					"%s: unexpected kphysm error code %d,"
295*25cf1a30Sjl 					" id 0x%p",
296*25cf1a30Sjl 					f, err, mp->sbm_cm.sbdev_id);
297*25cf1a30Sjl 
298*25cf1a30Sjl 				e_code = ESBD_IO;
299*25cf1a30Sjl 				break;
300*25cf1a30Sjl 		}
301*25cf1a30Sjl 
302*25cf1a30Sjl 		if (e_code != ESBD_NOERROR) {
303*25cf1a30Sjl 			dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
304*25cf1a30Sjl 		}
305*25cf1a30Sjl 	}
306*25cf1a30Sjl }
307*25cf1a30Sjl 
308*25cf1a30Sjl void
309*25cf1a30Sjl dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
310*25cf1a30Sjl {
311*25cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
312*25cf1a30Sjl 
313*25cf1a30Sjl 	dr_mem_unit_t	*mp = (dr_mem_unit_t *)cp;
314*25cf1a30Sjl 	struct memlist	*ml, *mc;
315*25cf1a30Sjl 	sbd_error_t	*err;
316*25cf1a30Sjl 	static fn_t	f = "dr_attach_mem";
317*25cf1a30Sjl 
318*25cf1a30Sjl 	PR_MEM("%s...\n", f);
319*25cf1a30Sjl 
320*25cf1a30Sjl 	dr_lock_status(hp->h_bd);
321*25cf1a30Sjl 	err = drmach_configure(cp->sbdev_id, 0);
322*25cf1a30Sjl 	dr_unlock_status(hp->h_bd);
323*25cf1a30Sjl 	if (err) {
324*25cf1a30Sjl 		DRERR_SET_C(&cp->sbdev_error, &err);
325*25cf1a30Sjl 		return;
326*25cf1a30Sjl 	}
327*25cf1a30Sjl 
328*25cf1a30Sjl 	ml = dr_get_memlist(mp);
329*25cf1a30Sjl 	for (mc = ml; mc; mc = mc->next) {
330*25cf1a30Sjl 		int		 rv;
331*25cf1a30Sjl 		sbd_error_t	*err;
332*25cf1a30Sjl 
333*25cf1a30Sjl 		rv = kphysm_add_memory_dynamic(
334*25cf1a30Sjl 			(pfn_t)(mc->address >> PAGESHIFT),
335*25cf1a30Sjl 			(pgcnt_t)(mc->size >> PAGESHIFT));
336*25cf1a30Sjl 		if (rv != KPHYSM_OK) {
337*25cf1a30Sjl 			/*
338*25cf1a30Sjl 			 * translate kphysm error and
339*25cf1a30Sjl 			 * store in devlist error
340*25cf1a30Sjl 			 */
341*25cf1a30Sjl 			switch (rv) {
342*25cf1a30Sjl 			case KPHYSM_ERESOURCE:
343*25cf1a30Sjl 				rv = ESBD_NOMEM;
344*25cf1a30Sjl 				break;
345*25cf1a30Sjl 
346*25cf1a30Sjl 			case KPHYSM_EFAULT:
347*25cf1a30Sjl 				rv = ESBD_FAULT;
348*25cf1a30Sjl 				break;
349*25cf1a30Sjl 
350*25cf1a30Sjl 			default:
351*25cf1a30Sjl 				rv = ESBD_INTERNAL;
352*25cf1a30Sjl 				break;
353*25cf1a30Sjl 			}
354*25cf1a30Sjl 
355*25cf1a30Sjl 			if (rv == ESBD_INTERNAL) {
356*25cf1a30Sjl 				DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
357*25cf1a30Sjl 			} else
358*25cf1a30Sjl 				dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
359*25cf1a30Sjl 			break;
360*25cf1a30Sjl 		}
361*25cf1a30Sjl 
362*25cf1a30Sjl 		err = drmach_mem_add_span(
363*25cf1a30Sjl 			mp->sbm_cm.sbdev_id, mc->address, mc->size);
364*25cf1a30Sjl 		if (err) {
365*25cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
366*25cf1a30Sjl 			break;
367*25cf1a30Sjl 		}
368*25cf1a30Sjl 	}
369*25cf1a30Sjl 
370*25cf1a30Sjl 	memlist_delete(ml);
371*25cf1a30Sjl 
372*25cf1a30Sjl 	/* back out if configure failed */
373*25cf1a30Sjl 	if (mp->sbm_cm.sbdev_error != NULL) {
374*25cf1a30Sjl 		dr_lock_status(hp->h_bd);
375*25cf1a30Sjl 		err = drmach_unconfigure(cp->sbdev_id, 0);
376*25cf1a30Sjl 		if (err)
377*25cf1a30Sjl 			sbd_err_clear(&err);
378*25cf1a30Sjl 		dr_unlock_status(hp->h_bd);
379*25cf1a30Sjl 	}
380*25cf1a30Sjl }
381*25cf1a30Sjl 
382*25cf1a30Sjl static struct memlist *
383*25cf1a30Sjl dr_memlist_del_retired_pages(struct memlist *mlist)
384*25cf1a30Sjl {
385*25cf1a30Sjl 	page_t		*pp;
386*25cf1a30Sjl 	pfn_t		pfn;
387*25cf1a30Sjl 	kmutex_t	*vphm;
388*25cf1a30Sjl 	vnode_t		*vp = &retired_pages;
389*25cf1a30Sjl 	static fn_t	f = "dr_memlist_del_retired_pages";
390*25cf1a30Sjl 
391*25cf1a30Sjl 	vphm = page_vnode_mutex(vp);
392*25cf1a30Sjl 	mutex_enter(vphm);
393*25cf1a30Sjl 
394*25cf1a30Sjl 	PR_MEM("%s\n", f);
395*25cf1a30Sjl 
396*25cf1a30Sjl 	if ((pp = vp->v_pages) == NULL) {
397*25cf1a30Sjl 		mutex_exit(vphm);
398*25cf1a30Sjl 		return (mlist);
399*25cf1a30Sjl 	}
400*25cf1a30Sjl 
401*25cf1a30Sjl 	do {
402*25cf1a30Sjl 		ASSERT(pp != NULL);
403*25cf1a30Sjl 		/*
404*25cf1a30Sjl 		 * page_downgrade happens after page_hashin, so we
405*25cf1a30Sjl 		 * can't assert PAGE_SE. Just assert locked to catch
406*25cf1a30Sjl 		 * changes to the retired vnode locking scheme.
407*25cf1a30Sjl 		 */
408*25cf1a30Sjl 		ASSERT(PAGE_LOCKED(pp));
409*25cf1a30Sjl 		ASSERT(pp->p_vnode == &retired_pages);
410*25cf1a30Sjl 
411*25cf1a30Sjl 		if (!page_trylock(pp, SE_SHARED))
412*25cf1a30Sjl 			continue;
413*25cf1a30Sjl 
414*25cf1a30Sjl 		pfn = page_pptonum(pp);
415*25cf1a30Sjl 
416*25cf1a30Sjl 		ASSERT((pp->p_offset >> PAGESHIFT) == pfn);
417*25cf1a30Sjl 		/*
418*25cf1a30Sjl 		 * Page retirement currently breaks large pages into PAGESIZE
419*25cf1a30Sjl 		 * pages. If this changes, need to remove the assert and deal
420*25cf1a30Sjl 		 * with different page sizes.
421*25cf1a30Sjl 		 */
422*25cf1a30Sjl 		ASSERT(pp->p_szc == 0);
423*25cf1a30Sjl 
424*25cf1a30Sjl 		if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) {
425*25cf1a30Sjl 			mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE);
426*25cf1a30Sjl 			PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) "
427*25cf1a30Sjl 			    "from memlist\n", ptob(pfn), pfn);
428*25cf1a30Sjl 		}
429*25cf1a30Sjl 
430*25cf1a30Sjl 		page_unlock(pp);
431*25cf1a30Sjl 	} while ((pp = pp->p_vpnext) != vp->v_pages);
432*25cf1a30Sjl 
433*25cf1a30Sjl 	mutex_exit(vphm);
434*25cf1a30Sjl 
435*25cf1a30Sjl 	return (mlist);
436*25cf1a30Sjl }
437*25cf1a30Sjl 
438*25cf1a30Sjl #ifdef	DEBUG
439*25cf1a30Sjl int dbg_retirecnt = 10;
440*25cf1a30Sjl 
441*25cf1a30Sjl static void
442*25cf1a30Sjl dbg_page_retire(struct memlist *r_ml)
443*25cf1a30Sjl {
444*25cf1a30Sjl 	struct memlist	*t_ml;
445*25cf1a30Sjl 	page_t		*pp, *epp;
446*25cf1a30Sjl 	pfn_t		pfn, epfn;
447*25cf1a30Sjl 	struct memseg	*seg;
448*25cf1a30Sjl 
449*25cf1a30Sjl 	int dbg_retired = 0;
450*25cf1a30Sjl 	int dbg_skip = 10;
451*25cf1a30Sjl 	int dbg_seq = 1;
452*25cf1a30Sjl 
453*25cf1a30Sjl 	if (r_ml == NULL)
454*25cf1a30Sjl 		return;
455*25cf1a30Sjl 
456*25cf1a30Sjl 	for (t_ml = r_ml; (t_ml != NULL); t_ml = t_ml->next) {
457*25cf1a30Sjl 		pfn = _b64top(t_ml->address);
458*25cf1a30Sjl 		epfn = _b64top(t_ml->address + t_ml->size);
459*25cf1a30Sjl 
460*25cf1a30Sjl 		for (seg = memsegs; seg != NULL; seg = seg->next) {
461*25cf1a30Sjl 			int retire = 0;
462*25cf1a30Sjl 			int skip = 0;
463*25cf1a30Sjl 			if (pfn >= seg->pages_end || epfn < seg->pages_base)
464*25cf1a30Sjl 				continue;
465*25cf1a30Sjl 
466*25cf1a30Sjl 			pp = seg->pages;
467*25cf1a30Sjl 			if (pfn > seg->pages_base)
468*25cf1a30Sjl 				pp += pfn - seg->pages_base;
469*25cf1a30Sjl 
470*25cf1a30Sjl 			epp = seg->epages;
471*25cf1a30Sjl 			if (epfn < seg->pages_end)
472*25cf1a30Sjl 				epp -= seg->pages_end - epfn;
473*25cf1a30Sjl 
474*25cf1a30Sjl 			ASSERT(pp < epp);
475*25cf1a30Sjl #if 0
476*25cf1a30Sjl 			while (pp < epp) {
477*25cf1a30Sjl 				if (PP_ISFREE(pp) && !page_isfaulty(pp)) {
478*25cf1a30Sjl 					if (retire++ < dbg_seq) {
479*25cf1a30Sjl 						page_settoxic(pp,
480*25cf1a30Sjl 							PAGE_IS_FAULTY);
481*25cf1a30Sjl 						page_retire(pp,
482*25cf1a30Sjl 							PAGE_IS_FAILING);
483*25cf1a30Sjl 						if (++dbg_retired >=
484*25cf1a30Sjl 							dbg_retirecnt)
485*25cf1a30Sjl 							return;
486*25cf1a30Sjl 					} else if (skip++ >= dbg_skip) {
487*25cf1a30Sjl 						skip = 0;
488*25cf1a30Sjl 						retire = 0;
489*25cf1a30Sjl 						dbg_seq++;
490*25cf1a30Sjl 					}
491*25cf1a30Sjl 				}
492*25cf1a30Sjl 				pp++;
493*25cf1a30Sjl 			}
494*25cf1a30Sjl #endif /* 0 */
495*25cf1a30Sjl 			while (pp < epp) {
496*25cf1a30Sjl 				if (PP_ISFREE(pp)) {
497*25cf1a30Sjl 					if (retire++ < dbg_seq) {
498*25cf1a30Sjl 						page_retire(t_ml->address,
499*25cf1a30Sjl 						    PR_OK);
500*25cf1a30Sjl 						if (++dbg_retired >=
501*25cf1a30Sjl 							dbg_retirecnt)
502*25cf1a30Sjl 							return;
503*25cf1a30Sjl 					} else if (skip++ >= dbg_skip) {
504*25cf1a30Sjl 						skip = 0;
505*25cf1a30Sjl 						retire = 0;
506*25cf1a30Sjl 						dbg_seq++;
507*25cf1a30Sjl 					}
508*25cf1a30Sjl 				}
509*25cf1a30Sjl 				pp++;
510*25cf1a30Sjl 			}
511*25cf1a30Sjl 		}
512*25cf1a30Sjl 	}
513*25cf1a30Sjl }
514*25cf1a30Sjl #endif
515*25cf1a30Sjl 
516*25cf1a30Sjl static int
517*25cf1a30Sjl dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
518*25cf1a30Sjl {
519*25cf1a30Sjl 	int		rv = -1;
520*25cf1a30Sjl 	time_t		 copytime;
521*25cf1a30Sjl 	drmachid_t	 cr_id;
522*25cf1a30Sjl 	dr_sr_handle_t	*srhp = NULL;
523*25cf1a30Sjl 	dr_board_t	*t_bp, *s_bp;
524*25cf1a30Sjl 	struct memlist	*c_ml, *d_ml;
525*25cf1a30Sjl 	sbd_error_t	*err;
526*25cf1a30Sjl 	static fn_t	 f = "dr_move_memory";
527*25cf1a30Sjl 
528*25cf1a30Sjl 	PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
529*25cf1a30Sjl 		f,
530*25cf1a30Sjl 		s_mp->sbm_cm.sbdev_path,
531*25cf1a30Sjl 		t_mp->sbm_cm.sbdev_path);
532*25cf1a30Sjl 
533*25cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
534*25cf1a30Sjl 	ASSERT(s_mp->sbm_peer == t_mp);
535*25cf1a30Sjl 	ASSERT(s_mp->sbm_mlist);
536*25cf1a30Sjl 
537*25cf1a30Sjl 	ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
538*25cf1a30Sjl 	ASSERT(t_mp->sbm_peer == s_mp);
539*25cf1a30Sjl 
540*25cf1a30Sjl #ifdef	DEBUG
541*25cf1a30Sjl 	if (dbg_retirecnt)
542*25cf1a30Sjl 		dbg_page_retire(s_mp->sbm_mlist);
543*25cf1a30Sjl #endif
544*25cf1a30Sjl 
545*25cf1a30Sjl 	/*
546*25cf1a30Sjl 	 * create a memlist of spans to copy by removing
547*25cf1a30Sjl 	 * the spans that have been deleted, if any, from
548*25cf1a30Sjl 	 * the full source board memlist.  s_mp->sbm_del_mlist
549*25cf1a30Sjl 	 * will be NULL if there were no spans deleted from
550*25cf1a30Sjl 	 * the source board.
551*25cf1a30Sjl 	 */
552*25cf1a30Sjl 	c_ml = memlist_dup(s_mp->sbm_mlist);
553*25cf1a30Sjl 	d_ml = s_mp->sbm_del_mlist;
554*25cf1a30Sjl 	while (d_ml != NULL) {
555*25cf1a30Sjl 		c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size);
556*25cf1a30Sjl 		d_ml = d_ml->next;
557*25cf1a30Sjl 	}
558*25cf1a30Sjl 
559*25cf1a30Sjl 	/*
560*25cf1a30Sjl 	 * Remove retired pages from the copy list. The page content
561*25cf1a30Sjl 	 * need not be copied since the pages are no longer in use.
562*25cf1a30Sjl 	 */
563*25cf1a30Sjl 	PR_MEM("%s: copy list before removing retired pages (if any):\n", f);
564*25cf1a30Sjl 	PR_MEMLIST_DUMP(c_ml);
565*25cf1a30Sjl 
566*25cf1a30Sjl 	c_ml = dr_memlist_del_retired_pages(c_ml);
567*25cf1a30Sjl 
568*25cf1a30Sjl 	PR_MEM("%s: copy list after removing retired pages:\n", f);
569*25cf1a30Sjl 	PR_MEMLIST_DUMP(c_ml);
570*25cf1a30Sjl 
571*25cf1a30Sjl 	/*
572*25cf1a30Sjl 	 * With parallel copy, it shouldn't make a difference which
573*25cf1a30Sjl 	 * CPU is the actual master during copy-rename since all
574*25cf1a30Sjl 	 * CPUs participate in the parallel copy anyway.
575*25cf1a30Sjl 	 */
576*25cf1a30Sjl 	affinity_set(CPU_CURRENT);
577*25cf1a30Sjl 
578*25cf1a30Sjl 	err = drmach_copy_rename_init(
579*25cf1a30Sjl 		t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
580*25cf1a30Sjl 	if (err) {
581*25cf1a30Sjl 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
582*25cf1a30Sjl 		affinity_clear();
583*25cf1a30Sjl 		memlist_delete(c_ml);
584*25cf1a30Sjl 		return (-1);
585*25cf1a30Sjl 	}
586*25cf1a30Sjl 
587*25cf1a30Sjl 	srhp = dr_get_sr_handle(hp);
588*25cf1a30Sjl 	ASSERT(srhp);
589*25cf1a30Sjl 
590*25cf1a30Sjl 	copytime = lbolt;
591*25cf1a30Sjl 
592*25cf1a30Sjl 	/* Quiesce the OS.  */
593*25cf1a30Sjl 	if (dr_suspend(srhp)) {
594*25cf1a30Sjl 		cmn_err(CE_WARN, "%s: failed to quiesce OS"
595*25cf1a30Sjl 			" for copy-rename", f);
596*25cf1a30Sjl 
597*25cf1a30Sjl 		err = drmach_copy_rename_fini(cr_id);
598*25cf1a30Sjl 		if (err) {
599*25cf1a30Sjl 			/*
600*25cf1a30Sjl 			 * no error is expected since the program has
601*25cf1a30Sjl 			 * not yet run.
602*25cf1a30Sjl 			 */
603*25cf1a30Sjl 
604*25cf1a30Sjl 			/* catch this in debug kernels */
605*25cf1a30Sjl 			ASSERT(0);
606*25cf1a30Sjl 
607*25cf1a30Sjl 			sbd_err_clear(&err);
608*25cf1a30Sjl 		}
609*25cf1a30Sjl 
610*25cf1a30Sjl 		/* suspend error reached via hp */
611*25cf1a30Sjl 		s_mp->sbm_cm.sbdev_error = hp->h_err;
612*25cf1a30Sjl 		hp->h_err = NULL;
613*25cf1a30Sjl 		goto done;
614*25cf1a30Sjl 	}
615*25cf1a30Sjl 
616*25cf1a30Sjl 	drmach_copy_rename(cr_id);
617*25cf1a30Sjl 
618*25cf1a30Sjl 	/* Resume the OS.  */
619*25cf1a30Sjl 	dr_resume(srhp);
620*25cf1a30Sjl 
621*25cf1a30Sjl 	copytime = lbolt - copytime;
622*25cf1a30Sjl 
623*25cf1a30Sjl 	if (err = drmach_copy_rename_fini(cr_id))
624*25cf1a30Sjl 		goto done;
625*25cf1a30Sjl 
626*25cf1a30Sjl 	/*
627*25cf1a30Sjl 	 * Rename memory for lgroup.
628*25cf1a30Sjl 	 * Source and target board numbers are packaged in arg.
629*25cf1a30Sjl 	 */
630*25cf1a30Sjl 	s_bp = s_mp->sbm_cm.sbdev_bp;
631*25cf1a30Sjl 	t_bp = t_mp->sbm_cm.sbdev_bp;
632*25cf1a30Sjl 
633*25cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
634*25cf1a30Sjl 		(uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
635*25cf1a30Sjl 
636*25cf1a30Sjl 
637*25cf1a30Sjl 	PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
638*25cf1a30Sjl 		f, copytime, copytime / hz);
639*25cf1a30Sjl 
640*25cf1a30Sjl 	rv = 0;
641*25cf1a30Sjl done:
642*25cf1a30Sjl 	if (srhp)
643*25cf1a30Sjl 		dr_release_sr_handle(srhp);
644*25cf1a30Sjl 	if (err)
645*25cf1a30Sjl 		DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
646*25cf1a30Sjl 	affinity_clear();
647*25cf1a30Sjl 
648*25cf1a30Sjl 	return (rv);
649*25cf1a30Sjl }
650*25cf1a30Sjl 
651*25cf1a30Sjl /*
652*25cf1a30Sjl  * If detaching node contains memory that is "non-permanent"
653*25cf1a30Sjl  * then the memory adr's are simply cleared.  If the memory
654*25cf1a30Sjl  * is non-relocatable, then do a copy-rename.
655*25cf1a30Sjl  */
656*25cf1a30Sjl void
657*25cf1a30Sjl dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
658*25cf1a30Sjl {
659*25cf1a30Sjl 	int			rv = 0;
660*25cf1a30Sjl 	dr_mem_unit_t		*s_mp = (dr_mem_unit_t *)cp;
661*25cf1a30Sjl 	dr_mem_unit_t		*t_mp;
662*25cf1a30Sjl 	dr_state_t		state;
663*25cf1a30Sjl 	static fn_t		f = "dr_detach_mem";
664*25cf1a30Sjl 
665*25cf1a30Sjl 	PR_MEM("%s...\n", f);
666*25cf1a30Sjl 
667*25cf1a30Sjl 	/* lookup target mem unit and target board structure, if any */
668*25cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
669*25cf1a30Sjl 		t_mp = s_mp->sbm_peer;
670*25cf1a30Sjl 		ASSERT(t_mp != NULL);
671*25cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
672*25cf1a30Sjl 	} else {
673*25cf1a30Sjl 		t_mp = NULL;
674*25cf1a30Sjl 	}
675*25cf1a30Sjl 
676*25cf1a30Sjl 	/* verify mem unit's state is UNREFERENCED */
677*25cf1a30Sjl 	state = s_mp->sbm_cm.sbdev_state;
678*25cf1a30Sjl 	if (state != DR_STATE_UNREFERENCED) {
679*25cf1a30Sjl 		dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
680*25cf1a30Sjl 		return;
681*25cf1a30Sjl 	}
682*25cf1a30Sjl 
683*25cf1a30Sjl 	/* verify target mem unit's state is UNREFERENCED, if any */
684*25cf1a30Sjl 	if (t_mp != NULL) {
685*25cf1a30Sjl 		state = t_mp->sbm_cm.sbdev_state;
686*25cf1a30Sjl 		if (state != DR_STATE_UNREFERENCED) {
687*25cf1a30Sjl 			dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
688*25cf1a30Sjl 			return;
689*25cf1a30Sjl 		}
690*25cf1a30Sjl 	}
691*25cf1a30Sjl 
692*25cf1a30Sjl 	/*
693*25cf1a30Sjl 	 * If there is no target board (no copy/rename was needed), then
694*25cf1a30Sjl 	 * we're done!
695*25cf1a30Sjl 	 */
696*25cf1a30Sjl 	if (t_mp == NULL) {
697*25cf1a30Sjl 		sbd_error_t *err;
698*25cf1a30Sjl 		/*
699*25cf1a30Sjl 		 * Reprogram interconnect hardware and disable
700*25cf1a30Sjl 		 * memory controllers for memory node that's going away.
701*25cf1a30Sjl 		 */
702*25cf1a30Sjl 
703*25cf1a30Sjl 		err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
704*25cf1a30Sjl 		if (err) {
705*25cf1a30Sjl 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
706*25cf1a30Sjl 			rv = -1;
707*25cf1a30Sjl 		}
708*25cf1a30Sjl 	} else {
709*25cf1a30Sjl 		rv = dr_move_memory(hp, s_mp, t_mp);
710*25cf1a30Sjl 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
711*25cf1a30Sjl 			f,
712*25cf1a30Sjl 			rv ? "FAILED" : "COMPLETED",
713*25cf1a30Sjl 			s_mp->sbm_cm.sbdev_bp->b_num,
714*25cf1a30Sjl 			t_mp->sbm_cm.sbdev_bp->b_num);
715*25cf1a30Sjl 
716*25cf1a30Sjl 		if (rv != 0)
717*25cf1a30Sjl 			(void) dr_cancel_mem(s_mp);
718*25cf1a30Sjl 	}
719*25cf1a30Sjl 
720*25cf1a30Sjl 	if (rv == 0) {
721*25cf1a30Sjl 		sbd_error_t *err;
722*25cf1a30Sjl 
723*25cf1a30Sjl 		dr_lock_status(hp->h_bd);
724*25cf1a30Sjl 		err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0);
725*25cf1a30Sjl 		dr_unlock_status(hp->h_bd);
726*25cf1a30Sjl 		if (err)
727*25cf1a30Sjl 			sbd_err_clear(&err);
728*25cf1a30Sjl 	}
729*25cf1a30Sjl }
730*25cf1a30Sjl 
731*25cf1a30Sjl /*
732*25cf1a30Sjl  * This routine acts as a wrapper for kphysm_del_span_query in order to
733*25cf1a30Sjl  * support potential memory holes in a board's physical address space.
734*25cf1a30Sjl  * It calls kphysm_del_span_query for each node in a memlist and accumulates
735*25cf1a30Sjl  * the results in *mp.
736*25cf1a30Sjl  */
737*25cf1a30Sjl static int
738*25cf1a30Sjl dr_del_mlist_query(struct memlist *mlist, memquery_t *mp)
739*25cf1a30Sjl {
740*25cf1a30Sjl 	struct memlist	*ml;
741*25cf1a30Sjl 	int		 rv = 0;
742*25cf1a30Sjl 
743*25cf1a30Sjl 
744*25cf1a30Sjl 	if (mlist == NULL)
745*25cf1a30Sjl 		cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n");
746*25cf1a30Sjl 
747*25cf1a30Sjl 	mp->phys_pages = 0;
748*25cf1a30Sjl 	mp->managed = 0;
749*25cf1a30Sjl 	mp->nonrelocatable = 0;
750*25cf1a30Sjl 	mp->first_nonrelocatable = (pfn_t)-1;	/* XXX */
751*25cf1a30Sjl 	mp->last_nonrelocatable = 0;
752*25cf1a30Sjl 
753*25cf1a30Sjl 	for (ml = mlist; ml; ml = ml->next) {
754*25cf1a30Sjl 		memquery_t mq;
755*25cf1a30Sjl 
756*25cf1a30Sjl 		rv = kphysm_del_span_query(
757*25cf1a30Sjl 			_b64top(ml->address), _b64top(ml->size), &mq);
758*25cf1a30Sjl 		if (rv)
759*25cf1a30Sjl 			break;
760*25cf1a30Sjl 
761*25cf1a30Sjl 		mp->phys_pages += mq.phys_pages;
762*25cf1a30Sjl 		mp->managed += mq.managed;
763*25cf1a30Sjl 		mp->nonrelocatable += mq.nonrelocatable;
764*25cf1a30Sjl 
765*25cf1a30Sjl 		if (mq.nonrelocatable != 0) {
766*25cf1a30Sjl 			if (mq.first_nonrelocatable < mp->first_nonrelocatable)
767*25cf1a30Sjl 				mp->first_nonrelocatable =
768*25cf1a30Sjl 					mq.first_nonrelocatable;
769*25cf1a30Sjl 			if (mq.last_nonrelocatable > mp->last_nonrelocatable)
770*25cf1a30Sjl 				mp->last_nonrelocatable =
771*25cf1a30Sjl 					mq.last_nonrelocatable;
772*25cf1a30Sjl 		}
773*25cf1a30Sjl 	}
774*25cf1a30Sjl 
775*25cf1a30Sjl 	if (mp->nonrelocatable == 0)
776*25cf1a30Sjl 		mp->first_nonrelocatable = 0;	/* XXX */
777*25cf1a30Sjl 
778*25cf1a30Sjl 	return (rv);
779*25cf1a30Sjl }
780*25cf1a30Sjl 
781*25cf1a30Sjl /*
782*25cf1a30Sjl  * NOTE: This routine is only partially smart about multiple
783*25cf1a30Sjl  *	 mem-units.  Need to make mem-status structure smart
784*25cf1a30Sjl  *	 about them also.
785*25cf1a30Sjl  */
786*25cf1a30Sjl int
787*25cf1a30Sjl dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
788*25cf1a30Sjl {
789*25cf1a30Sjl 	int		m, mix;
790*25cf1a30Sjl 	memdelstat_t	mdst;
791*25cf1a30Sjl 	memquery_t	mq;
792*25cf1a30Sjl 	dr_board_t	*bp;
793*25cf1a30Sjl 	dr_mem_unit_t	*mp;
794*25cf1a30Sjl 	sbd_mem_stat_t	*msp;
795*25cf1a30Sjl 	static fn_t	f = "dr_mem_status";
796*25cf1a30Sjl 
797*25cf1a30Sjl 	bp = hp->h_bd;
798*25cf1a30Sjl 	devset &= DR_DEVS_PRESENT(bp);
799*25cf1a30Sjl 
800*25cf1a30Sjl 	for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
801*25cf1a30Sjl 		int		rv;
802*25cf1a30Sjl 		sbd_error_t	*err;
803*25cf1a30Sjl 		drmach_status_t	 pstat;
804*25cf1a30Sjl 		dr_mem_unit_t	*p_mp;
805*25cf1a30Sjl 
806*25cf1a30Sjl 		if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
807*25cf1a30Sjl 			continue;
808*25cf1a30Sjl 
809*25cf1a30Sjl 		mp = dr_get_mem_unit(bp, m);
810*25cf1a30Sjl 
811*25cf1a30Sjl 		if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
812*25cf1a30Sjl 			/* present, but not fully initialized */
813*25cf1a30Sjl 			continue;
814*25cf1a30Sjl 		}
815*25cf1a30Sjl 
816*25cf1a30Sjl 		if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
817*25cf1a30Sjl 			continue;
818*25cf1a30Sjl 
819*25cf1a30Sjl 		/* fetch platform status */
820*25cf1a30Sjl 		err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
821*25cf1a30Sjl 		if (err) {
822*25cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
823*25cf1a30Sjl 			continue;
824*25cf1a30Sjl 		}
825*25cf1a30Sjl 
826*25cf1a30Sjl 		msp = &dsp->d_mem;
827*25cf1a30Sjl 		bzero((caddr_t)msp, sizeof (*msp));
828*25cf1a30Sjl 
829*25cf1a30Sjl 		strncpy(msp->ms_cm.c_id.c_name, pstat.type,
830*25cf1a30Sjl 			sizeof (msp->ms_cm.c_id.c_name));
831*25cf1a30Sjl 		msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
832*25cf1a30Sjl 		msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
833*25cf1a30Sjl 		msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
834*25cf1a30Sjl 		msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
835*25cf1a30Sjl 		msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
836*25cf1a30Sjl 		msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
837*25cf1a30Sjl 
838*25cf1a30Sjl 		msp->ms_totpages = mp->sbm_npages;
839*25cf1a30Sjl 		msp->ms_basepfn = mp->sbm_basepfn;
840*25cf1a30Sjl 		msp->ms_pageslost = mp->sbm_pageslost;
841*25cf1a30Sjl 		msp->ms_cage_enabled = kcage_on;
842*25cf1a30Sjl 
843*25cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RESERVED)
844*25cf1a30Sjl 			p_mp = mp->sbm_peer;
845*25cf1a30Sjl 		else
846*25cf1a30Sjl 			p_mp = NULL;
847*25cf1a30Sjl 
848*25cf1a30Sjl 		if (p_mp == NULL) {
849*25cf1a30Sjl 			msp->ms_peer_is_target = 0;
850*25cf1a30Sjl 			msp->ms_peer_ap_id[0] = '\0';
851*25cf1a30Sjl 		} else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
852*25cf1a30Sjl 			char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
853*25cf1a30Sjl 			char *minor;
854*25cf1a30Sjl 
855*25cf1a30Sjl 			/*
856*25cf1a30Sjl 			 * b_dip doesn't have to be held for ddi_pathname()
857*25cf1a30Sjl 			 * because the board struct (dr_board_t) will be
858*25cf1a30Sjl 			 * destroyed before b_dip detaches.
859*25cf1a30Sjl 			 */
860*25cf1a30Sjl 			(void) ddi_pathname(bp->b_dip, path);
861*25cf1a30Sjl 			minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
862*25cf1a30Sjl 
863*25cf1a30Sjl 			snprintf(msp->ms_peer_ap_id,
864*25cf1a30Sjl 			    sizeof (msp->ms_peer_ap_id), "%s%s",
865*25cf1a30Sjl 			    path, (minor == NULL) ? "" : minor);
866*25cf1a30Sjl 
867*25cf1a30Sjl 			kmem_free(path, MAXPATHLEN);
868*25cf1a30Sjl 
869*25cf1a30Sjl 			if (p_mp->sbm_flags & DR_MFLAG_TARGET)
870*25cf1a30Sjl 				msp->ms_peer_is_target = 1;
871*25cf1a30Sjl 		}
872*25cf1a30Sjl 
873*25cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RELOWNER)
874*25cf1a30Sjl 			rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
875*25cf1a30Sjl 		else
876*25cf1a30Sjl 			rv = KPHYSM_EHANDLE;	/* force 'if' to fail */
877*25cf1a30Sjl 
878*25cf1a30Sjl 		if (rv == KPHYSM_OK) {
879*25cf1a30Sjl 			/*
880*25cf1a30Sjl 			 * Any pages above managed is "free",
881*25cf1a30Sjl 			 * i.e. it's collected.
882*25cf1a30Sjl 			 */
883*25cf1a30Sjl 			msp->ms_detpages += (uint_t)(mdst.collected +
884*25cf1a30Sjl 			    mdst.phys_pages - mdst.managed);
885*25cf1a30Sjl 		} else {
886*25cf1a30Sjl 			/*
887*25cf1a30Sjl 			 * If we're UNREFERENCED or UNCONFIGURED,
888*25cf1a30Sjl 			 * then the number of detached pages is
889*25cf1a30Sjl 			 * however many pages are on the board.
890*25cf1a30Sjl 			 * I.e. detached = not in use by OS.
891*25cf1a30Sjl 			 */
892*25cf1a30Sjl 			switch (msp->ms_cm.c_ostate) {
893*25cf1a30Sjl 			/*
894*25cf1a30Sjl 			 * changed to use cfgadm states
895*25cf1a30Sjl 			 *
896*25cf1a30Sjl 			 * was:
897*25cf1a30Sjl 			 *	case DR_STATE_UNREFERENCED:
898*25cf1a30Sjl 			 *	case DR_STATE_UNCONFIGURED:
899*25cf1a30Sjl 			 */
900*25cf1a30Sjl 			case SBD_STAT_UNCONFIGURED:
901*25cf1a30Sjl 				msp->ms_detpages = msp->ms_totpages;
902*25cf1a30Sjl 				break;
903*25cf1a30Sjl 
904*25cf1a30Sjl 			default:
905*25cf1a30Sjl 				break;
906*25cf1a30Sjl 			}
907*25cf1a30Sjl 		}
908*25cf1a30Sjl 
909*25cf1a30Sjl 		/*
910*25cf1a30Sjl 		 * kphysm_del_span_query can report non-reloc pages = total
911*25cf1a30Sjl 		 * pages for memory that is not yet configured
912*25cf1a30Sjl 		 */
913*25cf1a30Sjl 		if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
914*25cf1a30Sjl 			struct memlist *ml;
915*25cf1a30Sjl 
916*25cf1a30Sjl 			ml = dr_get_memlist(mp);
917*25cf1a30Sjl 			rv = ml ? dr_del_mlist_query(ml, &mq) : -1;
918*25cf1a30Sjl 			memlist_delete(ml);
919*25cf1a30Sjl 
920*25cf1a30Sjl 			if (rv == KPHYSM_OK) {
921*25cf1a30Sjl 				msp->ms_managed_pages = mq.managed;
922*25cf1a30Sjl 				msp->ms_noreloc_pages = mq.nonrelocatable;
923*25cf1a30Sjl 				msp->ms_noreloc_first =
924*25cf1a30Sjl 				    mq.first_nonrelocatable;
925*25cf1a30Sjl 				msp->ms_noreloc_last =
926*25cf1a30Sjl 				    mq.last_nonrelocatable;
927*25cf1a30Sjl 				msp->ms_cm.c_sflags = 0;
928*25cf1a30Sjl 				if (mq.nonrelocatable) {
929*25cf1a30Sjl 					SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
930*25cf1a30Sjl 					    msp->ms_cm.c_sflags);
931*25cf1a30Sjl 				}
932*25cf1a30Sjl 			} else {
933*25cf1a30Sjl 				PR_MEM("%s: kphysm_del_span_query() = %d\n",
934*25cf1a30Sjl 				    f, rv);
935*25cf1a30Sjl 			}
936*25cf1a30Sjl 		}
937*25cf1a30Sjl 
938*25cf1a30Sjl 		/*
939*25cf1a30Sjl 		 * Check source unit state during copy-rename
940*25cf1a30Sjl 		 */
941*25cf1a30Sjl 		if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
942*25cf1a30Sjl 		    (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
943*25cf1a30Sjl 		    mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
944*25cf1a30Sjl 			msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
945*25cf1a30Sjl 
946*25cf1a30Sjl 		mix++;
947*25cf1a30Sjl 		dsp++;
948*25cf1a30Sjl 	}
949*25cf1a30Sjl 
950*25cf1a30Sjl 	return (mix);
951*25cf1a30Sjl }
952*25cf1a30Sjl 
953*25cf1a30Sjl int
954*25cf1a30Sjl dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
955*25cf1a30Sjl {
956*25cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
957*25cf1a30Sjl 
958*25cf1a30Sjl 	int		err_flag = 0;
959*25cf1a30Sjl 	int		d;
960*25cf1a30Sjl 	sbd_error_t	*err;
961*25cf1a30Sjl 	static fn_t	f = "dr_pre_attach_mem";
962*25cf1a30Sjl 
963*25cf1a30Sjl 	PR_MEM("%s...\n", f);
964*25cf1a30Sjl 
965*25cf1a30Sjl 	for (d = 0; d < devnum; d++) {
966*25cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
967*25cf1a30Sjl 		dr_state_t	state;
968*25cf1a30Sjl 
969*25cf1a30Sjl 		cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
970*25cf1a30Sjl 
971*25cf1a30Sjl 		state = mp->sbm_cm.sbdev_state;
972*25cf1a30Sjl 		switch (state) {
973*25cf1a30Sjl 		case DR_STATE_UNCONFIGURED:
974*25cf1a30Sjl 			PR_MEM("%s: recovering from UNCONFIG for %s\n",
975*25cf1a30Sjl 				f,
976*25cf1a30Sjl 				mp->sbm_cm.sbdev_path);
977*25cf1a30Sjl 
978*25cf1a30Sjl 			/* use memlist cached by dr_post_detach_mem_unit */
979*25cf1a30Sjl 			ASSERT(mp->sbm_mlist != NULL);
980*25cf1a30Sjl 			PR_MEM("%s: re-configuring cached memlist for %s:\n",
981*25cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
982*25cf1a30Sjl 			PR_MEMLIST_DUMP(mp->sbm_mlist);
983*25cf1a30Sjl 
984*25cf1a30Sjl 			/* kphysm del handle should be have been freed */
985*25cf1a30Sjl 			ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
986*25cf1a30Sjl 
987*25cf1a30Sjl 			/*FALLTHROUGH*/
988*25cf1a30Sjl 
989*25cf1a30Sjl 		case DR_STATE_CONNECTED:
990*25cf1a30Sjl 			PR_MEM("%s: reprogramming mem hardware on %s\n",
991*25cf1a30Sjl 				f, mp->sbm_cm.sbdev_bp->b_path);
992*25cf1a30Sjl 
993*25cf1a30Sjl 			PR_MEM("%s: enabling %s\n",
994*25cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
995*25cf1a30Sjl 
996*25cf1a30Sjl 			err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
997*25cf1a30Sjl 			if (err) {
998*25cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
999*25cf1a30Sjl 				err_flag = 1;
1000*25cf1a30Sjl 			}
1001*25cf1a30Sjl 			break;
1002*25cf1a30Sjl 
1003*25cf1a30Sjl 		default:
1004*25cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
1005*25cf1a30Sjl 			err_flag = 1;
1006*25cf1a30Sjl 			break;
1007*25cf1a30Sjl 		}
1008*25cf1a30Sjl 
1009*25cf1a30Sjl 		/* exit for loop if error encountered */
1010*25cf1a30Sjl 		if (err_flag)
1011*25cf1a30Sjl 			break;
1012*25cf1a30Sjl 	}
1013*25cf1a30Sjl 
1014*25cf1a30Sjl 	return (err_flag ? -1 : 0);
1015*25cf1a30Sjl }
1016*25cf1a30Sjl 
1017*25cf1a30Sjl int
1018*25cf1a30Sjl dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1019*25cf1a30Sjl {
1020*25cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
1021*25cf1a30Sjl 
1022*25cf1a30Sjl 	int		d;
1023*25cf1a30Sjl 	static fn_t	f = "dr_post_attach_mem";
1024*25cf1a30Sjl 
1025*25cf1a30Sjl 	PR_MEM("%s...\n", f);
1026*25cf1a30Sjl 
1027*25cf1a30Sjl 	for (d = 0; d < devnum; d++) {
1028*25cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1029*25cf1a30Sjl 		struct memlist	*mlist, *ml;
1030*25cf1a30Sjl 
1031*25cf1a30Sjl 		mlist = dr_get_memlist(mp);
1032*25cf1a30Sjl 		if (mlist == NULL) {
1033*25cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL);
1034*25cf1a30Sjl 			continue;
1035*25cf1a30Sjl 		}
1036*25cf1a30Sjl 
1037*25cf1a30Sjl 		/*
1038*25cf1a30Sjl 		 * Verify the memory really did successfully attach
1039*25cf1a30Sjl 		 * by checking for its existence in phys_install.
1040*25cf1a30Sjl 		 */
1041*25cf1a30Sjl 		memlist_read_lock();
1042*25cf1a30Sjl 		if (memlist_intersect(phys_install, mlist) == 0) {
1043*25cf1a30Sjl 			memlist_read_unlock();
1044*25cf1a30Sjl 
1045*25cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1046*25cf1a30Sjl 
1047*25cf1a30Sjl 			PR_MEM("%s: %s memlist not in phys_install",
1048*25cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
1049*25cf1a30Sjl 
1050*25cf1a30Sjl 			memlist_delete(mlist);
1051*25cf1a30Sjl 			continue;
1052*25cf1a30Sjl 		}
1053*25cf1a30Sjl 		memlist_read_unlock();
1054*25cf1a30Sjl 
1055*25cf1a30Sjl 		for (ml = mlist; ml != NULL; ml = ml->next) {
1056*25cf1a30Sjl 			sbd_error_t *err;
1057*25cf1a30Sjl 
1058*25cf1a30Sjl 			err = drmach_mem_add_span(
1059*25cf1a30Sjl 				mp->sbm_cm.sbdev_id,
1060*25cf1a30Sjl 				ml->address,
1061*25cf1a30Sjl 				ml->size);
1062*25cf1a30Sjl 			if (err)
1063*25cf1a30Sjl 				DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1064*25cf1a30Sjl 		}
1065*25cf1a30Sjl 
1066*25cf1a30Sjl 		memlist_delete(mlist);
1067*25cf1a30Sjl 
1068*25cf1a30Sjl 		/*
1069*25cf1a30Sjl 		 * Destroy cached memlist, if any.
1070*25cf1a30Sjl 		 * There will be a cached memlist in sbm_mlist if
1071*25cf1a30Sjl 		 * this board is being configured directly after
1072*25cf1a30Sjl 		 * an unconfigure.
1073*25cf1a30Sjl 		 * To support this transition, dr_post_detach_mem
1074*25cf1a30Sjl 		 * left a copy of the last known memlist in sbm_mlist.
1075*25cf1a30Sjl 		 * This memlist could differ from any derived from
1076*25cf1a30Sjl 		 * hardware if while this memunit was last configured
1077*25cf1a30Sjl 		 * the system detected and deleted bad pages from
1078*25cf1a30Sjl 		 * phys_install.  The location of those bad pages
1079*25cf1a30Sjl 		 * will be reflected in the cached memlist.
1080*25cf1a30Sjl 		 */
1081*25cf1a30Sjl 		if (mp->sbm_mlist) {
1082*25cf1a30Sjl 			memlist_delete(mp->sbm_mlist);
1083*25cf1a30Sjl 			mp->sbm_mlist = NULL;
1084*25cf1a30Sjl 		}
1085*25cf1a30Sjl 	}
1086*25cf1a30Sjl 
1087*25cf1a30Sjl 	return (0);
1088*25cf1a30Sjl }
1089*25cf1a30Sjl 
1090*25cf1a30Sjl int
1091*25cf1a30Sjl dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1092*25cf1a30Sjl {
1093*25cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
1094*25cf1a30Sjl 
1095*25cf1a30Sjl 	int d;
1096*25cf1a30Sjl 
1097*25cf1a30Sjl 	for (d = 0; d < devnum; d++) {
1098*25cf1a30Sjl 		dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1099*25cf1a30Sjl 
1100*25cf1a30Sjl 		cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1101*25cf1a30Sjl 	}
1102*25cf1a30Sjl 
1103*25cf1a30Sjl 	return (0);
1104*25cf1a30Sjl }
1105*25cf1a30Sjl 
1106*25cf1a30Sjl int
1107*25cf1a30Sjl dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1108*25cf1a30Sjl {
1109*25cf1a30Sjl 	_NOTE(ARGUNUSED(hp))
1110*25cf1a30Sjl 
1111*25cf1a30Sjl 	int		d, rv;
1112*25cf1a30Sjl 	static fn_t	f = "dr_post_detach_mem";
1113*25cf1a30Sjl 
1114*25cf1a30Sjl 	PR_MEM("%s...\n", f);
1115*25cf1a30Sjl 
1116*25cf1a30Sjl 	rv = 0;
1117*25cf1a30Sjl 	for (d = 0; d < devnum; d++) {
1118*25cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1119*25cf1a30Sjl 
1120*25cf1a30Sjl 		ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1121*25cf1a30Sjl 
1122*25cf1a30Sjl 		if (dr_post_detach_mem_unit(mp))
1123*25cf1a30Sjl 			rv = -1;
1124*25cf1a30Sjl 	}
1125*25cf1a30Sjl 
1126*25cf1a30Sjl 	return (rv);
1127*25cf1a30Sjl }
1128*25cf1a30Sjl 
1129*25cf1a30Sjl static void
1130*25cf1a30Sjl dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1131*25cf1a30Sjl {
1132*25cf1a30Sjl 	static fn_t	f = "dr_add_memory_spans";
1133*25cf1a30Sjl 
1134*25cf1a30Sjl 	PR_MEM("%s...", f);
1135*25cf1a30Sjl 	PR_MEMLIST_DUMP(ml);
1136*25cf1a30Sjl 
1137*25cf1a30Sjl #ifdef DEBUG
1138*25cf1a30Sjl 	memlist_read_lock();
1139*25cf1a30Sjl 	if (memlist_intersect(phys_install, ml)) {
1140*25cf1a30Sjl 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1141*25cf1a30Sjl 	}
1142*25cf1a30Sjl 	memlist_read_unlock();
1143*25cf1a30Sjl #endif
1144*25cf1a30Sjl 
1145*25cf1a30Sjl 	for (; ml; ml = ml->next) {
1146*25cf1a30Sjl 		pfn_t		 base;
1147*25cf1a30Sjl 		pgcnt_t		 npgs;
1148*25cf1a30Sjl 		int		 rv;
1149*25cf1a30Sjl 		sbd_error_t	*err;
1150*25cf1a30Sjl 
1151*25cf1a30Sjl 		base = _b64top(ml->address);
1152*25cf1a30Sjl 		npgs = _b64top(ml->size);
1153*25cf1a30Sjl 
1154*25cf1a30Sjl 		rv = kphysm_add_memory_dynamic(base, npgs);
1155*25cf1a30Sjl 
1156*25cf1a30Sjl 		err = drmach_mem_add_span(
1157*25cf1a30Sjl 			mp->sbm_cm.sbdev_id,
1158*25cf1a30Sjl 			ml->address,
1159*25cf1a30Sjl 			ml->size);
1160*25cf1a30Sjl 
1161*25cf1a30Sjl 		if (err)
1162*25cf1a30Sjl 			DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1163*25cf1a30Sjl 
1164*25cf1a30Sjl 		if (rv != KPHYSM_OK) {
1165*25cf1a30Sjl 			cmn_err(CE_WARN, "%s:"
1166*25cf1a30Sjl 				" unexpected kphysm_add_memory_dynamic"
1167*25cf1a30Sjl 				" return value %d;"
1168*25cf1a30Sjl 				" basepfn=0x%lx, npages=%ld\n",
1169*25cf1a30Sjl 				f, rv, base, npgs);
1170*25cf1a30Sjl 
1171*25cf1a30Sjl 			continue;
1172*25cf1a30Sjl 		}
1173*25cf1a30Sjl 	}
1174*25cf1a30Sjl }
1175*25cf1a30Sjl 
1176*25cf1a30Sjl static int
1177*25cf1a30Sjl dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1178*25cf1a30Sjl {
1179*25cf1a30Sjl 	uint64_t	sz = s_mp->sbm_slice_size;
1180*25cf1a30Sjl 	uint64_t	sm = sz - 1;
1181*25cf1a30Sjl 	/* old and new below refer to PAs before and after copy-rename */
1182*25cf1a30Sjl 	uint64_t	s_old_basepa, s_new_basepa;
1183*25cf1a30Sjl 	uint64_t	t_old_basepa, t_new_basepa;
1184*25cf1a30Sjl 	dr_mem_unit_t	*t_mp, *x_mp;
1185*25cf1a30Sjl 	drmach_mem_info_t	minfo;
1186*25cf1a30Sjl 	struct memlist	*ml;
1187*25cf1a30Sjl 	struct memlist	*t_excess_mlist;
1188*25cf1a30Sjl 	int		rv;
1189*25cf1a30Sjl 	int		s_excess_mem_deleted = 0;
1190*25cf1a30Sjl 	sbd_error_t	*err;
1191*25cf1a30Sjl 	static fn_t	f = "dr_post_detach_mem_unit";
1192*25cf1a30Sjl 
1193*25cf1a30Sjl 	PR_MEM("%s...\n", f);
1194*25cf1a30Sjl 
1195*25cf1a30Sjl 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1196*25cf1a30Sjl 	PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1197*25cf1a30Sjl 		f, s_mp->sbm_cm.sbdev_path);
1198*25cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1199*25cf1a30Sjl 
1200*25cf1a30Sjl 	/* sanity check */
1201*25cf1a30Sjl 	ASSERT(s_mp->sbm_del_mlist == NULL ||
1202*25cf1a30Sjl 		(s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1203*25cf1a30Sjl 
1204*25cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1205*25cf1a30Sjl 		t_mp = s_mp->sbm_peer;
1206*25cf1a30Sjl 		ASSERT(t_mp != NULL);
1207*25cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1208*25cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
1209*25cf1a30Sjl 
1210*25cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1211*25cf1a30Sjl 		ASSERT(t_mp->sbm_del_mlist);
1212*25cf1a30Sjl 
1213*25cf1a30Sjl 		PR_MEM("%s: target %s: deleted memlist:\n",
1214*25cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
1215*25cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1216*25cf1a30Sjl 	} else {
1217*25cf1a30Sjl 		/* this is no target unit */
1218*25cf1a30Sjl 		t_mp = NULL;
1219*25cf1a30Sjl 	}
1220*25cf1a30Sjl 
1221*25cf1a30Sjl 	/*
1222*25cf1a30Sjl 	 * Verify the memory really did successfully detach
1223*25cf1a30Sjl 	 * by checking for its non-existence in phys_install.
1224*25cf1a30Sjl 	 */
1225*25cf1a30Sjl 	rv = 0;
1226*25cf1a30Sjl 	memlist_read_lock();
1227*25cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1228*25cf1a30Sjl 		x_mp = s_mp;
1229*25cf1a30Sjl 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1230*25cf1a30Sjl 	}
1231*25cf1a30Sjl 	if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1232*25cf1a30Sjl 		x_mp = t_mp;
1233*25cf1a30Sjl 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1234*25cf1a30Sjl 	}
1235*25cf1a30Sjl 	memlist_read_unlock();
1236*25cf1a30Sjl 
1237*25cf1a30Sjl 	if (rv) {
1238*25cf1a30Sjl 		/* error: memlist still in phys_install */
1239*25cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1240*25cf1a30Sjl 	}
1241*25cf1a30Sjl 
1242*25cf1a30Sjl 	/*
1243*25cf1a30Sjl 	 * clean mem unit state and bail out if an error has been recorded.
1244*25cf1a30Sjl 	 */
1245*25cf1a30Sjl 	rv = 0;
1246*25cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error) {
1247*25cf1a30Sjl 		PR_MEM("%s: %s flags=%x", f,
1248*25cf1a30Sjl 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1249*25cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1250*25cf1a30Sjl 		DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1251*25cf1a30Sjl 		dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1252*25cf1a30Sjl 		rv = -1;
1253*25cf1a30Sjl 	}
1254*25cf1a30Sjl 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1255*25cf1a30Sjl 		PR_MEM("%s: %s flags=%x", f,
1256*25cf1a30Sjl 			s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1257*25cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1258*25cf1a30Sjl 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1259*25cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1260*25cf1a30Sjl 		rv = -1;
1261*25cf1a30Sjl 	}
1262*25cf1a30Sjl 	if (rv)
1263*25cf1a30Sjl 		goto cleanup;
1264*25cf1a30Sjl 
1265*25cf1a30Sjl 	s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1266*25cf1a30Sjl 	err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo);
1267*25cf1a30Sjl 	ASSERT(err == NULL);
1268*25cf1a30Sjl 	s_new_basepa = minfo.mi_basepa;
1269*25cf1a30Sjl 
1270*25cf1a30Sjl 	PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1271*25cf1a30Sjl 	PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1272*25cf1a30Sjl 
1273*25cf1a30Sjl 	if (t_mp != NULL) {
1274*25cf1a30Sjl 		struct memlist *s_copy_mlist;
1275*25cf1a30Sjl 
1276*25cf1a30Sjl 		t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1277*25cf1a30Sjl 		err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo);
1278*25cf1a30Sjl 		ASSERT(err == NULL);
1279*25cf1a30Sjl 		t_new_basepa = minfo.mi_basepa;
1280*25cf1a30Sjl 
1281*25cf1a30Sjl 		PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1282*25cf1a30Sjl 		PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1283*25cf1a30Sjl 
1284*25cf1a30Sjl 		/*
1285*25cf1a30Sjl 		 * Construct copy list with original source addresses.
1286*25cf1a30Sjl 		 * Used to add back excess target mem.
1287*25cf1a30Sjl 		 */
1288*25cf1a30Sjl 		s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1289*25cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1290*25cf1a30Sjl 			s_copy_mlist = memlist_del_span(s_copy_mlist,
1291*25cf1a30Sjl 			    ml->address, ml->size);
1292*25cf1a30Sjl 		}
1293*25cf1a30Sjl 
1294*25cf1a30Sjl 		PR_MEM("%s: source copy list:\n:", f);
1295*25cf1a30Sjl 		PR_MEMLIST_DUMP(s_copy_mlist);
1296*25cf1a30Sjl 
1297*25cf1a30Sjl 		/*
1298*25cf1a30Sjl 		 * We had to swap mem-units, so update
1299*25cf1a30Sjl 		 * memlists accordingly with new base
1300*25cf1a30Sjl 		 * addresses.
1301*25cf1a30Sjl 		 */
1302*25cf1a30Sjl 		for (ml = t_mp->sbm_mlist; ml; ml = ml->next) {
1303*25cf1a30Sjl 			ml->address -= t_old_basepa;
1304*25cf1a30Sjl 			ml->address += t_new_basepa;
1305*25cf1a30Sjl 		}
1306*25cf1a30Sjl 
1307*25cf1a30Sjl 		/*
1308*25cf1a30Sjl 		 * There is no need to explicitly rename the target delete
1309*25cf1a30Sjl 		 * memlist, because sbm_del_mlist and sbm_mlist always
1310*25cf1a30Sjl 		 * point to the same memlist for a copy/rename operation.
1311*25cf1a30Sjl 		 */
1312*25cf1a30Sjl 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1313*25cf1a30Sjl 
1314*25cf1a30Sjl 		PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1315*25cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1316*25cf1a30Sjl 
1317*25cf1a30Sjl 		for (ml = s_mp->sbm_mlist; ml; ml = ml->next) {
1318*25cf1a30Sjl 			ml->address -= s_old_basepa;
1319*25cf1a30Sjl 			ml->address += s_new_basepa;
1320*25cf1a30Sjl 		}
1321*25cf1a30Sjl 
1322*25cf1a30Sjl 		PR_MEM("%s: renamed source memlist:\n", f);
1323*25cf1a30Sjl 		PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1324*25cf1a30Sjl 
1325*25cf1a30Sjl 		/*
1326*25cf1a30Sjl 		 * Keep track of dynamically added segments
1327*25cf1a30Sjl 		 * since they cannot be split if we need to delete
1328*25cf1a30Sjl 		 * excess source memory later for this board.
1329*25cf1a30Sjl 		 */
1330*25cf1a30Sjl 		if (t_mp->sbm_dyn_segs)
1331*25cf1a30Sjl 			memlist_delete(t_mp->sbm_dyn_segs);
1332*25cf1a30Sjl 		t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1333*25cf1a30Sjl 		s_mp->sbm_dyn_segs = NULL;
1334*25cf1a30Sjl 
1335*25cf1a30Sjl 		/*
1336*25cf1a30Sjl 		 * Add back excess target memory.
1337*25cf1a30Sjl 		 * Subtract out the portion of the target memory
1338*25cf1a30Sjl 		 * node that was taken over by the source memory
1339*25cf1a30Sjl 		 * node.
1340*25cf1a30Sjl 		 */
1341*25cf1a30Sjl 		t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1342*25cf1a30Sjl 		for (ml = s_copy_mlist; ml; ml = ml->next) {
1343*25cf1a30Sjl 			t_excess_mlist =
1344*25cf1a30Sjl 			    memlist_del_span(t_excess_mlist,
1345*25cf1a30Sjl 			    ml->address, ml->size);
1346*25cf1a30Sjl 		}
1347*25cf1a30Sjl 
1348*25cf1a30Sjl 		/*
1349*25cf1a30Sjl 		 * Update dynamically added segs
1350*25cf1a30Sjl 		 */
1351*25cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) {
1352*25cf1a30Sjl 			t_mp->sbm_dyn_segs =
1353*25cf1a30Sjl 			    memlist_del_span(t_mp->sbm_dyn_segs,
1354*25cf1a30Sjl 			    ml->address, ml->size);
1355*25cf1a30Sjl 		}
1356*25cf1a30Sjl 		for (ml = t_excess_mlist; ml; ml = ml->next) {
1357*25cf1a30Sjl 			t_mp->sbm_dyn_segs =
1358*25cf1a30Sjl 			    memlist_cat_span(t_mp->sbm_dyn_segs,
1359*25cf1a30Sjl 			    ml->address, ml->size);
1360*25cf1a30Sjl 		}
1361*25cf1a30Sjl 		PR_MEM("%s: %s: updated dynamic seg list:\n",
1362*25cf1a30Sjl 		    f, t_mp->sbm_cm.sbdev_path);
1363*25cf1a30Sjl 		PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1364*25cf1a30Sjl 
1365*25cf1a30Sjl 		if (t_excess_mlist != NULL) {
1366*25cf1a30Sjl 			/*
1367*25cf1a30Sjl 			 * After the small <-> big copy-rename,
1368*25cf1a30Sjl 			 * the original address space for the
1369*25cf1a30Sjl 			 * source board may have excess to be
1370*25cf1a30Sjl 			 * deleted. This is a case different
1371*25cf1a30Sjl 			 * from the big->small excess source
1372*25cf1a30Sjl 			 * memory case listed below.
1373*25cf1a30Sjl 			 * Remove s_mp->sbm_del_mlist from
1374*25cf1a30Sjl 			 * the kernel cage glist.
1375*25cf1a30Sjl 			 */
1376*25cf1a30Sjl 			for (ml = s_mp->sbm_del_mlist; ml;
1377*25cf1a30Sjl 				ml = ml->next) {
1378*25cf1a30Sjl 				PR_MEM("%s: delete small<->big copy-"
1379*25cf1a30Sjl 				    "rename source excess memory", f);
1380*25cf1a30Sjl 				PR_MEMLIST_DUMP(ml);
1381*25cf1a30Sjl 
1382*25cf1a30Sjl 				err = drmach_mem_del_span(
1383*25cf1a30Sjl 					s_mp->sbm_cm.sbdev_id,
1384*25cf1a30Sjl 					    ml->address, ml->size);
1385*25cf1a30Sjl 				if (err)
1386*25cf1a30Sjl 					DRERR_SET_C(&s_mp->
1387*25cf1a30Sjl 					    sbm_cm.sbdev_error, &err);
1388*25cf1a30Sjl 				ASSERT(err == NULL);
1389*25cf1a30Sjl 			}
1390*25cf1a30Sjl 
1391*25cf1a30Sjl 			/*
1392*25cf1a30Sjl 			 * mark sbm_del_mlist as been deleted so that
1393*25cf1a30Sjl 			 * we won't end up to delete it twice later
1394*25cf1a30Sjl 			 * from the span list
1395*25cf1a30Sjl 			 */
1396*25cf1a30Sjl 			s_excess_mem_deleted = 1;
1397*25cf1a30Sjl 
1398*25cf1a30Sjl 			PR_MEM("%s: adding back remaining portion"
1399*25cf1a30Sjl 				" of %s, memlist:\n",
1400*25cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
1401*25cf1a30Sjl 			PR_MEMLIST_DUMP(t_excess_mlist);
1402*25cf1a30Sjl 
1403*25cf1a30Sjl 			dr_add_memory_spans(s_mp, t_excess_mlist);
1404*25cf1a30Sjl 			memlist_delete(t_excess_mlist);
1405*25cf1a30Sjl 		}
1406*25cf1a30Sjl 		memlist_delete(s_copy_mlist);
1407*25cf1a30Sjl 
1408*25cf1a30Sjl #ifdef DEBUG
1409*25cf1a30Sjl 		/*
1410*25cf1a30Sjl 		 * s_mp->sbm_del_mlist may still needed
1411*25cf1a30Sjl 		 */
1412*25cf1a30Sjl 		PR_MEM("%s: source delete memeory flag %d",
1413*25cf1a30Sjl 		    f, s_excess_mem_deleted);
1414*25cf1a30Sjl 		PR_MEM("%s: source delete memlist", f);
1415*25cf1a30Sjl 		PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1416*25cf1a30Sjl #endif
1417*25cf1a30Sjl 
1418*25cf1a30Sjl 	}
1419*25cf1a30Sjl 
1420*25cf1a30Sjl 	if (t_mp != NULL) {
1421*25cf1a30Sjl 		/* delete target's entire address space */
1422*25cf1a30Sjl 		err = drmach_mem_del_span(
1423*25cf1a30Sjl 			t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz);
1424*25cf1a30Sjl 		if (err)
1425*25cf1a30Sjl 			DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1426*25cf1a30Sjl 		ASSERT(err == NULL);
1427*25cf1a30Sjl 
1428*25cf1a30Sjl 		/*
1429*25cf1a30Sjl 		 * After the copy/rename, the original address space
1430*25cf1a30Sjl 		 * for the source board (which is now located on the
1431*25cf1a30Sjl 		 * target board) may now have some excess to be deleted.
1432*25cf1a30Sjl 		 * Those excess memory on the source board are kept in
1433*25cf1a30Sjl 		 * source board's sbm_del_mlist
1434*25cf1a30Sjl 		 */
1435*25cf1a30Sjl 		for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml;
1436*25cf1a30Sjl 			ml = ml->next) {
1437*25cf1a30Sjl 			PR_MEM("%s: delete source excess memory", f);
1438*25cf1a30Sjl 			PR_MEMLIST_DUMP(ml);
1439*25cf1a30Sjl 
1440*25cf1a30Sjl 			err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1441*25cf1a30Sjl 				ml->address, ml->size);
1442*25cf1a30Sjl 			if (err)
1443*25cf1a30Sjl 				DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1444*25cf1a30Sjl 			ASSERT(err == NULL);
1445*25cf1a30Sjl 		}
1446*25cf1a30Sjl 
1447*25cf1a30Sjl 	} else {
1448*25cf1a30Sjl 		/* delete board's entire address space */
1449*25cf1a30Sjl 		err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1450*25cf1a30Sjl 						s_old_basepa & ~ sm, sz);
1451*25cf1a30Sjl 		if (err)
1452*25cf1a30Sjl 			DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1453*25cf1a30Sjl 		ASSERT(err == NULL);
1454*25cf1a30Sjl 	}
1455*25cf1a30Sjl 
1456*25cf1a30Sjl cleanup:
1457*25cf1a30Sjl 	/* clean up target mem unit */
1458*25cf1a30Sjl 	if (t_mp != NULL) {
1459*25cf1a30Sjl 		memlist_delete(t_mp->sbm_del_mlist);
1460*25cf1a30Sjl 		/* no need to delete sbm_mlist, it shares sbm_del_mlist */
1461*25cf1a30Sjl 
1462*25cf1a30Sjl 		t_mp->sbm_del_mlist = NULL;
1463*25cf1a30Sjl 		t_mp->sbm_mlist = NULL;
1464*25cf1a30Sjl 		t_mp->sbm_peer = NULL;
1465*25cf1a30Sjl 		t_mp->sbm_flags = 0;
1466*25cf1a30Sjl 		t_mp->sbm_cm.sbdev_busy = 0;
1467*25cf1a30Sjl 		dr_init_mem_unit_data(t_mp);
1468*25cf1a30Sjl 
1469*25cf1a30Sjl 	}
1470*25cf1a30Sjl 	if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1471*25cf1a30Sjl 		/*
1472*25cf1a30Sjl 		 * now that copy/rename has completed, undo this
1473*25cf1a30Sjl 		 * work that was done in dr_release_mem_done.
1474*25cf1a30Sjl 		 */
1475*25cf1a30Sjl 		DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1476*25cf1a30Sjl 		DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1477*25cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1478*25cf1a30Sjl 	}
1479*25cf1a30Sjl 
1480*25cf1a30Sjl 	/*
1481*25cf1a30Sjl 	 * clean up (source) board's mem unit structure.
1482*25cf1a30Sjl 	 * NOTE: sbm_mlist is retained if no error has been record (in other
1483*25cf1a30Sjl 	 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1484*25cf1a30Sjl 	 * referred to elsewhere as the cached memlist.  The cached memlist
1485*25cf1a30Sjl 	 * is used to re-attach (configure back in) this memunit from the
1486*25cf1a30Sjl 	 * unconfigured state.  The memlist is retained because it may
1487*25cf1a30Sjl 	 * represent bad pages that were detected while the memory was
1488*25cf1a30Sjl 	 * configured into the OS.  The OS deletes bad pages from phys_install.
1489*25cf1a30Sjl 	 * Those deletes, if any, will be represented in the cached mlist.
1490*25cf1a30Sjl 	 */
1491*25cf1a30Sjl 	if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1492*25cf1a30Sjl 		memlist_delete(s_mp->sbm_del_mlist);
1493*25cf1a30Sjl 
1494*25cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1495*25cf1a30Sjl 		memlist_delete(s_mp->sbm_mlist);
1496*25cf1a30Sjl 		s_mp->sbm_mlist = NULL;
1497*25cf1a30Sjl 	}
1498*25cf1a30Sjl 
1499*25cf1a30Sjl 	if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1500*25cf1a30Sjl 		memlist_delete(s_mp->sbm_dyn_segs);
1501*25cf1a30Sjl 		s_mp->sbm_dyn_segs = NULL;
1502*25cf1a30Sjl 	}
1503*25cf1a30Sjl 
1504*25cf1a30Sjl 	s_mp->sbm_del_mlist = NULL;
1505*25cf1a30Sjl 	s_mp->sbm_peer = NULL;
1506*25cf1a30Sjl 	s_mp->sbm_flags = 0;
1507*25cf1a30Sjl 	s_mp->sbm_cm.sbdev_busy = 0;
1508*25cf1a30Sjl 	dr_init_mem_unit_data(s_mp);
1509*25cf1a30Sjl 
1510*25cf1a30Sjl 	PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1511*25cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1512*25cf1a30Sjl 
1513*25cf1a30Sjl 	return (0);
1514*25cf1a30Sjl }
1515*25cf1a30Sjl 
1516*25cf1a30Sjl /*
1517*25cf1a30Sjl  * Successful return from this function will have the memory
1518*25cf1a30Sjl  * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1519*25cf1a30Sjl  * and waiting.  This routine's job is to select the memory that
1520*25cf1a30Sjl  * actually has to be released (detached) which may not necessarily
1521*25cf1a30Sjl  * be the same memory node that came in in devlist[],
1522*25cf1a30Sjl  * i.e. a copy-rename is needed.
1523*25cf1a30Sjl  */
1524*25cf1a30Sjl int
1525*25cf1a30Sjl dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1526*25cf1a30Sjl {
1527*25cf1a30Sjl 	int		d;
1528*25cf1a30Sjl 	int		err_flag = 0;
1529*25cf1a30Sjl 	static fn_t	f = "dr_pre_release_mem";
1530*25cf1a30Sjl 
1531*25cf1a30Sjl 	PR_MEM("%s...\n", f);
1532*25cf1a30Sjl 
1533*25cf1a30Sjl 	for (d = 0; d < devnum; d++) {
1534*25cf1a30Sjl 		dr_mem_unit_t	*mp = (dr_mem_unit_t *)devlist[d];
1535*25cf1a30Sjl 		int		rv;
1536*25cf1a30Sjl 		memquery_t	mq;
1537*25cf1a30Sjl 		struct memlist	*ml;
1538*25cf1a30Sjl 
1539*25cf1a30Sjl 		if (mp->sbm_cm.sbdev_error) {
1540*25cf1a30Sjl 			err_flag = 1;
1541*25cf1a30Sjl 			continue;
1542*25cf1a30Sjl 		} else if (!kcage_on) {
1543*25cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1544*25cf1a30Sjl 			err_flag = 1;
1545*25cf1a30Sjl 			continue;
1546*25cf1a30Sjl 		}
1547*25cf1a30Sjl 
1548*25cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1549*25cf1a30Sjl 			/*
1550*25cf1a30Sjl 			 * Board is currently involved in a delete
1551*25cf1a30Sjl 			 * memory operation. Can't detach this guy until
1552*25cf1a30Sjl 			 * that operation completes.
1553*25cf1a30Sjl 			 */
1554*25cf1a30Sjl 			dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1555*25cf1a30Sjl 			err_flag = 1;
1556*25cf1a30Sjl 			break;
1557*25cf1a30Sjl 		}
1558*25cf1a30Sjl 
1559*25cf1a30Sjl 		/* flags should be clean at this time */
1560*25cf1a30Sjl 		ASSERT(mp->sbm_flags == 0);
1561*25cf1a30Sjl 
1562*25cf1a30Sjl 		ASSERT(mp->sbm_mlist == NULL);
1563*25cf1a30Sjl 		ASSERT(mp->sbm_del_mlist == NULL);
1564*25cf1a30Sjl 		if (mp->sbm_mlist != NULL) {
1565*25cf1a30Sjl 			memlist_delete(mp->sbm_mlist);
1566*25cf1a30Sjl 			mp->sbm_mlist = NULL;
1567*25cf1a30Sjl 		}
1568*25cf1a30Sjl 
1569*25cf1a30Sjl 		ml = dr_get_memlist(mp);
1570*25cf1a30Sjl 		if (ml == NULL) {
1571*25cf1a30Sjl 			err_flag = 1;
1572*25cf1a30Sjl 			PR_MEM("%s: no memlist found for %s\n",
1573*25cf1a30Sjl 			    f, mp->sbm_cm.sbdev_path);
1574*25cf1a30Sjl 			continue;
1575*25cf1a30Sjl 		}
1576*25cf1a30Sjl 
1577*25cf1a30Sjl 		/*
1578*25cf1a30Sjl 		 * Check whether the detaching memory requires a
1579*25cf1a30Sjl 		 * copy-rename.
1580*25cf1a30Sjl 		 */
1581*25cf1a30Sjl 		ASSERT(mp->sbm_npages != 0);
1582*25cf1a30Sjl 		rv = dr_del_mlist_query(ml, &mq);
1583*25cf1a30Sjl 		if (rv != KPHYSM_OK) {
1584*25cf1a30Sjl 			memlist_delete(ml);
1585*25cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1586*25cf1a30Sjl 			err_flag = 1;
1587*25cf1a30Sjl 			break;
1588*25cf1a30Sjl 		}
1589*25cf1a30Sjl 
1590*25cf1a30Sjl 		if (mq.nonrelocatable != 0) {
1591*25cf1a30Sjl 			if (!(dr_cmd_flags(hp) &
1592*25cf1a30Sjl 				(SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1593*25cf1a30Sjl 				memlist_delete(ml);
1594*25cf1a30Sjl 				/* caller wasn't prompted for a suspend */
1595*25cf1a30Sjl 				dr_dev_err(CE_WARN, &mp->sbm_cm,
1596*25cf1a30Sjl 					ESBD_QUIESCE_REQD);
1597*25cf1a30Sjl 				err_flag = 1;
1598*25cf1a30Sjl 				break;
1599*25cf1a30Sjl 			}
1600*25cf1a30Sjl 		}
1601*25cf1a30Sjl 
1602*25cf1a30Sjl 		/* allocate a kphysm handle */
1603*25cf1a30Sjl 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1604*25cf1a30Sjl 		if (rv != KPHYSM_OK) {
1605*25cf1a30Sjl 			memlist_delete(ml);
1606*25cf1a30Sjl 
1607*25cf1a30Sjl 			DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1608*25cf1a30Sjl 			err_flag = 1;
1609*25cf1a30Sjl 			break;
1610*25cf1a30Sjl 		}
1611*25cf1a30Sjl 		mp->sbm_flags |= DR_MFLAG_RELOWNER;
1612*25cf1a30Sjl 
1613*25cf1a30Sjl 		if ((mq.nonrelocatable != 0) ||
1614*25cf1a30Sjl 			dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1615*25cf1a30Sjl 			/*
1616*25cf1a30Sjl 			 * Either the detaching memory node contains
1617*25cf1a30Sjl 			 * non-reloc memory or we failed to reserve the
1618*25cf1a30Sjl 			 * detaching memory node (which did _not_ have
1619*25cf1a30Sjl 			 * any non-reloc memory, i.e. some non-reloc mem
1620*25cf1a30Sjl 			 * got onboard).
1621*25cf1a30Sjl 			 */
1622*25cf1a30Sjl 
1623*25cf1a30Sjl 			if (dr_select_mem_target(hp, mp, ml)) {
1624*25cf1a30Sjl 				int rv;
1625*25cf1a30Sjl 
1626*25cf1a30Sjl 				/*
1627*25cf1a30Sjl 				 * We had no luck locating a target
1628*25cf1a30Sjl 				 * memory node to be the recipient of
1629*25cf1a30Sjl 				 * the non-reloc memory on the node
1630*25cf1a30Sjl 				 * we're trying to detach.
1631*25cf1a30Sjl 				 * Clean up be disposing the mem handle
1632*25cf1a30Sjl 				 * and the mem list.
1633*25cf1a30Sjl 				 */
1634*25cf1a30Sjl 				rv = kphysm_del_release(mp->sbm_memhandle);
1635*25cf1a30Sjl 				if (rv != KPHYSM_OK) {
1636*25cf1a30Sjl 					/*
1637*25cf1a30Sjl 					 * can do nothing but complain
1638*25cf1a30Sjl 					 * and hope helpful for debug
1639*25cf1a30Sjl 					 */
1640*25cf1a30Sjl 					cmn_err(CE_WARN, "%s: unexpected"
1641*25cf1a30Sjl 						" kphysm_del_release return"
1642*25cf1a30Sjl 						" value %d",
1643*25cf1a30Sjl 						f, rv);
1644*25cf1a30Sjl 				}
1645*25cf1a30Sjl 				mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1646*25cf1a30Sjl 
1647*25cf1a30Sjl 				memlist_delete(ml);
1648*25cf1a30Sjl 
1649*25cf1a30Sjl 				/* make sure sbm_flags is clean */
1650*25cf1a30Sjl 				ASSERT(mp->sbm_flags == 0);
1651*25cf1a30Sjl 
1652*25cf1a30Sjl 				dr_dev_err(CE_WARN,
1653*25cf1a30Sjl 					&mp->sbm_cm, ESBD_NO_TARGET);
1654*25cf1a30Sjl 
1655*25cf1a30Sjl 				err_flag = 1;
1656*25cf1a30Sjl 				break;
1657*25cf1a30Sjl 			}
1658*25cf1a30Sjl 
1659*25cf1a30Sjl 			/*
1660*25cf1a30Sjl 			 * ml is not memlist_delete'd here because
1661*25cf1a30Sjl 			 * it has been assigned to mp->sbm_mlist
1662*25cf1a30Sjl 			 * by dr_select_mem_target.
1663*25cf1a30Sjl 			 */
1664*25cf1a30Sjl 		} else {
1665*25cf1a30Sjl 			/* no target needed to detach this board */
1666*25cf1a30Sjl 			mp->sbm_flags |= DR_MFLAG_RESERVED;
1667*25cf1a30Sjl 			mp->sbm_peer = NULL;
1668*25cf1a30Sjl 			mp->sbm_del_mlist = ml;
1669*25cf1a30Sjl 			mp->sbm_mlist = ml;
1670*25cf1a30Sjl 			mp->sbm_cm.sbdev_busy = 1;
1671*25cf1a30Sjl 		}
1672*25cf1a30Sjl #ifdef DEBUG
1673*25cf1a30Sjl 		ASSERT(mp->sbm_mlist != NULL);
1674*25cf1a30Sjl 
1675*25cf1a30Sjl 		if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1676*25cf1a30Sjl 			PR_MEM("%s: release of %s requires copy/rename;"
1677*25cf1a30Sjl 				" selected target board %s\n",
1678*25cf1a30Sjl 				f,
1679*25cf1a30Sjl 				mp->sbm_cm.sbdev_path,
1680*25cf1a30Sjl 				mp->sbm_peer->sbm_cm.sbdev_path);
1681*25cf1a30Sjl 		} else {
1682*25cf1a30Sjl 			PR_MEM("%s: copy/rename not required to release %s\n",
1683*25cf1a30Sjl 				f, mp->sbm_cm.sbdev_path);
1684*25cf1a30Sjl 		}
1685*25cf1a30Sjl 
1686*25cf1a30Sjl 		ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1687*25cf1a30Sjl 		ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1688*25cf1a30Sjl #endif
1689*25cf1a30Sjl 	}
1690*25cf1a30Sjl 
1691*25cf1a30Sjl 	return (err_flag ? -1 : 0);
1692*25cf1a30Sjl }
1693*25cf1a30Sjl 
1694*25cf1a30Sjl void
1695*25cf1a30Sjl dr_release_mem_done(dr_common_unit_t *cp)
1696*25cf1a30Sjl {
1697*25cf1a30Sjl 	dr_mem_unit_t	*s_mp = (dr_mem_unit_t *)cp;
1698*25cf1a30Sjl 	dr_mem_unit_t *t_mp, *mp;
1699*25cf1a30Sjl 	int		rv;
1700*25cf1a30Sjl 	static fn_t	f = "dr_release_mem_done";
1701*25cf1a30Sjl 
1702*25cf1a30Sjl 	/*
1703*25cf1a30Sjl 	 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1704*25cf1a30Sjl 	 * has a target unit.
1705*25cf1a30Sjl 	 */
1706*25cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1707*25cf1a30Sjl 		t_mp = s_mp->sbm_peer;
1708*25cf1a30Sjl 		ASSERT(t_mp != NULL);
1709*25cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
1710*25cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1711*25cf1a30Sjl 		ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1712*25cf1a30Sjl 	} else {
1713*25cf1a30Sjl 		/* this is no target unit */
1714*25cf1a30Sjl 		t_mp = NULL;
1715*25cf1a30Sjl 	}
1716*25cf1a30Sjl 
1717*25cf1a30Sjl 	/* free delete handle */
1718*25cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1719*25cf1a30Sjl 	ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1720*25cf1a30Sjl 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1721*25cf1a30Sjl 	if (rv != KPHYSM_OK) {
1722*25cf1a30Sjl 		/*
1723*25cf1a30Sjl 		 * can do nothing but complain
1724*25cf1a30Sjl 		 * and hope helpful for debug
1725*25cf1a30Sjl 		 */
1726*25cf1a30Sjl 		cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1727*25cf1a30Sjl 			" return value %d", f, rv);
1728*25cf1a30Sjl 	}
1729*25cf1a30Sjl 	s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1730*25cf1a30Sjl 
1731*25cf1a30Sjl 	/*
1732*25cf1a30Sjl 	 * If an error was encountered during release, clean up
1733*25cf1a30Sjl 	 * the source (and target, if present) unit data.
1734*25cf1a30Sjl 	 */
1735*25cf1a30Sjl /* XXX Can we know that sbdev_error was encountered during release? */
1736*25cf1a30Sjl 	if (s_mp->sbm_cm.sbdev_error != NULL) {
1737*25cf1a30Sjl 		PR_MEM("%s: %s: error %d noted\n",
1738*25cf1a30Sjl 			f,
1739*25cf1a30Sjl 			s_mp->sbm_cm.sbdev_path,
1740*25cf1a30Sjl 			s_mp->sbm_cm.sbdev_error->e_code);
1741*25cf1a30Sjl 
1742*25cf1a30Sjl 		if (t_mp != NULL) {
1743*25cf1a30Sjl 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1744*25cf1a30Sjl 			t_mp->sbm_del_mlist = NULL;
1745*25cf1a30Sjl 
1746*25cf1a30Sjl 			if (t_mp->sbm_mlist != NULL) {
1747*25cf1a30Sjl 				memlist_delete(t_mp->sbm_mlist);
1748*25cf1a30Sjl 				t_mp->sbm_mlist = NULL;
1749*25cf1a30Sjl 			}
1750*25cf1a30Sjl 
1751*25cf1a30Sjl 			t_mp->sbm_peer = NULL;
1752*25cf1a30Sjl 			t_mp->sbm_flags = 0;
1753*25cf1a30Sjl 			t_mp->sbm_cm.sbdev_busy = 0;
1754*25cf1a30Sjl 		}
1755*25cf1a30Sjl 
1756*25cf1a30Sjl 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1757*25cf1a30Sjl 			memlist_delete(s_mp->sbm_del_mlist);
1758*25cf1a30Sjl 		s_mp->sbm_del_mlist = NULL;
1759*25cf1a30Sjl 
1760*25cf1a30Sjl 		if (s_mp->sbm_mlist != NULL) {
1761*25cf1a30Sjl 			memlist_delete(s_mp->sbm_mlist);
1762*25cf1a30Sjl 			s_mp->sbm_mlist = NULL;
1763*25cf1a30Sjl 		}
1764*25cf1a30Sjl 
1765*25cf1a30Sjl 		s_mp->sbm_peer = NULL;
1766*25cf1a30Sjl 		s_mp->sbm_flags = 0;
1767*25cf1a30Sjl 		s_mp->sbm_cm.sbdev_busy = 0;
1768*25cf1a30Sjl 
1769*25cf1a30Sjl 		/* bail out */
1770*25cf1a30Sjl 		return;
1771*25cf1a30Sjl 	}
1772*25cf1a30Sjl 
1773*25cf1a30Sjl 	DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1774*25cf1a30Sjl 	dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1775*25cf1a30Sjl 
1776*25cf1a30Sjl 	if (t_mp != NULL) {
1777*25cf1a30Sjl 		/*
1778*25cf1a30Sjl 		 * the kphysm delete operation that drained the source
1779*25cf1a30Sjl 		 * board also drained this target board.  Since the source
1780*25cf1a30Sjl 		 * board drain is now known to have succeeded, we know this
1781*25cf1a30Sjl 		 * target board is drained too.
1782*25cf1a30Sjl 		 *
1783*25cf1a30Sjl 		 * because DR_DEV_SET_RELEASED and dr_device_transition
1784*25cf1a30Sjl 		 * is done here, the dr_release_dev_done should not
1785*25cf1a30Sjl 		 * fail.
1786*25cf1a30Sjl 		 */
1787*25cf1a30Sjl 		DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1788*25cf1a30Sjl 		dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1789*25cf1a30Sjl 
1790*25cf1a30Sjl 		/*
1791*25cf1a30Sjl 		 * NOTE: do not transition target's board state,
1792*25cf1a30Sjl 		 * even if the mem-unit was the last configure
1793*25cf1a30Sjl 		 * unit of the board.  When copy/rename completes
1794*25cf1a30Sjl 		 * this mem-unit will transitioned back to
1795*25cf1a30Sjl 		 * the configured state.  In the meantime, the
1796*25cf1a30Sjl 		 * board's must remain as is.
1797*25cf1a30Sjl 		 */
1798*25cf1a30Sjl 	}
1799*25cf1a30Sjl 
1800*25cf1a30Sjl 	/* if board(s) had deleted memory, verify it is gone */
1801*25cf1a30Sjl 	rv = 0;
1802*25cf1a30Sjl 	memlist_read_lock();
1803*25cf1a30Sjl 	if (s_mp->sbm_del_mlist != NULL) {
1804*25cf1a30Sjl 		mp = s_mp;
1805*25cf1a30Sjl 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1806*25cf1a30Sjl 	}
1807*25cf1a30Sjl 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1808*25cf1a30Sjl 		mp = t_mp;
1809*25cf1a30Sjl 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1810*25cf1a30Sjl 	}
1811*25cf1a30Sjl 	memlist_read_unlock();
1812*25cf1a30Sjl 	if (rv) {
1813*25cf1a30Sjl 		cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1814*25cf1a30Sjl 			"deleted memory still found in phys_install",
1815*25cf1a30Sjl 			f,
1816*25cf1a30Sjl 			(mp == t_mp ? "target " : ""),
1817*25cf1a30Sjl 			mp->sbm_cm.sbdev_bp->b_num,
1818*25cf1a30Sjl 			mp->sbm_cm.sbdev_unum);
1819*25cf1a30Sjl 
1820*25cf1a30Sjl 		DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1821*25cf1a30Sjl 		return;
1822*25cf1a30Sjl 	}
1823*25cf1a30Sjl 
1824*25cf1a30Sjl 	s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1825*25cf1a30Sjl 	if (t_mp != NULL)
1826*25cf1a30Sjl 		t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1827*25cf1a30Sjl 
1828*25cf1a30Sjl 	/* this should not fail */
1829*25cf1a30Sjl 	if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1830*25cf1a30Sjl 		/* catch this in debug kernels */
1831*25cf1a30Sjl 		ASSERT(0);
1832*25cf1a30Sjl 		return;
1833*25cf1a30Sjl 	}
1834*25cf1a30Sjl 
1835*25cf1a30Sjl 	PR_MEM("%s: marking %s release DONE\n",
1836*25cf1a30Sjl 		f, s_mp->sbm_cm.sbdev_path);
1837*25cf1a30Sjl 
1838*25cf1a30Sjl 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1839*25cf1a30Sjl 
1840*25cf1a30Sjl 	if (t_mp != NULL) {
1841*25cf1a30Sjl 		/* should not fail */
1842*25cf1a30Sjl 		rv = dr_release_dev_done(&t_mp->sbm_cm);
1843*25cf1a30Sjl 		if (rv != 0) {
1844*25cf1a30Sjl 			/* catch this in debug kernels */
1845*25cf1a30Sjl 			ASSERT(0);
1846*25cf1a30Sjl 			return;
1847*25cf1a30Sjl 		}
1848*25cf1a30Sjl 
1849*25cf1a30Sjl 		PR_MEM("%s: marking %s release DONE\n",
1850*25cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
1851*25cf1a30Sjl 
1852*25cf1a30Sjl 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1853*25cf1a30Sjl 	}
1854*25cf1a30Sjl }
1855*25cf1a30Sjl 
1856*25cf1a30Sjl /*ARGSUSED*/
1857*25cf1a30Sjl int
1858*25cf1a30Sjl dr_disconnect_mem(dr_mem_unit_t *mp)
1859*25cf1a30Sjl {
1860*25cf1a30Sjl 	static fn_t	f = "dr_disconnect_mem";
1861*25cf1a30Sjl 	update_membounds_t umb;
1862*25cf1a30Sjl 
1863*25cf1a30Sjl #ifdef DEBUG
1864*25cf1a30Sjl 	int state = mp->sbm_cm.sbdev_state;
1865*25cf1a30Sjl 	ASSERT(state == DR_STATE_CONNECTED ||
1866*25cf1a30Sjl 		state == DR_STATE_UNCONFIGURED);
1867*25cf1a30Sjl #endif
1868*25cf1a30Sjl 
1869*25cf1a30Sjl 	PR_MEM("%s...\n", f);
1870*25cf1a30Sjl 
1871*25cf1a30Sjl 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1872*25cf1a30Sjl 		memlist_delete(mp->sbm_del_mlist);
1873*25cf1a30Sjl 	mp->sbm_del_mlist = NULL;
1874*25cf1a30Sjl 
1875*25cf1a30Sjl 	if (mp->sbm_mlist) {
1876*25cf1a30Sjl 		memlist_delete(mp->sbm_mlist);
1877*25cf1a30Sjl 		mp->sbm_mlist = NULL;
1878*25cf1a30Sjl 	}
1879*25cf1a30Sjl 
1880*25cf1a30Sjl 	/*
1881*25cf1a30Sjl 	 * Remove memory from lgroup
1882*25cf1a30Sjl 	 * For now, only board info is required.
1883*25cf1a30Sjl 	 */
1884*25cf1a30Sjl 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1885*25cf1a30Sjl 	umb.u_base = (uint64_t)-1;
1886*25cf1a30Sjl 	umb.u_len = (uint64_t)-1;
1887*25cf1a30Sjl 
1888*25cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1889*25cf1a30Sjl 
1890*25cf1a30Sjl 	return (0);
1891*25cf1a30Sjl }
1892*25cf1a30Sjl 
1893*25cf1a30Sjl int
1894*25cf1a30Sjl dr_cancel_mem(dr_mem_unit_t *s_mp)
1895*25cf1a30Sjl {
1896*25cf1a30Sjl 	dr_mem_unit_t	*t_mp;
1897*25cf1a30Sjl 	dr_state_t	state;
1898*25cf1a30Sjl 	static fn_t	f = "dr_cancel_mem";
1899*25cf1a30Sjl 
1900*25cf1a30Sjl 	state = s_mp->sbm_cm.sbdev_state;
1901*25cf1a30Sjl 
1902*25cf1a30Sjl 	if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1903*25cf1a30Sjl 		/* must cancel source board, not target board */
1904*25cf1a30Sjl 		/* TODO: set error */
1905*25cf1a30Sjl 		return (-1);
1906*25cf1a30Sjl 	} else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1907*25cf1a30Sjl 		t_mp = s_mp->sbm_peer;
1908*25cf1a30Sjl 		ASSERT(t_mp != NULL);
1909*25cf1a30Sjl 		ASSERT(t_mp->sbm_peer == s_mp);
1910*25cf1a30Sjl 
1911*25cf1a30Sjl 		/* must always match the source board's state */
1912*25cf1a30Sjl 		/* TODO: is this assertion correct? */
1913*25cf1a30Sjl 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1914*25cf1a30Sjl 	} else {
1915*25cf1a30Sjl 		/* this is no target unit */
1916*25cf1a30Sjl 		t_mp = NULL;
1917*25cf1a30Sjl 	}
1918*25cf1a30Sjl 
1919*25cf1a30Sjl 	switch (state) {
1920*25cf1a30Sjl 	case DR_STATE_UNREFERENCED:	/* state set by dr_release_dev_done */
1921*25cf1a30Sjl 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1922*25cf1a30Sjl 
1923*25cf1a30Sjl 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1924*25cf1a30Sjl 			PR_MEM("%s: undoing target %s memory delete\n",
1925*25cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
1926*25cf1a30Sjl 			dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1927*25cf1a30Sjl 
1928*25cf1a30Sjl 			DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1929*25cf1a30Sjl 		}
1930*25cf1a30Sjl 
1931*25cf1a30Sjl 		if (s_mp->sbm_del_mlist != NULL) {
1932*25cf1a30Sjl 			PR_MEM("%s: undoing %s memory delete\n",
1933*25cf1a30Sjl 				f, s_mp->sbm_cm.sbdev_path);
1934*25cf1a30Sjl 
1935*25cf1a30Sjl 			dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1936*25cf1a30Sjl 		}
1937*25cf1a30Sjl 
1938*25cf1a30Sjl 		/*FALLTHROUGH*/
1939*25cf1a30Sjl 
1940*25cf1a30Sjl /* TODO: should no longer be possible to see the release state here */
1941*25cf1a30Sjl 	case DR_STATE_RELEASE:	/* state set by dr_release_mem_done */
1942*25cf1a30Sjl 
1943*25cf1a30Sjl 		ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1944*25cf1a30Sjl 
1945*25cf1a30Sjl 		if (t_mp != NULL) {
1946*25cf1a30Sjl 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1947*25cf1a30Sjl 			t_mp->sbm_del_mlist = NULL;
1948*25cf1a30Sjl 
1949*25cf1a30Sjl 			if (t_mp->sbm_mlist != NULL) {
1950*25cf1a30Sjl 				memlist_delete(t_mp->sbm_mlist);
1951*25cf1a30Sjl 				t_mp->sbm_mlist = NULL;
1952*25cf1a30Sjl 			}
1953*25cf1a30Sjl 
1954*25cf1a30Sjl 			t_mp->sbm_peer = NULL;
1955*25cf1a30Sjl 			t_mp->sbm_flags = 0;
1956*25cf1a30Sjl 			t_mp->sbm_cm.sbdev_busy = 0;
1957*25cf1a30Sjl 			dr_init_mem_unit_data(t_mp);
1958*25cf1a30Sjl 
1959*25cf1a30Sjl 			DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1960*25cf1a30Sjl 
1961*25cf1a30Sjl 			dr_device_transition(
1962*25cf1a30Sjl 				&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1963*25cf1a30Sjl 		}
1964*25cf1a30Sjl 
1965*25cf1a30Sjl 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1966*25cf1a30Sjl 			memlist_delete(s_mp->sbm_del_mlist);
1967*25cf1a30Sjl 		s_mp->sbm_del_mlist = NULL;
1968*25cf1a30Sjl 
1969*25cf1a30Sjl 		if (s_mp->sbm_mlist != NULL) {
1970*25cf1a30Sjl 			memlist_delete(s_mp->sbm_mlist);
1971*25cf1a30Sjl 			s_mp->sbm_mlist = NULL;
1972*25cf1a30Sjl 		}
1973*25cf1a30Sjl 
1974*25cf1a30Sjl 		s_mp->sbm_peer = NULL;
1975*25cf1a30Sjl 		s_mp->sbm_flags = 0;
1976*25cf1a30Sjl 		s_mp->sbm_cm.sbdev_busy = 0;
1977*25cf1a30Sjl 		dr_init_mem_unit_data(s_mp);
1978*25cf1a30Sjl 
1979*25cf1a30Sjl 		return (0);
1980*25cf1a30Sjl 
1981*25cf1a30Sjl 	default:
1982*25cf1a30Sjl 		PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
1983*25cf1a30Sjl 			f, (int)state, s_mp->sbm_cm.sbdev_path);
1984*25cf1a30Sjl 
1985*25cf1a30Sjl 		return (-1);
1986*25cf1a30Sjl 	}
1987*25cf1a30Sjl 	/*NOTREACHED*/
1988*25cf1a30Sjl }
1989*25cf1a30Sjl 
1990*25cf1a30Sjl void
1991*25cf1a30Sjl dr_init_mem_unit(dr_mem_unit_t *mp)
1992*25cf1a30Sjl {
1993*25cf1a30Sjl 	dr_state_t	new_state;
1994*25cf1a30Sjl 
1995*25cf1a30Sjl 
1996*25cf1a30Sjl 	if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
1997*25cf1a30Sjl 		new_state = DR_STATE_CONFIGURED;
1998*25cf1a30Sjl 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1999*25cf1a30Sjl 	} else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
2000*25cf1a30Sjl 		new_state = DR_STATE_CONNECTED;
2001*25cf1a30Sjl 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
2002*25cf1a30Sjl 	} else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
2003*25cf1a30Sjl 		new_state = DR_STATE_OCCUPIED;
2004*25cf1a30Sjl 	} else {
2005*25cf1a30Sjl 		new_state = DR_STATE_EMPTY;
2006*25cf1a30Sjl 	}
2007*25cf1a30Sjl 
2008*25cf1a30Sjl 	if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
2009*25cf1a30Sjl 		dr_init_mem_unit_data(mp);
2010*25cf1a30Sjl 
2011*25cf1a30Sjl 	/* delay transition until fully initialized */
2012*25cf1a30Sjl 	dr_device_transition(&mp->sbm_cm, new_state);
2013*25cf1a30Sjl }
2014*25cf1a30Sjl 
2015*25cf1a30Sjl static void
2016*25cf1a30Sjl dr_init_mem_unit_data(dr_mem_unit_t *mp)
2017*25cf1a30Sjl {
2018*25cf1a30Sjl 	drmachid_t	id = mp->sbm_cm.sbdev_id;
2019*25cf1a30Sjl 	drmach_mem_info_t	minfo;
2020*25cf1a30Sjl 	sbd_error_t	*err;
2021*25cf1a30Sjl 	static fn_t	f = "dr_init_mem_unit_data";
2022*25cf1a30Sjl 	update_membounds_t umb;
2023*25cf1a30Sjl 
2024*25cf1a30Sjl 	PR_MEM("%s...\n", f);
2025*25cf1a30Sjl 
2026*25cf1a30Sjl 	/* a little sanity checking */
2027*25cf1a30Sjl 	ASSERT(mp->sbm_peer == NULL);
2028*25cf1a30Sjl 	ASSERT(mp->sbm_flags == 0);
2029*25cf1a30Sjl 
2030*25cf1a30Sjl 	if (err = drmach_mem_get_info(id, &minfo)) {
2031*25cf1a30Sjl 		DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
2032*25cf1a30Sjl 		return;
2033*25cf1a30Sjl 	}
2034*25cf1a30Sjl 	mp->sbm_basepfn = _b64top(minfo.mi_basepa);
2035*25cf1a30Sjl 	mp->sbm_npages = _b64top(minfo.mi_size);
2036*25cf1a30Sjl 	mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask);
2037*25cf1a30Sjl 	mp->sbm_slice_size = minfo.mi_slice_size;
2038*25cf1a30Sjl 
2039*25cf1a30Sjl 	/*
2040*25cf1a30Sjl 	 * Add memory to lgroup
2041*25cf1a30Sjl 	 */
2042*25cf1a30Sjl 	umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2043*25cf1a30Sjl 	umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2044*25cf1a30Sjl 	umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2045*25cf1a30Sjl 
2046*25cf1a30Sjl 	lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2047*25cf1a30Sjl 
2048*25cf1a30Sjl 	PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2049*25cf1a30Sjl 		f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2050*25cf1a30Sjl }
2051*25cf1a30Sjl 
2052*25cf1a30Sjl static int
2053*25cf1a30Sjl dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2054*25cf1a30Sjl {
2055*25cf1a30Sjl 	int		err;
2056*25cf1a30Sjl 	pfn_t		base;
2057*25cf1a30Sjl 	pgcnt_t		npgs;
2058*25cf1a30Sjl 	struct memlist	*mc;
2059*25cf1a30Sjl 	static fn_t	f = "dr_reserve_mem_spans";
2060*25cf1a30Sjl 
2061*25cf1a30Sjl 	PR_MEM("%s...\n", f);
2062*25cf1a30Sjl 
2063*25cf1a30Sjl 	/*
2064*25cf1a30Sjl 	 * Walk the supplied memlist scheduling each span for removal
2065*25cf1a30Sjl 	 * with kphysm_del_span.  It is possible that a span may intersect
2066*25cf1a30Sjl 	 * an area occupied by the cage.
2067*25cf1a30Sjl 	 */
2068*25cf1a30Sjl 	for (mc = ml; mc != NULL; mc = mc->next) {
2069*25cf1a30Sjl 		base = _b64top(mc->address);
2070*25cf1a30Sjl 		npgs = _b64top(mc->size);
2071*25cf1a30Sjl 
2072*25cf1a30Sjl 		err = kphysm_del_span(*mhp, base, npgs);
2073*25cf1a30Sjl 		if (err != KPHYSM_OK) {
2074*25cf1a30Sjl 			cmn_err(CE_WARN, "%s memory reserve failed."
2075*25cf1a30Sjl 				" unexpected kphysm_del_span return value %d;"
2076*25cf1a30Sjl 				" basepfn=0x%lx npages=%ld",
2077*25cf1a30Sjl 				f, err, base, npgs);
2078*25cf1a30Sjl 
2079*25cf1a30Sjl 			return (-1);
2080*25cf1a30Sjl 		}
2081*25cf1a30Sjl 	}
2082*25cf1a30Sjl 
2083*25cf1a30Sjl 	return (0);
2084*25cf1a30Sjl }
2085*25cf1a30Sjl 
2086*25cf1a30Sjl #define	DR_SMT_NPREF_SETS	6
2087*25cf1a30Sjl #define	DR_SMT_NUNITS_PER_SET	MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD
2088*25cf1a30Sjl 
2089*25cf1a30Sjl /* debug counters */
2090*25cf1a30Sjl int dr_smt_realigned;
2091*25cf1a30Sjl int dr_smt_preference[DR_SMT_NPREF_SETS];
2092*25cf1a30Sjl 
2093*25cf1a30Sjl #ifdef DEBUG
2094*25cf1a30Sjl uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2095*25cf1a30Sjl #endif
2096*25cf1a30Sjl 
2097*25cf1a30Sjl /*
2098*25cf1a30Sjl  * Find and reserve a copy/rename target board suitable for the
2099*25cf1a30Sjl  * given source board.
2100*25cf1a30Sjl  * All boards in the system are examined and categorized in relation to
2101*25cf1a30Sjl  * their memory size versus the source board's memory size.  Order of
2102*25cf1a30Sjl  * preference is:
2103*25cf1a30Sjl  *	1st copy all source, source/target same size
2104*25cf1a30Sjl  *	2nd copy all source, larger target
2105*25cf1a30Sjl  * 	3rd copy nonrelocatable source span
2106*25cf1a30Sjl  */
2107*25cf1a30Sjl static int
2108*25cf1a30Sjl dr_select_mem_target(dr_handle_t *hp,
2109*25cf1a30Sjl 	dr_mem_unit_t *s_mp, struct memlist *s_ml)
2110*25cf1a30Sjl {
2111*25cf1a30Sjl 	dr_target_pref_t preference; /* lower value is higher preference */
2112*25cf1a30Sjl 	int		idx;
2113*25cf1a30Sjl 	dr_mem_unit_t	**sets;
2114*25cf1a30Sjl 
2115*25cf1a30Sjl 	int		t_bd;
2116*25cf1a30Sjl 	int		t_unit;
2117*25cf1a30Sjl 	int		rv;
2118*25cf1a30Sjl 	dr_board_t	*s_bp, *t_bp;
2119*25cf1a30Sjl 	dr_mem_unit_t	*t_mp, *c_mp;
2120*25cf1a30Sjl 	struct memlist	*d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL;
2121*25cf1a30Sjl 	memquery_t	s_mq = {0};
2122*25cf1a30Sjl 	static fn_t	f = "dr_select_mem_target";
2123*25cf1a30Sjl 
2124*25cf1a30Sjl 	PR_MEM("%s...\n", f);
2125*25cf1a30Sjl 
2126*25cf1a30Sjl 	ASSERT(s_ml != NULL);
2127*25cf1a30Sjl 
2128*25cf1a30Sjl 	sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2129*25cf1a30Sjl 	    DR_SMT_NPREF_SETS);
2130*25cf1a30Sjl 
2131*25cf1a30Sjl 	s_bp = hp->h_bd;
2132*25cf1a30Sjl 	/* calculate the offset into the slice of the last source board pfn */
2133*25cf1a30Sjl 	ASSERT(s_mp->sbm_npages != 0);
2134*25cf1a30Sjl 
2135*25cf1a30Sjl 	/*
2136*25cf1a30Sjl 	 * Find non-relocatable span on source board.
2137*25cf1a30Sjl 	 */
2138*25cf1a30Sjl 	rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq);
2139*25cf1a30Sjl 	if (rv != KPHYSM_OK) {
2140*25cf1a30Sjl 		PR_MEM("%s: %s: unexpected kphysm_del_span_query"
2141*25cf1a30Sjl 		    " return value %d; basepfn 0x%lx, npages %ld\n",
2142*25cf1a30Sjl 		    f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn,
2143*25cf1a30Sjl 		    s_mp->sbm_npages);
2144*25cf1a30Sjl 		return (-1);
2145*25cf1a30Sjl 	}
2146*25cf1a30Sjl 
2147*25cf1a30Sjl 	ASSERT(s_mq.phys_pages != 0);
2148*25cf1a30Sjl 	ASSERT(s_mq.nonrelocatable != 0);
2149*25cf1a30Sjl 
2150*25cf1a30Sjl 	PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f,
2151*25cf1a30Sjl 	    s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable,
2152*25cf1a30Sjl 	    s_mq.last_nonrelocatable);
2153*25cf1a30Sjl 
2154*25cf1a30Sjl 	/* break down s_ml if it contains dynamic segments */
2155*25cf1a30Sjl 	b_ml = memlist_dup(s_ml);
2156*25cf1a30Sjl 
2157*25cf1a30Sjl 	for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) {
2158*25cf1a30Sjl 		b_ml = memlist_del_span(b_ml, ml->address, ml->size);
2159*25cf1a30Sjl 		b_ml = memlist_cat_span(b_ml, ml->address, ml->size);
2160*25cf1a30Sjl 	}
2161*25cf1a30Sjl 
2162*25cf1a30Sjl 
2163*25cf1a30Sjl 	/*
2164*25cf1a30Sjl 	 * Make one pass through all memory units on all boards
2165*25cf1a30Sjl 	 * and categorize them with respect to the source board.
2166*25cf1a30Sjl 	 */
2167*25cf1a30Sjl 	for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2168*25cf1a30Sjl 		/*
2169*25cf1a30Sjl 		 * The board structs are a contiguous array
2170*25cf1a30Sjl 		 * so we take advantage of that to find the
2171*25cf1a30Sjl 		 * correct board struct pointer for a given
2172*25cf1a30Sjl 		 * board number.
2173*25cf1a30Sjl 		 */
2174*25cf1a30Sjl 		t_bp = dr_lookup_board(t_bd);
2175*25cf1a30Sjl 
2176*25cf1a30Sjl 		/* source board can not be its own target */
2177*25cf1a30Sjl 		if (s_bp->b_num == t_bp->b_num)
2178*25cf1a30Sjl 			continue;
2179*25cf1a30Sjl 
2180*25cf1a30Sjl 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2181*25cf1a30Sjl 
2182*25cf1a30Sjl 			t_mp = dr_get_mem_unit(t_bp, t_unit);
2183*25cf1a30Sjl 
2184*25cf1a30Sjl 			/* this memory node must be attached */
2185*25cf1a30Sjl 			if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2186*25cf1a30Sjl 				continue;
2187*25cf1a30Sjl 
2188*25cf1a30Sjl 			/* source unit can not be its own target */
2189*25cf1a30Sjl 			if (s_mp == t_mp) {
2190*25cf1a30Sjl 				/* catch this is debug kernels */
2191*25cf1a30Sjl 				ASSERT(0);
2192*25cf1a30Sjl 				continue;
2193*25cf1a30Sjl 			}
2194*25cf1a30Sjl 
2195*25cf1a30Sjl 			/*
2196*25cf1a30Sjl 			 * this memory node must not already be reserved
2197*25cf1a30Sjl 			 * by some other memory delete operation.
2198*25cf1a30Sjl 			 */
2199*25cf1a30Sjl 			if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2200*25cf1a30Sjl 				continue;
2201*25cf1a30Sjl 
2202*25cf1a30Sjl 			/* get target board memlist */
2203*25cf1a30Sjl 			t_ml = dr_get_memlist(t_mp);
2204*25cf1a30Sjl 			if (t_ml == NULL) {
2205*25cf1a30Sjl 				cmn_err(CE_WARN, "%s: no memlist for"
2206*25cf1a30Sjl 				    " mem-unit %d, board %d", f,
2207*25cf1a30Sjl 				    t_mp->sbm_cm.sbdev_bp->b_num,
2208*25cf1a30Sjl 				    t_mp->sbm_cm.sbdev_unum);
2209*25cf1a30Sjl 				continue;
2210*25cf1a30Sjl 			}
2211*25cf1a30Sjl 
2212*25cf1a30Sjl 			preference = dr_get_target_preference(hp, t_mp, s_mp,
2213*25cf1a30Sjl 			    t_ml, s_ml, b_ml);
2214*25cf1a30Sjl 
2215*25cf1a30Sjl 			if (preference == DR_TP_INVALID)
2216*25cf1a30Sjl 				continue;
2217*25cf1a30Sjl 
2218*25cf1a30Sjl 			dr_smt_preference[preference]++;
2219*25cf1a30Sjl 
2220*25cf1a30Sjl 			/* calculate index to start of preference set */
2221*25cf1a30Sjl 			idx  = DR_SMT_NUNITS_PER_SET * preference;
2222*25cf1a30Sjl 			/* calculate offset to respective element */
2223*25cf1a30Sjl 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2224*25cf1a30Sjl 
2225*25cf1a30Sjl 			ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS);
2226*25cf1a30Sjl 			sets[idx] = t_mp;
2227*25cf1a30Sjl 		}
2228*25cf1a30Sjl 	}
2229*25cf1a30Sjl 
2230*25cf1a30Sjl 	if (b_ml != NULL)
2231*25cf1a30Sjl 		memlist_delete(b_ml);
2232*25cf1a30Sjl 
2233*25cf1a30Sjl 	/*
2234*25cf1a30Sjl 	 * NOTE: this would be a good place to sort each candidate
2235*25cf1a30Sjl 	 * set in to some desired order, e.g. memory size in ascending
2236*25cf1a30Sjl 	 * order.  Without an additional sorting step here, the order
2237*25cf1a30Sjl 	 * within a set is ascending board number order.
2238*25cf1a30Sjl 	 */
2239*25cf1a30Sjl 
2240*25cf1a30Sjl 	c_mp = NULL;
2241*25cf1a30Sjl 	x_ml = NULL;
2242*25cf1a30Sjl 	t_ml = NULL;
2243*25cf1a30Sjl 	for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) {
2244*25cf1a30Sjl 		memquery_t mq;
2245*25cf1a30Sjl 
2246*25cf1a30Sjl 		preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET);
2247*25cf1a30Sjl 
2248*25cf1a30Sjl 		ASSERT(preference != DR_TP_INVALID);
2249*25cf1a30Sjl 
2250*25cf1a30Sjl 		/* cleanup t_ml after previous pass */
2251*25cf1a30Sjl 		if (t_ml != NULL) {
2252*25cf1a30Sjl 			memlist_delete(t_ml);
2253*25cf1a30Sjl 			t_ml = NULL;
2254*25cf1a30Sjl 		}
2255*25cf1a30Sjl 
2256*25cf1a30Sjl 		/* get candidate target board mem unit */
2257*25cf1a30Sjl 		t_mp = sets[idx];
2258*25cf1a30Sjl 		if (t_mp == NULL)
2259*25cf1a30Sjl 			continue;
2260*25cf1a30Sjl 
2261*25cf1a30Sjl 		/* get target board memlist */
2262*25cf1a30Sjl 		t_ml = dr_get_memlist(t_mp);
2263*25cf1a30Sjl 		if (t_ml == NULL) {
2264*25cf1a30Sjl 			cmn_err(CE_WARN, "%s: no memlist for"
2265*25cf1a30Sjl 				" mem-unit %d, board %d",
2266*25cf1a30Sjl 				f,
2267*25cf1a30Sjl 				t_mp->sbm_cm.sbdev_bp->b_num,
2268*25cf1a30Sjl 				t_mp->sbm_cm.sbdev_unum);
2269*25cf1a30Sjl 
2270*25cf1a30Sjl 			continue;
2271*25cf1a30Sjl 		}
2272*25cf1a30Sjl 
2273*25cf1a30Sjl 		PR_MEM("%s: checking for no-reloc in %s, "
2274*25cf1a30Sjl 			" basepfn=0x%lx, npages=%ld\n",
2275*25cf1a30Sjl 			f,
2276*25cf1a30Sjl 			t_mp->sbm_cm.sbdev_path,
2277*25cf1a30Sjl 			t_mp->sbm_basepfn,
2278*25cf1a30Sjl 			t_mp->sbm_npages);
2279*25cf1a30Sjl 
2280*25cf1a30Sjl 		rv = dr_del_mlist_query(t_ml, &mq);
2281*25cf1a30Sjl 		if (rv != KPHYSM_OK) {
2282*25cf1a30Sjl 			PR_MEM("%s: kphysm_del_span_query:"
2283*25cf1a30Sjl 				" unexpected return value %d\n", f, rv);
2284*25cf1a30Sjl 
2285*25cf1a30Sjl 			continue;
2286*25cf1a30Sjl 		}
2287*25cf1a30Sjl 
2288*25cf1a30Sjl 		if (mq.nonrelocatable != 0) {
2289*25cf1a30Sjl 			PR_MEM("%s: candidate %s has"
2290*25cf1a30Sjl 				" nonrelocatable span [0x%lx..0x%lx]\n",
2291*25cf1a30Sjl 				f,
2292*25cf1a30Sjl 				t_mp->sbm_cm.sbdev_path,
2293*25cf1a30Sjl 				mq.first_nonrelocatable,
2294*25cf1a30Sjl 				mq.last_nonrelocatable);
2295*25cf1a30Sjl 
2296*25cf1a30Sjl 			continue;
2297*25cf1a30Sjl 		}
2298*25cf1a30Sjl 
2299*25cf1a30Sjl #ifdef DEBUG
2300*25cf1a30Sjl 		/*
2301*25cf1a30Sjl 		 * This is a debug tool for excluding certain boards
2302*25cf1a30Sjl 		 * from being selected as a target board candidate.
2303*25cf1a30Sjl 		 * dr_ignore_board is only tested by this driver.
2304*25cf1a30Sjl 		 * It must be set with adb, obp, /etc/system or your
2305*25cf1a30Sjl 		 * favorite debugger.
2306*25cf1a30Sjl 		 */
2307*25cf1a30Sjl 		if (dr_ignore_board &
2308*25cf1a30Sjl 			(1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2309*25cf1a30Sjl 			PR_MEM("%s: dr_ignore_board flag set,"
2310*25cf1a30Sjl 				" ignoring %s as candidate\n",
2311*25cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
2312*25cf1a30Sjl 			continue;
2313*25cf1a30Sjl 		}
2314*25cf1a30Sjl #endif
2315*25cf1a30Sjl 
2316*25cf1a30Sjl 		/*
2317*25cf1a30Sjl 		 * Reserve excess source board memory, if any.
2318*25cf1a30Sjl 		 *
2319*25cf1a30Sjl 		 * Only the nonrelocatable source span will be copied
2320*25cf1a30Sjl 		 * so schedule the rest of the source mem to be deleted.
2321*25cf1a30Sjl 		 */
2322*25cf1a30Sjl 		switch (preference) {
2323*25cf1a30Sjl 		case DR_TP_NONRELOC:
2324*25cf1a30Sjl 			/*
2325*25cf1a30Sjl 			 * Get source copy memlist and use it to construct
2326*25cf1a30Sjl 			 * delete memlist.
2327*25cf1a30Sjl 			 */
2328*25cf1a30Sjl 			d_ml = memlist_dup(s_ml);
2329*25cf1a30Sjl 			x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp);
2330*25cf1a30Sjl 
2331*25cf1a30Sjl 			/* XXX */
2332*25cf1a30Sjl 			ASSERT(d_ml != NULL);
2333*25cf1a30Sjl 			ASSERT(x_ml != NULL);
2334*25cf1a30Sjl 
2335*25cf1a30Sjl 			for (ml = x_ml; ml != NULL; ml = ml->next) {
2336*25cf1a30Sjl 				d_ml = memlist_del_span(d_ml, ml->address,
2337*25cf1a30Sjl 				    ml->size);
2338*25cf1a30Sjl 			}
2339*25cf1a30Sjl 
2340*25cf1a30Sjl 			PR_MEM("%s: %s: reserving src brd memlist:\n", f,
2341*25cf1a30Sjl 			    s_mp->sbm_cm.sbdev_path);
2342*25cf1a30Sjl 			PR_MEMLIST_DUMP(d_ml);
2343*25cf1a30Sjl 
2344*25cf1a30Sjl 			/* reserve excess spans */
2345*25cf1a30Sjl 			if (dr_reserve_mem_spans(&s_mp->sbm_memhandle,
2346*25cf1a30Sjl 			    d_ml) != 0) {
2347*25cf1a30Sjl 				/* likely more non-reloc pages appeared */
2348*25cf1a30Sjl 				/* TODO: restart from top? */
2349*25cf1a30Sjl 				continue;
2350*25cf1a30Sjl 			}
2351*25cf1a30Sjl 			break;
2352*25cf1a30Sjl 		default:
2353*25cf1a30Sjl 			d_ml = NULL;
2354*25cf1a30Sjl 			break;
2355*25cf1a30Sjl 		}
2356*25cf1a30Sjl 
2357*25cf1a30Sjl 		s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2358*25cf1a30Sjl 
2359*25cf1a30Sjl 		/*
2360*25cf1a30Sjl 		 * reserve all memory on target board.
2361*25cf1a30Sjl 		 * NOTE: source board's memhandle is used.
2362*25cf1a30Sjl 		 *
2363*25cf1a30Sjl 		 * If this succeeds (eq 0), then target selection is
2364*25cf1a30Sjl 		 * complete and all unwanted memory spans, both source and
2365*25cf1a30Sjl 		 * target, have been reserved.  Loop is terminated.
2366*25cf1a30Sjl 		 */
2367*25cf1a30Sjl 		if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2368*25cf1a30Sjl 			PR_MEM("%s: %s: target board memory reserved\n",
2369*25cf1a30Sjl 				f, t_mp->sbm_cm.sbdev_path);
2370*25cf1a30Sjl 
2371*25cf1a30Sjl 			/* a candidate target board is now reserved */
2372*25cf1a30Sjl 			t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2373*25cf1a30Sjl 			c_mp = t_mp;
2374*25cf1a30Sjl 
2375*25cf1a30Sjl 			/* *** EXITING LOOP *** */
2376*25cf1a30Sjl 			break;
2377*25cf1a30Sjl 		}
2378*25cf1a30Sjl 
2379*25cf1a30Sjl 		/* did not successfully reserve the target board. */
2380*25cf1a30Sjl 		PR_MEM("%s: could not reserve target %s\n",
2381*25cf1a30Sjl 			f, t_mp->sbm_cm.sbdev_path);
2382*25cf1a30Sjl 
2383*25cf1a30Sjl 		/*
2384*25cf1a30Sjl 		 * NOTE: an undo of the dr_reserve_mem_span work
2385*25cf1a30Sjl 		 * will happen automatically when the memhandle
2386*25cf1a30Sjl 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2387*25cf1a30Sjl 		 */
2388*25cf1a30Sjl 
2389*25cf1a30Sjl 		s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2390*25cf1a30Sjl 	}
2391*25cf1a30Sjl 
2392*25cf1a30Sjl 	/* clean up after memlist editing logic */
2393*25cf1a30Sjl 	if (x_ml != NULL)
2394*25cf1a30Sjl 		memlist_delete(x_ml);
2395*25cf1a30Sjl 
2396*25cf1a30Sjl 	FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET *
2397*25cf1a30Sjl 	    DR_SMT_NPREF_SETS);
2398*25cf1a30Sjl 
2399*25cf1a30Sjl 	/*
2400*25cf1a30Sjl 	 * c_mp will be NULL when the entire sets[] array
2401*25cf1a30Sjl 	 * has been searched without reserving a target board.
2402*25cf1a30Sjl 	 */
2403*25cf1a30Sjl 	if (c_mp == NULL) {
2404*25cf1a30Sjl 		PR_MEM("%s: %s: target selection failed.\n",
2405*25cf1a30Sjl 			f, s_mp->sbm_cm.sbdev_path);
2406*25cf1a30Sjl 
2407*25cf1a30Sjl 		if (t_ml != NULL)
2408*25cf1a30Sjl 			memlist_delete(t_ml);
2409*25cf1a30Sjl 
2410*25cf1a30Sjl 		return (-1);
2411*25cf1a30Sjl 	}
2412*25cf1a30Sjl 
2413*25cf1a30Sjl 	PR_MEM("%s: found target %s for source %s\n",
2414*25cf1a30Sjl 		f,
2415*25cf1a30Sjl 		c_mp->sbm_cm.sbdev_path,
2416*25cf1a30Sjl 		s_mp->sbm_cm.sbdev_path);
2417*25cf1a30Sjl 
2418*25cf1a30Sjl 	s_mp->sbm_peer = c_mp;
2419*25cf1a30Sjl 	s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2420*25cf1a30Sjl 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2421*25cf1a30Sjl 	s_mp->sbm_mlist = s_ml;
2422*25cf1a30Sjl 	s_mp->sbm_cm.sbdev_busy = 1;
2423*25cf1a30Sjl 
2424*25cf1a30Sjl 	c_mp->sbm_peer = s_mp;
2425*25cf1a30Sjl 	c_mp->sbm_flags |= DR_MFLAG_TARGET;
2426*25cf1a30Sjl 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2427*25cf1a30Sjl 	c_mp->sbm_mlist = t_ml;
2428*25cf1a30Sjl 	c_mp->sbm_cm.sbdev_busy = 1;
2429*25cf1a30Sjl 
2430*25cf1a30Sjl 	return (0);
2431*25cf1a30Sjl }
2432*25cf1a30Sjl 
2433*25cf1a30Sjl /*
2434*25cf1a30Sjl  * Returns target preference rank:
2435*25cf1a30Sjl  *     -1 not a valid copy-rename target board
2436*25cf1a30Sjl  *	0 copy all source, source/target same size
2437*25cf1a30Sjl  *	1 copy all source, larger target
2438*25cf1a30Sjl  * 	2 copy nonrelocatable source span
2439*25cf1a30Sjl  */
2440*25cf1a30Sjl static dr_target_pref_t
2441*25cf1a30Sjl dr_get_target_preference(dr_handle_t *hp,
2442*25cf1a30Sjl     dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp,
2443*25cf1a30Sjl     struct memlist *t_ml, struct memlist *s_ml,
2444*25cf1a30Sjl     struct memlist *b_ml)
2445*25cf1a30Sjl {
2446*25cf1a30Sjl 	dr_target_pref_t preference;
2447*25cf1a30Sjl 	struct memlist *s_nonreloc_ml = NULL;
2448*25cf1a30Sjl 	drmachid_t t_id;
2449*25cf1a30Sjl 	static fn_t	f = "dr_get_target_preference";
2450*25cf1a30Sjl 
2451*25cf1a30Sjl 	t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2452*25cf1a30Sjl 
2453*25cf1a30Sjl 	/*
2454*25cf1a30Sjl 	 * Can the entire source board be copied?
2455*25cf1a30Sjl 	 */
2456*25cf1a30Sjl 	if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) {
2457*25cf1a30Sjl 		if (s_mp->sbm_npages == t_mp->sbm_npages)
2458*25cf1a30Sjl 			preference = DR_TP_SAME;	/* same size */
2459*25cf1a30Sjl 		else
2460*25cf1a30Sjl 			preference = DR_TP_LARGE;	/* larger target */
2461*25cf1a30Sjl 	} else {
2462*25cf1a30Sjl 		/*
2463*25cf1a30Sjl 		 * Entire source won't fit so try non-relocatable memory only
2464*25cf1a30Sjl 		 * (target aligned).
2465*25cf1a30Sjl 		 */
2466*25cf1a30Sjl 		s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp);
2467*25cf1a30Sjl 		if (s_nonreloc_ml == NULL) {
2468*25cf1a30Sjl 			PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f);
2469*25cf1a30Sjl 			preference = DR_TP_INVALID;
2470*25cf1a30Sjl 		}
2471*25cf1a30Sjl 		if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp))
2472*25cf1a30Sjl 			preference = DR_TP_NONRELOC;
2473*25cf1a30Sjl 		else
2474*25cf1a30Sjl 			preference = DR_TP_INVALID;
2475*25cf1a30Sjl 	}
2476*25cf1a30Sjl 
2477*25cf1a30Sjl 	if (s_nonreloc_ml != NULL)
2478*25cf1a30Sjl 		memlist_delete(s_nonreloc_ml);
2479*25cf1a30Sjl 
2480*25cf1a30Sjl 	/*
2481*25cf1a30Sjl 	 * Force floating board preference lower than all other boards
2482*25cf1a30Sjl 	 * if the force flag is present; otherwise disallow the board.
2483*25cf1a30Sjl 	 */
2484*25cf1a30Sjl 	if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) {
2485*25cf1a30Sjl 		if (dr_cmd_flags(hp) & SBD_FLAG_FORCE)
2486*25cf1a30Sjl 			preference += DR_TP_FLOATING;
2487*25cf1a30Sjl 		else
2488*25cf1a30Sjl 			preference = DR_TP_INVALID;
2489*25cf1a30Sjl 	}
2490*25cf1a30Sjl 
2491*25cf1a30Sjl 	PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path,
2492*25cf1a30Sjl 	    preference);
2493*25cf1a30Sjl 
2494*25cf1a30Sjl 	return (preference);
2495*25cf1a30Sjl }
2496*25cf1a30Sjl 
2497*25cf1a30Sjl /*
2498*25cf1a30Sjl  * Create a memlist representing the source memory that will be copied to
2499*25cf1a30Sjl  * the target board.  The memory to be copied is the maximum amount that
2500*25cf1a30Sjl  * will fit on the target board.
2501*25cf1a30Sjl  */
2502*25cf1a30Sjl static struct memlist *
2503*25cf1a30Sjl dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist,
2504*25cf1a30Sjl     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2505*25cf1a30Sjl {
2506*25cf1a30Sjl 	struct memlist	*t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml;
2507*25cf1a30Sjl 	uint64_t	s_slice_mask, s_slice_base;
2508*25cf1a30Sjl 	uint64_t	t_slice_mask, t_slice_base;
2509*25cf1a30Sjl 	static fn_t	f = "dr_get_copy_mlist";
2510*25cf1a30Sjl 
2511*25cf1a30Sjl 	ASSERT(s_mlist != NULL);
2512*25cf1a30Sjl 	ASSERT(t_mlist != NULL);
2513*25cf1a30Sjl 	ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size);
2514*25cf1a30Sjl 
2515*25cf1a30Sjl 	s_slice_mask = s_mp->sbm_slice_size - 1;
2516*25cf1a30Sjl 	s_slice_base = s_mlist->address & ~s_slice_mask;
2517*25cf1a30Sjl 
2518*25cf1a30Sjl 	t_slice_mask = t_mp->sbm_slice_size - 1;
2519*25cf1a30Sjl 	t_slice_base = t_mlist->address & ~t_slice_mask;
2520*25cf1a30Sjl 
2521*25cf1a30Sjl 	t_ml = memlist_dup(t_mlist);
2522*25cf1a30Sjl 	s_del_ml = memlist_dup(s_mlist);
2523*25cf1a30Sjl 	s_copy_ml = memlist_dup(s_mlist);
2524*25cf1a30Sjl 
2525*25cf1a30Sjl 	/* XXX */
2526*25cf1a30Sjl 	ASSERT(t_ml != NULL);
2527*25cf1a30Sjl 	ASSERT(s_del_ml != NULL);
2528*25cf1a30Sjl 	ASSERT(s_copy_ml != NULL);
2529*25cf1a30Sjl 
2530*25cf1a30Sjl 	/*
2531*25cf1a30Sjl 	 * To construct the source copy memlist:
2532*25cf1a30Sjl 	 *
2533*25cf1a30Sjl 	 * The target memlist is converted to the post-rename
2534*25cf1a30Sjl 	 * source addresses.  This is the physical address range
2535*25cf1a30Sjl 	 * the target will have after the copy-rename.  Overlaying
2536*25cf1a30Sjl 	 * and deleting this from the current source memlist will
2537*25cf1a30Sjl 	 * give the source delete memlist.  The copy memlist is
2538*25cf1a30Sjl 	 * the reciprocal of the source delete memlist.
2539*25cf1a30Sjl 	 */
2540*25cf1a30Sjl 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2541*25cf1a30Sjl 		/*
2542*25cf1a30Sjl 		 * Normalize relative to target slice base PA
2543*25cf1a30Sjl 		 * in order to preseve slice offsets.
2544*25cf1a30Sjl 		 */
2545*25cf1a30Sjl 		ml->address -= t_slice_base;
2546*25cf1a30Sjl 		/*
2547*25cf1a30Sjl 		 * Convert to source slice PA address.
2548*25cf1a30Sjl 		 */
2549*25cf1a30Sjl 		ml->address += s_slice_base;
2550*25cf1a30Sjl 	}
2551*25cf1a30Sjl 
2552*25cf1a30Sjl 	for (ml = t_ml; ml != NULL; ml = ml->next) {
2553*25cf1a30Sjl 		s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size);
2554*25cf1a30Sjl 	}
2555*25cf1a30Sjl 
2556*25cf1a30Sjl 	/*
2557*25cf1a30Sjl 	 * Expand the delete mlist to fully include any dynamic segments
2558*25cf1a30Sjl 	 * it intersects with.
2559*25cf1a30Sjl 	 */
2560*25cf1a30Sjl 	for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) {
2561*25cf1a30Sjl 		uint64_t del_base = ml->address;
2562*25cf1a30Sjl 		uint64_t del_end = ml->address + ml->size;
2563*25cf1a30Sjl 		struct memlist *dyn;
2564*25cf1a30Sjl 
2565*25cf1a30Sjl 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2566*25cf1a30Sjl 			uint64_t dyn_base = dyn->address;
2567*25cf1a30Sjl 			uint64_t dyn_end = dyn->address + dyn->size;
2568*25cf1a30Sjl 
2569*25cf1a30Sjl 			if (del_base > dyn_base && del_base < dyn_end)
2570*25cf1a30Sjl 				del_base = dyn_base;
2571*25cf1a30Sjl 
2572*25cf1a30Sjl 			if (del_end > dyn_base && del_end < dyn_end)
2573*25cf1a30Sjl 				del_end = dyn_end;
2574*25cf1a30Sjl 		}
2575*25cf1a30Sjl 
2576*25cf1a30Sjl 		x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base);
2577*25cf1a30Sjl 	}
2578*25cf1a30Sjl 
2579*25cf1a30Sjl 	memlist_delete(s_del_ml);
2580*25cf1a30Sjl 	s_del_ml = x_ml;
2581*25cf1a30Sjl 
2582*25cf1a30Sjl 	for (ml = s_del_ml; ml != NULL; ml = ml->next) {
2583*25cf1a30Sjl 		s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size);
2584*25cf1a30Sjl 	}
2585*25cf1a30Sjl 
2586*25cf1a30Sjl 	PR_MEM("%s: source delete mlist\n", f);
2587*25cf1a30Sjl 	PR_MEMLIST_DUMP(s_del_ml);
2588*25cf1a30Sjl 
2589*25cf1a30Sjl 	PR_MEM("%s: source copy mlist\n", f);
2590*25cf1a30Sjl 	PR_MEMLIST_DUMP(s_copy_ml);
2591*25cf1a30Sjl 
2592*25cf1a30Sjl 	memlist_delete(t_ml);
2593*25cf1a30Sjl 	memlist_delete(s_del_ml);
2594*25cf1a30Sjl 
2595*25cf1a30Sjl 	return (s_copy_ml);
2596*25cf1a30Sjl }
2597*25cf1a30Sjl 
2598*25cf1a30Sjl /*
2599*25cf1a30Sjl  * Scan the non-relocatable spans on the source memory
2600*25cf1a30Sjl  * and construct a minimum mlist that includes all non-reloc
2601*25cf1a30Sjl  * memory subject to target alignment, and dynamic segment
2602*25cf1a30Sjl  * constraints where only whole dynamic segments may be deleted.
2603*25cf1a30Sjl  */
2604*25cf1a30Sjl static struct memlist *
2605*25cf1a30Sjl dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp)
2606*25cf1a30Sjl {
2607*25cf1a30Sjl 	struct memlist	*x_ml = NULL;
2608*25cf1a30Sjl 	struct memlist	*ml;
2609*25cf1a30Sjl 	static fn_t	f = "dr_get_nonreloc_mlist";
2610*25cf1a30Sjl 
2611*25cf1a30Sjl 	PR_MEM("%s: checking for split of dyn seg list:\n", f);
2612*25cf1a30Sjl 	PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2613*25cf1a30Sjl 
2614*25cf1a30Sjl 	for (ml = s_ml; ml; ml = ml->next) {
2615*25cf1a30Sjl 		int rv;
2616*25cf1a30Sjl 		uint64_t nr_base, nr_end;
2617*25cf1a30Sjl 		memquery_t mq;
2618*25cf1a30Sjl 		struct memlist *dyn;
2619*25cf1a30Sjl 
2620*25cf1a30Sjl 		rv = kphysm_del_span_query(
2621*25cf1a30Sjl 			_b64top(ml->address), _b64top(ml->size), &mq);
2622*25cf1a30Sjl 		if (rv) {
2623*25cf1a30Sjl 			memlist_delete(x_ml);
2624*25cf1a30Sjl 			return (NULL);
2625*25cf1a30Sjl 		}
2626*25cf1a30Sjl 
2627*25cf1a30Sjl 		if (mq.nonrelocatable == 0)
2628*25cf1a30Sjl 			continue;
2629*25cf1a30Sjl 
2630*25cf1a30Sjl 		PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f,
2631*25cf1a30Sjl 			_ptob64(mq.first_nonrelocatable),
2632*25cf1a30Sjl 			_ptob64(mq.last_nonrelocatable),
2633*25cf1a30Sjl 			mq.first_nonrelocatable,
2634*25cf1a30Sjl 			mq.last_nonrelocatable);
2635*25cf1a30Sjl 
2636*25cf1a30Sjl 		/*
2637*25cf1a30Sjl 		 * Align the span at both ends to allow for possible
2638*25cf1a30Sjl 		 * cage expansion.
2639*25cf1a30Sjl 		 */
2640*25cf1a30Sjl 		nr_base = _ptob64(mq.first_nonrelocatable);
2641*25cf1a30Sjl 		nr_end = _ptob64(mq.last_nonrelocatable + 1);
2642*25cf1a30Sjl 
2643*25cf1a30Sjl 		PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n",
2644*25cf1a30Sjl 			f, nr_base, nr_end);
2645*25cf1a30Sjl 
2646*25cf1a30Sjl 		/*
2647*25cf1a30Sjl 		 * Expand the non-reloc span to fully include any
2648*25cf1a30Sjl 		 * dynamic segments it intersects with.
2649*25cf1a30Sjl 		 */
2650*25cf1a30Sjl 		for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) {
2651*25cf1a30Sjl 			uint64_t dyn_base = dyn->address;
2652*25cf1a30Sjl 			uint64_t dyn_end = dyn->address + dyn->size;
2653*25cf1a30Sjl 
2654*25cf1a30Sjl 			if (nr_base > dyn_base && nr_base < dyn_end)
2655*25cf1a30Sjl 				nr_base = dyn_base;
2656*25cf1a30Sjl 
2657*25cf1a30Sjl 			if (nr_end > dyn_base && nr_end < dyn_end)
2658*25cf1a30Sjl 				nr_end = dyn_end;
2659*25cf1a30Sjl 		}
2660*25cf1a30Sjl 
2661*25cf1a30Sjl 		x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base);
2662*25cf1a30Sjl 	}
2663*25cf1a30Sjl 
2664*25cf1a30Sjl 	if (x_ml == NULL) {
2665*25cf1a30Sjl 		PR_MEM("%s: source didn't have any non-reloc pages!\n", f);
2666*25cf1a30Sjl 		return (NULL);
2667*25cf1a30Sjl 	}
2668*25cf1a30Sjl 
2669*25cf1a30Sjl 	PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path);
2670*25cf1a30Sjl 	PR_MEMLIST_DUMP(x_ml);
2671*25cf1a30Sjl 
2672*25cf1a30Sjl 	return (x_ml);
2673*25cf1a30Sjl }
2674*25cf1a30Sjl 
2675*25cf1a30Sjl /*
2676*25cf1a30Sjl  * Check if source memlist can fit in target memlist while maintaining
2677*25cf1a30Sjl  * relative offsets within board.
2678*25cf1a30Sjl  */
2679*25cf1a30Sjl static int
2680*25cf1a30Sjl dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist,
2681*25cf1a30Sjl     dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
2682*25cf1a30Sjl {
2683*25cf1a30Sjl 	int		canfit = 0;
2684*25cf1a30Sjl 	struct memlist	*s_ml, *t_ml, *ml;
2685*25cf1a30Sjl 	uint64_t	s_slice_mask, t_slice_mask;
2686*25cf1a30Sjl 	static fn_t	f = "dr_mlist_canfit";
2687*25cf1a30Sjl 
2688*25cf1a30Sjl 	s_ml = memlist_dup(s_mlist);
2689*25cf1a30Sjl 	t_ml = memlist_dup(t_mlist);
2690*25cf1a30Sjl 
2691*25cf1a30Sjl 	if (s_ml == NULL || t_ml == NULL) {
2692*25cf1a30Sjl 		cmn_err(CE_WARN, "%s: memlist_dup failed\n", f);
2693*25cf1a30Sjl 		goto done;
2694*25cf1a30Sjl 	}
2695*25cf1a30Sjl 
2696*25cf1a30Sjl 	s_slice_mask = s_mp->sbm_slice_size - 1;
2697*25cf1a30Sjl 	t_slice_mask = t_mp->sbm_slice_size - 1;
2698*25cf1a30Sjl 
2699*25cf1a30Sjl 	/*
2700*25cf1a30Sjl 	 * Normalize to slice relative offsets.
2701*25cf1a30Sjl 	 */
2702*25cf1a30Sjl 	for (ml = s_ml; ml; ml = ml->next)
2703*25cf1a30Sjl 		ml->address &= s_slice_mask;
2704*25cf1a30Sjl 
2705*25cf1a30Sjl 	for (ml = t_ml; ml; ml = ml->next)
2706*25cf1a30Sjl 		ml->address &= t_slice_mask;
2707*25cf1a30Sjl 
2708*25cf1a30Sjl 	canfit = memlist_canfit(s_ml, t_ml);
2709*25cf1a30Sjl done:
2710*25cf1a30Sjl 	memlist_delete(s_ml);
2711*25cf1a30Sjl 	memlist_delete(t_ml);
2712*25cf1a30Sjl 
2713*25cf1a30Sjl 	return (canfit);
2714*25cf1a30Sjl }
2715*25cf1a30Sjl 
2716*25cf1a30Sjl /*
2717*25cf1a30Sjl  * Memlist support.
2718*25cf1a30Sjl  */
2719*25cf1a30Sjl 
2720*25cf1a30Sjl /*
2721*25cf1a30Sjl  * Determine whether the source memlist (s_mlist) will
2722*25cf1a30Sjl  * fit into the target memlist (t_mlist) in terms of
2723*25cf1a30Sjl  * size and holes.  Assumes the caller has normalized the
2724*25cf1a30Sjl  * memlist physical addresses for comparison.
2725*25cf1a30Sjl  */
2726*25cf1a30Sjl static int
2727*25cf1a30Sjl memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2728*25cf1a30Sjl {
2729*25cf1a30Sjl 	int		rv = 0;
2730*25cf1a30Sjl 	struct memlist	*s_ml, *t_ml;
2731*25cf1a30Sjl 
2732*25cf1a30Sjl 	if ((s_mlist == NULL) || (t_mlist == NULL))
2733*25cf1a30Sjl 		return (0);
2734*25cf1a30Sjl 
2735*25cf1a30Sjl 	s_ml = s_mlist;
2736*25cf1a30Sjl 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) {
2737*25cf1a30Sjl 		uint64_t	s_start, s_end;
2738*25cf1a30Sjl 		uint64_t	t_start, t_end;
2739*25cf1a30Sjl 
2740*25cf1a30Sjl 		t_start = t_ml->address;
2741*25cf1a30Sjl 		t_end = t_start + t_ml->size;
2742*25cf1a30Sjl 
2743*25cf1a30Sjl 		for (; s_ml; s_ml = s_ml->next) {
2744*25cf1a30Sjl 			s_start = s_ml->address;
2745*25cf1a30Sjl 			s_end = s_start + s_ml->size;
2746*25cf1a30Sjl 
2747*25cf1a30Sjl 			if ((s_start < t_start) || (s_end > t_end))
2748*25cf1a30Sjl 				break;
2749*25cf1a30Sjl 		}
2750*25cf1a30Sjl 	}
2751*25cf1a30Sjl 
2752*25cf1a30Sjl 	/*
2753*25cf1a30Sjl 	 * If we ran out of source memlist chunks that mean
2754*25cf1a30Sjl 	 * we found a home for all of them.
2755*25cf1a30Sjl 	 */
2756*25cf1a30Sjl 	if (s_ml == NULL)
2757*25cf1a30Sjl 		rv = 1;
2758*25cf1a30Sjl 
2759*25cf1a30Sjl 	return (rv);
2760*25cf1a30Sjl }
2761