xref: /illumos-gate/usr/src/uts/sun4u/io/sbd_mem.c (revision 56f33205)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * memory support routines for sbd.
29  */
30 
31 #include <sys/debug.h>
32 #include <sys/types.h>
33 #include <sys/errno.h>
34 #include <sys/param.h>
35 #include <sys/dditypes.h>
36 #include <sys/kmem.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/sunndi.h>
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sysmacros.h>
43 #include <sys/machsystm.h>
44 #include <sys/spitregs.h>
45 #include <sys/cpuvar.h>
46 #include <sys/cpu_module.h>
47 #include <sys/promif.h>
48 #include <sys/memlist_impl.h>
49 #include <sys/mem_cage.h>
50 #include <sys/lgrp.h>
51 #include <sys/platform_module.h>
52 #include <vm/seg_kmem.h>
53 
54 #include <sys/sbdpriv.h>
55 
56 #define	_ptob64(p) ((uint64_t)(p) << PAGESHIFT)
57 #define	_b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
58 
59 static int		sbd_post_detach_mem_unit(sbd_mem_unit_t *mp,
60 				sbderror_t *ep);
61 static int		sbd_reserve_mem_spans(memhandle_t *mhp,
62 					struct memlist *mlist);
63 static int		sbd_check_boundaries(struct memlist *orig_memlist,
64 					sbd_mem_unit_t *s_mp,
65 					sbd_mem_unit_t *t_mp);
66 static int		sbd_select_mem_target(sbd_handle_t *hp,
67 				sbd_mem_unit_t *mp, struct memlist *ml);
68 static void		sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t
69 					*ep);
70 static int		memlist_canfit(struct memlist *s_mlist,
71 					struct memlist *t_mlist);
72 static void		sbd_mem_cleanup(sbd_mem_unit_t *s_mp,
73 				sbd_mem_unit_t *t_mp, sbderror_t *ep);
74 static void		sbd_flush_ecache(uint64_t a, uint64_t b);
75 
76 struct memlist *
sbd_get_memlist(sbd_mem_unit_t * mp,sbderror_t * ep)77 sbd_get_memlist(sbd_mem_unit_t *mp, sbderror_t *ep)
78 {
79 	struct memlist	*mlist;
80 	static fn_t	f = "sbd_get_memlist";
81 	sbd_board_t 	*sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
82 	sbdp_handle_t	*hdp;
83 	sbd_handle_t	*hp = MACHBD2HD(sbp);
84 
85 	PR_MEM("%s...\n", f);
86 
87 	/*
88 	 * Return cached memlist, if present.
89 	 * This memlist will be present following an
90 	 * unconfigure (a.k.a: detach) of this memunit.
91 	 * It should only be used in the case were a configure
92 	 * is bringing this memunit back in without going
93 	 * through the disconnect and connect states.
94 	 */
95 	if (mp->sbm_mlist) {
96 		PR_MEM("%s: found cached memlist\n", f);
97 
98 		mlist = memlist_dup(mp->sbm_mlist);
99 	} else {
100 		/* attempt to construct a memlist using phys_install */
101 
102 		/*
103 		 * NOTE: this code block assumes only one memunit per
104 		 * board.  This is currently safe because the function
105 		 * sbd_init_mem_devlist() forces this assumption to be
106 		 * valid.
107 		 */
108 
109 		/* round down to slice base address */
110 		/* build mlist from the lower layer */
111 		hdp = sbd_get_sbdp_handle(sbp, hp);
112 		mlist = sbdp_get_memlist(hdp, mp->sbm_cm.sbdev_dip);
113 		if (mlist == NULL) {
114 			SBD_GET_PERR(hdp->h_err, ep);
115 			PR_MEM("sbd:%s: failed to get memlist for "
116 				"dip (0x%p) ecode %d errno %d", f,
117 				(void *)mp->sbm_cm.sbdev_dip,
118 				ep->e_code, ep->e_errno);
119 			sbd_release_sbdp_handle(hdp);
120 			return (NULL);
121 		}
122 		sbd_release_sbdp_handle(hdp);
123 	}
124 
125 	PR_MEM("%s: memlist for mem-unit (%d.%d), dip 0x%p:\n",
126 		f, sbp->sb_num,
127 		mp->sbm_cm.sbdev_unum,
128 		(void *)mp->sbm_cm.sbdev_dip);
129 	SBD_MEMLIST_DUMP(mlist);
130 
131 	return (mlist);
132 }
133 
134 int
sbd_pre_attach_mem(sbd_handle_t * hp,sbd_devlist_t devlist[],int devnum)135 sbd_pre_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
136 {
137 	int		err_flag = 0;
138 	sbderror_t	*ep = SBD_HD2ERR(hp);
139 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
140 	int		d, i;
141 	sbdp_handle_t	*hdp;
142 	static fn_t	f = "sbd_pre_attach_mem";
143 
144 	PR_MEM("%s...\n", f);
145 
146 	SBD_SET_ERR(ep, 0);
147 	hdp = sbd_get_sbdp_handle(sbp, hp);
148 
149 	for (d = 0; d < devnum; d++) {
150 		sbd_mem_unit_t	*mp;
151 		int		unit;
152 		dev_info_t	*dip;
153 		sbd_istate_t	state;
154 		int		rv;
155 
156 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
157 		ASSERT(devlist[d].dv_dip != NULL);
158 
159 		dip = devlist[d].dv_dip;
160 		unit = sbdp_get_unit_num(hdp, dip);
161 		if (unit == -1) {
162 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
163 				continue;
164 			else {
165 				SBD_GET_PERR(hdp->h_err, ep);
166 				err_flag = 1;
167 				break;
168 			}
169 		}
170 
171 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
172 
173 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
174 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
175 
176 		PR_MEM("sbd: OS attach mem-unit (%d.%d)\n",
177 			sbp->sb_num,
178 			mp->sbm_cm.sbdev_unum);
179 
180 		state = mp->sbm_cm.sbdev_state;
181 		switch (state) {
182 		case SBD_STATE_UNCONFIGURED:
183 			/* use memlist cached by sbd_post_detach_mem_unit */
184 			if (mp->sbm_mlist != NULL) {
185 				PR_MEM("%s: recovering from UNCONFIG"
186 					" mem-unit (%d.%d)\n",
187 					f, sbp->sb_num,
188 					mp->sbm_cm.sbdev_unum);
189 
190 				PR_MEM("%s: re-configure cached memlist:\n", f);
191 				SBD_MEMLIST_DUMP(mp->sbm_mlist);
192 
193 				/*
194 				 * kphysm del handle should have been freed
195 				 */
196 				ASSERT((mp->sbm_flags & SBD_MFLAG_RELOWNER)
197 					== 0);
198 			} else {
199 				if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
200 					continue;
201 				else {
202 					SBD_GET_PERR(hdp->h_err, ep);
203 					err_flag = 1;
204 					PR_MEM("%s: mem-unit (%d.%d)"
205 						" unusable\n",
206 						f, sbp->sb_num,
207 						mp->sbm_cm.sbdev_unum);
208 					break;
209 				}
210 			}
211 
212 			/*FALLTHROUGH*/
213 
214 		case SBD_STATE_CONNECTED:
215 			PR_MEM("%s: reprogramming mem hardware (board %d)\n",
216 				f, sbp->sb_num);
217 
218 			for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
219 				if (mp->sbm_dip[i] == NULL)
220 					continue;
221 				dip = mp->sbm_dip[i];
222 
223 				PR_MEM("%s: enabling mc 0x%p on board %d\n",
224 					f, (void *)dip, sbp->sb_num);
225 
226 				rv = sbdphw_enable_memctrl(hdp, dip);
227 				if (rv < 0) {
228 					SBD_GET_PERR(hdp->h_err, ep);
229 					cmn_err(CE_WARN,
230 					"%s: failed to program mem ctrlr %p on "
231 					"board %d", f, (void *)mp->sbm_dip[i],
232 					sbp->sb_num);
233 					err_flag = 1;
234 				}
235 			}
236 			break;
237 
238 		default:
239 			cmn_err(CE_WARN,
240 				"%s: unexpected state (%d) for mem-unit "
241 				"(%d.%d)", f, state, sbp->sb_num,
242 				mp->sbm_cm.sbdev_unum);
243 			if (SBD_GET_ERR(ep) == 0) {
244 				SBD_SET_ERR(ep, ESBD_STATE);
245 				err_flag = 1;
246 			}
247 			break;
248 		}
249 
250 		/* exit for loop if error encountered */
251 		if (err_flag) {
252 			SBD_SET_ERRSTR(ep,
253 			    sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
254 			break;
255 		}
256 	}
257 	sbd_release_sbdp_handle(hdp);
258 
259 	return (err_flag ? -1 : 0);
260 }
261 
262 int
sbd_post_attach_mem(sbd_handle_t * hp,sbd_devlist_t devlist[],int devnum)263 sbd_post_attach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
264 {
265 	int		d;
266 	sbdp_handle_t	*hdp;
267 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
268 	sbderror_t	*ep = SBD_HD2ERR(hp);
269 	static fn_t	f = "sbd_post_attach_mem";
270 
271 	PR_MEM("%s...\n", f);
272 	hdp = sbd_get_sbdp_handle(sbp, hp);
273 
274 	for (d = 0; d < devnum; d++) {
275 		sbd_mem_unit_t	*mp;
276 		dev_info_t	*dip;
277 		int		unit;
278 		struct memlist	*mlist, *ml;
279 
280 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
281 		ASSERT(devlist[d].dv_dip != NULL);
282 
283 		dip = devlist[d].dv_dip;
284 		unit = sbdp_get_unit_num(hdp, dip);
285 		if (unit == -1) {
286 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
287 				continue;
288 			else {
289 				SBD_GET_PERR(hdp->h_err, ep);
290 				break;
291 			}
292 		}
293 
294 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
295 
296 		mlist = sbd_get_memlist(mp, ep);
297 		if (mlist == NULL) {
298 			cmn_err(CE_WARN,
299 				"%s: no memlist for mem-unit (%d.%d)",
300 				f,
301 				sbp->sb_num,
302 				mp->sbm_cm.sbdev_unum);
303 
304 			if (SBD_GET_ERR(ep) == 0) {
305 				SBD_SET_ERR(ep, ESBD_MEMFAIL);
306 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
307 			}
308 
309 			continue;
310 		}
311 
312 		/*
313 		 * Verify the memory really did successfully attach
314 		 * by checking for its existence in phys_install.
315 		 */
316 
317 		memlist_read_lock();
318 		if (memlist_intersect(phys_install, mlist) == 0) {
319 			memlist_read_unlock();
320 
321 			cmn_err(CE_WARN,
322 				"%s: mem-unit (%d.%d) memlist not in"
323 				" phys_install", f, sbp->sb_num,
324 				mp->sbm_cm.sbdev_unum);
325 
326 			if (SBD_GET_ERR(ep) == 0) {
327 				SBD_SET_ERR(ep, ESBD_INTERNAL);
328 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
329 			}
330 
331 			memlist_delete(mlist);
332 			continue;
333 		}
334 		memlist_read_unlock();
335 
336 		for (ml = mlist; ml != NULL; ml = ml->ml_next) {
337 			(void) sbdp_mem_add_span(hdp, ml->ml_address,
338 			    ml->ml_size);
339 		}
340 
341 		memlist_delete(mlist);
342 
343 		/*
344 		 * Destroy cached memlist, if any.
345 		 * There will be a cached memlist in sbm_mlist if
346 		 * this board is being configured directly after
347 		 * an unconfigure.
348 		 * To support this transition, sbd_post_detach_mem
349 		 * left a copy of the last known memlist in sbm_mlist.
350 		 * This memlist could differ from any derived from
351 		 * hardware if while this memunit was last configured
352 		 * the system detected and deleted bad pages from
353 		 * phys_install.  The location of those bad pages
354 		 * will be reflected in the cached memlist.
355 		 */
356 		if (mp->sbm_mlist) {
357 			memlist_delete(mp->sbm_mlist);
358 			mp->sbm_mlist = NULL;
359 		}
360 		sbd_init_mem_unit_data(mp, ep);
361 	}
362 
363 	sbd_release_sbdp_handle(hdp);
364 	return (0);
365 }
366 
367 int
sbd_pre_detach_mem(sbd_handle_t * hp,sbd_devlist_t devlist[],int devnum)368 sbd_pre_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
369 {
370 	int		d;
371 	int		unit;
372 	sbdp_handle_t	*hdp;
373 	sbderror_t	*ep = SBD_HD2ERR(hp);
374 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
375 	dev_info_t	*dip;
376 
377 	hdp = sbd_get_sbdp_handle(sbp, hp);
378 
379 	for (d = 0; d < devnum; d++) {
380 		sbd_mem_unit_t *mp;
381 
382 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
383 		ASSERT(devlist[d].dv_dip != NULL);
384 
385 		dip = devlist[d].dv_dip;
386 		unit = sbdp_get_unit_num(hdp, dip);
387 		if (unit == -1) {
388 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
389 				continue;
390 			else {
391 				SBD_GET_PERR(hdp->h_err, ep);
392 				sbd_release_sbdp_handle(hdp);
393 				return (-1);
394 			}
395 		}
396 
397 		mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
398 
399 		/* sanity check */
400 		ASSERT(mp->sbm_cm.sbdev_sbp == sbp);
401 		ASSERT(unit == mp->sbm_cm.sbdev_unum);
402 
403 		PR_MEM("sbd: OS detach mem-unit (%d.%d)\n",
404 			sbp->sb_num, mp->sbm_cm.sbdev_unum);
405 	}
406 
407 	sbd_release_sbdp_handle(hdp);
408 	return (0);
409 }
410 
411 int
sbd_post_detach_mem(sbd_handle_t * hp,sbd_devlist_t devlist[],int devnum)412 sbd_post_detach_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
413 {
414 	int		d, rv;
415 	sbdp_handle_t	*hdp;
416 	sbd_board_t	*sbp;
417 	sbd_mem_unit_t	*s_mp, *t_mp;
418 	static fn_t	f = "sbd_post_detach_mem";
419 
420 	PR_MEM("%s...\n", f);
421 
422 	sbp = SBDH2BD(hp->h_sbd);
423 
424 	hdp = sbd_get_sbdp_handle(sbp, hp);
425 
426 
427 	rv = 0;
428 	for (d = 0; d < devnum; d++) {
429 		sbderror_t	*ep;
430 		dev_info_t	*dip;
431 		int		unit;
432 
433 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
434 		ASSERT(devlist[d].dv_dip != NULL);
435 
436 		ep = &devlist[d].dv_error;
437 		if ((SBD_GET_ERR(SBD_HD2ERR(hp)) != 0) ||
438 		    (sbd_set_err_in_hdl(hp, ep) == 0)) {
439 			rv = -1;
440 		}
441 
442 		dip = devlist[d].dv_dip;
443 		unit = sbdp_get_unit_num(hdp, dip);
444 		if (unit == -1) {
445 			if (hp->h_flags & SBD_IOCTL_FLAG_FORCE)
446 				continue;
447 			else {
448 				if (rv != -1)
449 					SBD_GET_PERR(hdp->h_err, ep);
450 				break;
451 			}
452 		}
453 
454 		s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
455 
456 		ASSERT(s_mp->sbm_cm.sbdev_sbp == sbp);
457 
458 		if (rv == -1) {
459 			if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
460 				t_mp = s_mp->sbm_peer;
461 			} else {
462 				/* this is no target unit */
463 				t_mp = NULL;
464 			}
465 
466 			sbd_mem_cleanup(s_mp, t_mp, ep);
467 		} else if (sbd_post_detach_mem_unit(s_mp, ep))
468 			rv = -1;
469 	}
470 
471 	sbd_release_sbdp_handle(hdp);
472 	return (rv);
473 }
474 
475 static void
sbd_add_memory_spans(sbd_board_t * sbp,struct memlist * ml)476 sbd_add_memory_spans(sbd_board_t *sbp, struct memlist *ml)
477 {
478 	sbdp_handle_t	*hdp;
479 	static fn_t	f = "sbd_add_memory_spans";
480 
481 	PR_MEM("%s...", f);
482 	SBD_MEMLIST_DUMP(ml);
483 
484 #ifdef DEBUG
485 	memlist_read_lock();
486 	if (memlist_intersect(phys_install, ml)) {
487 		PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
488 	}
489 	memlist_read_unlock();
490 #endif
491 	hdp = sbd_get_sbdp_handle(NULL, NULL);
492 
493 	for (; ml; ml = ml->ml_next) {
494 		update_membounds_t umb;
495 		pfn_t	base;
496 		pgcnt_t	npgs;
497 		int	rv;
498 
499 		base = _b64top(ml->ml_address);
500 		npgs = _b64top(ml->ml_size);
501 
502 		umb.u_board = sbp->sb_num;
503 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
504 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
505 
506 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
507 		rv = kphysm_add_memory_dynamic(base, npgs);
508 
509 		(void) sbdp_mem_add_span(hdp, ml->ml_address, ml->ml_size);
510 
511 		if (rv != KPHYSM_OK) {
512 			cmn_err(CE_WARN, "sbd:%s:"
513 				" unexpected kphysm_add_memory_dynamic"
514 				" return value %d;"
515 				" basepfn=0x%lx, npages=%ld\n",
516 				f, rv, base, npgs);
517 
518 			continue;
519 		}
520 		rv = kcage_range_add(base, npgs, KCAGE_DOWN);
521 		if (rv != 0)
522 			continue;
523 	}
524 	sbd_release_sbdp_handle(hdp);
525 }
526 
527 /* hack for test scripts.  *** remove before code finalized *** */
528 int sbd_last_target;
529 
530 static int
sbd_post_detach_mem_unit(sbd_mem_unit_t * s_mp,sbderror_t * ep)531 sbd_post_detach_mem_unit(sbd_mem_unit_t *s_mp, sbderror_t *ep)
532 {
533 	uint64_t	sz;
534 	uint64_t	sm;
535 	uint64_t	t_basepa;
536 	uint64_t	tmp_basepa;
537 	uint64_t	s_basepa;
538 	sbd_board_t 	*sbp;
539 	sbdp_handle_t	*hdp;
540 	uint64_t	s_nbytes;
541 	uint64_t	s_new_basepa;
542 	sbd_mem_unit_t	*t_mp, *x_mp;
543 	struct memlist	*ml;
544 	int		rv;
545 	static fn_t	f = "sbd_post_detach_mem_unit";
546 	sbd_handle_t	*hp;
547 
548 	PR_MEM("%s...\n", f);
549 
550 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
551 	hp = MACHBD2HD(sbp);
552 	hdp = sbd_get_sbdp_handle(sbp, hp);
553 
554 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
555 		cmn_err(CE_WARN,
556 			"sbd:%s: no alignment for mem-unit (%d.%d)",
557 			f, sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
558 		SBD_GET_PERR(hdp->h_err, ep);
559 		sbd_release_sbdp_handle(hdp);
560 		return (-1);
561 	}
562 	sm = sz - 1;
563 
564 	/* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
565 	PR_MEM("%s: brd %d: deleted memlist (EMPTY maybe okay):\n",
566 		f, sbp->sb_num);
567 	SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
568 
569 	/* sanity check */
570 	ASSERT(s_mp->sbm_del_mlist == NULL ||
571 		(s_mp->sbm_flags & SBD_MFLAG_RELDONE) != 0);
572 
573 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
574 		t_mp = s_mp->sbm_peer;
575 
576 		ASSERT(t_mp != NULL);
577 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
578 		ASSERT(t_mp->sbm_peer == s_mp);
579 
580 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RELDONE);
581 		ASSERT(t_mp->sbm_del_mlist);
582 
583 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
584 		PR_MEM("%s: target brd %d: deleted memlist:\n",
585 			f, sbp->sb_num);
586 		SBD_MEMLIST_DUMP(t_mp->sbm_del_mlist);
587 	} else {
588 		/* this is no target unit */
589 		t_mp = NULL;
590 	}
591 
592 	/*
593 	 * Verify the memory really did successfully detach
594 	 * by checking for its non-existence in phys_install.
595 	 */
596 	rv = 0;
597 	memlist_read_lock();
598 	if (s_mp->sbm_flags & SBD_MFLAG_RELDONE) {
599 		x_mp = s_mp;
600 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
601 	}
602 	if (rv == 0 && t_mp && (t_mp->sbm_flags & SBD_MFLAG_RELDONE)) {
603 		x_mp = t_mp;
604 		rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
605 	}
606 	memlist_read_unlock();
607 
608 	if (rv) {
609 		sbp = (sbd_board_t *)x_mp->sbm_cm.sbdev_sbp;
610 
611 		cmn_err(CE_WARN,
612 			"%s: %smem-unit (%d.%d) memlist still in phys_install",
613 			f,
614 			x_mp == t_mp ? "target " : "",
615 			sbp->sb_num,
616 			x_mp->sbm_cm.sbdev_unum);
617 		SBD_SET_ERR(ep, ESBD_INTERNAL);
618 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[x_mp->sbm_cm.sbdev_unum]);
619 		sbd_release_sbdp_handle(hdp);
620 		return (-1);
621 	}
622 
623 	s_basepa	= _ptob64(s_mp->sbm_basepfn);
624 	s_nbytes	= _ptob64(s_mp->sbm_npages);
625 
626 	if (t_mp != NULL) {
627 		t_basepa	= _ptob64(t_mp->sbm_basepfn);
628 		s_new_basepa	= (s_basepa & ~ sm) +
629 					_ptob64(t_mp->sbm_slice_offset);
630 
631 		/*
632 		 * We had to swap mem-units, so update
633 		 * memlists accordingly with new base
634 		 * addresses.
635 		 */
636 		for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
637 			ml->ml_address -= t_basepa;
638 			ml->ml_address += s_new_basepa;
639 		}
640 
641 		/*
642 		 * There is no need to explicitly rename the target delete
643 		 * memlist, because sbm_del_mlist and sbm_mlist always
644 		 * point to the same memlist for a copy/rename operation.
645 		 */
646 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
647 
648 		PR_MEM("%s: renamed target memlist and delete memlist", f);
649 		SBD_MEMLIST_DUMP(t_mp->sbm_mlist);
650 
651 		for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
652 			ml->ml_address -= s_basepa;
653 			ml->ml_address += t_basepa;
654 		}
655 
656 		PR_MEM("%s: renamed source memlist", f);
657 		SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
658 
659 #ifdef DEBUG
660 		ASSERT(s_mp->sbm_mlist != s_mp->sbm_del_mlist);
661 		/*
662 		 * Renaming s_mp->sbm_del_mlist is not necessary.  This
663 		 * list is not used beyond this point, and in fact, is
664 		 *  disposed of at the end of this function.
665 		 */
666 		for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
667 			ml->ml_address -= s_basepa;
668 			ml->ml_address += t_basepa;
669 		}
670 
671 		PR_MEM("%s: renamed source delete memlist", f);
672 		SBD_MEMLIST_DUMP(s_mp->sbm_del_mlist);
673 #endif
674 
675 		if (s_mp->sbm_flags & SBD_MFLAG_MEMUPSIZE) {
676 			struct memlist	*nl;
677 			int mlret;
678 
679 			/*
680 			 * We had to perform a copy-rename from a
681 			 * small memory node to a big memory node.
682 			 * Need to add back the remaining memory on
683 			 * the big board that wasn't used by that
684 			 * from the small board during the copy.
685 			 * Subtract out the portion of the target memory
686 			 * node that was taken over by the source memory
687 			 * node.
688 			 */
689 			nl = memlist_dup(t_mp->sbm_mlist);
690 			mlret = memlist_delete_span(s_basepa, s_nbytes, &nl);
691 			PR_MEM("%s: mlret = %d\n", f, mlret);
692 
693 			sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
694 			PR_MEM("%s: adding back remaining portion"
695 				" of mem-unit (%d.%d), memlist:\n",
696 				f, sbp->sb_num,
697 				t_mp->sbm_cm.sbdev_unum);
698 
699 			SBD_MEMLIST_DUMP(nl);
700 
701 			sbd_add_memory_spans(sbp, nl);
702 
703 			memlist_delete(nl);
704 		}
705 	}
706 
707 
708 	if (t_mp != NULL) {
709 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
710 		hdp->h_board = sbp->sb_num;
711 		/* delete target's entire address space */
712 		tmp_basepa = t_basepa & ~ sm;
713 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
714 		ASSERT(rv == 0);
715 
716 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
717 		hdp->h_board = sbp->sb_num;
718 		tmp_basepa = s_basepa & ~ sm;
719 		sz = s_new_basepa & sm;
720 		/* delete source board's vacant address space */
721 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
722 		ASSERT(rv == 0);
723 	} else {
724 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
725 		hdp->h_board = sbp->sb_num;
726 		tmp_basepa = s_basepa & ~ sm;
727 		/* delete board's entire address space */
728 		rv = sbdp_mem_del_span(hdp, tmp_basepa, sz);
729 		ASSERT(rv == 0);
730 	}
731 
732 #ifdef LINT
733 	rv = rv;
734 #endif
735 
736 	sbd_mem_cleanup(s_mp, t_mp, ep);
737 
738 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
739 	PR_MEM("%s: board %d's memlist:", f, sbp->sb_num);
740 	SBD_MEMLIST_DUMP(s_mp->sbm_mlist);
741 
742 	sbd_release_sbdp_handle(hdp);
743 	return (0);
744 }
745 
746 static void
sbd_mem_cleanup(sbd_mem_unit_t * s_mp,sbd_mem_unit_t * t_mp,sbderror_t * ep)747 sbd_mem_cleanup(sbd_mem_unit_t *s_mp, sbd_mem_unit_t *t_mp, sbderror_t *ep)
748 {
749 	sbd_board_t *sbp;
750 
751 	/* clean up target mem unit */
752 	if (t_mp != NULL) {
753 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
754 
755 		ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
756 		/*
757 		 * sbm_del_mlist and sbm_mlist point at the same list
758 		 * We only need to delete one and then set both pointers
759 		 * to NULL
760 		 */
761 		memlist_delete(t_mp->sbm_del_mlist);
762 
763 		t_mp->sbm_del_mlist = NULL;
764 		t_mp->sbm_mlist = NULL;
765 		t_mp->sbm_peer = NULL;
766 		t_mp->sbm_flags = 0;
767 		t_mp->sbm_cm.sbdev_busy = 0;
768 		sbd_init_mem_unit_data(t_mp, ep);
769 
770 		/*
771 		 * now that copy/rename has completed, undo this
772 		 * work that was done in sbd_release_mem_done.
773 		 */
774 		/*
775 		 * If error don't set the target to configured
776 		 */
777 		if (SBD_GET_ERR(ep) == 0) {
778 			SBD_DEV_CLR_UNREFERENCED(sbp, SBD_COMP_MEM, 0);
779 			SBD_DEV_CLR_RELEASED(sbp, SBD_COMP_MEM, 0);
780 			SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, 0,
781 				SBD_STATE_CONFIGURED);
782 		}
783 
784 /* hack for test scripts.  *** remove before code finalized *** */
785 sbd_last_target = sbp->sb_num;
786 	}
787 
788 	/*
789 	 * clean up (source) board's mem unit structure.
790 	 * NOTE: sbm_mlist is retained.  It is referred to as the
791 	 * cached memlist.  The cached memlist is used to re-attach
792 	 * (configure back in) this memunit from the unconfigured
793 	 * state.
794 	 */
795 	if (s_mp != NULL) {
796 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
797 
798 		/*
799 		 * Don't want to call memlist_delete for sbm_del_mlist,
800 		 * since that list points to the sbm_list
801 		 */
802 		s_mp->sbm_del_mlist = NULL;
803 		s_mp->sbm_peer = NULL;
804 		s_mp->sbm_flags = 0;
805 		s_mp->sbm_cm.sbdev_busy = 0;
806 		sbd_init_mem_unit_data(s_mp, ep);
807 	}
808 }
809 
810 /*
811  * Successful return from this function will have the memory
812  * handle in sbp->sb_dev[..mem-unit...].sbm_memhandle allocated
813  * and waiting.  This routine's job is to select the memory that
814  * actually has to be released (detached) which may not necessarily
815  * be the same memory node that came in in devlist[],
816  * i.e. a copy-rename is needed.
817  */
818 int
sbd_pre_release_mem(sbd_handle_t * hp,sbd_devlist_t devlist[],int devnum)819 sbd_pre_release_mem(sbd_handle_t *hp, sbd_devlist_t devlist[], int devnum)
820 {
821 	extern int	kcage_on;
822 	int		d;
823 	int		err_flag = 0;
824 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
825 	sbderror_t	*ep = SBD_HD2ERR(hp);
826 	sbderror_t	*lep;
827 	static fn_t	f = "sbd_pre_release_mem";
828 
829 	PR_MEM("%s...\n", f);
830 
831 	if (kcage_on == 0) {
832 		/*
833 		 * Can't Detach memory if Cage is OFF.
834 		 */
835 		cmn_err(CE_WARN, "%s: kernel cage is disabled", f);
836 		SBD_SET_ERR(ep, ESBD_KCAGE_OFF);
837 		return (-1);
838 	}
839 
840 	for (d = 0; d < devnum; d++) {
841 		int		rv;
842 		memquery_t	mq;
843 		sbd_mem_unit_t	*mp;
844 		struct memlist	*ml;
845 
846 		/* sbd_get_devlist will not devlist element w/ dip of 0 */
847 		ASSERT(devlist[d].dv_dip != NULL);
848 
849 		mp = SBD_GET_BOARD_MEMUNIT(sbp, d);
850 
851 		/*
852 		 * If all the mem unit is marked as failed then don't allow the
853 		 * operation
854 		 */
855 		if (mp->sbm_cm.sbdev_cond == SBD_COND_FAILED) {
856 			SBD_SET_ERR(ep, ESBD_STATE);
857 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
858 			err_flag = -1;
859 			break;
860 		}
861 
862 		ASSERT(d == mp->sbm_cm.sbdev_unum);
863 
864 		/*
865 		 * if interleave is set to across boards fail the op
866 		 */
867 		if (mp->sbm_interleave) {
868 			SBD_SET_ERR(ep, ESBD_MEMINTLV);
869 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[d]);
870 			err_flag = -1;
871 			break;
872 		}
873 
874 		lep = &devlist[d].dv_error;
875 		if (SBD_GET_ERR(lep) != 0) {
876 			err_flag = -1;
877 			(void) sbd_set_err_in_hdl(hp, lep);
878 			break;
879 		}
880 
881 		if (mp->sbm_flags & SBD_MFLAG_RESERVED) {
882 			/*
883 			 * Board is currently involved in a delete
884 			 * memory operation. Can't detach this guy until
885 			 * that operation completes.
886 			 */
887 			cmn_err(CE_WARN,
888 				"%s: ineligible mem-unit (%d.%d) for detach",
889 				f, sbp->sb_num,
890 				mp->sbm_cm.sbdev_unum);
891 
892 			SBD_SET_ERR(lep, ESBD_INVAL);
893 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
894 			(void) sbd_set_err_in_hdl(hp, lep);
895 			err_flag = -1;
896 			break;
897 		}
898 
899 		/*
900 		 * Check whether the detaching memory requires a
901 		 * copy-rename.
902 		 */
903 		ASSERT(mp->sbm_npages != 0);
904 		rv = kphysm_del_span_query(
905 			mp->sbm_basepfn, mp->sbm_npages, &mq);
906 		if (rv != KPHYSM_OK) {
907 			cmn_err(CE_WARN,
908 				"%s: unexpected kphysm_del_span_query"
909 				" return value %d;"
910 				" basepfn 0x%lx, npages 0x%lx,"
911 				" mem-unit (%d.%d), dip 0x%p",
912 				f,
913 				rv,
914 				mp->sbm_basepfn,
915 				mp->sbm_npages,
916 				sbp->sb_num,
917 				mp->sbm_cm.sbdev_unum,
918 				(void *)mp->sbm_cm.sbdev_dip);
919 
920 			SBD_SET_ERR(lep, ESBD_INTERNAL);
921 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
922 			(void) sbd_set_err_in_hdl(hp, lep);
923 			err_flag = -1;
924 			break;
925 		}
926 
927 		if (mq.nonrelocatable != 0) {
928 			if (!(hp->h_iap->i_flags & SBD_FLAG_QUIESCE_OKAY)) {
929 				/* caller wasn't prompted for a suspend */
930 					SBD_SET_ERR(lep, ESBD_QUIESCE_REQD);
931 					SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
932 					(void) sbd_set_err_in_hdl(hp, lep);
933 					err_flag = 1;
934 					break;
935 			}
936 		}
937 
938 		/* flags should be clean at this time */
939 		ASSERT(mp->sbm_flags == 0);
940 
941 		ASSERT(mp->sbm_del_mlist == NULL);	/* should be null */
942 
943 		if (mp->sbm_mlist != NULL) {
944 			memlist_delete(mp->sbm_mlist);
945 			mp->sbm_mlist = NULL;
946 		}
947 
948 		ml = sbd_get_memlist(mp, lep);
949 		(void) sbd_set_err_in_hdl(hp, lep);
950 		if (ml == NULL) {
951 			PR_MEM("%s: no memlist found for board %d\n",
952 				f, sbp->sb_num);
953 			err_flag = -1;
954 			break;
955 		}
956 
957 		/* allocate a kphysm handle */
958 		rv = kphysm_del_gethandle(&mp->sbm_memhandle);
959 		if (rv != KPHYSM_OK) {
960 			memlist_delete(ml);
961 
962 			cmn_err(CE_WARN,
963 				"%s: unexpected kphysm_del_gethandle"
964 				" return value %d", f, rv);
965 
966 			SBD_SET_ERR(lep, ESBD_INTERNAL);
967 			SBD_SET_ERRSTR(lep, sbp->sb_mempath[d]);
968 			(void) sbd_set_err_in_hdl(hp, lep);
969 			err_flag = -1;
970 			break;
971 		}
972 		mp->sbm_flags |= SBD_MFLAG_RELOWNER;
973 
974 		if ((mq.nonrelocatable != 0) ||
975 			sbd_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
976 			/*
977 			 * Either the detaching memory node contains
978 			 * non-reloc memory or we failed to reserve the
979 			 * detaching memory node (which did _not_ have
980 			 * any non-reloc memory, i.e. some non-reloc mem
981 			 * got onboard).
982 			 */
983 
984 			if (sbd_select_mem_target(hp, mp, ml)) {
985 				int rv;
986 
987 				/*
988 				 * We had no luck locating a target
989 				 * memory node to be the recipient of
990 				 * the non-reloc memory on the node
991 				 * we're trying to detach.
992 				 * Clean up be disposing the mem handle
993 				 * and the mem list.
994 				 */
995 				rv = kphysm_del_release(mp->sbm_memhandle);
996 				if (rv != KPHYSM_OK) {
997 					/*
998 					 * can do nothing but complain
999 					 * and hope helpful for debug
1000 					 */
1001 					cmn_err(CE_WARN, "sbd:%s: unexpected"
1002 						" kphysm_del_release return"
1003 						" value %d",
1004 						f, rv);
1005 				}
1006 				mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1007 
1008 				memlist_delete(ml);
1009 
1010 				/* make sure sbm_flags is clean */
1011 				ASSERT(mp->sbm_flags == 0);
1012 
1013 				cmn_err(CE_WARN,
1014 					"%s: no available target for "
1015 					"mem-unit (%d.%d)",
1016 					f, sbp->sb_num,
1017 					mp->sbm_cm.sbdev_unum);
1018 
1019 				SBD_SET_ERR(lep, ESBD_NO_TARGET);
1020 				SBD_SET_ERRSTR(lep,
1021 					sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1022 				(void) sbd_set_err_in_hdl(hp, lep);
1023 
1024 				err_flag = -1;
1025 				break;
1026 			}
1027 
1028 			/*
1029 			 * ml is not memlist_deleted here because
1030 			 * it has been assigned to mp->sbm_mlist
1031 			 * by sbd_select_mem_target.
1032 			 */
1033 		} else {
1034 			/* no target needed to detach this board */
1035 			mp->sbm_flags |= SBD_MFLAG_RESERVED;
1036 			mp->sbm_peer = NULL;
1037 			mp->sbm_del_mlist = ml;
1038 			mp->sbm_mlist = ml;
1039 			mp->sbm_cm.sbdev_busy = 1;
1040 		}
1041 #ifdef DEBUG
1042 		ASSERT(mp->sbm_mlist != NULL);
1043 
1044 		if (mp->sbm_flags & SBD_MFLAG_SOURCE) {
1045 			int src, targ;
1046 
1047 			sbp = (sbd_board_t *)
1048 				mp->sbm_peer->sbm_cm.sbdev_sbp;
1049 			targ = sbp->sb_num;
1050 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1051 			src = sbp->sb_num;
1052 			PR_MEM("%s: release of board %d requires copy/rename;"
1053 				" selected target board %d\n",
1054 				f, src, targ);
1055 		} else {
1056 			sbp = (sbd_board_t *)mp->sbm_cm.sbdev_sbp;
1057 			PR_MEM("%s: copy/rename not required to release"
1058 				" board %d\n", f, sbp->sb_num);
1059 		}
1060 
1061 		ASSERT(mp->sbm_flags & SBD_MFLAG_RELOWNER);
1062 		ASSERT(mp->sbm_flags & SBD_MFLAG_RESERVED);
1063 #endif
1064 	}
1065 
1066 	return (err_flag);
1067 }
1068 
1069 void
sbd_release_mem_done(sbd_handle_t * hp,int unit)1070 sbd_release_mem_done(sbd_handle_t *hp, int unit)
1071 {
1072 	sbd_mem_unit_t	*s_mp, *t_mp, *mp;
1073 	sbderror_t	*ep = SBD_HD2ERR(hp);
1074 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1075 	int		rv;
1076 	static fn_t	f = "sbd_release_mem_done";
1077 
1078 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1079 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1080 
1081 	/*
1082 	 * This unit will be flagged with SBD_MFLAG_SOURCE, if it
1083 	 * has a target unit.
1084 	 */
1085 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1086 		t_mp = s_mp->sbm_peer;
1087 		ASSERT(t_mp != NULL);
1088 		ASSERT(t_mp->sbm_peer == s_mp);
1089 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_TARGET);
1090 		ASSERT(t_mp->sbm_flags & SBD_MFLAG_RESERVED);
1091 	} else {
1092 		/* this is no target unit */
1093 		t_mp = NULL;
1094 	}
1095 
1096 	/* free delete handle */
1097 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RELOWNER);
1098 	ASSERT(s_mp->sbm_flags & SBD_MFLAG_RESERVED);
1099 
1100 	rv = kphysm_del_release(s_mp->sbm_memhandle);
1101 	if (rv != KPHYSM_OK) {
1102 		/*
1103 		 * can do nothing but complain
1104 		 * and hope helpful for debug
1105 		 */
1106 		cmn_err(CE_WARN, "sbd:%s: unexpected kphysm_del_release"
1107 			" return value %d", f, rv);
1108 	}
1109 	s_mp->sbm_flags &= ~SBD_MFLAG_RELOWNER;
1110 
1111 	/*
1112 	 * If an error was encountered during release, clean up
1113 	 * the source (and target, if present) unit data.
1114 	 */
1115 	if (SBD_GET_ERR(ep) != 0) {
1116 
1117 		PR_MEM("%s: unit %d.%d: error %d noted\n",
1118 			f, sbp->sb_num,
1119 			s_mp->sbm_cm.sbdev_unum,
1120 			SBD_GET_ERR(ep));
1121 
1122 		sbd_mem_cleanup(s_mp, t_mp, ep);
1123 
1124 		/* bail out */
1125 		return;
1126 	}
1127 
1128 	SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM, unit);
1129 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_RELEASE);
1130 
1131 	if (t_mp != NULL) {
1132 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1133 		/*
1134 		 * the kphysm delete operation that drained the source
1135 		 * board also drained this target board.  Since the source
1136 		 * board drain is now known to have succeeded, we know this
1137 		 * target board is drained too.
1138 		 */
1139 		SBD_DEV_SET_RELEASED(sbp, SBD_COMP_MEM,
1140 			t_mp->sbm_cm.sbdev_unum);
1141 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1142 			t_mp->sbm_cm.sbdev_unum,
1143 			SBD_STATE_RELEASE);
1144 
1145 		/*
1146 		 * NOTE: do not transition target's board state,
1147 		 * even if the mem-unit was the last configure
1148 		 * unit of the board.  When copy/rename completes
1149 		 * this mem-unit will transitioned back to
1150 		 * the configured state.  In the meantime, the
1151 		 * board's must remain as is.
1152 		 */
1153 	}
1154 
1155 	/* if board(s) had deleted memory, verify it is gone */
1156 	rv = 0;
1157 	memlist_read_lock();
1158 	if (s_mp->sbm_del_mlist != NULL) {
1159 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1160 		mp = s_mp;
1161 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1162 	}
1163 	if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1164 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1165 		mp = t_mp;
1166 		rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1167 	}
1168 	memlist_read_unlock();
1169 	if (rv) {
1170 		cmn_err(CE_WARN, "sbd:%s: %smem-unit (%d.%d): "
1171 			"deleted memory still found in phys_install",
1172 			f,
1173 			(mp == t_mp ? "target " : ""),
1174 			sbp->sb_num,
1175 			mp->sbm_cm.sbdev_unum);
1176 
1177 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1178 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[mp->sbm_cm.sbdev_unum]);
1179 		return;
1180 	}
1181 
1182 	s_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1183 	if (t_mp != NULL) {
1184 		t_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1185 		t_mp->sbm_flags |= SBD_MFLAG_RELDONE;
1186 	}
1187 
1188 	sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1189 
1190 	SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM, unit);
1191 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, SBD_STATE_UNREFERENCED);
1192 
1193 	PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1194 		f, sbp->sb_num,
1195 		s_mp->sbm_cm.sbdev_unum);
1196 
1197 	s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1198 
1199 	if (t_mp != NULL) {
1200 		sbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
1201 
1202 		SBD_DEV_SET_UNREFERENCED(sbp, SBD_COMP_MEM,
1203 			t_mp->sbm_cm.sbdev_unum);
1204 		SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM,
1205 			t_mp->sbm_cm.sbdev_unum,
1206 			SBD_STATE_UNREFERENCED);
1207 
1208 		sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
1209 
1210 		PR_MEM("%s: marking mem-unit (%d.%d) release DONE\n",
1211 			f, sbp->sb_num,
1212 			t_mp->sbm_cm.sbdev_unum);
1213 
1214 		t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1215 	}
1216 }
1217 
1218 int
sbd_disconnect_mem(sbd_handle_t * hp,int unit)1219 sbd_disconnect_mem(sbd_handle_t *hp, int unit)
1220 {
1221 	static fn_t	f = "sbd_disconnect_mem";
1222 	sbd_mem_unit_t	*mp;
1223 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1224 
1225 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1226 
1227 	ASSERT(mp->sbm_cm.sbdev_state == SBD_STATE_CONNECTED ||
1228 	    mp->sbm_cm.sbdev_state == SBD_STATE_UNCONFIGURED);
1229 
1230 	PR_MEM("%s...\n", f);
1231 
1232 	if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1233 		memlist_delete(mp->sbm_del_mlist);
1234 	mp->sbm_del_mlist = NULL;
1235 
1236 	if (mp->sbm_mlist) {
1237 		memlist_delete(mp->sbm_mlist);
1238 		mp->sbm_mlist = NULL;
1239 	}
1240 
1241 	return (0);
1242 }
1243 
1244 int
sbd_cancel_mem(sbd_handle_t * hp,int unit)1245 sbd_cancel_mem(sbd_handle_t *hp, int unit)
1246 {
1247 	sbd_mem_unit_t	*s_mp, *t_mp;
1248 	sbd_istate_t	state;
1249 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
1250 	sbd_board_t	*tsbp;
1251 	static fn_t	f = "sbd_cancel_mem";
1252 	sbderror_t	*ep = SBD_HD2ERR(hp);
1253 
1254 	s_mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1255 
1256 	state = s_mp->sbm_cm.sbdev_state;
1257 
1258 	if (s_mp->sbm_flags & SBD_MFLAG_TARGET) {
1259 		/* must cancel source board, not target board */
1260 		SBD_SET_ERR(ep, ESBD_INTERNAL);
1261 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
1262 		return (-1);
1263 	} else if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
1264 		t_mp = s_mp->sbm_peer;
1265 		tsbp = t_mp->sbm_cm.sbdev_sbp;
1266 		ASSERT(t_mp != NULL);
1267 		ASSERT(t_mp->sbm_peer == s_mp);
1268 
1269 		/* must always match the source board's state */
1270 		ASSERT(t_mp->sbm_cm.sbdev_state == state);
1271 	} else {
1272 		/* this is no target unit */
1273 		t_mp = NULL;
1274 	}
1275 
1276 	switch (state) {
1277 	case SBD_STATE_UNREFERENCED:	/* state set by sbd_release_mem_done */
1278 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1279 
1280 		if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1281 			PR_MEM("%s: undoing target board %d memory delete\n",
1282 				f, tsbp->sb_num);
1283 			sbd_add_memory_spans(tsbp, t_mp->sbm_del_mlist);
1284 			SBD_DEV_CLR_UNREFERENCED(tsbp, SBD_COMP_MEM,
1285 				t_mp->sbm_cm.sbdev_unum);
1286 		}
1287 
1288 		if (s_mp->sbm_del_mlist != NULL) {
1289 			PR_MEM("%s: undoing board %d memory delete\n",
1290 				f, sbp->sb_num);
1291 			sbd_add_memory_spans(sbp, s_mp->sbm_del_mlist);
1292 		}
1293 
1294 		/*FALLTHROUGH*/
1295 
1296 	case SBD_STATE_CONFIGURED:
1297 		/*
1298 		 * we got here because of an error early in the release process
1299 		 * Just leave the memory as is and report the error
1300 		 */
1301 
1302 		ASSERT((s_mp->sbm_flags & SBD_MFLAG_RELOWNER) == 0);
1303 
1304 		if (t_mp != NULL) {
1305 			ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1306 			t_mp->sbm_del_mlist = NULL;
1307 
1308 			if (t_mp->sbm_mlist != NULL) {
1309 				memlist_delete(t_mp->sbm_mlist);
1310 				t_mp->sbm_mlist = NULL;
1311 			}
1312 
1313 			t_mp->sbm_peer = NULL;
1314 			t_mp->sbm_flags = 0;
1315 			t_mp->sbm_cm.sbdev_busy = 0;
1316 			sbd_init_mem_unit_data(t_mp, ep);
1317 
1318 			SBD_DEV_CLR_RELEASED(tsbp, SBD_COMP_MEM,
1319 				t_mp->sbm_cm.sbdev_unum);
1320 
1321 			SBD_DEVICE_TRANSITION(tsbp, SBD_COMP_MEM,
1322 				t_mp->sbm_cm.sbdev_unum,
1323 				SBD_STATE_CONFIGURED);
1324 		}
1325 
1326 		if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1327 			memlist_delete(s_mp->sbm_del_mlist);
1328 		s_mp->sbm_del_mlist = NULL;
1329 
1330 		if (s_mp->sbm_mlist != NULL) {
1331 			memlist_delete(s_mp->sbm_mlist);
1332 			s_mp->sbm_mlist = NULL;
1333 		}
1334 
1335 		s_mp->sbm_peer = NULL;
1336 		s_mp->sbm_flags = 0;
1337 		s_mp->sbm_cm.sbdev_busy = 0;
1338 		sbd_init_mem_unit_data(s_mp, ep);
1339 
1340 		return (0);
1341 	default:
1342 		PR_MEM("%s: WARNING unexpected state (%d) for "
1343 			"mem-unit %d.%d\n",
1344 			f,
1345 			(int)state,
1346 			sbp->sb_num,
1347 			s_mp->sbm_cm.sbdev_unum);
1348 
1349 		return (-1);
1350 	}
1351 	/*NOTREACHED*/
1352 }
1353 
1354 void
sbd_init_mem_unit(sbd_board_t * sbp,int unit,sbderror_t * ep)1355 sbd_init_mem_unit(sbd_board_t *sbp, int unit, sbderror_t *ep)
1356 {
1357 	sbd_istate_t	new_state;
1358 	sbd_mem_unit_t	*mp;
1359 	dev_info_t	*cur_mc_dip;
1360 	int		failed_mcs = 0, present_mcs = 0;
1361 	sbd_cond_t	mc_cond;
1362 	int		i;
1363 
1364 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
1365 
1366 	if (SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_MEM, unit)) {
1367 		new_state = SBD_STATE_CONFIGURED;
1368 	} else if (SBD_DEV_IS_PRESENT(sbp, SBD_COMP_MEM, unit)) {
1369 		new_state = SBD_STATE_CONNECTED;
1370 	} else if (mp->sbm_cm.sbdev_dip != NULL) {
1371 		new_state = SBD_STATE_OCCUPIED;
1372 	} else {
1373 		new_state = SBD_STATE_EMPTY;
1374 	}
1375 
1376 	/*
1377 	 * Check all the possible memory nodes on the board.  If all of them
1378 	 * have a failed status mark memory as failed. Otherwise mem is ok
1379 	 */
1380 	if (!sbp->sb_memaccess_ok) {
1381 		mp->sbm_cm.sbdev_cond = SBD_COND_UNKNOWN;
1382 		return;
1383 	}
1384 
1385 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
1386 		cur_mc_dip = mp->sbm_dip[i];
1387 
1388 		if (cur_mc_dip == NULL)
1389 			continue;
1390 
1391 		present_mcs |= (1 << i);
1392 
1393 		mc_cond = sbd_get_comp_cond(cur_mc_dip);
1394 		if (mc_cond == SBD_COND_FAILED) {
1395 			failed_mcs |= (1 << i);
1396 		}
1397 	}
1398 
1399 	if (failed_mcs == present_mcs) {
1400 		/*
1401 		 * All mem nodes failed, therefore mark all mem
1402 		 * as failed
1403 		 */
1404 		mp->sbm_cm.sbdev_cond = SBD_COND_FAILED;
1405 	} else {
1406 		mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1407 	}
1408 
1409 	sbd_init_mem_unit_data(mp, ep);
1410 
1411 	/*
1412 	 * Any changes to this memory unit should be performed above
1413 	 * this call to ensure the unit is fully initialized
1414 	 * before transitioning to the new state.
1415 	 */
1416 	SBD_DEVICE_TRANSITION(sbp, SBD_COMP_MEM, unit, new_state);
1417 
1418 }
1419 
1420 static void
sbd_init_mem_unit_data(sbd_mem_unit_t * mp,sbderror_t * ep)1421 sbd_init_mem_unit_data(sbd_mem_unit_t *mp, sbderror_t *ep)
1422 {
1423 	uint64_t	basepa;
1424 	uint64_t	sz;
1425 	sbd_board_t	*sbp = mp->sbm_cm.sbdev_sbp;
1426 	sbdp_handle_t	*hdp;
1427 	static fn_t	f = "sbd_init_mem_unit_data";
1428 	sbd_handle_t	*hp = MACHBD2HD(sbp);
1429 
1430 	PR_MEM("%s...\n", f);
1431 
1432 	/* a little sanity checking */
1433 	ASSERT(mp->sbm_peer == NULL);
1434 	ASSERT(mp->sbm_flags == 0);
1435 
1436 	hdp = sbd_get_sbdp_handle(sbp, hp);
1437 
1438 	/* get basepfn of mem unit */
1439 	if (sbdphw_get_base_physaddr(hdp, mp->sbm_cm.sbdev_dip, &basepa)) {
1440 		cmn_err(CE_WARN, "sbd:%s: failed to get physaddr"
1441 			" for mem-unit (%d.%d)",
1442 			f,
1443 			sbp->sb_num,
1444 			mp->sbm_cm.sbdev_unum);
1445 		SBD_GET_PERR(hdp->h_err, ep);
1446 		sbd_release_sbdp_handle(hdp);
1447 		return;
1448 	}
1449 	mp->sbm_basepfn = _b64top(basepa);
1450 
1451 	/* attempt to get number of pages from PDA */
1452 	mp->sbm_npages = sbdp_get_mem_size(hdp);
1453 
1454 	/* if didn't work, calculate using memlist */
1455 	if (mp->sbm_npages == 0) {
1456 		struct memlist	*ml, *mlist;
1457 		mlist = sbd_get_memlist(mp, ep);
1458 		for (ml = mlist; ml; ml = ml->ml_next)
1459 			mp->sbm_npages += btop(ml->ml_size);
1460 		memlist_delete(mlist);
1461 	}
1462 
1463 
1464 	if (sbdp_get_mem_alignment(hdp, mp->sbm_cm.sbdev_dip, &sz)) {
1465 		cmn_err(CE_WARN,
1466 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1467 			f, sbp->sb_num, mp->sbm_cm.sbdev_unum);
1468 		SBD_GET_PERR(hdp->h_err, ep);
1469 		sbd_release_sbdp_handle(hdp);
1470 		return;
1471 	}
1472 	mp->sbm_alignment_mask = _b64top(sz);
1473 
1474 
1475 	mp->sbm_interleave = sbdp_isinterleaved(hdp,
1476 	    mp->sbm_cm.sbdev_dip);
1477 
1478 	PR_MEM("%s: board %d (basepfn = 0x%lx, npgs = 0x%lx interleave %d)\n",
1479 		f, sbp->sb_num,
1480 		mp->sbm_basepfn,
1481 		mp->sbm_npages,
1482 		mp->sbm_interleave);
1483 
1484 	sbd_release_sbdp_handle(hdp);
1485 }
1486 
1487 static int
sbd_reserve_mem_spans(memhandle_t * mhp,struct memlist * ml)1488 sbd_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
1489 {
1490 	int		err;
1491 	pfn_t		base;
1492 	pgcnt_t		npgs;
1493 	struct memlist	*mc;
1494 	static fn_t	f = "sbd_reserve_mem_spans";
1495 
1496 	PR_MEM("%s...\n", f);
1497 
1498 	/*
1499 	 * Walk the supplied memlist scheduling each span for removal
1500 	 * with kphysm_del_span.  It is possible that a span may intersect
1501 	 * an area occupied by the cage.
1502 	 */
1503 	for (mc = ml; mc != NULL; mc = mc->ml_next) {
1504 		base = _b64top(mc->ml_address);
1505 		npgs = _b64top(mc->ml_size);
1506 
1507 		err = kphysm_del_span(*mhp, base, npgs);
1508 		if (err != KPHYSM_OK) {
1509 			cmn_err(CE_WARN, "sbd:%s memory reserve failed."
1510 				" unexpected kphysm_del_span return value %d;"
1511 				" basepfn=0x%lx npages=%ld",
1512 				f, err, base, npgs);
1513 			return (-1);
1514 		}
1515 	}
1516 	return (0);
1517 }
1518 
1519 /* debug counters */
1520 int sbd_smt_realigned;
1521 int sbd_smt_preference[4];
1522 
1523 #ifdef DEBUG
1524 uint_t sbd_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
1525 #endif
1526 
1527 /*
1528  * Verify that there is no memory overlapping if copy-rename is
1529  * done with the selected target board.
1530  *
1531  * Returns 0 if OK, -1 otherwise.
1532  */
1533 static int
sbd_check_boundaries(struct memlist * orig_memlist,sbd_mem_unit_t * s_mp,sbd_mem_unit_t * t_mp)1534 sbd_check_boundaries(struct memlist *orig_memlist, sbd_mem_unit_t *s_mp,
1535 	sbd_mem_unit_t *t_mp)
1536 {
1537 	struct memlist	*new_memlist;
1538 	int mlret;
1539 	static fn_t	f = "sbd_check_boundaries";
1540 
1541 	new_memlist = memlist_dup(orig_memlist);
1542 	if (new_memlist == NULL) {
1543 		PR_MEM("%s: can't dup original memlist\n", f);
1544 		return (-1);
1545 	}
1546 
1547 	mlret = memlist_delete_span(
1548 		_ptob64(s_mp->sbm_basepfn),
1549 		_ptob64(s_mp->sbm_npages),
1550 		&new_memlist);
1551 	if (mlret != MEML_SPANOP_OK) {
1552 		PR_MEM("%s: del s/s mlret = %d\n", f, mlret);
1553 		goto check_done;
1554 	}
1555 
1556 	mlret = memlist_delete_span(
1557 		_ptob64(t_mp->sbm_basepfn),
1558 		_ptob64(t_mp->sbm_npages),
1559 		&new_memlist);
1560 	if (mlret != MEML_SPANOP_OK) {
1561 		PR_MEM("%s: del t/t mlret = %d\n", f, mlret);
1562 		goto check_done;
1563 	}
1564 
1565 	mlret = memlist_add_span(
1566 		_ptob64(t_mp->sbm_basepfn),
1567 		_ptob64(s_mp->sbm_npages),
1568 		&new_memlist);
1569 	if (mlret != MEML_SPANOP_OK) {
1570 		PR_MEM("%s: add t/s mlret = %d\n", f, mlret);
1571 		goto check_done;
1572 	}
1573 
1574 	mlret = memlist_add_span(
1575 		_ptob64(s_mp->sbm_basepfn),
1576 		_ptob64(t_mp->sbm_npages),
1577 		&new_memlist);
1578 	if (mlret != MEML_SPANOP_OK) {
1579 		PR_MEM("%s: add s/t mlret = %d\n", f, mlret);
1580 	}
1581 
1582 check_done:
1583 	memlist_delete(new_memlist);
1584 
1585 	if (mlret == MEML_SPANOP_OK)
1586 		return (0);
1587 	else
1588 		return (-1);
1589 }
1590 
1591 /*
1592  * Find and reserve a copy/rename target board suitable for the
1593  * given source board.
1594  * All boards in the system are examined and categorized in relation to
1595  * their memory size versus the source board's memory size.  Order of
1596  * preference is:
1597  *	1st: board has same memory size
1598  * 	2nd: board has larger memory size
1599  *	3rd: board has smaller memory size
1600  *	4th: board has smaller memory size, available memory will be reduced.
1601  * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
1602  * span to which the MC responds to address span that appropriately covers
1603  * the nonrelocatable span of the source board.
1604  */
1605 static int
sbd_select_mem_target(sbd_handle_t * hp,sbd_mem_unit_t * s_mp,struct memlist * s_ml)1606 sbd_select_mem_target(sbd_handle_t *hp,
1607 	sbd_mem_unit_t *s_mp, struct memlist *s_ml)
1608 {
1609 	uint64_t	sz;
1610 	pgcnt_t		sm;
1611 	int		n_sets = 4; /* same, larger, smaller, clipped */
1612 	int		preference; /* lower value is higher preference */
1613 	int		n_units_per_set;
1614 	int		idx;
1615 	sbd_mem_unit_t	**sets;
1616 	sbdp_handle_t	*hdp;
1617 	int		t_bd;
1618 	sbd_softstate_t	*softsp;
1619 	int		t_unit;
1620 	int		max_boards;
1621 	int		rv;
1622 	sbd_board_t	*s_sbp, *t_sbp;
1623 	sbd_mem_unit_t	*t_mp, *c_mp;
1624 	struct memlist	*d_ml, *t_ml, *x_ml;
1625 	memquery_t	s_mq = {0};
1626 	static fn_t	f = "sbd_select_mem_target";
1627 
1628 	PR_MEM("%s...\n", f);
1629 
1630 	ASSERT(s_ml != NULL);
1631 
1632 	s_sbp = s_mp->sbm_cm.sbdev_sbp;
1633 
1634 	hdp = sbd_get_sbdp_handle(s_sbp, hp);
1635 
1636 	if (sbdp_get_mem_alignment(hdp, s_mp->sbm_cm.sbdev_dip, &sz)) {
1637 		sbderror_t	*ep = SBD_HD2ERR(hp);
1638 		cmn_err(CE_WARN,
1639 			"sbd:%s: no alignment for mem-unit (%d.%d)",
1640 			f, s_sbp->sb_num, s_mp->sbm_cm.sbdev_unum);
1641 		SBD_GET_PERR(hdp->h_err, ep);
1642 		sbd_release_sbdp_handle(hdp);
1643 		return (-1);
1644 	}
1645 	sm = sz - 1;
1646 	sbd_release_sbdp_handle(hdp);
1647 
1648 	softsp = (sbd_softstate_t *)s_sbp->sb_softsp;
1649 
1650 	max_boards = softsp->max_boards;
1651 	n_units_per_set = max_boards * MAX_MEM_UNITS_PER_BOARD;
1652 	sets = GETSTRUCT(sbd_mem_unit_t *, n_units_per_set * n_sets);
1653 
1654 	/*
1655 	 * Make one pass through all memory units on all boards
1656 	 * and categorize them with respect to the source board.
1657 	 */
1658 	for (t_bd = 0; t_bd < max_boards; t_bd++) {
1659 		/*
1660 		 * The board structs are a contiguous array
1661 		 * so we take advantage of that to find the
1662 		 * correct board struct pointer for a given
1663 		 * board number.
1664 		 */
1665 		t_sbp = (sbd_board_t *)softsp->sbd_boardlist;
1666 		t_sbp += t_bd;
1667 
1668 		/* source board can not be its own target */
1669 		if (s_sbp->sb_num == t_sbp->sb_num)
1670 			continue;
1671 
1672 		for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
1673 
1674 			t_mp = SBD_GET_BOARD_MEMUNIT(t_sbp, t_unit);
1675 
1676 			/* this memory node must be attached */
1677 			if (!SBD_DEV_IS_ATTACHED(t_sbp, SBD_COMP_MEM, t_unit))
1678 				continue;
1679 
1680 			/* source unit can not be its own target */
1681 			if (s_mp == t_mp) {
1682 				/* catch this in debug kernels */
1683 				ASSERT(0);
1684 				continue;
1685 			}
1686 
1687 			/*
1688 			 * this memory node must not already be reserved
1689 			 * by some other memory delete operation.
1690 			 */
1691 			if (t_mp->sbm_flags & SBD_MFLAG_RESERVED)
1692 				continue;
1693 
1694 			/*
1695 			 * categorize the memory node
1696 			 * If this is a smaller memory node, create a
1697 			 * temporary, edited copy of the source board's
1698 			 * memlist containing only the span of the non-
1699 			 * relocatable pages.
1700 			 */
1701 			if (t_mp->sbm_npages == s_mp->sbm_npages) {
1702 				preference = 0;
1703 				t_mp->sbm_slice_offset = 0;
1704 			} else if (t_mp->sbm_npages > s_mp->sbm_npages) {
1705 				preference = 1;
1706 				t_mp->sbm_slice_offset = 0;
1707 			} else {
1708 				/*
1709 				 * We do not allow other options right now
1710 				 */
1711 				continue;
1712 			}
1713 
1714 			sbd_smt_preference[preference]++;
1715 
1716 			/* calculate index to start of preference set */
1717 			idx  = n_units_per_set * preference;
1718 			/* calculate offset to respective element */
1719 			idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
1720 
1721 			ASSERT(idx < n_units_per_set * n_sets);
1722 			sets[idx] = t_mp;
1723 		}
1724 	}
1725 
1726 	/*
1727 	 * NOTE: this would be a good place to sort each candidate
1728 	 * set in to some desired order, e.g. memory size in ascending
1729 	 * order.  Without an additional sorting step here, the order
1730 	 * within a set is ascending board number order.
1731 	 */
1732 
1733 	c_mp = NULL;
1734 	x_ml = NULL;
1735 	t_ml = NULL;
1736 	for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
1737 		memquery_t mq;
1738 
1739 		/* cleanup t_ml after previous pass */
1740 		if (t_ml != NULL) {
1741 			memlist_delete(t_ml);
1742 			t_ml = NULL;
1743 		}
1744 
1745 		/* get candidate target board mem unit */
1746 		t_mp = sets[idx];
1747 		if (t_mp == NULL)
1748 			continue;
1749 
1750 		t_sbp = t_mp->sbm_cm.sbdev_sbp;
1751 
1752 		/* get target board memlist */
1753 		t_ml = sbd_get_memlist(t_mp, SBD_HD2ERR(hp));
1754 		if (t_ml == NULL) {
1755 			cmn_err(CE_WARN, "sbd:%s: no memlist for"
1756 				" mem-unit %d, board %d",
1757 				f,
1758 				t_sbp->sb_num,
1759 				t_mp->sbm_cm.sbdev_unum);
1760 
1761 			continue;
1762 		}
1763 
1764 		/* get appropriate source board memlist */
1765 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1766 			spgcnt_t excess;
1767 
1768 			/*
1769 			 * make a copy of the source board memlist
1770 			 * then edit it to remove the spans that
1771 			 * are outside the calculated span of
1772 			 * [pfn..s_mq.last_nonrelocatable].
1773 			 */
1774 			if (x_ml != NULL)
1775 				memlist_delete(x_ml);
1776 
1777 			x_ml = memlist_dup(s_ml);
1778 			if (x_ml == NULL) {
1779 				PR_MEM("%s: memlist_dup failed\n", f);
1780 				/* TODO: should abort */
1781 				continue;
1782 			}
1783 
1784 			/* trim off lower portion */
1785 			excess = t_mp->sbm_slice_offset;
1786 			if (excess > 0) {
1787 				int mlret;
1788 
1789 				mlret = memlist_delete_span(
1790 					_ptob64(s_mp->sbm_basepfn),
1791 					_ptob64(excess),
1792 					&x_ml);
1793 				PR_MEM("%s: mlret = %d\n", f, mlret);
1794 			}
1795 
1796 			/*
1797 			 * Since this candidate target board is smaller
1798 			 * than the source board, s_mq must have been
1799 			 * initialized in previous loop while processing
1800 			 * this or some other candidate board.
1801 			 * FIXME: this is weak.
1802 			 */
1803 			ASSERT(s_mq.phys_pages != 0);
1804 
1805 			/* trim off upper portion */
1806 			excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
1807 				- (s_mq.last_nonrelocatable + 1);
1808 			if (excess > 0) {
1809 				pfn_t p;
1810 				int mlret;
1811 
1812 				p  = s_mq.last_nonrelocatable + 1;
1813 				p -= excess;
1814 
1815 				mlret = memlist_delete_span(
1816 					_ptob64(p),
1817 					_ptob64(excess),
1818 					&x_ml);
1819 				PR_MEM("%s: mlret = %d\n", f, mlret);
1820 			}
1821 
1822 			PR_MEM("%s: brd %d: edited source memlist:\n",
1823 				f, s_sbp->sb_num);
1824 			SBD_MEMLIST_DUMP(x_ml);
1825 
1826 #ifdef DEBUG
1827 			/* sanity check memlist */
1828 			d_ml = x_ml;
1829 			while (d_ml->ml_next != NULL)
1830 				d_ml = d_ml->ml_next;
1831 			ASSERT(x_ml->ml_address == _ptob64(s_mp->sbm_basepfn) +
1832 				_ptob64(t_mp->sbm_slice_offset));
1833 			ASSERT(d_ml->ml_address + d_ml->ml_size ==
1834 				_ptob64(s_mq.last_nonrelocatable + 1));
1835 #endif
1836 
1837 			/*
1838 			 * x_ml now describes only the portion of the
1839 			 * source board that will be moved during the
1840 			 * copy/rename operation.
1841 			 */
1842 			d_ml = x_ml;
1843 		} else {
1844 			/* use original memlist; all spans will be moved */
1845 			d_ml = s_ml;
1846 		}
1847 
1848 		/* verify target can support source memory spans. */
1849 		if (memlist_canfit(d_ml, t_ml) == 0) {
1850 			PR_MEM("%s: source memlist won't"
1851 				" fit in target memlist\n", f);
1852 			PR_MEM("%s: source memlist:\n", f);
1853 			SBD_MEMLIST_DUMP(d_ml);
1854 			PR_MEM("%s: target memlist:\n", f);
1855 			SBD_MEMLIST_DUMP(t_ml);
1856 
1857 			continue;
1858 		}
1859 
1860 		/* NOTE: the value of d_ml is not used beyond this point */
1861 
1862 		PR_MEM("%s: checking for no-reloc on board %d, "
1863 			" basepfn=0x%lx, npages=%ld\n",
1864 			f,
1865 			t_sbp->sb_num,
1866 			t_mp->sbm_basepfn,
1867 			t_mp->sbm_npages);
1868 
1869 		rv = kphysm_del_span_query(
1870 			t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
1871 		if (rv != KPHYSM_OK) {
1872 			PR_MEM("%s: kphysm_del_span_query:"
1873 				" unexpected return value %d\n", f, rv);
1874 
1875 			continue;
1876 		}
1877 
1878 		if (mq.nonrelocatable != 0) {
1879 			PR_MEM("%s: candidate board %d has"
1880 				" nonrelocatable span [0x%lx..0x%lx]\n",
1881 				f,
1882 				t_sbp->sb_num,
1883 				mq.first_nonrelocatable,
1884 				mq.last_nonrelocatable);
1885 
1886 			continue;
1887 		}
1888 
1889 #ifdef DEBUG
1890 		/*
1891 		 * This is a debug tool for excluding certain boards
1892 		 * from being selected as a target board candidate.
1893 		 * sbd_ignore_board is only tested by this driver.
1894 		 * It must be set with adb, obp, /etc/system or your
1895 		 * favorite debugger.
1896 		 */
1897 		if (sbd_ignore_board &
1898 			(1 << (t_sbp->sb_num - 1))) {
1899 			PR_MEM("%s: sbd_ignore_board flag set,"
1900 				" ignoring board %d as candidate\n",
1901 				f, t_sbp->sb_num);
1902 			continue;
1903 		}
1904 #endif
1905 
1906 		/*
1907 		 * Make sure there is no memory overlap if this
1908 		 * target board is used for copy-rename.
1909 		 */
1910 		if (sbd_check_boundaries(phys_install, s_mp, t_mp) != 0)
1911 			continue;
1912 
1913 		/*
1914 		 * Reserve excess source board memory, if any.
1915 		 *
1916 		 * When the number of pages on the candidate target
1917 		 * board is less than the number of pages on the source,
1918 		 * then some spans (clearly) of the source board's address
1919 		 * space will not be covered by physical memory after the
1920 		 * copy/rename completes.  The following code block
1921 		 * schedules those spans to be deleted.
1922 		 */
1923 		if (t_mp->sbm_npages < s_mp->sbm_npages) {
1924 			pfn_t pfn;
1925 			int mlret;
1926 
1927 			d_ml = memlist_dup(s_ml);
1928 			if (d_ml == NULL) {
1929 				PR_MEM("%s: cant dup src brd memlist\n", f);
1930 				/* TODO: should abort */
1931 				continue;
1932 			}
1933 
1934 			/* calculate base pfn relative to target board */
1935 			pfn  = s_mp->sbm_basepfn & ~sm;
1936 			pfn += t_mp->sbm_slice_offset;
1937 
1938 			/* remove span that will reside on candidate board */
1939 			mlret = memlist_delete_span(
1940 				_ptob64(pfn),
1941 				_ptob64(t_mp->sbm_npages),
1942 				&d_ml);
1943 			PR_MEM("%s: mlret = %d\n", f, mlret);
1944 
1945 			PR_MEM("%s: brd %d: reserving src brd memlist:\n",
1946 				f, s_sbp->sb_num);
1947 			SBD_MEMLIST_DUMP(d_ml);
1948 
1949 			/* reserve excess spans */
1950 			if (sbd_reserve_mem_spans(
1951 				&s_mp->sbm_memhandle, d_ml) != 0) {
1952 
1953 				/* likely more non-reloc pages appeared */
1954 				/* TODO: restart from top? */
1955 				continue;
1956 			}
1957 		} else {
1958 			/* no excess source board memory */
1959 			d_ml = NULL;
1960 		}
1961 
1962 		s_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1963 
1964 		/*
1965 		 * reserve all memory on target board.
1966 		 * NOTE: source board's memhandle is used.
1967 		 *
1968 		 * If this succeeds (eq 0), then target selection is
1969 		 * complete and all unwanted memory spans, both source and
1970 		 * target, have been reserved.  Loop is terminated.
1971 		 */
1972 		if (sbd_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
1973 			PR_MEM("%s: brd %d: target board memory reserved\n",
1974 				f, t_sbp->sb_num);
1975 
1976 			/* a candidate target board is now reserved */
1977 			t_mp->sbm_flags |= SBD_MFLAG_RESERVED;
1978 			c_mp = t_mp;
1979 
1980 			/* *** EXITING LOOP *** */
1981 			break;
1982 		}
1983 
1984 		/* did not successfully reserve the target board. */
1985 		PR_MEM("%s: could not reserve target board %d\n",
1986 			f, t_sbp->sb_num);
1987 
1988 		/*
1989 		 * NOTE: an undo of the sbd_reserve_mem_span work
1990 		 * will happen automatically when the memhandle
1991 		 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
1992 		 */
1993 
1994 		s_mp->sbm_flags &= ~SBD_MFLAG_RESERVED;
1995 	}
1996 
1997 	/* clean up after memlist editing logic */
1998 	if (x_ml != NULL)
1999 		memlist_delete(x_ml);
2000 
2001 	FREESTRUCT(sets, sbd_mem_unit_t *, n_units_per_set * n_sets);
2002 
2003 	/*
2004 	 * c_mp will be NULL when the entire sets[] array
2005 	 * has been searched without reserving a target board.
2006 	 */
2007 	if (c_mp == NULL) {
2008 		PR_MEM("%s: brd %d: target selection failed.\n",
2009 			f, s_sbp->sb_num);
2010 
2011 		if (t_ml != NULL)
2012 			memlist_delete(t_ml);
2013 
2014 		return (-1);
2015 	}
2016 
2017 	PR_MEM("%s: found target board %d for source board %d\n",
2018 		f,
2019 		t_sbp->sb_num,
2020 		s_sbp->sb_num);
2021 
2022 	s_mp->sbm_peer = c_mp;
2023 	s_mp->sbm_flags |= SBD_MFLAG_SOURCE;
2024 	s_mp->sbm_del_mlist = d_ml;	/* spans to be deleted, if any */
2025 	s_mp->sbm_mlist = s_ml;
2026 	s_mp->sbm_cm.sbdev_busy = 1;
2027 
2028 	c_mp->sbm_peer = s_mp;
2029 	c_mp->sbm_flags |= SBD_MFLAG_TARGET;
2030 	c_mp->sbm_del_mlist = t_ml;	/* spans to be deleted */
2031 	c_mp->sbm_mlist = t_ml;
2032 	c_mp->sbm_cm.sbdev_busy = 1;
2033 
2034 	s_mp->sbm_flags &= ~SBD_MFLAG_MEMRESIZE;
2035 	if (c_mp->sbm_npages > s_mp->sbm_npages) {
2036 		s_mp->sbm_flags |= SBD_MFLAG_MEMUPSIZE;
2037 		PR_MEM("%s: upsize (source pgs 0x%lx < target pgs 0x%lx)\n",
2038 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2039 	} else if (c_mp->sbm_npages < s_mp->sbm_npages) {
2040 		s_mp->sbm_flags |= SBD_MFLAG_MEMDOWNSIZE;
2041 		PR_MEM("%s: downsize (source pgs 0x%lx > target pgs 0x%lx)\n",
2042 			f, s_mp->sbm_npages, c_mp->sbm_npages);
2043 	}
2044 
2045 	return (0);
2046 }
2047 
2048 int
sbd_move_memory(sbd_handle_t * hp,sbd_board_t * s_bp,sbd_board_t * t_bp)2049 sbd_move_memory(sbd_handle_t *hp, sbd_board_t *s_bp, sbd_board_t *t_bp)
2050 {
2051 	int	ret;
2052 	sbdp_handle_t	*hdp;
2053 	sbderror_t	*ep = SBD_HD2ERR(hp);
2054 
2055 	hdp = sbd_get_sbdp_handle(s_bp, hp);
2056 
2057 	ret = sbdp_move_memory(hdp, t_bp->sb_num);
2058 	if (ret != 0)
2059 		SBD_GET_PERR(hdp->h_err, ep);
2060 
2061 	sbd_release_sbdp_handle(hdp);
2062 
2063 	return (ret);
2064 }
2065 
2066 /*
2067  * Memlist support.
2068  */
2069 void
memlist_delete(struct memlist * mlist)2070 memlist_delete(struct memlist *mlist)
2071 {
2072 	sbdp_handle_t	*hdp;
2073 
2074 	hdp = sbd_get_sbdp_handle(NULL, NULL);
2075 
2076 	(void) sbdp_del_memlist(hdp, mlist);
2077 
2078 	sbd_release_sbdp_handle(hdp);
2079 }
2080 
2081 struct memlist *
memlist_dup(struct memlist * mlist)2082 memlist_dup(struct memlist *mlist)
2083 {
2084 	struct memlist *hl, *prev;
2085 
2086 	if (mlist == NULL)
2087 		return (NULL);
2088 
2089 	prev = NULL;
2090 	hl = NULL;
2091 	for (; mlist; mlist = mlist->ml_next) {
2092 		struct memlist *mp;
2093 
2094 		mp = memlist_get_one();
2095 		if (mp == NULL) {
2096 			if (hl != NULL)
2097 				memlist_free_list(hl);
2098 			hl = NULL;
2099 			break;
2100 		}
2101 		mp->ml_address = mlist->ml_address;
2102 		mp->ml_size = mlist->ml_size;
2103 		mp->ml_next = NULL;
2104 		mp->ml_prev = prev;
2105 
2106 		if (prev == NULL)
2107 			hl = mp;
2108 		else
2109 			prev->ml_next = mp;
2110 		prev = mp;
2111 	}
2112 
2113 	return (hl);
2114 }
2115 
2116 void
memlist_dump(struct memlist * mlist)2117 memlist_dump(struct memlist *mlist)
2118 {
2119 	register struct memlist *ml;
2120 
2121 	if (mlist == NULL) {
2122 		PR_MEM("memlist> EMPTY\n");
2123 	} else {
2124 		for (ml = mlist; ml; ml = ml->ml_next)
2125 			PR_MEM("memlist> 0x%" PRIx64 " "
2126 				"0x%" PRIx64 " \n",
2127 				ml->ml_address, ml->ml_size);
2128 	}
2129 }
2130 
2131 int
memlist_intersect(struct memlist * al,struct memlist * bl)2132 memlist_intersect(struct memlist *al, struct memlist *bl)
2133 {
2134 	uint64_t	astart, aend, bstart, bend;
2135 
2136 	if ((al == NULL) || (bl == NULL))
2137 		return (0);
2138 
2139 	aend = al->ml_address + al->ml_size;
2140 	bstart = bl->ml_address;
2141 	bend = bl->ml_address + bl->ml_size;
2142 
2143 	while (al && bl) {
2144 		while (al && (aend <= bstart))
2145 			if ((al = al->ml_next) != NULL)
2146 				aend = al->ml_address + al->ml_size;
2147 		if (al == NULL)
2148 			return (0);
2149 
2150 		if ((astart = al->ml_address) <= bstart)
2151 			return (1);
2152 
2153 		while (bl && (bend <= astart))
2154 			if ((bl = bl->ml_next) != NULL)
2155 				bend = bl->ml_address + bl->ml_size;
2156 		if (bl == NULL)
2157 			return (0);
2158 
2159 		if ((bstart = bl->ml_address) <= astart)
2160 			return (1);
2161 	}
2162 
2163 	return (0);
2164 }
2165 
2166 /*
2167  * Determine whether the source memlist (s_mlist) will
2168  * fit into the target memlist (t_mlist) in terms of
2169  * size and holes (i.e. based on same relative base address).
2170  */
2171 static int
memlist_canfit(struct memlist * s_mlist,struct memlist * t_mlist)2172 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2173 {
2174 	int		rv = 0;
2175 	uint64_t	s_basepa, t_basepa;
2176 	struct memlist	*s_ml, *t_ml;
2177 
2178 	if ((s_mlist == NULL) || (t_mlist == NULL))
2179 		return (0);
2180 
2181 	/*
2182 	 * Base both memlists on common base address (0).
2183 	 */
2184 	s_basepa = s_mlist->ml_address;
2185 	t_basepa = t_mlist->ml_address;
2186 
2187 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2188 		s_ml->ml_address -= s_basepa;
2189 
2190 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2191 		t_ml->ml_address -= t_basepa;
2192 
2193 	s_ml = s_mlist;
2194 	for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
2195 		uint64_t	s_start, s_end;
2196 		uint64_t	t_start, t_end;
2197 
2198 		t_start = t_ml->ml_address;
2199 		t_end = t_start + t_ml->ml_size;
2200 
2201 		for (; s_ml; s_ml = s_ml->ml_next) {
2202 			s_start = s_ml->ml_address;
2203 			s_end = s_start + s_ml->ml_size;
2204 
2205 			if ((s_start < t_start) || (s_end > t_end))
2206 				break;
2207 		}
2208 	}
2209 	/*
2210 	 * If we ran out of source memlist chunks that mean
2211 	 * we found a home for all of them.
2212 	 */
2213 	if (s_ml == NULL)
2214 		rv = 1;
2215 
2216 	/*
2217 	 * Need to add base addresses back since memlists
2218 	 * are probably in use by caller.
2219 	 */
2220 	for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2221 		s_ml->ml_address += s_basepa;
2222 
2223 	for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2224 		t_ml->ml_address += t_basepa;
2225 
2226 	return (rv);
2227 }
2228 
2229 void
sbd_attach_mem(sbd_handle_t * hp,sbderror_t * ep)2230 sbd_attach_mem(sbd_handle_t *hp, sbderror_t *ep)
2231 {
2232 	sbd_mem_unit_t	*mp;
2233 	dev_info_t	*dip;
2234 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2235 	sbdp_handle_t	*hdp;
2236 	int		err, unit;
2237 	struct memlist	*ml, *mc;
2238 	static fn_t	f = "sbd_attach_mem";
2239 	int		i;
2240 
2241 	PR_MEM("%s...\n", f);
2242 
2243 	/*
2244 	 * all four cpus have to be attached before
2245 	 * configuring mem
2246 	 */
2247 	for (i = 0; i < MAX_CPU_UNITS_PER_BOARD; i++) {
2248 		sbd_cpu_unit_t	*cpup;
2249 		struct cpu	*cp;
2250 
2251 		if (!SBD_DEV_IS_PRESENT(sbp, SBD_COMP_CPU, i))
2252 			continue;
2253 
2254 		if (!SBD_DEV_IS_ATTACHED(sbp, SBD_COMP_CPU, i))
2255 			goto error;
2256 
2257 		cpup = SBD_GET_BOARD_CPUUNIT(sbp, i);
2258 
2259 		if (cpup == NULL)
2260 			goto error;
2261 
2262 		mutex_enter(&cpu_lock);
2263 		cp = cpu_get(cpup->sbc_cpu_id);
2264 		if (cp == NULL) {
2265 			mutex_exit(&cpu_lock);
2266 			cmn_err(CE_WARN,
2267 			    "sbd:%s: cpu_get failed for cpu %d",
2268 			    f, cpup->sbc_cpu_id);
2269 			goto error;
2270 		}
2271 		if (cpu_is_poweredoff(cp)) {
2272 			mutex_exit(&cpu_lock);
2273 			goto error;
2274 		}
2275 		mutex_exit(&cpu_lock);
2276 		continue;
2277 
2278 error:
2279 		SBD_SET_ERR(ep, ESBD_CPUONLINE);
2280 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[i]);
2281 		(void) sbd_set_err_in_hdl(hp, ep);
2282 		return;
2283 	}
2284 
2285 	dip = *(sbp->sb_devlist[NIX(SBD_COMP_MEM)]);
2286 
2287 	hdp = sbd_get_sbdp_handle(sbp, hp);
2288 	unit = sbdp_get_unit_num(hdp, dip);
2289 	if (unit < 0) {
2290 		SBD_GET_PERR(hdp->h_err, ep);
2291 		sbd_release_sbdp_handle(hdp);
2292 		return;
2293 	}
2294 
2295 	ASSERT(sbp->sb_mempath[unit] != NULL);
2296 	ASSERT(e_ddi_branch_held(dip));
2297 
2298 	(void) ddi_pathname(dip, sbp->sb_mempath[unit]);
2299 
2300 	mp = SBD_GET_BOARD_MEMUNIT(sbp, unit);
2301 
2302 	ml = sbd_get_memlist(mp, ep);
2303 	if (ml == NULL) {
2304 		cmn_err(CE_WARN,
2305 			"sbd:%s: failed to get memlist for "
2306 			"board %d", f, sbp->sb_num);
2307 		/*
2308 		 * Need to record an error and return.
2309 		 */
2310 		SBD_SET_ERR(ep, ESBD_MEMFAIL);
2311 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2312 		sbd_release_sbdp_handle(hdp);
2313 		return;
2314 	}
2315 
2316 	SBD_MEMLIST_DUMP(ml);
2317 	err = 0;
2318 	for (mc = ml; mc; mc = mc->ml_next) {
2319 		update_membounds_t umb;
2320 		pfn_t	base;
2321 		pgcnt_t npgs;
2322 
2323 		base = (pfn_t)(mc->ml_address >> PAGESHIFT);
2324 		npgs = (pgcnt_t)(mc->ml_size >> PAGESHIFT);
2325 
2326 		umb.u_board = sbp->sb_num;
2327 		umb.u_base = (uint64_t)base << MMU_PAGESHIFT;
2328 		umb.u_len = (uint64_t)npgs << MMU_PAGESHIFT;
2329 
2330 		lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2331 		err = kphysm_add_memory_dynamic(base, npgs);
2332 
2333 		if (err != KPHYSM_OK) {
2334 			cmn_err(CE_WARN,
2335 			    "%s: kphysm_add_memory_dynamic fail %d", f, err);
2336 
2337 			/* translate kphysm error */
2338 			switch (err) {
2339 			case KPHYSM_ERESOURCE:
2340 				err = ESBD_NOMEM;
2341 				break;
2342 
2343 			case KPHYSM_EFAULT:
2344 				err = ESBD_FAULT;
2345 				break;
2346 
2347 			default:
2348 				err = ESBD_INVAL;
2349 				break;
2350 			}
2351 			break;
2352 		}
2353 
2354 		err = kcage_range_add(base, npgs, KCAGE_DOWN);
2355 		if (err != 0) {
2356 			cmn_err(CE_WARN,
2357 			    "%s: kcage_range_add fail %d", f, err);
2358 
2359 			/* Translate kcage error. */
2360 			switch (err) {
2361 			case ENOMEM:
2362 				err = ESBD_NOMEM;
2363 				break;
2364 			default:
2365 				err = ESBD_INVAL;
2366 				break;
2367 			}
2368 			break;
2369 		}
2370 		(void) sbdp_mem_add_span(hdp, mc->ml_address, mc->ml_size);
2371 	}
2372 
2373 	if (err != 0) {
2374 		SBD_SET_ERR(ep, err);
2375 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2376 	}
2377 
2378 	memlist_delete(ml);
2379 	sbd_release_sbdp_handle(hdp);
2380 
2381 	/*
2382 	 * Now attach all mem devinfo nodes to the device tree.
2383 	 */
2384 	for (i = 0; i < SBD_NUM_MC_PER_BOARD; i++) {
2385 		if (mp->sbm_dip[i] == NULL)
2386 			continue;
2387 		ASSERT(e_ddi_branch_held(mp->sbm_dip[i]));
2388 		if (e_ddi_branch_configure(mp->sbm_dip[i], NULL, 0) &&
2389 		    SBD_GET_ERR(ep) == 0) {
2390 			SBD_SET_ERR(ep, ESBD_INVAL);
2391 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2392 		}
2393 	}
2394 }
2395 
2396 typedef struct {
2397 	kcondvar_t cond;
2398 	kmutex_t lock;
2399 	int error;
2400 	int done;
2401 } sbd_release_mem_sync_t;
2402 
2403 /*
2404  * When we reach here the memory being drained should have
2405  * already been reserved in sbd_pre_release_mem().
2406  * Our only task here is to kick off the "drain".
2407  * Returns -1 when error encountered or zero for success.
2408  */
2409 int
sbd_release_mem(sbd_handle_t * hp,dev_info_t * dip,int unit)2410 sbd_release_mem(sbd_handle_t *hp, dev_info_t *dip, int unit)
2411 {
2412 	memhandle_t	mh;
2413 	int		err;
2414 	int		cancel_flag = 0;
2415 	int		e_code = 0;
2416 	sbd_board_t	*sbp = SBDH2BD(hp->h_sbd);
2417 	sbd_release_mem_sync_t rms;
2418 	static fn_t	f = "sbd_release_mem";
2419 
2420 	/*
2421 	 * If this node has a scheduled memory delete operation,
2422 	 * it will have a memhandle.  If it does have a memhandle (the
2423 	 * return value of sbd_get_memhandle is zero when true),
2424 	 * then perform the delete.
2425 	 */
2426 
2427 	if ((cancel_flag = sbd_get_memhandle(hp, dip, &mh)) != 0) {
2428 		cmn_err(CE_WARN, "%s: couldn't get the memhandle\n", f);
2429 		return (cancel_flag);
2430 	}
2431 
2432 	bzero((void *) &rms, sizeof (rms));
2433 
2434 	mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
2435 	cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
2436 
2437 	mutex_enter(&rms.lock);
2438 	err = kphysm_del_start(mh, sbd_release_memory_done, (void *) &rms);
2439 	if (err == KPHYSM_OK) {
2440 		/* wait for completion */
2441 		while (!rms.done) {
2442 			if (cancel_flag) {
2443 				/* previously canceled */
2444 				cv_wait(&rms.cond, &rms.lock);
2445 			} else if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
2446 				/* interrupted: cancel and wait */
2447 				cancel_flag = -1;
2448 				(void) kphysm_del_cancel(mh);
2449 			}
2450 		}
2451 		/* get the result of the memory delete operation */
2452 		err = rms.error;
2453 	} else {
2454 		(void) kphysm_del_release(mh);
2455 	}
2456 
2457 	mutex_exit(&rms.lock);
2458 
2459 	cv_destroy(&rms.cond);
2460 	mutex_destroy(&rms.lock);
2461 
2462 	if (err != KPHYSM_OK) {
2463 		switch (err) {
2464 			case KPHYSM_ENOWORK:
2465 				e_code = ESBD_NOERROR;
2466 				break;
2467 
2468 			case KPHYSM_EHANDLE:
2469 			case KPHYSM_ESEQUENCE:
2470 				e_code = ESBD_INTERNAL;
2471 				break;
2472 
2473 			case KPHYSM_ENOTVIABLE:
2474 				e_code = ESBD_MEM_NOTVIABLE;
2475 				break;
2476 
2477 			case KPHYSM_EREFUSED:
2478 				e_code = ESBD_MEM_REFUSED;
2479 				break;
2480 
2481 			case KPHYSM_ENONRELOC:
2482 				e_code = ESBD_MEM_NONRELOC;
2483 				break;
2484 
2485 			case KPHYSM_ECANCELLED:
2486 				e_code = ESBD_MEM_CANCELLED;
2487 				break;
2488 
2489 			case KPHYSM_ERESOURCE:
2490 				e_code = ESBD_MEMFAIL;
2491 				break;
2492 
2493 			default:
2494 				cmn_err(CE_WARN, "sbd:%s:"
2495 					" unexpected kphysm error code %d,"
2496 					" dip 0x%p",
2497 					f, err, (void *)dip);
2498 
2499 				e_code = ESBD_IO;
2500 				break;
2501 		}
2502 
2503 		if (e_code != 0) {
2504 			cancel_flag = -1;
2505 			SBD_SET_ERR(SBD_HD2ERR(hp), e_code);
2506 			SBD_SET_ERRSTR(SBD_HD2ERR(hp), sbp->sb_mempath[unit]);
2507 		}
2508 	}
2509 
2510 	return (cancel_flag);
2511 }
2512 
2513 /*
2514  * Memory has been logically removed by the time this routine is called.
2515  */
2516 void
sbd_release_memory_done(void * arg,int error)2517 sbd_release_memory_done(void *arg, int error)
2518 {
2519 	sbd_release_mem_sync_t *ds = arg;
2520 
2521 	mutex_enter(&ds->lock);
2522 	ds->error = error;
2523 	ds->done = 1;
2524 	cv_signal(&ds->cond);
2525 	mutex_exit(&ds->lock);
2526 }
2527 
2528 /*
2529  * If detaching node contains memory that is "non-permanent"
2530  * then the memory adr's are simply cleared.  If the memory
2531  * is non-relocatable, then do a copy-rename.
2532  */
2533 int
sbd_detach_memory(sbd_handle_t * hp,sbderror_t * ep,sbd_mem_unit_t * s_mp,int unit)2534 sbd_detach_memory(sbd_handle_t *hp, sbderror_t *ep, sbd_mem_unit_t *s_mp,
2535 	int unit)
2536 {
2537 	int			rv;
2538 	sbd_mem_unit_t		*t_mp;
2539 	sbd_istate_t		state;
2540 	sbdp_handle_t		*hdp;
2541 	sbd_board_t 		*sbp = (sbd_board_t *)s_mp->sbm_cm.sbdev_sbp;
2542 	sbd_board_t		*tbp;
2543 	static fn_t		f = "sbd_detach_memory";
2544 
2545 	PR_MEM("%s...\n", f);
2546 
2547 	/* lookup target mem unit and target board structure, if any */
2548 	if (s_mp->sbm_flags & SBD_MFLAG_SOURCE) {
2549 		t_mp = s_mp->sbm_peer;
2550 		ASSERT(t_mp != NULL);
2551 		ASSERT(t_mp->sbm_peer == s_mp);
2552 		tbp = (sbd_board_t *)t_mp->sbm_cm.sbdev_sbp;
2553 	} else {
2554 		t_mp = NULL;
2555 	}
2556 
2557 	/* verify mem unit's state is UNREFERENCED */
2558 	state = s_mp->sbm_cm.sbdev_state;
2559 	if (state != SBD_STATE_UNREFERENCED) {
2560 		cmn_err(CE_WARN, "%s: invalid state transition for"
2561 			" mem-unit (%d.%d)",
2562 			f,
2563 			sbp->sb_num,
2564 			s_mp->sbm_cm.sbdev_unum);
2565 		SBD_SET_ERR(ep, ESBD_STATE);
2566 		SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2567 		return (-1);
2568 	}
2569 
2570 	/* verify target mem unit's state is UNREFERENCED, if any */
2571 	if (t_mp != NULL) {
2572 		state = t_mp->sbm_cm.sbdev_state;
2573 		if (state != SBD_STATE_UNREFERENCED) {
2574 			cmn_err(CE_WARN, "%s: invalid state transition for"
2575 				" target mem-unit (%d.%d)",
2576 				f,
2577 				tbp->sb_num,
2578 				t_mp->sbm_cm.sbdev_unum);
2579 			SBD_SET_ERR(ep, ESBD_STATE);
2580 			SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2581 			return (-1);
2582 		}
2583 	}
2584 
2585 	/*
2586 	 * Displacement flush all ecaches in the system.
2587 	 * That's the fastest way to remove all cache references
2588 	 * to the detaching memory.
2589 	 */
2590 	xc_all(sbd_flush_ecache, 0, 0);
2591 
2592 	hdp = sbd_get_sbdp_handle(sbp, hp);
2593 
2594 	/*
2595 	 * If there is no target board (no copy/rename was needed), then
2596 	 * we're done!
2597 	 */
2598 	if (t_mp == NULL) {
2599 		/*
2600 		 * Reprogram interconnect hardware and disable
2601 		 * memory controllers for memory node that's going away.
2602 		 */
2603 
2604 		rv = sbdphw_disable_memctrl(hdp, s_mp->sbm_cm.sbdev_dip);
2605 		if (rv) {
2606 			cmn_err(CE_WARN,
2607 				"%s: failed to deprogram mem-unit (%d.%d),"
2608 				" dip 0x%p",
2609 				f,
2610 				sbp->sb_num,
2611 				s_mp->sbm_cm.sbdev_unum,
2612 				(void *)s_mp->sbm_cm.sbdev_dip);
2613 			/*
2614 			 * Make sure we don't rewrite an sbdp error
2615 			 */
2616 			if (SBD_GET_ERR(ep) != 0) {
2617 				SBD_SET_ERR(ep, ESBD_HW_PROGRAM);
2618 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2619 			}
2620 		}
2621 	} else {
2622 		rv = sbd_move_memory(hp, sbp, tbp);
2623 		if (rv) {
2624 			int i;
2625 
2626 			cmn_err(CE_WARN, "%s: failed to move memory"
2627 				" from board %d to board %d",
2628 				f,
2629 				sbp->sb_num,
2630 				tbp->sb_num);
2631 			/*
2632 			 * Make sure we don't rewrite an sbdp error
2633 			 */
2634 			if (SBD_GET_ERR(ep) != 0) {
2635 				SBD_SET_ERR(ep, ESBD_INTERNAL);
2636 				SBD_SET_ERRSTR(ep, sbp->sb_mempath[unit]);
2637 			}
2638 			/*
2639 			 * If we failed here, it means that the target board's
2640 			 * memory has been unconfigured.  We need to configure
2641 			 * it back
2642 			 */
2643 			for (i = 0; i < MAX_MEM_UNITS_PER_BOARD; i++) {
2644 				int		unit;
2645 				dev_info_t	*dip;
2646 				dev_info_t	**devlist;
2647 
2648 
2649 				devlist = tbp->sb_devlist[NIX(SBD_COMP_MEM)];
2650 				dip = devlist[i];
2651 				sbd_reset_error_sbdph(hdp);
2652 				unit = sbdp_get_unit_num(hdp, dip);
2653 
2654 				/*
2655 				 * We already saved the error that created
2656 				 * this mess.  If we fail, make sure not
2657 				 * to overwrite the original error
2658 				 */
2659 				if (unit == -1) {
2660 					continue;
2661 				}
2662 				if (sbd_cancel_mem(hp, unit) != 0)
2663 					continue;
2664 
2665 				t_mp->sbm_flags = 0;
2666 				/*
2667 				 * clean up
2668 				 */
2669 				sbd_mem_cleanup(s_mp, t_mp, ep);
2670 				if (s_mp->sbm_mlist) {
2671 					memlist_delete(s_mp->sbm_mlist);
2672 					s_mp->sbm_mlist = NULL;
2673 				}
2674 
2675 				SBD_DEVICE_TRANSITION(tbp, SBD_COMP_MEM,
2676 				    unit, SBD_STATE_CONFIGURED);
2677 			}
2678 		}
2679 
2680 		PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
2681 			f,
2682 			rv ? "FAILED" : "COMPLETED",
2683 			sbp->sb_num,
2684 			tbp->sb_num);
2685 	}
2686 
2687 	if (rv == 0) {
2688 		update_membounds_t umb;
2689 
2690 		umb.u_board = sbp->sb_num;
2691 		umb.u_base = (uint64_t)-1;
2692 		umb.u_len = (uint64_t)-1;
2693 
2694 		lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
2695 	}
2696 
2697 	sbd_release_sbdp_handle(hdp);
2698 	return (rv);
2699 }
2700 
2701 /*ARGSUSED*/
2702 static void
sbd_flush_ecache(uint64_t a,uint64_t b)2703 sbd_flush_ecache(uint64_t a, uint64_t b)
2704 {
2705 	cpu_flush_ecache();
2706 }
2707