125cf1a30Sjl /* 225cf1a30Sjl * CDDL HEADER START 325cf1a30Sjl * 425cf1a30Sjl * The contents of this file are subject to the terms of the 525cf1a30Sjl * Common Development and Distribution License (the "License"). 625cf1a30Sjl * You may not use this file except in compliance with the License. 725cf1a30Sjl * 825cf1a30Sjl * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 925cf1a30Sjl * or http://www.opensolaris.org/os/licensing. 1025cf1a30Sjl * See the License for the specific language governing permissions 1125cf1a30Sjl * and limitations under the License. 1225cf1a30Sjl * 1325cf1a30Sjl * When distributing Covered Code, include this CDDL HEADER in each 1425cf1a30Sjl * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1525cf1a30Sjl * If applicable, add the following below this CDDL HEADER, with the 1625cf1a30Sjl * fields enclosed by brackets "[]" replaced with your own identifying 1725cf1a30Sjl * information: Portions Copyright [yyyy] [name of copyright owner] 1825cf1a30Sjl * 1925cf1a30Sjl * CDDL HEADER END 2025cf1a30Sjl */ 2125cf1a30Sjl /* 22*68ac2337Sjl * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 2325cf1a30Sjl * Use is subject to license terms. 2425cf1a30Sjl */ 2525cf1a30Sjl 2625cf1a30Sjl #pragma ident "%Z%%M% %I% %E% SMI" 2725cf1a30Sjl 2825cf1a30Sjl /* 2925cf1a30Sjl * DR memory support routines. 3025cf1a30Sjl */ 3125cf1a30Sjl 3225cf1a30Sjl #include <sys/note.h> 3325cf1a30Sjl #include <sys/debug.h> 3425cf1a30Sjl #include <sys/types.h> 3525cf1a30Sjl #include <sys/errno.h> 3625cf1a30Sjl #include <sys/param.h> 3725cf1a30Sjl #include <sys/dditypes.h> 3825cf1a30Sjl #include <sys/kmem.h> 3925cf1a30Sjl #include <sys/conf.h> 4025cf1a30Sjl #include <sys/ddi.h> 4125cf1a30Sjl #include <sys/sunddi.h> 4225cf1a30Sjl #include <sys/sunndi.h> 4325cf1a30Sjl #include <sys/ddi_impldefs.h> 4425cf1a30Sjl #include <sys/ndi_impldefs.h> 4525cf1a30Sjl #include <sys/sysmacros.h> 4625cf1a30Sjl #include <sys/machsystm.h> 4725cf1a30Sjl #include <sys/spitregs.h> 4825cf1a30Sjl #include <sys/cpuvar.h> 4925cf1a30Sjl #include <sys/promif.h> 5025cf1a30Sjl #include <vm/seg_kmem.h> 5125cf1a30Sjl #include <sys/lgrp.h> 5225cf1a30Sjl #include <sys/platform_module.h> 5325cf1a30Sjl 5425cf1a30Sjl #include <vm/page.h> 5525cf1a30Sjl 5625cf1a30Sjl #include <sys/dr.h> 5725cf1a30Sjl #include <sys/dr_util.h> 5825cf1a30Sjl #include <sys/drmach.h> 59*68ac2337Sjl #include <sys/kobj.h> 6025cf1a30Sjl 6125cf1a30Sjl extern struct memlist *phys_install; 62*68ac2337Sjl extern vnode_t *retired_pages; 6325cf1a30Sjl 6425cf1a30Sjl /* TODO: push this reference below drmach line */ 6525cf1a30Sjl extern int kcage_on; 6625cf1a30Sjl 6725cf1a30Sjl /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */ 68*68ac2337Sjl static char *dr_ie_fmt = "dr_mem.c %d"; 6925cf1a30Sjl 7025cf1a30Sjl typedef enum { 7125cf1a30Sjl DR_TP_INVALID = -1, 7225cf1a30Sjl DR_TP_SAME, 7325cf1a30Sjl DR_TP_LARGE, 7425cf1a30Sjl DR_TP_NONRELOC, 7525cf1a30Sjl DR_TP_FLOATING 7625cf1a30Sjl } dr_target_pref_t; 7725cf1a30Sjl 7825cf1a30Sjl static int dr_post_detach_mem_unit(dr_mem_unit_t *mp); 7925cf1a30Sjl static int dr_reserve_mem_spans(memhandle_t *mhp, 8025cf1a30Sjl struct memlist *mlist); 8125cf1a30Sjl static int dr_select_mem_target(dr_handle_t *hp, 8225cf1a30Sjl dr_mem_unit_t *mp, struct memlist *ml); 8325cf1a30Sjl static void dr_init_mem_unit_data(dr_mem_unit_t *mp); 8425cf1a30Sjl static struct memlist *dr_memlist_del_retired_pages(struct memlist *ml); 8525cf1a30Sjl static dr_target_pref_t dr_get_target_preference(dr_handle_t *hp, 8625cf1a30Sjl dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp, 8725cf1a30Sjl struct memlist *s_ml, struct memlist *x_ml, 8825cf1a30Sjl struct memlist *b_ml); 8925cf1a30Sjl 9025cf1a30Sjl static int memlist_canfit(struct memlist *s_mlist, 9125cf1a30Sjl struct memlist *t_mlist); 9225cf1a30Sjl static int dr_del_mlist_query(struct memlist *mlist, 9325cf1a30Sjl memquery_t *mp); 9425cf1a30Sjl static struct memlist *dr_get_copy_mlist(struct memlist *s_ml, 9525cf1a30Sjl struct memlist *t_ml, dr_mem_unit_t *s_mp, 9625cf1a30Sjl dr_mem_unit_t *t_mp); 9725cf1a30Sjl static struct memlist *dr_get_nonreloc_mlist(struct memlist *s_ml, 9825cf1a30Sjl dr_mem_unit_t *s_mp); 9925cf1a30Sjl static int dr_memlist_canfit(struct memlist *s_mlist, 10025cf1a30Sjl struct memlist *t_mlist, dr_mem_unit_t *s_mp, 10125cf1a30Sjl dr_mem_unit_t *t_mp); 10225cf1a30Sjl 10325cf1a30Sjl /* 10425cf1a30Sjl * dr_mem_unit_t.sbm_flags 10525cf1a30Sjl */ 10625cf1a30Sjl #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */ 10725cf1a30Sjl #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */ 10825cf1a30Sjl #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */ 10925cf1a30Sjl #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */ 11025cf1a30Sjl #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */ 11125cf1a30Sjl 11225cf1a30Sjl /* helper macros */ 11325cf1a30Sjl #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT) 11425cf1a30Sjl #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT)) 11525cf1a30Sjl 11625cf1a30Sjl static struct memlist * 11725cf1a30Sjl dr_get_memlist(dr_mem_unit_t *mp) 11825cf1a30Sjl { 11925cf1a30Sjl struct memlist *mlist = NULL; 12025cf1a30Sjl sbd_error_t *err; 12125cf1a30Sjl static fn_t f = "dr_get_memlist"; 12225cf1a30Sjl 12325cf1a30Sjl PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path); 12425cf1a30Sjl 12525cf1a30Sjl /* 12625cf1a30Sjl * Return cached memlist, if present. 12725cf1a30Sjl * This memlist will be present following an 12825cf1a30Sjl * unconfigure (a.k.a: detach) of this memunit. 12925cf1a30Sjl * It should only be used in the case were a configure 13025cf1a30Sjl * is bringing this memunit back in without going 13125cf1a30Sjl * through the disconnect and connect states. 13225cf1a30Sjl */ 13325cf1a30Sjl if (mp->sbm_mlist) { 13425cf1a30Sjl PR_MEM("%s: found cached memlist\n", f); 13525cf1a30Sjl 13625cf1a30Sjl mlist = memlist_dup(mp->sbm_mlist); 13725cf1a30Sjl } else { 13825cf1a30Sjl uint64_t basepa = _ptob64(mp->sbm_basepfn); 13925cf1a30Sjl 14025cf1a30Sjl /* attempt to construct a memlist using phys_install */ 14125cf1a30Sjl 14225cf1a30Sjl /* round down to slice base address */ 14325cf1a30Sjl basepa &= ~(mp->sbm_slice_size - 1); 14425cf1a30Sjl 14525cf1a30Sjl /* get a copy of phys_install to edit */ 14625cf1a30Sjl memlist_read_lock(); 14725cf1a30Sjl mlist = memlist_dup(phys_install); 14825cf1a30Sjl memlist_read_unlock(); 14925cf1a30Sjl 15025cf1a30Sjl /* trim lower irrelevant span */ 15125cf1a30Sjl if (mlist) 15225cf1a30Sjl mlist = memlist_del_span(mlist, 0ull, basepa); 15325cf1a30Sjl 15425cf1a30Sjl /* trim upper irrelevant span */ 15525cf1a30Sjl if (mlist) { 15625cf1a30Sjl uint64_t endpa; 15725cf1a30Sjl 15825cf1a30Sjl basepa += mp->sbm_slice_size; 15925cf1a30Sjl endpa = _ptob64(physmax + 1); 16025cf1a30Sjl if (endpa > basepa) 16125cf1a30Sjl mlist = memlist_del_span( 16225cf1a30Sjl mlist, basepa, 16325cf1a30Sjl endpa - basepa); 16425cf1a30Sjl } 16525cf1a30Sjl 16625cf1a30Sjl if (mlist) { 16725cf1a30Sjl /* successfully built a memlist */ 16825cf1a30Sjl PR_MEM("%s: derived memlist from phys_install\n", f); 16925cf1a30Sjl } 17025cf1a30Sjl 17125cf1a30Sjl /* if no mlist yet, try platform layer */ 17225cf1a30Sjl if (!mlist) { 17325cf1a30Sjl err = drmach_mem_get_memlist( 17425cf1a30Sjl mp->sbm_cm.sbdev_id, &mlist); 17525cf1a30Sjl if (err) { 17625cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 17725cf1a30Sjl mlist = NULL; /* paranoia */ 17825cf1a30Sjl } 17925cf1a30Sjl } 18025cf1a30Sjl } 18125cf1a30Sjl 18225cf1a30Sjl PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path); 18325cf1a30Sjl PR_MEMLIST_DUMP(mlist); 18425cf1a30Sjl 18525cf1a30Sjl return (mlist); 18625cf1a30Sjl } 18725cf1a30Sjl 18825cf1a30Sjl typedef struct { 18925cf1a30Sjl kcondvar_t cond; 19025cf1a30Sjl kmutex_t lock; 19125cf1a30Sjl int error; 19225cf1a30Sjl int done; 19325cf1a30Sjl } dr_release_mem_sync_t; 19425cf1a30Sjl 19525cf1a30Sjl /* 19625cf1a30Sjl * Memory has been logically removed by the time this routine is called. 19725cf1a30Sjl */ 19825cf1a30Sjl static void 19925cf1a30Sjl dr_mem_del_done(void *arg, int error) 20025cf1a30Sjl { 20125cf1a30Sjl dr_release_mem_sync_t *ds = arg; 20225cf1a30Sjl 20325cf1a30Sjl mutex_enter(&ds->lock); 20425cf1a30Sjl ds->error = error; 20525cf1a30Sjl ds->done = 1; 20625cf1a30Sjl cv_signal(&ds->cond); 20725cf1a30Sjl mutex_exit(&ds->lock); 20825cf1a30Sjl } 20925cf1a30Sjl 21025cf1a30Sjl /* 21125cf1a30Sjl * When we reach here the memory being drained should have 21225cf1a30Sjl * already been reserved in dr_pre_release_mem(). 21325cf1a30Sjl * Our only task here is to kick off the "drain" and wait 21425cf1a30Sjl * for it to finish. 21525cf1a30Sjl */ 21625cf1a30Sjl void 21725cf1a30Sjl dr_release_mem(dr_common_unit_t *cp) 21825cf1a30Sjl { 21925cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; 22025cf1a30Sjl int err; 22125cf1a30Sjl dr_release_mem_sync_t rms; 22225cf1a30Sjl static fn_t f = "dr_release_mem"; 22325cf1a30Sjl 22425cf1a30Sjl /* check that this memory unit has been reserved */ 22525cf1a30Sjl if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) { 22625cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 22725cf1a30Sjl return; 22825cf1a30Sjl } 22925cf1a30Sjl 23025cf1a30Sjl bzero((void *) &rms, sizeof (rms)); 23125cf1a30Sjl 23225cf1a30Sjl mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL); 23325cf1a30Sjl cv_init(&rms.cond, NULL, CV_DRIVER, NULL); 23425cf1a30Sjl 23525cf1a30Sjl mutex_enter(&rms.lock); 23625cf1a30Sjl err = kphysm_del_start(mp->sbm_memhandle, 23725cf1a30Sjl dr_mem_del_done, (void *) &rms); 23825cf1a30Sjl if (err == KPHYSM_OK) { 23925cf1a30Sjl /* wait for completion or interrupt */ 24025cf1a30Sjl while (!rms.done) { 24125cf1a30Sjl if (cv_wait_sig(&rms.cond, &rms.lock) == 0) { 24225cf1a30Sjl /* then there is a pending UNIX signal */ 24325cf1a30Sjl (void) kphysm_del_cancel(mp->sbm_memhandle); 24425cf1a30Sjl 24525cf1a30Sjl /* wait for completion */ 24625cf1a30Sjl while (!rms.done) 24725cf1a30Sjl cv_wait(&rms.cond, &rms.lock); 24825cf1a30Sjl } 24925cf1a30Sjl } 25025cf1a30Sjl /* get the result of the memory delete operation */ 25125cf1a30Sjl err = rms.error; 25225cf1a30Sjl } 25325cf1a30Sjl mutex_exit(&rms.lock); 25425cf1a30Sjl 25525cf1a30Sjl cv_destroy(&rms.cond); 25625cf1a30Sjl mutex_destroy(&rms.lock); 25725cf1a30Sjl 25825cf1a30Sjl if (err != KPHYSM_OK) { 25925cf1a30Sjl int e_code; 26025cf1a30Sjl 26125cf1a30Sjl switch (err) { 26225cf1a30Sjl case KPHYSM_ENOWORK: 26325cf1a30Sjl e_code = ESBD_NOERROR; 26425cf1a30Sjl break; 26525cf1a30Sjl 26625cf1a30Sjl case KPHYSM_EHANDLE: 26725cf1a30Sjl case KPHYSM_ESEQUENCE: 26825cf1a30Sjl e_code = ESBD_INTERNAL; 26925cf1a30Sjl break; 27025cf1a30Sjl 27125cf1a30Sjl case KPHYSM_ENOTVIABLE: 27225cf1a30Sjl e_code = ESBD_MEM_NOTVIABLE; 27325cf1a30Sjl break; 27425cf1a30Sjl 27525cf1a30Sjl case KPHYSM_EREFUSED: 27625cf1a30Sjl e_code = ESBD_MEM_REFUSED; 27725cf1a30Sjl break; 27825cf1a30Sjl 27925cf1a30Sjl case KPHYSM_ENONRELOC: 28025cf1a30Sjl e_code = ESBD_MEM_NONRELOC; 28125cf1a30Sjl break; 28225cf1a30Sjl 28325cf1a30Sjl case KPHYSM_ECANCELLED: 28425cf1a30Sjl e_code = ESBD_MEM_CANCELLED; 28525cf1a30Sjl break; 28625cf1a30Sjl 28725cf1a30Sjl case KPHYSM_ERESOURCE: 28825cf1a30Sjl e_code = ESBD_MEMFAIL; 28925cf1a30Sjl break; 29025cf1a30Sjl 29125cf1a30Sjl default: 29225cf1a30Sjl cmn_err(CE_WARN, 29325cf1a30Sjl "%s: unexpected kphysm error code %d," 29425cf1a30Sjl " id 0x%p", 29525cf1a30Sjl f, err, mp->sbm_cm.sbdev_id); 29625cf1a30Sjl 29725cf1a30Sjl e_code = ESBD_IO; 29825cf1a30Sjl break; 29925cf1a30Sjl } 30025cf1a30Sjl 30125cf1a30Sjl if (e_code != ESBD_NOERROR) { 30225cf1a30Sjl dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code); 30325cf1a30Sjl } 30425cf1a30Sjl } 30525cf1a30Sjl } 30625cf1a30Sjl 30725cf1a30Sjl void 30825cf1a30Sjl dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp) 30925cf1a30Sjl { 31025cf1a30Sjl _NOTE(ARGUNUSED(hp)) 31125cf1a30Sjl 31225cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; 31325cf1a30Sjl struct memlist *ml, *mc; 31425cf1a30Sjl sbd_error_t *err; 31525cf1a30Sjl static fn_t f = "dr_attach_mem"; 31625cf1a30Sjl 31725cf1a30Sjl PR_MEM("%s...\n", f); 31825cf1a30Sjl 31925cf1a30Sjl dr_lock_status(hp->h_bd); 32025cf1a30Sjl err = drmach_configure(cp->sbdev_id, 0); 32125cf1a30Sjl dr_unlock_status(hp->h_bd); 32225cf1a30Sjl if (err) { 32325cf1a30Sjl DRERR_SET_C(&cp->sbdev_error, &err); 32425cf1a30Sjl return; 32525cf1a30Sjl } 32625cf1a30Sjl 32725cf1a30Sjl ml = dr_get_memlist(mp); 32825cf1a30Sjl for (mc = ml; mc; mc = mc->next) { 32925cf1a30Sjl int rv; 33025cf1a30Sjl sbd_error_t *err; 33125cf1a30Sjl 33225cf1a30Sjl rv = kphysm_add_memory_dynamic( 33325cf1a30Sjl (pfn_t)(mc->address >> PAGESHIFT), 33425cf1a30Sjl (pgcnt_t)(mc->size >> PAGESHIFT)); 33525cf1a30Sjl if (rv != KPHYSM_OK) { 33625cf1a30Sjl /* 33725cf1a30Sjl * translate kphysm error and 33825cf1a30Sjl * store in devlist error 33925cf1a30Sjl */ 34025cf1a30Sjl switch (rv) { 34125cf1a30Sjl case KPHYSM_ERESOURCE: 34225cf1a30Sjl rv = ESBD_NOMEM; 34325cf1a30Sjl break; 34425cf1a30Sjl 34525cf1a30Sjl case KPHYSM_EFAULT: 34625cf1a30Sjl rv = ESBD_FAULT; 34725cf1a30Sjl break; 34825cf1a30Sjl 34925cf1a30Sjl default: 35025cf1a30Sjl rv = ESBD_INTERNAL; 35125cf1a30Sjl break; 35225cf1a30Sjl } 35325cf1a30Sjl 35425cf1a30Sjl if (rv == ESBD_INTERNAL) { 35525cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 35625cf1a30Sjl } else 35725cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, rv); 35825cf1a30Sjl break; 35925cf1a30Sjl } 36025cf1a30Sjl 36125cf1a30Sjl err = drmach_mem_add_span( 36225cf1a30Sjl mp->sbm_cm.sbdev_id, mc->address, mc->size); 36325cf1a30Sjl if (err) { 36425cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 36525cf1a30Sjl break; 36625cf1a30Sjl } 36725cf1a30Sjl } 36825cf1a30Sjl 36925cf1a30Sjl memlist_delete(ml); 37025cf1a30Sjl 37125cf1a30Sjl /* back out if configure failed */ 37225cf1a30Sjl if (mp->sbm_cm.sbdev_error != NULL) { 37325cf1a30Sjl dr_lock_status(hp->h_bd); 37425cf1a30Sjl err = drmach_unconfigure(cp->sbdev_id, 0); 37525cf1a30Sjl if (err) 37625cf1a30Sjl sbd_err_clear(&err); 37725cf1a30Sjl dr_unlock_status(hp->h_bd); 37825cf1a30Sjl } 37925cf1a30Sjl } 38025cf1a30Sjl 38125cf1a30Sjl static struct memlist * 38225cf1a30Sjl dr_memlist_del_retired_pages(struct memlist *mlist) 38325cf1a30Sjl { 38425cf1a30Sjl page_t *pp; 38525cf1a30Sjl pfn_t pfn; 38625cf1a30Sjl kmutex_t *vphm; 387*68ac2337Sjl vnode_t *vp = retired_pages; 38825cf1a30Sjl static fn_t f = "dr_memlist_del_retired_pages"; 38925cf1a30Sjl 39025cf1a30Sjl vphm = page_vnode_mutex(vp); 39125cf1a30Sjl mutex_enter(vphm); 39225cf1a30Sjl 39325cf1a30Sjl PR_MEM("%s\n", f); 39425cf1a30Sjl 39525cf1a30Sjl if ((pp = vp->v_pages) == NULL) { 39625cf1a30Sjl mutex_exit(vphm); 39725cf1a30Sjl return (mlist); 39825cf1a30Sjl } 39925cf1a30Sjl 40025cf1a30Sjl do { 40125cf1a30Sjl ASSERT(pp != NULL); 40225cf1a30Sjl /* 40325cf1a30Sjl * page_downgrade happens after page_hashin, so we 40425cf1a30Sjl * can't assert PAGE_SE. Just assert locked to catch 40525cf1a30Sjl * changes to the retired vnode locking scheme. 40625cf1a30Sjl */ 40725cf1a30Sjl ASSERT(PAGE_LOCKED(pp)); 408*68ac2337Sjl ASSERT(pp->p_vnode == retired_pages); 40925cf1a30Sjl 41025cf1a30Sjl if (!page_trylock(pp, SE_SHARED)) 41125cf1a30Sjl continue; 41225cf1a30Sjl 41325cf1a30Sjl pfn = page_pptonum(pp); 41425cf1a30Sjl 41525cf1a30Sjl ASSERT((pp->p_offset >> PAGESHIFT) == pfn); 41625cf1a30Sjl /* 41725cf1a30Sjl * Page retirement currently breaks large pages into PAGESIZE 41825cf1a30Sjl * pages. If this changes, need to remove the assert and deal 41925cf1a30Sjl * with different page sizes. 42025cf1a30Sjl */ 42125cf1a30Sjl ASSERT(pp->p_szc == 0); 42225cf1a30Sjl 42325cf1a30Sjl if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) { 42425cf1a30Sjl mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE); 42525cf1a30Sjl PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) " 42625cf1a30Sjl "from memlist\n", ptob(pfn), pfn); 42725cf1a30Sjl } 42825cf1a30Sjl 42925cf1a30Sjl page_unlock(pp); 43025cf1a30Sjl } while ((pp = pp->p_vpnext) != vp->v_pages); 43125cf1a30Sjl 43225cf1a30Sjl mutex_exit(vphm); 43325cf1a30Sjl 43425cf1a30Sjl return (mlist); 43525cf1a30Sjl } 43625cf1a30Sjl 43725cf1a30Sjl static int 43825cf1a30Sjl dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 43925cf1a30Sjl { 44025cf1a30Sjl int rv = -1; 44125cf1a30Sjl time_t copytime; 44225cf1a30Sjl drmachid_t cr_id; 44325cf1a30Sjl dr_sr_handle_t *srhp = NULL; 44425cf1a30Sjl dr_board_t *t_bp, *s_bp; 44525cf1a30Sjl struct memlist *c_ml, *d_ml; 44625cf1a30Sjl sbd_error_t *err; 44725cf1a30Sjl static fn_t f = "dr_move_memory"; 44825cf1a30Sjl 44925cf1a30Sjl PR_MEM("%s: (INLINE) moving memory from %s to %s\n", 45025cf1a30Sjl f, 45125cf1a30Sjl s_mp->sbm_cm.sbdev_path, 45225cf1a30Sjl t_mp->sbm_cm.sbdev_path); 45325cf1a30Sjl 45425cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE); 45525cf1a30Sjl ASSERT(s_mp->sbm_peer == t_mp); 45625cf1a30Sjl ASSERT(s_mp->sbm_mlist); 45725cf1a30Sjl 45825cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 45925cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 46025cf1a30Sjl 46125cf1a30Sjl /* 46225cf1a30Sjl * create a memlist of spans to copy by removing 46325cf1a30Sjl * the spans that have been deleted, if any, from 46425cf1a30Sjl * the full source board memlist. s_mp->sbm_del_mlist 46525cf1a30Sjl * will be NULL if there were no spans deleted from 46625cf1a30Sjl * the source board. 46725cf1a30Sjl */ 46825cf1a30Sjl c_ml = memlist_dup(s_mp->sbm_mlist); 46925cf1a30Sjl d_ml = s_mp->sbm_del_mlist; 47025cf1a30Sjl while (d_ml != NULL) { 47125cf1a30Sjl c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size); 47225cf1a30Sjl d_ml = d_ml->next; 47325cf1a30Sjl } 47425cf1a30Sjl 47525cf1a30Sjl /* 47625cf1a30Sjl * Remove retired pages from the copy list. The page content 47725cf1a30Sjl * need not be copied since the pages are no longer in use. 47825cf1a30Sjl */ 47925cf1a30Sjl PR_MEM("%s: copy list before removing retired pages (if any):\n", f); 48025cf1a30Sjl PR_MEMLIST_DUMP(c_ml); 48125cf1a30Sjl 48225cf1a30Sjl c_ml = dr_memlist_del_retired_pages(c_ml); 48325cf1a30Sjl 48425cf1a30Sjl PR_MEM("%s: copy list after removing retired pages:\n", f); 48525cf1a30Sjl PR_MEMLIST_DUMP(c_ml); 48625cf1a30Sjl 48725cf1a30Sjl /* 48825cf1a30Sjl * With parallel copy, it shouldn't make a difference which 48925cf1a30Sjl * CPU is the actual master during copy-rename since all 49025cf1a30Sjl * CPUs participate in the parallel copy anyway. 49125cf1a30Sjl */ 49225cf1a30Sjl affinity_set(CPU_CURRENT); 49325cf1a30Sjl 49425cf1a30Sjl err = drmach_copy_rename_init( 49525cf1a30Sjl t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id); 49625cf1a30Sjl if (err) { 49725cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 49825cf1a30Sjl affinity_clear(); 49925cf1a30Sjl memlist_delete(c_ml); 50025cf1a30Sjl return (-1); 50125cf1a30Sjl } 50225cf1a30Sjl 50325cf1a30Sjl srhp = dr_get_sr_handle(hp); 50425cf1a30Sjl ASSERT(srhp); 50525cf1a30Sjl 50625cf1a30Sjl copytime = lbolt; 50725cf1a30Sjl 50825cf1a30Sjl /* Quiesce the OS. */ 50925cf1a30Sjl if (dr_suspend(srhp)) { 51025cf1a30Sjl cmn_err(CE_WARN, "%s: failed to quiesce OS" 51125cf1a30Sjl " for copy-rename", f); 51225cf1a30Sjl 51325cf1a30Sjl err = drmach_copy_rename_fini(cr_id); 51425cf1a30Sjl if (err) { 51525cf1a30Sjl /* 51625cf1a30Sjl * no error is expected since the program has 51725cf1a30Sjl * not yet run. 51825cf1a30Sjl */ 51925cf1a30Sjl 52025cf1a30Sjl /* catch this in debug kernels */ 52125cf1a30Sjl ASSERT(0); 52225cf1a30Sjl 52325cf1a30Sjl sbd_err_clear(&err); 52425cf1a30Sjl } 52525cf1a30Sjl 52625cf1a30Sjl /* suspend error reached via hp */ 52725cf1a30Sjl s_mp->sbm_cm.sbdev_error = hp->h_err; 52825cf1a30Sjl hp->h_err = NULL; 52925cf1a30Sjl goto done; 53025cf1a30Sjl } 53125cf1a30Sjl 53225cf1a30Sjl drmach_copy_rename(cr_id); 53325cf1a30Sjl 53425cf1a30Sjl /* Resume the OS. */ 53525cf1a30Sjl dr_resume(srhp); 53625cf1a30Sjl 53725cf1a30Sjl copytime = lbolt - copytime; 53825cf1a30Sjl 53925cf1a30Sjl if (err = drmach_copy_rename_fini(cr_id)) 54025cf1a30Sjl goto done; 54125cf1a30Sjl 54225cf1a30Sjl /* 54325cf1a30Sjl * Rename memory for lgroup. 54425cf1a30Sjl * Source and target board numbers are packaged in arg. 54525cf1a30Sjl */ 54625cf1a30Sjl s_bp = s_mp->sbm_cm.sbdev_bp; 54725cf1a30Sjl t_bp = t_mp->sbm_cm.sbdev_bp; 54825cf1a30Sjl 54925cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_RENAME, 55025cf1a30Sjl (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16))); 55125cf1a30Sjl 55225cf1a30Sjl 55325cf1a30Sjl PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n", 55425cf1a30Sjl f, copytime, copytime / hz); 55525cf1a30Sjl 55625cf1a30Sjl rv = 0; 55725cf1a30Sjl done: 55825cf1a30Sjl if (srhp) 55925cf1a30Sjl dr_release_sr_handle(srhp); 56025cf1a30Sjl if (err) 56125cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 56225cf1a30Sjl affinity_clear(); 56325cf1a30Sjl 56425cf1a30Sjl return (rv); 56525cf1a30Sjl } 56625cf1a30Sjl 56725cf1a30Sjl /* 56825cf1a30Sjl * If detaching node contains memory that is "non-permanent" 56925cf1a30Sjl * then the memory adr's are simply cleared. If the memory 57025cf1a30Sjl * is non-relocatable, then do a copy-rename. 57125cf1a30Sjl */ 57225cf1a30Sjl void 57325cf1a30Sjl dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp) 57425cf1a30Sjl { 57525cf1a30Sjl int rv = 0; 57625cf1a30Sjl dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; 57725cf1a30Sjl dr_mem_unit_t *t_mp; 57825cf1a30Sjl dr_state_t state; 57925cf1a30Sjl static fn_t f = "dr_detach_mem"; 58025cf1a30Sjl 58125cf1a30Sjl PR_MEM("%s...\n", f); 58225cf1a30Sjl 58325cf1a30Sjl /* lookup target mem unit and target board structure, if any */ 58425cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 58525cf1a30Sjl t_mp = s_mp->sbm_peer; 58625cf1a30Sjl ASSERT(t_mp != NULL); 58725cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 58825cf1a30Sjl } else { 58925cf1a30Sjl t_mp = NULL; 59025cf1a30Sjl } 59125cf1a30Sjl 59225cf1a30Sjl /* verify mem unit's state is UNREFERENCED */ 59325cf1a30Sjl state = s_mp->sbm_cm.sbdev_state; 59425cf1a30Sjl if (state != DR_STATE_UNREFERENCED) { 59525cf1a30Sjl dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE); 59625cf1a30Sjl return; 59725cf1a30Sjl } 59825cf1a30Sjl 59925cf1a30Sjl /* verify target mem unit's state is UNREFERENCED, if any */ 60025cf1a30Sjl if (t_mp != NULL) { 60125cf1a30Sjl state = t_mp->sbm_cm.sbdev_state; 60225cf1a30Sjl if (state != DR_STATE_UNREFERENCED) { 60325cf1a30Sjl dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE); 60425cf1a30Sjl return; 60525cf1a30Sjl } 60625cf1a30Sjl } 60725cf1a30Sjl 60825cf1a30Sjl /* 60925cf1a30Sjl * If there is no target board (no copy/rename was needed), then 61025cf1a30Sjl * we're done! 61125cf1a30Sjl */ 61225cf1a30Sjl if (t_mp == NULL) { 61325cf1a30Sjl sbd_error_t *err; 61425cf1a30Sjl /* 61525cf1a30Sjl * Reprogram interconnect hardware and disable 61625cf1a30Sjl * memory controllers for memory node that's going away. 61725cf1a30Sjl */ 61825cf1a30Sjl 61925cf1a30Sjl err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id); 62025cf1a30Sjl if (err) { 62125cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 62225cf1a30Sjl rv = -1; 62325cf1a30Sjl } 62425cf1a30Sjl } else { 62525cf1a30Sjl rv = dr_move_memory(hp, s_mp, t_mp); 62625cf1a30Sjl PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n", 62725cf1a30Sjl f, 62825cf1a30Sjl rv ? "FAILED" : "COMPLETED", 62925cf1a30Sjl s_mp->sbm_cm.sbdev_bp->b_num, 63025cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num); 63125cf1a30Sjl 63225cf1a30Sjl if (rv != 0) 63325cf1a30Sjl (void) dr_cancel_mem(s_mp); 63425cf1a30Sjl } 63525cf1a30Sjl 63625cf1a30Sjl if (rv == 0) { 63725cf1a30Sjl sbd_error_t *err; 63825cf1a30Sjl 63925cf1a30Sjl dr_lock_status(hp->h_bd); 64025cf1a30Sjl err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0); 64125cf1a30Sjl dr_unlock_status(hp->h_bd); 64225cf1a30Sjl if (err) 64325cf1a30Sjl sbd_err_clear(&err); 64425cf1a30Sjl } 64525cf1a30Sjl } 64625cf1a30Sjl 64725cf1a30Sjl /* 64825cf1a30Sjl * This routine acts as a wrapper for kphysm_del_span_query in order to 64925cf1a30Sjl * support potential memory holes in a board's physical address space. 65025cf1a30Sjl * It calls kphysm_del_span_query for each node in a memlist and accumulates 65125cf1a30Sjl * the results in *mp. 65225cf1a30Sjl */ 65325cf1a30Sjl static int 65425cf1a30Sjl dr_del_mlist_query(struct memlist *mlist, memquery_t *mp) 65525cf1a30Sjl { 65625cf1a30Sjl struct memlist *ml; 65725cf1a30Sjl int rv = 0; 65825cf1a30Sjl 65925cf1a30Sjl 66025cf1a30Sjl if (mlist == NULL) 66125cf1a30Sjl cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n"); 66225cf1a30Sjl 66325cf1a30Sjl mp->phys_pages = 0; 66425cf1a30Sjl mp->managed = 0; 66525cf1a30Sjl mp->nonrelocatable = 0; 66625cf1a30Sjl mp->first_nonrelocatable = (pfn_t)-1; /* XXX */ 66725cf1a30Sjl mp->last_nonrelocatable = 0; 66825cf1a30Sjl 66925cf1a30Sjl for (ml = mlist; ml; ml = ml->next) { 67025cf1a30Sjl memquery_t mq; 67125cf1a30Sjl 67225cf1a30Sjl rv = kphysm_del_span_query( 67325cf1a30Sjl _b64top(ml->address), _b64top(ml->size), &mq); 67425cf1a30Sjl if (rv) 67525cf1a30Sjl break; 67625cf1a30Sjl 67725cf1a30Sjl mp->phys_pages += mq.phys_pages; 67825cf1a30Sjl mp->managed += mq.managed; 67925cf1a30Sjl mp->nonrelocatable += mq.nonrelocatable; 68025cf1a30Sjl 68125cf1a30Sjl if (mq.nonrelocatable != 0) { 68225cf1a30Sjl if (mq.first_nonrelocatable < mp->first_nonrelocatable) 68325cf1a30Sjl mp->first_nonrelocatable = 68425cf1a30Sjl mq.first_nonrelocatable; 68525cf1a30Sjl if (mq.last_nonrelocatable > mp->last_nonrelocatable) 68625cf1a30Sjl mp->last_nonrelocatable = 68725cf1a30Sjl mq.last_nonrelocatable; 68825cf1a30Sjl } 68925cf1a30Sjl } 69025cf1a30Sjl 69125cf1a30Sjl if (mp->nonrelocatable == 0) 69225cf1a30Sjl mp->first_nonrelocatable = 0; /* XXX */ 69325cf1a30Sjl 69425cf1a30Sjl return (rv); 69525cf1a30Sjl } 69625cf1a30Sjl 69725cf1a30Sjl /* 69825cf1a30Sjl * NOTE: This routine is only partially smart about multiple 69925cf1a30Sjl * mem-units. Need to make mem-status structure smart 70025cf1a30Sjl * about them also. 70125cf1a30Sjl */ 70225cf1a30Sjl int 70325cf1a30Sjl dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) 70425cf1a30Sjl { 70525cf1a30Sjl int m, mix; 70625cf1a30Sjl memdelstat_t mdst; 70725cf1a30Sjl memquery_t mq; 70825cf1a30Sjl dr_board_t *bp; 70925cf1a30Sjl dr_mem_unit_t *mp; 71025cf1a30Sjl sbd_mem_stat_t *msp; 71125cf1a30Sjl static fn_t f = "dr_mem_status"; 71225cf1a30Sjl 71325cf1a30Sjl bp = hp->h_bd; 71425cf1a30Sjl devset &= DR_DEVS_PRESENT(bp); 71525cf1a30Sjl 71625cf1a30Sjl for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) { 71725cf1a30Sjl int rv; 71825cf1a30Sjl sbd_error_t *err; 71925cf1a30Sjl drmach_status_t pstat; 72025cf1a30Sjl dr_mem_unit_t *p_mp; 72125cf1a30Sjl 72225cf1a30Sjl if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0) 72325cf1a30Sjl continue; 72425cf1a30Sjl 72525cf1a30Sjl mp = dr_get_mem_unit(bp, m); 72625cf1a30Sjl 72725cf1a30Sjl if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) { 72825cf1a30Sjl /* present, but not fully initialized */ 72925cf1a30Sjl continue; 73025cf1a30Sjl } 73125cf1a30Sjl 73225cf1a30Sjl if (mp->sbm_cm.sbdev_id == (drmachid_t)0) 73325cf1a30Sjl continue; 73425cf1a30Sjl 73525cf1a30Sjl /* fetch platform status */ 73625cf1a30Sjl err = drmach_status(mp->sbm_cm.sbdev_id, &pstat); 73725cf1a30Sjl if (err) { 73825cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 73925cf1a30Sjl continue; 74025cf1a30Sjl } 74125cf1a30Sjl 74225cf1a30Sjl msp = &dsp->d_mem; 74325cf1a30Sjl bzero((caddr_t)msp, sizeof (*msp)); 74425cf1a30Sjl 74525cf1a30Sjl strncpy(msp->ms_cm.c_id.c_name, pstat.type, 74625cf1a30Sjl sizeof (msp->ms_cm.c_id.c_name)); 74725cf1a30Sjl msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type; 74825cf1a30Sjl msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT; 74925cf1a30Sjl msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond; 75025cf1a30Sjl msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy; 75125cf1a30Sjl msp->ms_cm.c_time = mp->sbm_cm.sbdev_time; 75225cf1a30Sjl msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate; 75325cf1a30Sjl 75425cf1a30Sjl msp->ms_totpages = mp->sbm_npages; 75525cf1a30Sjl msp->ms_basepfn = mp->sbm_basepfn; 75625cf1a30Sjl msp->ms_pageslost = mp->sbm_pageslost; 75725cf1a30Sjl msp->ms_cage_enabled = kcage_on; 75825cf1a30Sjl 75925cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RESERVED) 76025cf1a30Sjl p_mp = mp->sbm_peer; 76125cf1a30Sjl else 76225cf1a30Sjl p_mp = NULL; 76325cf1a30Sjl 76425cf1a30Sjl if (p_mp == NULL) { 76525cf1a30Sjl msp->ms_peer_is_target = 0; 76625cf1a30Sjl msp->ms_peer_ap_id[0] = '\0'; 76725cf1a30Sjl } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) { 76825cf1a30Sjl char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 76925cf1a30Sjl char *minor; 77025cf1a30Sjl 77125cf1a30Sjl /* 77225cf1a30Sjl * b_dip doesn't have to be held for ddi_pathname() 77325cf1a30Sjl * because the board struct (dr_board_t) will be 77425cf1a30Sjl * destroyed before b_dip detaches. 77525cf1a30Sjl */ 77625cf1a30Sjl (void) ddi_pathname(bp->b_dip, path); 77725cf1a30Sjl minor = strchr(p_mp->sbm_cm.sbdev_path, ':'); 77825cf1a30Sjl 77925cf1a30Sjl snprintf(msp->ms_peer_ap_id, 78025cf1a30Sjl sizeof (msp->ms_peer_ap_id), "%s%s", 78125cf1a30Sjl path, (minor == NULL) ? "" : minor); 78225cf1a30Sjl 78325cf1a30Sjl kmem_free(path, MAXPATHLEN); 78425cf1a30Sjl 78525cf1a30Sjl if (p_mp->sbm_flags & DR_MFLAG_TARGET) 78625cf1a30Sjl msp->ms_peer_is_target = 1; 78725cf1a30Sjl } 78825cf1a30Sjl 78925cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RELOWNER) 79025cf1a30Sjl rv = kphysm_del_status(mp->sbm_memhandle, &mdst); 79125cf1a30Sjl else 79225cf1a30Sjl rv = KPHYSM_EHANDLE; /* force 'if' to fail */ 79325cf1a30Sjl 79425cf1a30Sjl if (rv == KPHYSM_OK) { 79525cf1a30Sjl /* 79625cf1a30Sjl * Any pages above managed is "free", 79725cf1a30Sjl * i.e. it's collected. 79825cf1a30Sjl */ 79925cf1a30Sjl msp->ms_detpages += (uint_t)(mdst.collected + 80025cf1a30Sjl mdst.phys_pages - mdst.managed); 80125cf1a30Sjl } else { 80225cf1a30Sjl /* 80325cf1a30Sjl * If we're UNREFERENCED or UNCONFIGURED, 80425cf1a30Sjl * then the number of detached pages is 80525cf1a30Sjl * however many pages are on the board. 80625cf1a30Sjl * I.e. detached = not in use by OS. 80725cf1a30Sjl */ 80825cf1a30Sjl switch (msp->ms_cm.c_ostate) { 80925cf1a30Sjl /* 81025cf1a30Sjl * changed to use cfgadm states 81125cf1a30Sjl * 81225cf1a30Sjl * was: 81325cf1a30Sjl * case DR_STATE_UNREFERENCED: 81425cf1a30Sjl * case DR_STATE_UNCONFIGURED: 81525cf1a30Sjl */ 81625cf1a30Sjl case SBD_STAT_UNCONFIGURED: 81725cf1a30Sjl msp->ms_detpages = msp->ms_totpages; 81825cf1a30Sjl break; 81925cf1a30Sjl 82025cf1a30Sjl default: 82125cf1a30Sjl break; 82225cf1a30Sjl } 82325cf1a30Sjl } 82425cf1a30Sjl 82525cf1a30Sjl /* 82625cf1a30Sjl * kphysm_del_span_query can report non-reloc pages = total 82725cf1a30Sjl * pages for memory that is not yet configured 82825cf1a30Sjl */ 82925cf1a30Sjl if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) { 83025cf1a30Sjl struct memlist *ml; 83125cf1a30Sjl 83225cf1a30Sjl ml = dr_get_memlist(mp); 83325cf1a30Sjl rv = ml ? dr_del_mlist_query(ml, &mq) : -1; 83425cf1a30Sjl memlist_delete(ml); 83525cf1a30Sjl 83625cf1a30Sjl if (rv == KPHYSM_OK) { 83725cf1a30Sjl msp->ms_managed_pages = mq.managed; 83825cf1a30Sjl msp->ms_noreloc_pages = mq.nonrelocatable; 83925cf1a30Sjl msp->ms_noreloc_first = 84025cf1a30Sjl mq.first_nonrelocatable; 84125cf1a30Sjl msp->ms_noreloc_last = 84225cf1a30Sjl mq.last_nonrelocatable; 84325cf1a30Sjl msp->ms_cm.c_sflags = 0; 84425cf1a30Sjl if (mq.nonrelocatable) { 84525cf1a30Sjl SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE, 84625cf1a30Sjl msp->ms_cm.c_sflags); 84725cf1a30Sjl } 84825cf1a30Sjl } else { 84925cf1a30Sjl PR_MEM("%s: kphysm_del_span_query() = %d\n", 85025cf1a30Sjl f, rv); 85125cf1a30Sjl } 85225cf1a30Sjl } 85325cf1a30Sjl 85425cf1a30Sjl /* 85525cf1a30Sjl * Check source unit state during copy-rename 85625cf1a30Sjl */ 85725cf1a30Sjl if ((mp->sbm_flags & DR_MFLAG_SOURCE) && 85825cf1a30Sjl (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED || 85925cf1a30Sjl mp->sbm_cm.sbdev_state == DR_STATE_RELEASE)) 86025cf1a30Sjl msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED; 86125cf1a30Sjl 86225cf1a30Sjl mix++; 86325cf1a30Sjl dsp++; 86425cf1a30Sjl } 86525cf1a30Sjl 86625cf1a30Sjl return (mix); 86725cf1a30Sjl } 86825cf1a30Sjl 86925cf1a30Sjl int 87025cf1a30Sjl dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 87125cf1a30Sjl { 87225cf1a30Sjl _NOTE(ARGUNUSED(hp)) 87325cf1a30Sjl 87425cf1a30Sjl int err_flag = 0; 87525cf1a30Sjl int d; 87625cf1a30Sjl sbd_error_t *err; 87725cf1a30Sjl static fn_t f = "dr_pre_attach_mem"; 87825cf1a30Sjl 87925cf1a30Sjl PR_MEM("%s...\n", f); 88025cf1a30Sjl 88125cf1a30Sjl for (d = 0; d < devnum; d++) { 88225cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 88325cf1a30Sjl dr_state_t state; 88425cf1a30Sjl 88525cf1a30Sjl cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path); 88625cf1a30Sjl 88725cf1a30Sjl state = mp->sbm_cm.sbdev_state; 88825cf1a30Sjl switch (state) { 88925cf1a30Sjl case DR_STATE_UNCONFIGURED: 89025cf1a30Sjl PR_MEM("%s: recovering from UNCONFIG for %s\n", 89125cf1a30Sjl f, 89225cf1a30Sjl mp->sbm_cm.sbdev_path); 89325cf1a30Sjl 89425cf1a30Sjl /* use memlist cached by dr_post_detach_mem_unit */ 89525cf1a30Sjl ASSERT(mp->sbm_mlist != NULL); 89625cf1a30Sjl PR_MEM("%s: re-configuring cached memlist for %s:\n", 89725cf1a30Sjl f, mp->sbm_cm.sbdev_path); 89825cf1a30Sjl PR_MEMLIST_DUMP(mp->sbm_mlist); 89925cf1a30Sjl 90025cf1a30Sjl /* kphysm del handle should be have been freed */ 90125cf1a30Sjl ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 90225cf1a30Sjl 90325cf1a30Sjl /*FALLTHROUGH*/ 90425cf1a30Sjl 90525cf1a30Sjl case DR_STATE_CONNECTED: 90625cf1a30Sjl PR_MEM("%s: reprogramming mem hardware on %s\n", 90725cf1a30Sjl f, mp->sbm_cm.sbdev_bp->b_path); 90825cf1a30Sjl 90925cf1a30Sjl PR_MEM("%s: enabling %s\n", 91025cf1a30Sjl f, mp->sbm_cm.sbdev_path); 91125cf1a30Sjl 91225cf1a30Sjl err = drmach_mem_enable(mp->sbm_cm.sbdev_id); 91325cf1a30Sjl if (err) { 91425cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 91525cf1a30Sjl err_flag = 1; 91625cf1a30Sjl } 91725cf1a30Sjl break; 91825cf1a30Sjl 91925cf1a30Sjl default: 92025cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE); 92125cf1a30Sjl err_flag = 1; 92225cf1a30Sjl break; 92325cf1a30Sjl } 92425cf1a30Sjl 92525cf1a30Sjl /* exit for loop if error encountered */ 92625cf1a30Sjl if (err_flag) 92725cf1a30Sjl break; 92825cf1a30Sjl } 92925cf1a30Sjl 93025cf1a30Sjl return (err_flag ? -1 : 0); 93125cf1a30Sjl } 93225cf1a30Sjl 933*68ac2337Sjl static void 934*68ac2337Sjl dr_update_mc_memory() 935*68ac2337Sjl { 936*68ac2337Sjl void (*mc_update_mlist)(void); 937*68ac2337Sjl 938*68ac2337Sjl /* 939*68ac2337Sjl * mc-opl is configured during drmach_mem_new but the memory 940*68ac2337Sjl * has not been added to phys_install at that time. 941*68ac2337Sjl * we must inform mc-opl to update the mlist after we 942*68ac2337Sjl * attach or detach a system board. 943*68ac2337Sjl */ 944*68ac2337Sjl 945*68ac2337Sjl mc_update_mlist = (void (*)(void)) 946*68ac2337Sjl modgetsymvalue("opl_mc_update_mlist", 0); 947*68ac2337Sjl 948*68ac2337Sjl if (mc_update_mlist != NULL) { 949*68ac2337Sjl (*mc_update_mlist)(); 950*68ac2337Sjl } 951*68ac2337Sjl } 952*68ac2337Sjl 95325cf1a30Sjl int 95425cf1a30Sjl dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 95525cf1a30Sjl { 95625cf1a30Sjl _NOTE(ARGUNUSED(hp)) 95725cf1a30Sjl 95825cf1a30Sjl int d; 95925cf1a30Sjl static fn_t f = "dr_post_attach_mem"; 96025cf1a30Sjl 96125cf1a30Sjl PR_MEM("%s...\n", f); 96225cf1a30Sjl 96325cf1a30Sjl for (d = 0; d < devnum; d++) { 96425cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 96525cf1a30Sjl struct memlist *mlist, *ml; 96625cf1a30Sjl 96725cf1a30Sjl mlist = dr_get_memlist(mp); 96825cf1a30Sjl if (mlist == NULL) { 969*68ac2337Sjl /* OPL supports memoryless board */ 97025cf1a30Sjl continue; 97125cf1a30Sjl } 97225cf1a30Sjl 97325cf1a30Sjl /* 97425cf1a30Sjl * Verify the memory really did successfully attach 97525cf1a30Sjl * by checking for its existence in phys_install. 97625cf1a30Sjl */ 97725cf1a30Sjl memlist_read_lock(); 97825cf1a30Sjl if (memlist_intersect(phys_install, mlist) == 0) { 97925cf1a30Sjl memlist_read_unlock(); 98025cf1a30Sjl 98125cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 98225cf1a30Sjl 98325cf1a30Sjl PR_MEM("%s: %s memlist not in phys_install", 98425cf1a30Sjl f, mp->sbm_cm.sbdev_path); 98525cf1a30Sjl 98625cf1a30Sjl memlist_delete(mlist); 98725cf1a30Sjl continue; 98825cf1a30Sjl } 98925cf1a30Sjl memlist_read_unlock(); 99025cf1a30Sjl 99125cf1a30Sjl for (ml = mlist; ml != NULL; ml = ml->next) { 99225cf1a30Sjl sbd_error_t *err; 99325cf1a30Sjl 99425cf1a30Sjl err = drmach_mem_add_span( 99525cf1a30Sjl mp->sbm_cm.sbdev_id, 99625cf1a30Sjl ml->address, 99725cf1a30Sjl ml->size); 99825cf1a30Sjl if (err) 99925cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 100025cf1a30Sjl } 100125cf1a30Sjl 100225cf1a30Sjl memlist_delete(mlist); 100325cf1a30Sjl 100425cf1a30Sjl /* 100525cf1a30Sjl * Destroy cached memlist, if any. 100625cf1a30Sjl * There will be a cached memlist in sbm_mlist if 100725cf1a30Sjl * this board is being configured directly after 100825cf1a30Sjl * an unconfigure. 100925cf1a30Sjl * To support this transition, dr_post_detach_mem 101025cf1a30Sjl * left a copy of the last known memlist in sbm_mlist. 101125cf1a30Sjl * This memlist could differ from any derived from 101225cf1a30Sjl * hardware if while this memunit was last configured 101325cf1a30Sjl * the system detected and deleted bad pages from 101425cf1a30Sjl * phys_install. The location of those bad pages 101525cf1a30Sjl * will be reflected in the cached memlist. 101625cf1a30Sjl */ 101725cf1a30Sjl if (mp->sbm_mlist) { 101825cf1a30Sjl memlist_delete(mp->sbm_mlist); 101925cf1a30Sjl mp->sbm_mlist = NULL; 102025cf1a30Sjl } 102125cf1a30Sjl } 102225cf1a30Sjl 1023*68ac2337Sjl dr_update_mc_memory(); 1024*68ac2337Sjl 102525cf1a30Sjl return (0); 102625cf1a30Sjl } 102725cf1a30Sjl 102825cf1a30Sjl int 102925cf1a30Sjl dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 103025cf1a30Sjl { 103125cf1a30Sjl _NOTE(ARGUNUSED(hp)) 103225cf1a30Sjl 103325cf1a30Sjl int d; 103425cf1a30Sjl 103525cf1a30Sjl for (d = 0; d < devnum; d++) { 103625cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 103725cf1a30Sjl 103825cf1a30Sjl cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path); 103925cf1a30Sjl } 104025cf1a30Sjl 104125cf1a30Sjl return (0); 104225cf1a30Sjl } 104325cf1a30Sjl 104425cf1a30Sjl int 104525cf1a30Sjl dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 104625cf1a30Sjl { 104725cf1a30Sjl _NOTE(ARGUNUSED(hp)) 104825cf1a30Sjl 104925cf1a30Sjl int d, rv; 105025cf1a30Sjl static fn_t f = "dr_post_detach_mem"; 105125cf1a30Sjl 105225cf1a30Sjl PR_MEM("%s...\n", f); 105325cf1a30Sjl 105425cf1a30Sjl rv = 0; 105525cf1a30Sjl for (d = 0; d < devnum; d++) { 105625cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 105725cf1a30Sjl 105825cf1a30Sjl ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd); 105925cf1a30Sjl 106025cf1a30Sjl if (dr_post_detach_mem_unit(mp)) 106125cf1a30Sjl rv = -1; 106225cf1a30Sjl } 1063*68ac2337Sjl dr_update_mc_memory(); 106425cf1a30Sjl 106525cf1a30Sjl return (rv); 106625cf1a30Sjl } 106725cf1a30Sjl 106825cf1a30Sjl static void 106925cf1a30Sjl dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml) 107025cf1a30Sjl { 107125cf1a30Sjl static fn_t f = "dr_add_memory_spans"; 107225cf1a30Sjl 107325cf1a30Sjl PR_MEM("%s...", f); 107425cf1a30Sjl PR_MEMLIST_DUMP(ml); 107525cf1a30Sjl 107625cf1a30Sjl #ifdef DEBUG 107725cf1a30Sjl memlist_read_lock(); 107825cf1a30Sjl if (memlist_intersect(phys_install, ml)) { 107925cf1a30Sjl PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f); 108025cf1a30Sjl } 108125cf1a30Sjl memlist_read_unlock(); 108225cf1a30Sjl #endif 108325cf1a30Sjl 108425cf1a30Sjl for (; ml; ml = ml->next) { 108525cf1a30Sjl pfn_t base; 108625cf1a30Sjl pgcnt_t npgs; 108725cf1a30Sjl int rv; 108825cf1a30Sjl sbd_error_t *err; 108925cf1a30Sjl 109025cf1a30Sjl base = _b64top(ml->address); 109125cf1a30Sjl npgs = _b64top(ml->size); 109225cf1a30Sjl 109325cf1a30Sjl rv = kphysm_add_memory_dynamic(base, npgs); 109425cf1a30Sjl 109525cf1a30Sjl err = drmach_mem_add_span( 109625cf1a30Sjl mp->sbm_cm.sbdev_id, 109725cf1a30Sjl ml->address, 109825cf1a30Sjl ml->size); 109925cf1a30Sjl 110025cf1a30Sjl if (err) 110125cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 110225cf1a30Sjl 110325cf1a30Sjl if (rv != KPHYSM_OK) { 110425cf1a30Sjl cmn_err(CE_WARN, "%s:" 110525cf1a30Sjl " unexpected kphysm_add_memory_dynamic" 110625cf1a30Sjl " return value %d;" 110725cf1a30Sjl " basepfn=0x%lx, npages=%ld\n", 110825cf1a30Sjl f, rv, base, npgs); 110925cf1a30Sjl 111025cf1a30Sjl continue; 111125cf1a30Sjl } 111225cf1a30Sjl } 111325cf1a30Sjl } 111425cf1a30Sjl 1115*68ac2337Sjl static int 1116*68ac2337Sjl memlist_touch(struct memlist *ml, uint64_t add) 1117*68ac2337Sjl { 1118*68ac2337Sjl while (ml != NULL) { 1119*68ac2337Sjl if ((add == ml->address) || 1120*68ac2337Sjl (add == (ml->address + ml->size))) 1121*68ac2337Sjl return (1); 1122*68ac2337Sjl ml = ml->next; 1123*68ac2337Sjl } 1124*68ac2337Sjl return (0); 1125*68ac2337Sjl } 1126*68ac2337Sjl 1127*68ac2337Sjl static sbd_error_t * 1128*68ac2337Sjl dr_process_excess_mlist(dr_mem_unit_t *s_mp, 1129*68ac2337Sjl dr_mem_unit_t *t_mp, struct memlist *t_excess_mlist) 1130*68ac2337Sjl { 1131*68ac2337Sjl struct memlist *ml; 1132*68ac2337Sjl sbd_error_t *err; 1133*68ac2337Sjl static fn_t f = "dr_process_excess_mlist"; 1134*68ac2337Sjl uint64_t new_pa, nbytes; 1135*68ac2337Sjl int rv; 1136*68ac2337Sjl 1137*68ac2337Sjl err = NULL; 1138*68ac2337Sjl 1139*68ac2337Sjl /* 1140*68ac2337Sjl * After the small <-> big copy-rename, 1141*68ac2337Sjl * the original address space for the 1142*68ac2337Sjl * source board may have excess to be 1143*68ac2337Sjl * deleted. This is a case different 1144*68ac2337Sjl * from the big->small excess source 1145*68ac2337Sjl * memory case listed below. 1146*68ac2337Sjl * Remove s_mp->sbm_del_mlist from 1147*68ac2337Sjl * the kernel cage glist. 1148*68ac2337Sjl */ 1149*68ac2337Sjl for (ml = s_mp->sbm_del_mlist; ml; 1150*68ac2337Sjl ml = ml->next) { 1151*68ac2337Sjl PR_MEM("%s: delete small<->big copy-" 1152*68ac2337Sjl "rename source excess memory", f); 1153*68ac2337Sjl PR_MEMLIST_DUMP(ml); 1154*68ac2337Sjl 1155*68ac2337Sjl err = drmach_mem_del_span( 1156*68ac2337Sjl s_mp->sbm_cm.sbdev_id, 1157*68ac2337Sjl ml->address, ml->size); 1158*68ac2337Sjl if (err) 1159*68ac2337Sjl DRERR_SET_C(&s_mp-> 1160*68ac2337Sjl sbm_cm.sbdev_error, &err); 1161*68ac2337Sjl ASSERT(err == NULL); 1162*68ac2337Sjl } 1163*68ac2337Sjl 1164*68ac2337Sjl PR_MEM("%s: adding back remaining portion" 1165*68ac2337Sjl " of %s, memlist:\n", 1166*68ac2337Sjl f, t_mp->sbm_cm.sbdev_path); 1167*68ac2337Sjl PR_MEMLIST_DUMP(t_excess_mlist); 1168*68ac2337Sjl 1169*68ac2337Sjl for (ml = t_excess_mlist; ml; ml = ml->next) { 1170*68ac2337Sjl struct memlist ml0; 1171*68ac2337Sjl 1172*68ac2337Sjl ml0.address = ml->address; 1173*68ac2337Sjl ml0.size = ml->size; 1174*68ac2337Sjl ml0.next = ml0.prev = NULL; 1175*68ac2337Sjl 1176*68ac2337Sjl /* 1177*68ac2337Sjl * If the memory object is 256 MB aligned (max page size 1178*68ac2337Sjl * on OPL, it will not be coalesced to the adjacent memory 1179*68ac2337Sjl * chunks. The coalesce logic assumes contiguous page 1180*68ac2337Sjl * structures for contiguous memory and we hit panic. 1181*68ac2337Sjl * For anything less than 256 MB alignment, we have 1182*68ac2337Sjl * to make sure that it is not adjacent to anything. 1183*68ac2337Sjl * If the new chunk is adjacent to phys_install, we 1184*68ac2337Sjl * truncate it to 4MB boundary. 4 MB is somewhat 1185*68ac2337Sjl * arbitrary. However we do not want to create 1186*68ac2337Sjl * very small segments because they can cause problem. 1187*68ac2337Sjl * The extreme case of 8K segment will fail 1188*68ac2337Sjl * kphysm_add_memory_dynamic(), e.g. 1189*68ac2337Sjl */ 1190*68ac2337Sjl if ((ml->address & (MH_MPSS_ALIGNMENT - 1)) || 1191*68ac2337Sjl (ml->size & (MH_MPSS_ALIGNMENT - 1))) { 1192*68ac2337Sjl 1193*68ac2337Sjl memlist_read_lock(); 1194*68ac2337Sjl rv = memlist_touch(phys_install, ml0.address); 1195*68ac2337Sjl memlist_read_unlock(); 1196*68ac2337Sjl 1197*68ac2337Sjl if (rv) { 1198*68ac2337Sjl new_pa = roundup(ml0.address + 1, MH_MIN_ALIGNMENT); 1199*68ac2337Sjl nbytes = (new_pa - ml0.address); 1200*68ac2337Sjl if (nbytes >= ml0.size) { 1201*68ac2337Sjl t_mp->sbm_dyn_segs = 1202*68ac2337Sjl memlist_del_span(t_mp->sbm_dyn_segs, 1203*68ac2337Sjl ml0.address, ml0.size); 1204*68ac2337Sjl continue; 1205*68ac2337Sjl } 1206*68ac2337Sjl t_mp->sbm_dyn_segs = 1207*68ac2337Sjl memlist_del_span(t_mp->sbm_dyn_segs, 1208*68ac2337Sjl ml0.address, nbytes); 1209*68ac2337Sjl ml0.size -= nbytes; 1210*68ac2337Sjl ml0.address = new_pa; 1211*68ac2337Sjl } 1212*68ac2337Sjl 1213*68ac2337Sjl if (ml0.size == 0) { 1214*68ac2337Sjl continue; 1215*68ac2337Sjl } 1216*68ac2337Sjl 1217*68ac2337Sjl memlist_read_lock(); 1218*68ac2337Sjl rv = memlist_touch(phys_install, ml0.address + ml0.size); 1219*68ac2337Sjl memlist_read_unlock(); 1220*68ac2337Sjl 1221*68ac2337Sjl if (rv) { 1222*68ac2337Sjl new_pa = rounddown(ml0.address + ml0.size - 1, 1223*68ac2337Sjl MH_MIN_ALIGNMENT); 1224*68ac2337Sjl nbytes = (ml0.address + ml0.size - new_pa); 1225*68ac2337Sjl if (nbytes >= ml0.size) { 1226*68ac2337Sjl t_mp->sbm_dyn_segs = 1227*68ac2337Sjl memlist_del_span(t_mp->sbm_dyn_segs, 1228*68ac2337Sjl ml0.address, ml0.size); 1229*68ac2337Sjl continue; 1230*68ac2337Sjl } 1231*68ac2337Sjl t_mp->sbm_dyn_segs = 1232*68ac2337Sjl memlist_del_span(t_mp->sbm_dyn_segs, 1233*68ac2337Sjl new_pa, nbytes); 1234*68ac2337Sjl ml0.size -= nbytes; 1235*68ac2337Sjl } 1236*68ac2337Sjl 1237*68ac2337Sjl if (ml0.size > 0) { 1238*68ac2337Sjl dr_add_memory_spans(s_mp, &ml0); 1239*68ac2337Sjl } 1240*68ac2337Sjl } else if (ml0.size > 0) { 1241*68ac2337Sjl dr_add_memory_spans(s_mp, &ml0); 1242*68ac2337Sjl } 1243*68ac2337Sjl } 1244*68ac2337Sjl memlist_delete(t_excess_mlist); 1245*68ac2337Sjl return (err); 1246*68ac2337Sjl } 1247*68ac2337Sjl 124825cf1a30Sjl static int 124925cf1a30Sjl dr_post_detach_mem_unit(dr_mem_unit_t *s_mp) 125025cf1a30Sjl { 125125cf1a30Sjl uint64_t sz = s_mp->sbm_slice_size; 125225cf1a30Sjl uint64_t sm = sz - 1; 125325cf1a30Sjl /* old and new below refer to PAs before and after copy-rename */ 125425cf1a30Sjl uint64_t s_old_basepa, s_new_basepa; 125525cf1a30Sjl uint64_t t_old_basepa, t_new_basepa; 125625cf1a30Sjl dr_mem_unit_t *t_mp, *x_mp; 125725cf1a30Sjl drmach_mem_info_t minfo; 125825cf1a30Sjl struct memlist *ml; 125925cf1a30Sjl struct memlist *t_excess_mlist; 126025cf1a30Sjl int rv; 126125cf1a30Sjl int s_excess_mem_deleted = 0; 126225cf1a30Sjl sbd_error_t *err; 126325cf1a30Sjl static fn_t f = "dr_post_detach_mem_unit"; 126425cf1a30Sjl 126525cf1a30Sjl PR_MEM("%s...\n", f); 126625cf1a30Sjl 126725cf1a30Sjl /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */ 126825cf1a30Sjl PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n", 126925cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 127025cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); 127125cf1a30Sjl 127225cf1a30Sjl /* sanity check */ 127325cf1a30Sjl ASSERT(s_mp->sbm_del_mlist == NULL || 127425cf1a30Sjl (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0); 127525cf1a30Sjl 127625cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 127725cf1a30Sjl t_mp = s_mp->sbm_peer; 127825cf1a30Sjl ASSERT(t_mp != NULL); 127925cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 128025cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 128125cf1a30Sjl 128225cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE); 128325cf1a30Sjl ASSERT(t_mp->sbm_del_mlist); 128425cf1a30Sjl 128525cf1a30Sjl PR_MEM("%s: target %s: deleted memlist:\n", 128625cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 128725cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_del_mlist); 128825cf1a30Sjl } else { 128925cf1a30Sjl /* this is no target unit */ 129025cf1a30Sjl t_mp = NULL; 129125cf1a30Sjl } 129225cf1a30Sjl 129325cf1a30Sjl /* 129425cf1a30Sjl * Verify the memory really did successfully detach 129525cf1a30Sjl * by checking for its non-existence in phys_install. 129625cf1a30Sjl */ 129725cf1a30Sjl rv = 0; 129825cf1a30Sjl memlist_read_lock(); 129925cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_RELDONE) { 130025cf1a30Sjl x_mp = s_mp; 130125cf1a30Sjl rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); 130225cf1a30Sjl } 130325cf1a30Sjl if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) { 130425cf1a30Sjl x_mp = t_mp; 130525cf1a30Sjl rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); 130625cf1a30Sjl } 130725cf1a30Sjl memlist_read_unlock(); 130825cf1a30Sjl 130925cf1a30Sjl if (rv) { 131025cf1a30Sjl /* error: memlist still in phys_install */ 131125cf1a30Sjl DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm); 131225cf1a30Sjl } 131325cf1a30Sjl 131425cf1a30Sjl /* 131525cf1a30Sjl * clean mem unit state and bail out if an error has been recorded. 131625cf1a30Sjl */ 131725cf1a30Sjl rv = 0; 131825cf1a30Sjl if (s_mp->sbm_cm.sbdev_error) { 131925cf1a30Sjl PR_MEM("%s: %s flags=%x", f, 132025cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); 132125cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm); 132225cf1a30Sjl DR_DEV_CLR_RELEASED(&s_mp->sbm_cm); 132325cf1a30Sjl dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED); 132425cf1a30Sjl rv = -1; 132525cf1a30Sjl } 132625cf1a30Sjl if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) { 132725cf1a30Sjl PR_MEM("%s: %s flags=%x", f, 132825cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); 132925cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 133025cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 133125cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); 133225cf1a30Sjl rv = -1; 133325cf1a30Sjl } 133425cf1a30Sjl if (rv) 133525cf1a30Sjl goto cleanup; 133625cf1a30Sjl 133725cf1a30Sjl s_old_basepa = _ptob64(s_mp->sbm_basepfn); 133825cf1a30Sjl err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo); 133925cf1a30Sjl ASSERT(err == NULL); 134025cf1a30Sjl s_new_basepa = minfo.mi_basepa; 134125cf1a30Sjl 134225cf1a30Sjl PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa); 134325cf1a30Sjl PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa); 134425cf1a30Sjl 134525cf1a30Sjl if (t_mp != NULL) { 134625cf1a30Sjl struct memlist *s_copy_mlist; 134725cf1a30Sjl 134825cf1a30Sjl t_old_basepa = _ptob64(t_mp->sbm_basepfn); 134925cf1a30Sjl err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo); 135025cf1a30Sjl ASSERT(err == NULL); 135125cf1a30Sjl t_new_basepa = minfo.mi_basepa; 135225cf1a30Sjl 135325cf1a30Sjl PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa); 135425cf1a30Sjl PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa); 135525cf1a30Sjl 135625cf1a30Sjl /* 135725cf1a30Sjl * Construct copy list with original source addresses. 135825cf1a30Sjl * Used to add back excess target mem. 135925cf1a30Sjl */ 136025cf1a30Sjl s_copy_mlist = memlist_dup(s_mp->sbm_mlist); 136125cf1a30Sjl for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { 136225cf1a30Sjl s_copy_mlist = memlist_del_span(s_copy_mlist, 136325cf1a30Sjl ml->address, ml->size); 136425cf1a30Sjl } 136525cf1a30Sjl 136625cf1a30Sjl PR_MEM("%s: source copy list:\n:", f); 136725cf1a30Sjl PR_MEMLIST_DUMP(s_copy_mlist); 136825cf1a30Sjl 136925cf1a30Sjl /* 137025cf1a30Sjl * We had to swap mem-units, so update 137125cf1a30Sjl * memlists accordingly with new base 137225cf1a30Sjl * addresses. 137325cf1a30Sjl */ 137425cf1a30Sjl for (ml = t_mp->sbm_mlist; ml; ml = ml->next) { 137525cf1a30Sjl ml->address -= t_old_basepa; 137625cf1a30Sjl ml->address += t_new_basepa; 137725cf1a30Sjl } 137825cf1a30Sjl 137925cf1a30Sjl /* 138025cf1a30Sjl * There is no need to explicitly rename the target delete 138125cf1a30Sjl * memlist, because sbm_del_mlist and sbm_mlist always 138225cf1a30Sjl * point to the same memlist for a copy/rename operation. 138325cf1a30Sjl */ 138425cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 138525cf1a30Sjl 138625cf1a30Sjl PR_MEM("%s: renamed target memlist and delete memlist:\n", f); 138725cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_mlist); 138825cf1a30Sjl 138925cf1a30Sjl for (ml = s_mp->sbm_mlist; ml; ml = ml->next) { 139025cf1a30Sjl ml->address -= s_old_basepa; 139125cf1a30Sjl ml->address += s_new_basepa; 139225cf1a30Sjl } 139325cf1a30Sjl 139425cf1a30Sjl PR_MEM("%s: renamed source memlist:\n", f); 139525cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_mlist); 1396*68ac2337Sjl PR_MEM("%s: source dyn seg memlist:\n", f); 1397*68ac2337Sjl PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs); 139825cf1a30Sjl 139925cf1a30Sjl /* 140025cf1a30Sjl * Keep track of dynamically added segments 140125cf1a30Sjl * since they cannot be split if we need to delete 140225cf1a30Sjl * excess source memory later for this board. 140325cf1a30Sjl */ 140425cf1a30Sjl if (t_mp->sbm_dyn_segs) 140525cf1a30Sjl memlist_delete(t_mp->sbm_dyn_segs); 140625cf1a30Sjl t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs; 140725cf1a30Sjl s_mp->sbm_dyn_segs = NULL; 140825cf1a30Sjl 140925cf1a30Sjl /* 141025cf1a30Sjl * Add back excess target memory. 141125cf1a30Sjl * Subtract out the portion of the target memory 141225cf1a30Sjl * node that was taken over by the source memory 141325cf1a30Sjl * node. 141425cf1a30Sjl */ 141525cf1a30Sjl t_excess_mlist = memlist_dup(t_mp->sbm_mlist); 141625cf1a30Sjl for (ml = s_copy_mlist; ml; ml = ml->next) { 141725cf1a30Sjl t_excess_mlist = 141825cf1a30Sjl memlist_del_span(t_excess_mlist, 141925cf1a30Sjl ml->address, ml->size); 142025cf1a30Sjl } 1421*68ac2337Sjl PR_MEM("%s: excess memlist:\n", f); 1422*68ac2337Sjl PR_MEMLIST_DUMP(t_excess_mlist); 142325cf1a30Sjl 142425cf1a30Sjl /* 142525cf1a30Sjl * Update dynamically added segs 142625cf1a30Sjl */ 142725cf1a30Sjl for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { 142825cf1a30Sjl t_mp->sbm_dyn_segs = 142925cf1a30Sjl memlist_del_span(t_mp->sbm_dyn_segs, 143025cf1a30Sjl ml->address, ml->size); 143125cf1a30Sjl } 143225cf1a30Sjl for (ml = t_excess_mlist; ml; ml = ml->next) { 143325cf1a30Sjl t_mp->sbm_dyn_segs = 143425cf1a30Sjl memlist_cat_span(t_mp->sbm_dyn_segs, 143525cf1a30Sjl ml->address, ml->size); 143625cf1a30Sjl } 143725cf1a30Sjl PR_MEM("%s: %s: updated dynamic seg list:\n", 143825cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 143925cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs); 144025cf1a30Sjl 144125cf1a30Sjl if (t_excess_mlist != NULL) { 1442*68ac2337Sjl err = dr_process_excess_mlist(s_mp, t_mp, 1443*68ac2337Sjl t_excess_mlist); 144425cf1a30Sjl s_excess_mem_deleted = 1; 144525cf1a30Sjl } 1446*68ac2337Sjl 144725cf1a30Sjl memlist_delete(s_copy_mlist); 144825cf1a30Sjl 144925cf1a30Sjl #ifdef DEBUG 145025cf1a30Sjl /* 145125cf1a30Sjl * s_mp->sbm_del_mlist may still needed 145225cf1a30Sjl */ 145325cf1a30Sjl PR_MEM("%s: source delete memeory flag %d", 145425cf1a30Sjl f, s_excess_mem_deleted); 145525cf1a30Sjl PR_MEM("%s: source delete memlist", f); 145625cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); 145725cf1a30Sjl #endif 145825cf1a30Sjl 145925cf1a30Sjl } 146025cf1a30Sjl 146125cf1a30Sjl if (t_mp != NULL) { 146225cf1a30Sjl /* delete target's entire address space */ 146325cf1a30Sjl err = drmach_mem_del_span( 146425cf1a30Sjl t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz); 146525cf1a30Sjl if (err) 146625cf1a30Sjl DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err); 146725cf1a30Sjl ASSERT(err == NULL); 146825cf1a30Sjl 146925cf1a30Sjl /* 147025cf1a30Sjl * After the copy/rename, the original address space 147125cf1a30Sjl * for the source board (which is now located on the 147225cf1a30Sjl * target board) may now have some excess to be deleted. 147325cf1a30Sjl * Those excess memory on the source board are kept in 147425cf1a30Sjl * source board's sbm_del_mlist 147525cf1a30Sjl */ 147625cf1a30Sjl for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml; 147725cf1a30Sjl ml = ml->next) { 147825cf1a30Sjl PR_MEM("%s: delete source excess memory", f); 147925cf1a30Sjl PR_MEMLIST_DUMP(ml); 148025cf1a30Sjl 148125cf1a30Sjl err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, 148225cf1a30Sjl ml->address, ml->size); 148325cf1a30Sjl if (err) 148425cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 148525cf1a30Sjl ASSERT(err == NULL); 148625cf1a30Sjl } 148725cf1a30Sjl 148825cf1a30Sjl } else { 148925cf1a30Sjl /* delete board's entire address space */ 149025cf1a30Sjl err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, 149125cf1a30Sjl s_old_basepa & ~ sm, sz); 149225cf1a30Sjl if (err) 149325cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 149425cf1a30Sjl ASSERT(err == NULL); 149525cf1a30Sjl } 149625cf1a30Sjl 149725cf1a30Sjl cleanup: 149825cf1a30Sjl /* clean up target mem unit */ 149925cf1a30Sjl if (t_mp != NULL) { 150025cf1a30Sjl memlist_delete(t_mp->sbm_del_mlist); 150125cf1a30Sjl /* no need to delete sbm_mlist, it shares sbm_del_mlist */ 150225cf1a30Sjl 150325cf1a30Sjl t_mp->sbm_del_mlist = NULL; 150425cf1a30Sjl t_mp->sbm_mlist = NULL; 150525cf1a30Sjl t_mp->sbm_peer = NULL; 150625cf1a30Sjl t_mp->sbm_flags = 0; 150725cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 150825cf1a30Sjl dr_init_mem_unit_data(t_mp); 150925cf1a30Sjl 151025cf1a30Sjl } 151125cf1a30Sjl if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) { 151225cf1a30Sjl /* 151325cf1a30Sjl * now that copy/rename has completed, undo this 151425cf1a30Sjl * work that was done in dr_release_mem_done. 151525cf1a30Sjl */ 151625cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 151725cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 151825cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); 151925cf1a30Sjl } 152025cf1a30Sjl 152125cf1a30Sjl /* 152225cf1a30Sjl * clean up (source) board's mem unit structure. 152325cf1a30Sjl * NOTE: sbm_mlist is retained if no error has been record (in other 152425cf1a30Sjl * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is 152525cf1a30Sjl * referred to elsewhere as the cached memlist. The cached memlist 152625cf1a30Sjl * is used to re-attach (configure back in) this memunit from the 152725cf1a30Sjl * unconfigured state. The memlist is retained because it may 152825cf1a30Sjl * represent bad pages that were detected while the memory was 152925cf1a30Sjl * configured into the OS. The OS deletes bad pages from phys_install. 153025cf1a30Sjl * Those deletes, if any, will be represented in the cached mlist. 153125cf1a30Sjl */ 153225cf1a30Sjl if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist) 153325cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 153425cf1a30Sjl 153525cf1a30Sjl if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) { 153625cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 153725cf1a30Sjl s_mp->sbm_mlist = NULL; 153825cf1a30Sjl } 153925cf1a30Sjl 154025cf1a30Sjl if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) { 154125cf1a30Sjl memlist_delete(s_mp->sbm_dyn_segs); 154225cf1a30Sjl s_mp->sbm_dyn_segs = NULL; 154325cf1a30Sjl } 154425cf1a30Sjl 154525cf1a30Sjl s_mp->sbm_del_mlist = NULL; 154625cf1a30Sjl s_mp->sbm_peer = NULL; 154725cf1a30Sjl s_mp->sbm_flags = 0; 154825cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 154925cf1a30Sjl dr_init_mem_unit_data(s_mp); 155025cf1a30Sjl 155125cf1a30Sjl PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path); 155225cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_mlist); 155325cf1a30Sjl 155425cf1a30Sjl return (0); 155525cf1a30Sjl } 155625cf1a30Sjl 155725cf1a30Sjl /* 155825cf1a30Sjl * Successful return from this function will have the memory 155925cf1a30Sjl * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated 156025cf1a30Sjl * and waiting. This routine's job is to select the memory that 156125cf1a30Sjl * actually has to be released (detached) which may not necessarily 156225cf1a30Sjl * be the same memory node that came in in devlist[], 156325cf1a30Sjl * i.e. a copy-rename is needed. 156425cf1a30Sjl */ 156525cf1a30Sjl int 156625cf1a30Sjl dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 156725cf1a30Sjl { 156825cf1a30Sjl int d; 156925cf1a30Sjl int err_flag = 0; 157025cf1a30Sjl static fn_t f = "dr_pre_release_mem"; 157125cf1a30Sjl 157225cf1a30Sjl PR_MEM("%s...\n", f); 157325cf1a30Sjl 157425cf1a30Sjl for (d = 0; d < devnum; d++) { 157525cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 157625cf1a30Sjl int rv; 157725cf1a30Sjl memquery_t mq; 157825cf1a30Sjl struct memlist *ml; 157925cf1a30Sjl 158025cf1a30Sjl if (mp->sbm_cm.sbdev_error) { 158125cf1a30Sjl err_flag = 1; 158225cf1a30Sjl continue; 158325cf1a30Sjl } else if (!kcage_on) { 158425cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF); 158525cf1a30Sjl err_flag = 1; 158625cf1a30Sjl continue; 158725cf1a30Sjl } 158825cf1a30Sjl 158925cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RESERVED) { 159025cf1a30Sjl /* 159125cf1a30Sjl * Board is currently involved in a delete 159225cf1a30Sjl * memory operation. Can't detach this guy until 159325cf1a30Sjl * that operation completes. 159425cf1a30Sjl */ 159525cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL); 159625cf1a30Sjl err_flag = 1; 159725cf1a30Sjl break; 159825cf1a30Sjl } 159925cf1a30Sjl 160025cf1a30Sjl /* flags should be clean at this time */ 160125cf1a30Sjl ASSERT(mp->sbm_flags == 0); 160225cf1a30Sjl 160325cf1a30Sjl ASSERT(mp->sbm_mlist == NULL); 160425cf1a30Sjl ASSERT(mp->sbm_del_mlist == NULL); 160525cf1a30Sjl if (mp->sbm_mlist != NULL) { 160625cf1a30Sjl memlist_delete(mp->sbm_mlist); 160725cf1a30Sjl mp->sbm_mlist = NULL; 160825cf1a30Sjl } 160925cf1a30Sjl 161025cf1a30Sjl ml = dr_get_memlist(mp); 161125cf1a30Sjl if (ml == NULL) { 161225cf1a30Sjl err_flag = 1; 161325cf1a30Sjl PR_MEM("%s: no memlist found for %s\n", 161425cf1a30Sjl f, mp->sbm_cm.sbdev_path); 161525cf1a30Sjl continue; 161625cf1a30Sjl } 161725cf1a30Sjl 161825cf1a30Sjl /* 161925cf1a30Sjl * Check whether the detaching memory requires a 162025cf1a30Sjl * copy-rename. 162125cf1a30Sjl */ 162225cf1a30Sjl ASSERT(mp->sbm_npages != 0); 1623*68ac2337Sjl 162425cf1a30Sjl rv = dr_del_mlist_query(ml, &mq); 162525cf1a30Sjl if (rv != KPHYSM_OK) { 162625cf1a30Sjl memlist_delete(ml); 162725cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 162825cf1a30Sjl err_flag = 1; 162925cf1a30Sjl break; 163025cf1a30Sjl } 163125cf1a30Sjl 163225cf1a30Sjl if (mq.nonrelocatable != 0) { 163325cf1a30Sjl if (!(dr_cmd_flags(hp) & 163425cf1a30Sjl (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) { 163525cf1a30Sjl memlist_delete(ml); 163625cf1a30Sjl /* caller wasn't prompted for a suspend */ 163725cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, 163825cf1a30Sjl ESBD_QUIESCE_REQD); 163925cf1a30Sjl err_flag = 1; 164025cf1a30Sjl break; 164125cf1a30Sjl } 164225cf1a30Sjl } 164325cf1a30Sjl 164425cf1a30Sjl /* allocate a kphysm handle */ 164525cf1a30Sjl rv = kphysm_del_gethandle(&mp->sbm_memhandle); 164625cf1a30Sjl if (rv != KPHYSM_OK) { 164725cf1a30Sjl memlist_delete(ml); 164825cf1a30Sjl 164925cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 165025cf1a30Sjl err_flag = 1; 165125cf1a30Sjl break; 165225cf1a30Sjl } 165325cf1a30Sjl mp->sbm_flags |= DR_MFLAG_RELOWNER; 165425cf1a30Sjl 165525cf1a30Sjl if ((mq.nonrelocatable != 0) || 165625cf1a30Sjl dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) { 165725cf1a30Sjl /* 165825cf1a30Sjl * Either the detaching memory node contains 165925cf1a30Sjl * non-reloc memory or we failed to reserve the 166025cf1a30Sjl * detaching memory node (which did _not_ have 166125cf1a30Sjl * any non-reloc memory, i.e. some non-reloc mem 166225cf1a30Sjl * got onboard). 166325cf1a30Sjl */ 166425cf1a30Sjl 166525cf1a30Sjl if (dr_select_mem_target(hp, mp, ml)) { 166625cf1a30Sjl int rv; 166725cf1a30Sjl 166825cf1a30Sjl /* 166925cf1a30Sjl * We had no luck locating a target 167025cf1a30Sjl * memory node to be the recipient of 167125cf1a30Sjl * the non-reloc memory on the node 167225cf1a30Sjl * we're trying to detach. 167325cf1a30Sjl * Clean up be disposing the mem handle 167425cf1a30Sjl * and the mem list. 167525cf1a30Sjl */ 167625cf1a30Sjl rv = kphysm_del_release(mp->sbm_memhandle); 167725cf1a30Sjl if (rv != KPHYSM_OK) { 167825cf1a30Sjl /* 167925cf1a30Sjl * can do nothing but complain 168025cf1a30Sjl * and hope helpful for debug 168125cf1a30Sjl */ 168225cf1a30Sjl cmn_err(CE_WARN, "%s: unexpected" 168325cf1a30Sjl " kphysm_del_release return" 168425cf1a30Sjl " value %d", 168525cf1a30Sjl f, rv); 168625cf1a30Sjl } 168725cf1a30Sjl mp->sbm_flags &= ~DR_MFLAG_RELOWNER; 168825cf1a30Sjl 168925cf1a30Sjl memlist_delete(ml); 169025cf1a30Sjl 169125cf1a30Sjl /* make sure sbm_flags is clean */ 169225cf1a30Sjl ASSERT(mp->sbm_flags == 0); 169325cf1a30Sjl 169425cf1a30Sjl dr_dev_err(CE_WARN, 169525cf1a30Sjl &mp->sbm_cm, ESBD_NO_TARGET); 169625cf1a30Sjl 169725cf1a30Sjl err_flag = 1; 169825cf1a30Sjl break; 169925cf1a30Sjl } 170025cf1a30Sjl 170125cf1a30Sjl /* 170225cf1a30Sjl * ml is not memlist_delete'd here because 170325cf1a30Sjl * it has been assigned to mp->sbm_mlist 170425cf1a30Sjl * by dr_select_mem_target. 170525cf1a30Sjl */ 170625cf1a30Sjl } else { 170725cf1a30Sjl /* no target needed to detach this board */ 170825cf1a30Sjl mp->sbm_flags |= DR_MFLAG_RESERVED; 170925cf1a30Sjl mp->sbm_peer = NULL; 171025cf1a30Sjl mp->sbm_del_mlist = ml; 171125cf1a30Sjl mp->sbm_mlist = ml; 171225cf1a30Sjl mp->sbm_cm.sbdev_busy = 1; 171325cf1a30Sjl } 171425cf1a30Sjl #ifdef DEBUG 171525cf1a30Sjl ASSERT(mp->sbm_mlist != NULL); 171625cf1a30Sjl 171725cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_SOURCE) { 171825cf1a30Sjl PR_MEM("%s: release of %s requires copy/rename;" 171925cf1a30Sjl " selected target board %s\n", 172025cf1a30Sjl f, 172125cf1a30Sjl mp->sbm_cm.sbdev_path, 172225cf1a30Sjl mp->sbm_peer->sbm_cm.sbdev_path); 172325cf1a30Sjl } else { 172425cf1a30Sjl PR_MEM("%s: copy/rename not required to release %s\n", 172525cf1a30Sjl f, mp->sbm_cm.sbdev_path); 172625cf1a30Sjl } 172725cf1a30Sjl 172825cf1a30Sjl ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER); 172925cf1a30Sjl ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED); 173025cf1a30Sjl #endif 173125cf1a30Sjl } 173225cf1a30Sjl 173325cf1a30Sjl return (err_flag ? -1 : 0); 173425cf1a30Sjl } 173525cf1a30Sjl 173625cf1a30Sjl void 173725cf1a30Sjl dr_release_mem_done(dr_common_unit_t *cp) 173825cf1a30Sjl { 173925cf1a30Sjl dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; 174025cf1a30Sjl dr_mem_unit_t *t_mp, *mp; 174125cf1a30Sjl int rv; 174225cf1a30Sjl static fn_t f = "dr_release_mem_done"; 174325cf1a30Sjl 174425cf1a30Sjl /* 174525cf1a30Sjl * This unit will be flagged with DR_MFLAG_SOURCE, if it 174625cf1a30Sjl * has a target unit. 174725cf1a30Sjl */ 174825cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 174925cf1a30Sjl t_mp = s_mp->sbm_peer; 175025cf1a30Sjl ASSERT(t_mp != NULL); 175125cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 175225cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 175325cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED); 175425cf1a30Sjl } else { 175525cf1a30Sjl /* this is no target unit */ 175625cf1a30Sjl t_mp = NULL; 175725cf1a30Sjl } 175825cf1a30Sjl 175925cf1a30Sjl /* free delete handle */ 176025cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER); 176125cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED); 176225cf1a30Sjl rv = kphysm_del_release(s_mp->sbm_memhandle); 176325cf1a30Sjl if (rv != KPHYSM_OK) { 176425cf1a30Sjl /* 176525cf1a30Sjl * can do nothing but complain 176625cf1a30Sjl * and hope helpful for debug 176725cf1a30Sjl */ 176825cf1a30Sjl cmn_err(CE_WARN, "%s: unexpected kphysm_del_release" 176925cf1a30Sjl " return value %d", f, rv); 177025cf1a30Sjl } 177125cf1a30Sjl s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER; 177225cf1a30Sjl 177325cf1a30Sjl /* 177425cf1a30Sjl * If an error was encountered during release, clean up 177525cf1a30Sjl * the source (and target, if present) unit data. 177625cf1a30Sjl */ 177725cf1a30Sjl /* XXX Can we know that sbdev_error was encountered during release? */ 177825cf1a30Sjl if (s_mp->sbm_cm.sbdev_error != NULL) { 177925cf1a30Sjl PR_MEM("%s: %s: error %d noted\n", 178025cf1a30Sjl f, 178125cf1a30Sjl s_mp->sbm_cm.sbdev_path, 178225cf1a30Sjl s_mp->sbm_cm.sbdev_error->e_code); 178325cf1a30Sjl 178425cf1a30Sjl if (t_mp != NULL) { 178525cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 178625cf1a30Sjl t_mp->sbm_del_mlist = NULL; 178725cf1a30Sjl 178825cf1a30Sjl if (t_mp->sbm_mlist != NULL) { 178925cf1a30Sjl memlist_delete(t_mp->sbm_mlist); 179025cf1a30Sjl t_mp->sbm_mlist = NULL; 179125cf1a30Sjl } 179225cf1a30Sjl 179325cf1a30Sjl t_mp->sbm_peer = NULL; 179425cf1a30Sjl t_mp->sbm_flags = 0; 179525cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 179625cf1a30Sjl } 179725cf1a30Sjl 179825cf1a30Sjl if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) 179925cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 180025cf1a30Sjl s_mp->sbm_del_mlist = NULL; 180125cf1a30Sjl 180225cf1a30Sjl if (s_mp->sbm_mlist != NULL) { 180325cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 180425cf1a30Sjl s_mp->sbm_mlist = NULL; 180525cf1a30Sjl } 180625cf1a30Sjl 180725cf1a30Sjl s_mp->sbm_peer = NULL; 180825cf1a30Sjl s_mp->sbm_flags = 0; 180925cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 181025cf1a30Sjl 181125cf1a30Sjl /* bail out */ 181225cf1a30Sjl return; 181325cf1a30Sjl } 181425cf1a30Sjl 181525cf1a30Sjl DR_DEV_SET_RELEASED(&s_mp->sbm_cm); 181625cf1a30Sjl dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE); 181725cf1a30Sjl 181825cf1a30Sjl if (t_mp != NULL) { 181925cf1a30Sjl /* 182025cf1a30Sjl * the kphysm delete operation that drained the source 182125cf1a30Sjl * board also drained this target board. Since the source 182225cf1a30Sjl * board drain is now known to have succeeded, we know this 182325cf1a30Sjl * target board is drained too. 182425cf1a30Sjl * 182525cf1a30Sjl * because DR_DEV_SET_RELEASED and dr_device_transition 182625cf1a30Sjl * is done here, the dr_release_dev_done should not 182725cf1a30Sjl * fail. 182825cf1a30Sjl */ 182925cf1a30Sjl DR_DEV_SET_RELEASED(&t_mp->sbm_cm); 183025cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE); 183125cf1a30Sjl 183225cf1a30Sjl /* 183325cf1a30Sjl * NOTE: do not transition target's board state, 183425cf1a30Sjl * even if the mem-unit was the last configure 183525cf1a30Sjl * unit of the board. When copy/rename completes 183625cf1a30Sjl * this mem-unit will transitioned back to 183725cf1a30Sjl * the configured state. In the meantime, the 183825cf1a30Sjl * board's must remain as is. 183925cf1a30Sjl */ 184025cf1a30Sjl } 184125cf1a30Sjl 184225cf1a30Sjl /* if board(s) had deleted memory, verify it is gone */ 184325cf1a30Sjl rv = 0; 184425cf1a30Sjl memlist_read_lock(); 184525cf1a30Sjl if (s_mp->sbm_del_mlist != NULL) { 184625cf1a30Sjl mp = s_mp; 184725cf1a30Sjl rv = memlist_intersect(phys_install, mp->sbm_del_mlist); 184825cf1a30Sjl } 184925cf1a30Sjl if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) { 185025cf1a30Sjl mp = t_mp; 185125cf1a30Sjl rv = memlist_intersect(phys_install, mp->sbm_del_mlist); 185225cf1a30Sjl } 185325cf1a30Sjl memlist_read_unlock(); 185425cf1a30Sjl if (rv) { 185525cf1a30Sjl cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): " 185625cf1a30Sjl "deleted memory still found in phys_install", 185725cf1a30Sjl f, 185825cf1a30Sjl (mp == t_mp ? "target " : ""), 185925cf1a30Sjl mp->sbm_cm.sbdev_bp->b_num, 186025cf1a30Sjl mp->sbm_cm.sbdev_unum); 186125cf1a30Sjl 186225cf1a30Sjl DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm); 186325cf1a30Sjl return; 186425cf1a30Sjl } 186525cf1a30Sjl 186625cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_RELDONE; 186725cf1a30Sjl if (t_mp != NULL) 186825cf1a30Sjl t_mp->sbm_flags |= DR_MFLAG_RELDONE; 186925cf1a30Sjl 187025cf1a30Sjl /* this should not fail */ 187125cf1a30Sjl if (dr_release_dev_done(&s_mp->sbm_cm) != 0) { 187225cf1a30Sjl /* catch this in debug kernels */ 187325cf1a30Sjl ASSERT(0); 187425cf1a30Sjl return; 187525cf1a30Sjl } 187625cf1a30Sjl 187725cf1a30Sjl PR_MEM("%s: marking %s release DONE\n", 187825cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 187925cf1a30Sjl 188025cf1a30Sjl s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; 188125cf1a30Sjl 188225cf1a30Sjl if (t_mp != NULL) { 188325cf1a30Sjl /* should not fail */ 188425cf1a30Sjl rv = dr_release_dev_done(&t_mp->sbm_cm); 188525cf1a30Sjl if (rv != 0) { 188625cf1a30Sjl /* catch this in debug kernels */ 188725cf1a30Sjl ASSERT(0); 188825cf1a30Sjl return; 188925cf1a30Sjl } 189025cf1a30Sjl 189125cf1a30Sjl PR_MEM("%s: marking %s release DONE\n", 189225cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 189325cf1a30Sjl 189425cf1a30Sjl t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; 189525cf1a30Sjl } 189625cf1a30Sjl } 189725cf1a30Sjl 189825cf1a30Sjl /*ARGSUSED*/ 189925cf1a30Sjl int 190025cf1a30Sjl dr_disconnect_mem(dr_mem_unit_t *mp) 190125cf1a30Sjl { 190225cf1a30Sjl static fn_t f = "dr_disconnect_mem"; 190325cf1a30Sjl update_membounds_t umb; 190425cf1a30Sjl 190525cf1a30Sjl #ifdef DEBUG 190625cf1a30Sjl int state = mp->sbm_cm.sbdev_state; 190725cf1a30Sjl ASSERT(state == DR_STATE_CONNECTED || 190825cf1a30Sjl state == DR_STATE_UNCONFIGURED); 190925cf1a30Sjl #endif 191025cf1a30Sjl 191125cf1a30Sjl PR_MEM("%s...\n", f); 191225cf1a30Sjl 191325cf1a30Sjl if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist) 191425cf1a30Sjl memlist_delete(mp->sbm_del_mlist); 191525cf1a30Sjl mp->sbm_del_mlist = NULL; 191625cf1a30Sjl 191725cf1a30Sjl if (mp->sbm_mlist) { 191825cf1a30Sjl memlist_delete(mp->sbm_mlist); 191925cf1a30Sjl mp->sbm_mlist = NULL; 192025cf1a30Sjl } 192125cf1a30Sjl 192225cf1a30Sjl /* 192325cf1a30Sjl * Remove memory from lgroup 192425cf1a30Sjl * For now, only board info is required. 192525cf1a30Sjl */ 192625cf1a30Sjl umb.u_board = mp->sbm_cm.sbdev_bp->b_num; 192725cf1a30Sjl umb.u_base = (uint64_t)-1; 192825cf1a30Sjl umb.u_len = (uint64_t)-1; 192925cf1a30Sjl 193025cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb); 193125cf1a30Sjl 193225cf1a30Sjl return (0); 193325cf1a30Sjl } 193425cf1a30Sjl 193525cf1a30Sjl int 193625cf1a30Sjl dr_cancel_mem(dr_mem_unit_t *s_mp) 193725cf1a30Sjl { 193825cf1a30Sjl dr_mem_unit_t *t_mp; 193925cf1a30Sjl dr_state_t state; 194025cf1a30Sjl static fn_t f = "dr_cancel_mem"; 194125cf1a30Sjl 194225cf1a30Sjl state = s_mp->sbm_cm.sbdev_state; 194325cf1a30Sjl 194425cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_TARGET) { 194525cf1a30Sjl /* must cancel source board, not target board */ 194625cf1a30Sjl /* TODO: set error */ 194725cf1a30Sjl return (-1); 194825cf1a30Sjl } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 194925cf1a30Sjl t_mp = s_mp->sbm_peer; 195025cf1a30Sjl ASSERT(t_mp != NULL); 195125cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 195225cf1a30Sjl 195325cf1a30Sjl /* must always match the source board's state */ 195425cf1a30Sjl /* TODO: is this assertion correct? */ 195525cf1a30Sjl ASSERT(t_mp->sbm_cm.sbdev_state == state); 195625cf1a30Sjl } else { 195725cf1a30Sjl /* this is no target unit */ 195825cf1a30Sjl t_mp = NULL; 195925cf1a30Sjl } 196025cf1a30Sjl 196125cf1a30Sjl switch (state) { 196225cf1a30Sjl case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */ 196325cf1a30Sjl ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 196425cf1a30Sjl 196525cf1a30Sjl if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) { 196625cf1a30Sjl PR_MEM("%s: undoing target %s memory delete\n", 196725cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 196825cf1a30Sjl dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist); 196925cf1a30Sjl 197025cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 197125cf1a30Sjl } 197225cf1a30Sjl 197325cf1a30Sjl if (s_mp->sbm_del_mlist != NULL) { 197425cf1a30Sjl PR_MEM("%s: undoing %s memory delete\n", 197525cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 197625cf1a30Sjl 197725cf1a30Sjl dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist); 197825cf1a30Sjl } 197925cf1a30Sjl 198025cf1a30Sjl /*FALLTHROUGH*/ 198125cf1a30Sjl 198225cf1a30Sjl /* TODO: should no longer be possible to see the release state here */ 198325cf1a30Sjl case DR_STATE_RELEASE: /* state set by dr_release_mem_done */ 198425cf1a30Sjl 198525cf1a30Sjl ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 198625cf1a30Sjl 198725cf1a30Sjl if (t_mp != NULL) { 198825cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 198925cf1a30Sjl t_mp->sbm_del_mlist = NULL; 199025cf1a30Sjl 199125cf1a30Sjl if (t_mp->sbm_mlist != NULL) { 199225cf1a30Sjl memlist_delete(t_mp->sbm_mlist); 199325cf1a30Sjl t_mp->sbm_mlist = NULL; 199425cf1a30Sjl } 199525cf1a30Sjl 199625cf1a30Sjl t_mp->sbm_peer = NULL; 199725cf1a30Sjl t_mp->sbm_flags = 0; 199825cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 199925cf1a30Sjl dr_init_mem_unit_data(t_mp); 200025cf1a30Sjl 200125cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 200225cf1a30Sjl 200325cf1a30Sjl dr_device_transition( 200425cf1a30Sjl &t_mp->sbm_cm, DR_STATE_CONFIGURED); 200525cf1a30Sjl } 200625cf1a30Sjl 200725cf1a30Sjl if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) 200825cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 200925cf1a30Sjl s_mp->sbm_del_mlist = NULL; 201025cf1a30Sjl 201125cf1a30Sjl if (s_mp->sbm_mlist != NULL) { 201225cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 201325cf1a30Sjl s_mp->sbm_mlist = NULL; 201425cf1a30Sjl } 201525cf1a30Sjl 201625cf1a30Sjl s_mp->sbm_peer = NULL; 201725cf1a30Sjl s_mp->sbm_flags = 0; 201825cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 201925cf1a30Sjl dr_init_mem_unit_data(s_mp); 202025cf1a30Sjl 202125cf1a30Sjl return (0); 202225cf1a30Sjl 202325cf1a30Sjl default: 202425cf1a30Sjl PR_MEM("%s: WARNING unexpected state (%d) for %s\n", 202525cf1a30Sjl f, (int)state, s_mp->sbm_cm.sbdev_path); 202625cf1a30Sjl 202725cf1a30Sjl return (-1); 202825cf1a30Sjl } 202925cf1a30Sjl /*NOTREACHED*/ 203025cf1a30Sjl } 203125cf1a30Sjl 203225cf1a30Sjl void 203325cf1a30Sjl dr_init_mem_unit(dr_mem_unit_t *mp) 203425cf1a30Sjl { 203525cf1a30Sjl dr_state_t new_state; 203625cf1a30Sjl 203725cf1a30Sjl 203825cf1a30Sjl if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) { 203925cf1a30Sjl new_state = DR_STATE_CONFIGURED; 204025cf1a30Sjl mp->sbm_cm.sbdev_cond = SBD_COND_OK; 204125cf1a30Sjl } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) { 204225cf1a30Sjl new_state = DR_STATE_CONNECTED; 204325cf1a30Sjl mp->sbm_cm.sbdev_cond = SBD_COND_OK; 204425cf1a30Sjl } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) { 204525cf1a30Sjl new_state = DR_STATE_OCCUPIED; 204625cf1a30Sjl } else { 204725cf1a30Sjl new_state = DR_STATE_EMPTY; 204825cf1a30Sjl } 204925cf1a30Sjl 205025cf1a30Sjl if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) 205125cf1a30Sjl dr_init_mem_unit_data(mp); 205225cf1a30Sjl 205325cf1a30Sjl /* delay transition until fully initialized */ 205425cf1a30Sjl dr_device_transition(&mp->sbm_cm, new_state); 205525cf1a30Sjl } 205625cf1a30Sjl 205725cf1a30Sjl static void 205825cf1a30Sjl dr_init_mem_unit_data(dr_mem_unit_t *mp) 205925cf1a30Sjl { 206025cf1a30Sjl drmachid_t id = mp->sbm_cm.sbdev_id; 206125cf1a30Sjl drmach_mem_info_t minfo; 206225cf1a30Sjl sbd_error_t *err; 206325cf1a30Sjl static fn_t f = "dr_init_mem_unit_data"; 206425cf1a30Sjl update_membounds_t umb; 206525cf1a30Sjl 206625cf1a30Sjl PR_MEM("%s...\n", f); 206725cf1a30Sjl 206825cf1a30Sjl /* a little sanity checking */ 206925cf1a30Sjl ASSERT(mp->sbm_peer == NULL); 207025cf1a30Sjl ASSERT(mp->sbm_flags == 0); 207125cf1a30Sjl 207225cf1a30Sjl if (err = drmach_mem_get_info(id, &minfo)) { 207325cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 207425cf1a30Sjl return; 207525cf1a30Sjl } 207625cf1a30Sjl mp->sbm_basepfn = _b64top(minfo.mi_basepa); 207725cf1a30Sjl mp->sbm_npages = _b64top(minfo.mi_size); 207825cf1a30Sjl mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask); 207925cf1a30Sjl mp->sbm_slice_size = minfo.mi_slice_size; 208025cf1a30Sjl 208125cf1a30Sjl /* 208225cf1a30Sjl * Add memory to lgroup 208325cf1a30Sjl */ 208425cf1a30Sjl umb.u_board = mp->sbm_cm.sbdev_bp->b_num; 208525cf1a30Sjl umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT; 208625cf1a30Sjl umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT; 208725cf1a30Sjl 208825cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb); 208925cf1a30Sjl 209025cf1a30Sjl PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n", 209125cf1a30Sjl f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages); 209225cf1a30Sjl } 209325cf1a30Sjl 209425cf1a30Sjl static int 209525cf1a30Sjl dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml) 209625cf1a30Sjl { 209725cf1a30Sjl int err; 209825cf1a30Sjl pfn_t base; 209925cf1a30Sjl pgcnt_t npgs; 210025cf1a30Sjl struct memlist *mc; 210125cf1a30Sjl static fn_t f = "dr_reserve_mem_spans"; 210225cf1a30Sjl 210325cf1a30Sjl PR_MEM("%s...\n", f); 210425cf1a30Sjl 210525cf1a30Sjl /* 210625cf1a30Sjl * Walk the supplied memlist scheduling each span for removal 210725cf1a30Sjl * with kphysm_del_span. It is possible that a span may intersect 210825cf1a30Sjl * an area occupied by the cage. 210925cf1a30Sjl */ 211025cf1a30Sjl for (mc = ml; mc != NULL; mc = mc->next) { 211125cf1a30Sjl base = _b64top(mc->address); 211225cf1a30Sjl npgs = _b64top(mc->size); 211325cf1a30Sjl 211425cf1a30Sjl err = kphysm_del_span(*mhp, base, npgs); 211525cf1a30Sjl if (err != KPHYSM_OK) { 211625cf1a30Sjl cmn_err(CE_WARN, "%s memory reserve failed." 211725cf1a30Sjl " unexpected kphysm_del_span return value %d;" 211825cf1a30Sjl " basepfn=0x%lx npages=%ld", 211925cf1a30Sjl f, err, base, npgs); 212025cf1a30Sjl 212125cf1a30Sjl return (-1); 212225cf1a30Sjl } 212325cf1a30Sjl } 212425cf1a30Sjl 212525cf1a30Sjl return (0); 212625cf1a30Sjl } 212725cf1a30Sjl 212825cf1a30Sjl #define DR_SMT_NPREF_SETS 6 212925cf1a30Sjl #define DR_SMT_NUNITS_PER_SET MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD 213025cf1a30Sjl 213125cf1a30Sjl /* debug counters */ 213225cf1a30Sjl int dr_smt_realigned; 213325cf1a30Sjl int dr_smt_preference[DR_SMT_NPREF_SETS]; 213425cf1a30Sjl 213525cf1a30Sjl #ifdef DEBUG 213625cf1a30Sjl uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */ 213725cf1a30Sjl #endif 213825cf1a30Sjl 213925cf1a30Sjl /* 214025cf1a30Sjl * Find and reserve a copy/rename target board suitable for the 214125cf1a30Sjl * given source board. 214225cf1a30Sjl * All boards in the system are examined and categorized in relation to 214325cf1a30Sjl * their memory size versus the source board's memory size. Order of 214425cf1a30Sjl * preference is: 214525cf1a30Sjl * 1st copy all source, source/target same size 214625cf1a30Sjl * 2nd copy all source, larger target 214725cf1a30Sjl * 3rd copy nonrelocatable source span 214825cf1a30Sjl */ 214925cf1a30Sjl static int 215025cf1a30Sjl dr_select_mem_target(dr_handle_t *hp, 215125cf1a30Sjl dr_mem_unit_t *s_mp, struct memlist *s_ml) 215225cf1a30Sjl { 215325cf1a30Sjl dr_target_pref_t preference; /* lower value is higher preference */ 215425cf1a30Sjl int idx; 215525cf1a30Sjl dr_mem_unit_t **sets; 215625cf1a30Sjl 215725cf1a30Sjl int t_bd; 215825cf1a30Sjl int t_unit; 215925cf1a30Sjl int rv; 216025cf1a30Sjl dr_board_t *s_bp, *t_bp; 216125cf1a30Sjl dr_mem_unit_t *t_mp, *c_mp; 216225cf1a30Sjl struct memlist *d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL; 216325cf1a30Sjl memquery_t s_mq = {0}; 216425cf1a30Sjl static fn_t f = "dr_select_mem_target"; 216525cf1a30Sjl 216625cf1a30Sjl PR_MEM("%s...\n", f); 216725cf1a30Sjl 216825cf1a30Sjl ASSERT(s_ml != NULL); 216925cf1a30Sjl 217025cf1a30Sjl sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET * 217125cf1a30Sjl DR_SMT_NPREF_SETS); 217225cf1a30Sjl 217325cf1a30Sjl s_bp = hp->h_bd; 217425cf1a30Sjl /* calculate the offset into the slice of the last source board pfn */ 217525cf1a30Sjl ASSERT(s_mp->sbm_npages != 0); 217625cf1a30Sjl 217725cf1a30Sjl /* 217825cf1a30Sjl * Find non-relocatable span on source board. 217925cf1a30Sjl */ 218025cf1a30Sjl rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq); 218125cf1a30Sjl if (rv != KPHYSM_OK) { 218225cf1a30Sjl PR_MEM("%s: %s: unexpected kphysm_del_span_query" 218325cf1a30Sjl " return value %d; basepfn 0x%lx, npages %ld\n", 218425cf1a30Sjl f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn, 218525cf1a30Sjl s_mp->sbm_npages); 218625cf1a30Sjl return (-1); 218725cf1a30Sjl } 218825cf1a30Sjl 218925cf1a30Sjl ASSERT(s_mq.phys_pages != 0); 219025cf1a30Sjl ASSERT(s_mq.nonrelocatable != 0); 219125cf1a30Sjl 219225cf1a30Sjl PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f, 219325cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable, 219425cf1a30Sjl s_mq.last_nonrelocatable); 219525cf1a30Sjl 219625cf1a30Sjl /* break down s_ml if it contains dynamic segments */ 219725cf1a30Sjl b_ml = memlist_dup(s_ml); 219825cf1a30Sjl 219925cf1a30Sjl for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) { 220025cf1a30Sjl b_ml = memlist_del_span(b_ml, ml->address, ml->size); 220125cf1a30Sjl b_ml = memlist_cat_span(b_ml, ml->address, ml->size); 220225cf1a30Sjl } 220325cf1a30Sjl 220425cf1a30Sjl 220525cf1a30Sjl /* 220625cf1a30Sjl * Make one pass through all memory units on all boards 220725cf1a30Sjl * and categorize them with respect to the source board. 220825cf1a30Sjl */ 220925cf1a30Sjl for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) { 221025cf1a30Sjl /* 221125cf1a30Sjl * The board structs are a contiguous array 221225cf1a30Sjl * so we take advantage of that to find the 221325cf1a30Sjl * correct board struct pointer for a given 221425cf1a30Sjl * board number. 221525cf1a30Sjl */ 221625cf1a30Sjl t_bp = dr_lookup_board(t_bd); 221725cf1a30Sjl 221825cf1a30Sjl /* source board can not be its own target */ 221925cf1a30Sjl if (s_bp->b_num == t_bp->b_num) 222025cf1a30Sjl continue; 222125cf1a30Sjl 222225cf1a30Sjl for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) { 222325cf1a30Sjl 222425cf1a30Sjl t_mp = dr_get_mem_unit(t_bp, t_unit); 222525cf1a30Sjl 222625cf1a30Sjl /* this memory node must be attached */ 222725cf1a30Sjl if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm)) 222825cf1a30Sjl continue; 222925cf1a30Sjl 223025cf1a30Sjl /* source unit can not be its own target */ 223125cf1a30Sjl if (s_mp == t_mp) { 223225cf1a30Sjl /* catch this is debug kernels */ 223325cf1a30Sjl ASSERT(0); 223425cf1a30Sjl continue; 223525cf1a30Sjl } 223625cf1a30Sjl 223725cf1a30Sjl /* 223825cf1a30Sjl * this memory node must not already be reserved 223925cf1a30Sjl * by some other memory delete operation. 224025cf1a30Sjl */ 224125cf1a30Sjl if (t_mp->sbm_flags & DR_MFLAG_RESERVED) 224225cf1a30Sjl continue; 224325cf1a30Sjl 224425cf1a30Sjl /* get target board memlist */ 224525cf1a30Sjl t_ml = dr_get_memlist(t_mp); 224625cf1a30Sjl if (t_ml == NULL) { 224725cf1a30Sjl cmn_err(CE_WARN, "%s: no memlist for" 224825cf1a30Sjl " mem-unit %d, board %d", f, 224925cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num, 225025cf1a30Sjl t_mp->sbm_cm.sbdev_unum); 225125cf1a30Sjl continue; 225225cf1a30Sjl } 225325cf1a30Sjl 225425cf1a30Sjl preference = dr_get_target_preference(hp, t_mp, s_mp, 225525cf1a30Sjl t_ml, s_ml, b_ml); 225625cf1a30Sjl 225725cf1a30Sjl if (preference == DR_TP_INVALID) 225825cf1a30Sjl continue; 225925cf1a30Sjl 226025cf1a30Sjl dr_smt_preference[preference]++; 226125cf1a30Sjl 226225cf1a30Sjl /* calculate index to start of preference set */ 226325cf1a30Sjl idx = DR_SMT_NUNITS_PER_SET * preference; 226425cf1a30Sjl /* calculate offset to respective element */ 226525cf1a30Sjl idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit; 226625cf1a30Sjl 226725cf1a30Sjl ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS); 226825cf1a30Sjl sets[idx] = t_mp; 226925cf1a30Sjl } 227025cf1a30Sjl } 227125cf1a30Sjl 227225cf1a30Sjl if (b_ml != NULL) 227325cf1a30Sjl memlist_delete(b_ml); 227425cf1a30Sjl 227525cf1a30Sjl /* 227625cf1a30Sjl * NOTE: this would be a good place to sort each candidate 227725cf1a30Sjl * set in to some desired order, e.g. memory size in ascending 227825cf1a30Sjl * order. Without an additional sorting step here, the order 227925cf1a30Sjl * within a set is ascending board number order. 228025cf1a30Sjl */ 228125cf1a30Sjl 228225cf1a30Sjl c_mp = NULL; 228325cf1a30Sjl x_ml = NULL; 228425cf1a30Sjl t_ml = NULL; 228525cf1a30Sjl for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) { 228625cf1a30Sjl memquery_t mq; 228725cf1a30Sjl 228825cf1a30Sjl preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET); 228925cf1a30Sjl 229025cf1a30Sjl ASSERT(preference != DR_TP_INVALID); 229125cf1a30Sjl 229225cf1a30Sjl /* cleanup t_ml after previous pass */ 229325cf1a30Sjl if (t_ml != NULL) { 229425cf1a30Sjl memlist_delete(t_ml); 229525cf1a30Sjl t_ml = NULL; 229625cf1a30Sjl } 229725cf1a30Sjl 229825cf1a30Sjl /* get candidate target board mem unit */ 229925cf1a30Sjl t_mp = sets[idx]; 230025cf1a30Sjl if (t_mp == NULL) 230125cf1a30Sjl continue; 230225cf1a30Sjl 230325cf1a30Sjl /* get target board memlist */ 230425cf1a30Sjl t_ml = dr_get_memlist(t_mp); 230525cf1a30Sjl if (t_ml == NULL) { 230625cf1a30Sjl cmn_err(CE_WARN, "%s: no memlist for" 230725cf1a30Sjl " mem-unit %d, board %d", 230825cf1a30Sjl f, 230925cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num, 231025cf1a30Sjl t_mp->sbm_cm.sbdev_unum); 231125cf1a30Sjl 231225cf1a30Sjl continue; 231325cf1a30Sjl } 231425cf1a30Sjl 231525cf1a30Sjl PR_MEM("%s: checking for no-reloc in %s, " 231625cf1a30Sjl " basepfn=0x%lx, npages=%ld\n", 231725cf1a30Sjl f, 231825cf1a30Sjl t_mp->sbm_cm.sbdev_path, 231925cf1a30Sjl t_mp->sbm_basepfn, 232025cf1a30Sjl t_mp->sbm_npages); 232125cf1a30Sjl 232225cf1a30Sjl rv = dr_del_mlist_query(t_ml, &mq); 232325cf1a30Sjl if (rv != KPHYSM_OK) { 232425cf1a30Sjl PR_MEM("%s: kphysm_del_span_query:" 232525cf1a30Sjl " unexpected return value %d\n", f, rv); 232625cf1a30Sjl 232725cf1a30Sjl continue; 232825cf1a30Sjl } 232925cf1a30Sjl 233025cf1a30Sjl if (mq.nonrelocatable != 0) { 233125cf1a30Sjl PR_MEM("%s: candidate %s has" 233225cf1a30Sjl " nonrelocatable span [0x%lx..0x%lx]\n", 233325cf1a30Sjl f, 233425cf1a30Sjl t_mp->sbm_cm.sbdev_path, 233525cf1a30Sjl mq.first_nonrelocatable, 233625cf1a30Sjl mq.last_nonrelocatable); 233725cf1a30Sjl 233825cf1a30Sjl continue; 233925cf1a30Sjl } 234025cf1a30Sjl 234125cf1a30Sjl #ifdef DEBUG 234225cf1a30Sjl /* 234325cf1a30Sjl * This is a debug tool for excluding certain boards 234425cf1a30Sjl * from being selected as a target board candidate. 234525cf1a30Sjl * dr_ignore_board is only tested by this driver. 234625cf1a30Sjl * It must be set with adb, obp, /etc/system or your 234725cf1a30Sjl * favorite debugger. 234825cf1a30Sjl */ 234925cf1a30Sjl if (dr_ignore_board & 235025cf1a30Sjl (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) { 235125cf1a30Sjl PR_MEM("%s: dr_ignore_board flag set," 235225cf1a30Sjl " ignoring %s as candidate\n", 235325cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 235425cf1a30Sjl continue; 235525cf1a30Sjl } 235625cf1a30Sjl #endif 235725cf1a30Sjl 235825cf1a30Sjl /* 235925cf1a30Sjl * Reserve excess source board memory, if any. 236025cf1a30Sjl * 236125cf1a30Sjl * Only the nonrelocatable source span will be copied 236225cf1a30Sjl * so schedule the rest of the source mem to be deleted. 236325cf1a30Sjl */ 236425cf1a30Sjl switch (preference) { 236525cf1a30Sjl case DR_TP_NONRELOC: 236625cf1a30Sjl /* 236725cf1a30Sjl * Get source copy memlist and use it to construct 236825cf1a30Sjl * delete memlist. 236925cf1a30Sjl */ 237025cf1a30Sjl d_ml = memlist_dup(s_ml); 237125cf1a30Sjl x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp); 237225cf1a30Sjl 237325cf1a30Sjl /* XXX */ 237425cf1a30Sjl ASSERT(d_ml != NULL); 237525cf1a30Sjl ASSERT(x_ml != NULL); 237625cf1a30Sjl 237725cf1a30Sjl for (ml = x_ml; ml != NULL; ml = ml->next) { 237825cf1a30Sjl d_ml = memlist_del_span(d_ml, ml->address, 237925cf1a30Sjl ml->size); 238025cf1a30Sjl } 238125cf1a30Sjl 238225cf1a30Sjl PR_MEM("%s: %s: reserving src brd memlist:\n", f, 238325cf1a30Sjl s_mp->sbm_cm.sbdev_path); 238425cf1a30Sjl PR_MEMLIST_DUMP(d_ml); 238525cf1a30Sjl 238625cf1a30Sjl /* reserve excess spans */ 238725cf1a30Sjl if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, 238825cf1a30Sjl d_ml) != 0) { 238925cf1a30Sjl /* likely more non-reloc pages appeared */ 239025cf1a30Sjl /* TODO: restart from top? */ 239125cf1a30Sjl continue; 239225cf1a30Sjl } 239325cf1a30Sjl break; 239425cf1a30Sjl default: 239525cf1a30Sjl d_ml = NULL; 239625cf1a30Sjl break; 239725cf1a30Sjl } 239825cf1a30Sjl 239925cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_RESERVED; 240025cf1a30Sjl 240125cf1a30Sjl /* 240225cf1a30Sjl * reserve all memory on target board. 240325cf1a30Sjl * NOTE: source board's memhandle is used. 240425cf1a30Sjl * 240525cf1a30Sjl * If this succeeds (eq 0), then target selection is 240625cf1a30Sjl * complete and all unwanted memory spans, both source and 240725cf1a30Sjl * target, have been reserved. Loop is terminated. 240825cf1a30Sjl */ 240925cf1a30Sjl if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) { 241025cf1a30Sjl PR_MEM("%s: %s: target board memory reserved\n", 241125cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 241225cf1a30Sjl 241325cf1a30Sjl /* a candidate target board is now reserved */ 241425cf1a30Sjl t_mp->sbm_flags |= DR_MFLAG_RESERVED; 241525cf1a30Sjl c_mp = t_mp; 241625cf1a30Sjl 241725cf1a30Sjl /* *** EXITING LOOP *** */ 241825cf1a30Sjl break; 241925cf1a30Sjl } 242025cf1a30Sjl 242125cf1a30Sjl /* did not successfully reserve the target board. */ 242225cf1a30Sjl PR_MEM("%s: could not reserve target %s\n", 242325cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 242425cf1a30Sjl 242525cf1a30Sjl /* 242625cf1a30Sjl * NOTE: an undo of the dr_reserve_mem_span work 242725cf1a30Sjl * will happen automatically when the memhandle 242825cf1a30Sjl * (s_mp->sbm_memhandle) is kphysm_del_release'd. 242925cf1a30Sjl */ 243025cf1a30Sjl 243125cf1a30Sjl s_mp->sbm_flags &= ~DR_MFLAG_RESERVED; 243225cf1a30Sjl } 243325cf1a30Sjl 243425cf1a30Sjl /* clean up after memlist editing logic */ 243525cf1a30Sjl if (x_ml != NULL) 243625cf1a30Sjl memlist_delete(x_ml); 243725cf1a30Sjl 243825cf1a30Sjl FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET * 243925cf1a30Sjl DR_SMT_NPREF_SETS); 244025cf1a30Sjl 244125cf1a30Sjl /* 244225cf1a30Sjl * c_mp will be NULL when the entire sets[] array 244325cf1a30Sjl * has been searched without reserving a target board. 244425cf1a30Sjl */ 244525cf1a30Sjl if (c_mp == NULL) { 244625cf1a30Sjl PR_MEM("%s: %s: target selection failed.\n", 244725cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 244825cf1a30Sjl 244925cf1a30Sjl if (t_ml != NULL) 245025cf1a30Sjl memlist_delete(t_ml); 245125cf1a30Sjl 245225cf1a30Sjl return (-1); 245325cf1a30Sjl } 245425cf1a30Sjl 245525cf1a30Sjl PR_MEM("%s: found target %s for source %s\n", 245625cf1a30Sjl f, 245725cf1a30Sjl c_mp->sbm_cm.sbdev_path, 245825cf1a30Sjl s_mp->sbm_cm.sbdev_path); 245925cf1a30Sjl 246025cf1a30Sjl s_mp->sbm_peer = c_mp; 246125cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_SOURCE; 246225cf1a30Sjl s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */ 246325cf1a30Sjl s_mp->sbm_mlist = s_ml; 246425cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 1; 246525cf1a30Sjl 246625cf1a30Sjl c_mp->sbm_peer = s_mp; 246725cf1a30Sjl c_mp->sbm_flags |= DR_MFLAG_TARGET; 246825cf1a30Sjl c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */ 246925cf1a30Sjl c_mp->sbm_mlist = t_ml; 247025cf1a30Sjl c_mp->sbm_cm.sbdev_busy = 1; 247125cf1a30Sjl 247225cf1a30Sjl return (0); 247325cf1a30Sjl } 247425cf1a30Sjl 247525cf1a30Sjl /* 247625cf1a30Sjl * Returns target preference rank: 247725cf1a30Sjl * -1 not a valid copy-rename target board 247825cf1a30Sjl * 0 copy all source, source/target same size 247925cf1a30Sjl * 1 copy all source, larger target 248025cf1a30Sjl * 2 copy nonrelocatable source span 248125cf1a30Sjl */ 248225cf1a30Sjl static dr_target_pref_t 248325cf1a30Sjl dr_get_target_preference(dr_handle_t *hp, 248425cf1a30Sjl dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp, 248525cf1a30Sjl struct memlist *t_ml, struct memlist *s_ml, 248625cf1a30Sjl struct memlist *b_ml) 248725cf1a30Sjl { 248825cf1a30Sjl dr_target_pref_t preference; 248925cf1a30Sjl struct memlist *s_nonreloc_ml = NULL; 249025cf1a30Sjl drmachid_t t_id; 249125cf1a30Sjl static fn_t f = "dr_get_target_preference"; 249225cf1a30Sjl 249325cf1a30Sjl t_id = t_mp->sbm_cm.sbdev_bp->b_id; 249425cf1a30Sjl 249525cf1a30Sjl /* 249625cf1a30Sjl * Can the entire source board be copied? 249725cf1a30Sjl */ 249825cf1a30Sjl if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) { 249925cf1a30Sjl if (s_mp->sbm_npages == t_mp->sbm_npages) 250025cf1a30Sjl preference = DR_TP_SAME; /* same size */ 250125cf1a30Sjl else 250225cf1a30Sjl preference = DR_TP_LARGE; /* larger target */ 250325cf1a30Sjl } else { 250425cf1a30Sjl /* 250525cf1a30Sjl * Entire source won't fit so try non-relocatable memory only 250625cf1a30Sjl * (target aligned). 250725cf1a30Sjl */ 250825cf1a30Sjl s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp); 250925cf1a30Sjl if (s_nonreloc_ml == NULL) { 251025cf1a30Sjl PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f); 251125cf1a30Sjl preference = DR_TP_INVALID; 251225cf1a30Sjl } 251325cf1a30Sjl if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp)) 251425cf1a30Sjl preference = DR_TP_NONRELOC; 251525cf1a30Sjl else 251625cf1a30Sjl preference = DR_TP_INVALID; 251725cf1a30Sjl } 251825cf1a30Sjl 251925cf1a30Sjl if (s_nonreloc_ml != NULL) 252025cf1a30Sjl memlist_delete(s_nonreloc_ml); 252125cf1a30Sjl 252225cf1a30Sjl /* 252325cf1a30Sjl * Force floating board preference lower than all other boards 252425cf1a30Sjl * if the force flag is present; otherwise disallow the board. 252525cf1a30Sjl */ 252625cf1a30Sjl if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) { 252725cf1a30Sjl if (dr_cmd_flags(hp) & SBD_FLAG_FORCE) 252825cf1a30Sjl preference += DR_TP_FLOATING; 252925cf1a30Sjl else 253025cf1a30Sjl preference = DR_TP_INVALID; 253125cf1a30Sjl } 253225cf1a30Sjl 253325cf1a30Sjl PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path, 253425cf1a30Sjl preference); 253525cf1a30Sjl 253625cf1a30Sjl return (preference); 253725cf1a30Sjl } 253825cf1a30Sjl 253925cf1a30Sjl /* 254025cf1a30Sjl * Create a memlist representing the source memory that will be copied to 254125cf1a30Sjl * the target board. The memory to be copied is the maximum amount that 254225cf1a30Sjl * will fit on the target board. 254325cf1a30Sjl */ 254425cf1a30Sjl static struct memlist * 254525cf1a30Sjl dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist, 254625cf1a30Sjl dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 254725cf1a30Sjl { 254825cf1a30Sjl struct memlist *t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml; 254925cf1a30Sjl uint64_t s_slice_mask, s_slice_base; 255025cf1a30Sjl uint64_t t_slice_mask, t_slice_base; 255125cf1a30Sjl static fn_t f = "dr_get_copy_mlist"; 255225cf1a30Sjl 255325cf1a30Sjl ASSERT(s_mlist != NULL); 255425cf1a30Sjl ASSERT(t_mlist != NULL); 255525cf1a30Sjl ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size); 255625cf1a30Sjl 255725cf1a30Sjl s_slice_mask = s_mp->sbm_slice_size - 1; 255825cf1a30Sjl s_slice_base = s_mlist->address & ~s_slice_mask; 255925cf1a30Sjl 256025cf1a30Sjl t_slice_mask = t_mp->sbm_slice_size - 1; 256125cf1a30Sjl t_slice_base = t_mlist->address & ~t_slice_mask; 256225cf1a30Sjl 256325cf1a30Sjl t_ml = memlist_dup(t_mlist); 256425cf1a30Sjl s_del_ml = memlist_dup(s_mlist); 256525cf1a30Sjl s_copy_ml = memlist_dup(s_mlist); 256625cf1a30Sjl 256725cf1a30Sjl /* XXX */ 256825cf1a30Sjl ASSERT(t_ml != NULL); 256925cf1a30Sjl ASSERT(s_del_ml != NULL); 257025cf1a30Sjl ASSERT(s_copy_ml != NULL); 257125cf1a30Sjl 257225cf1a30Sjl /* 257325cf1a30Sjl * To construct the source copy memlist: 257425cf1a30Sjl * 257525cf1a30Sjl * The target memlist is converted to the post-rename 257625cf1a30Sjl * source addresses. This is the physical address range 257725cf1a30Sjl * the target will have after the copy-rename. Overlaying 257825cf1a30Sjl * and deleting this from the current source memlist will 257925cf1a30Sjl * give the source delete memlist. The copy memlist is 258025cf1a30Sjl * the reciprocal of the source delete memlist. 258125cf1a30Sjl */ 258225cf1a30Sjl for (ml = t_ml; ml != NULL; ml = ml->next) { 258325cf1a30Sjl /* 258425cf1a30Sjl * Normalize relative to target slice base PA 258525cf1a30Sjl * in order to preseve slice offsets. 258625cf1a30Sjl */ 258725cf1a30Sjl ml->address -= t_slice_base; 258825cf1a30Sjl /* 258925cf1a30Sjl * Convert to source slice PA address. 259025cf1a30Sjl */ 259125cf1a30Sjl ml->address += s_slice_base; 259225cf1a30Sjl } 259325cf1a30Sjl 259425cf1a30Sjl for (ml = t_ml; ml != NULL; ml = ml->next) { 259525cf1a30Sjl s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size); 259625cf1a30Sjl } 259725cf1a30Sjl 259825cf1a30Sjl /* 259925cf1a30Sjl * Expand the delete mlist to fully include any dynamic segments 260025cf1a30Sjl * it intersects with. 260125cf1a30Sjl */ 260225cf1a30Sjl for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) { 260325cf1a30Sjl uint64_t del_base = ml->address; 260425cf1a30Sjl uint64_t del_end = ml->address + ml->size; 260525cf1a30Sjl struct memlist *dyn; 260625cf1a30Sjl 260725cf1a30Sjl for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) { 260825cf1a30Sjl uint64_t dyn_base = dyn->address; 260925cf1a30Sjl uint64_t dyn_end = dyn->address + dyn->size; 261025cf1a30Sjl 261125cf1a30Sjl if (del_base > dyn_base && del_base < dyn_end) 261225cf1a30Sjl del_base = dyn_base; 261325cf1a30Sjl 261425cf1a30Sjl if (del_end > dyn_base && del_end < dyn_end) 261525cf1a30Sjl del_end = dyn_end; 261625cf1a30Sjl } 261725cf1a30Sjl 261825cf1a30Sjl x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base); 261925cf1a30Sjl } 262025cf1a30Sjl 262125cf1a30Sjl memlist_delete(s_del_ml); 262225cf1a30Sjl s_del_ml = x_ml; 262325cf1a30Sjl 262425cf1a30Sjl for (ml = s_del_ml; ml != NULL; ml = ml->next) { 262525cf1a30Sjl s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size); 262625cf1a30Sjl } 262725cf1a30Sjl 262825cf1a30Sjl PR_MEM("%s: source delete mlist\n", f); 262925cf1a30Sjl PR_MEMLIST_DUMP(s_del_ml); 263025cf1a30Sjl 263125cf1a30Sjl PR_MEM("%s: source copy mlist\n", f); 263225cf1a30Sjl PR_MEMLIST_DUMP(s_copy_ml); 263325cf1a30Sjl 263425cf1a30Sjl memlist_delete(t_ml); 263525cf1a30Sjl memlist_delete(s_del_ml); 263625cf1a30Sjl 263725cf1a30Sjl return (s_copy_ml); 263825cf1a30Sjl } 263925cf1a30Sjl 264025cf1a30Sjl /* 264125cf1a30Sjl * Scan the non-relocatable spans on the source memory 264225cf1a30Sjl * and construct a minimum mlist that includes all non-reloc 264325cf1a30Sjl * memory subject to target alignment, and dynamic segment 264425cf1a30Sjl * constraints where only whole dynamic segments may be deleted. 264525cf1a30Sjl */ 264625cf1a30Sjl static struct memlist * 264725cf1a30Sjl dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp) 264825cf1a30Sjl { 264925cf1a30Sjl struct memlist *x_ml = NULL; 265025cf1a30Sjl struct memlist *ml; 265125cf1a30Sjl static fn_t f = "dr_get_nonreloc_mlist"; 265225cf1a30Sjl 265325cf1a30Sjl PR_MEM("%s: checking for split of dyn seg list:\n", f); 265425cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs); 265525cf1a30Sjl 265625cf1a30Sjl for (ml = s_ml; ml; ml = ml->next) { 265725cf1a30Sjl int rv; 265825cf1a30Sjl uint64_t nr_base, nr_end; 265925cf1a30Sjl memquery_t mq; 266025cf1a30Sjl struct memlist *dyn; 266125cf1a30Sjl 266225cf1a30Sjl rv = kphysm_del_span_query( 266325cf1a30Sjl _b64top(ml->address), _b64top(ml->size), &mq); 266425cf1a30Sjl if (rv) { 266525cf1a30Sjl memlist_delete(x_ml); 266625cf1a30Sjl return (NULL); 266725cf1a30Sjl } 266825cf1a30Sjl 266925cf1a30Sjl if (mq.nonrelocatable == 0) 267025cf1a30Sjl continue; 267125cf1a30Sjl 267225cf1a30Sjl PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f, 267325cf1a30Sjl _ptob64(mq.first_nonrelocatable), 267425cf1a30Sjl _ptob64(mq.last_nonrelocatable), 267525cf1a30Sjl mq.first_nonrelocatable, 267625cf1a30Sjl mq.last_nonrelocatable); 267725cf1a30Sjl 267825cf1a30Sjl /* 267925cf1a30Sjl * Align the span at both ends to allow for possible 268025cf1a30Sjl * cage expansion. 268125cf1a30Sjl */ 268225cf1a30Sjl nr_base = _ptob64(mq.first_nonrelocatable); 268325cf1a30Sjl nr_end = _ptob64(mq.last_nonrelocatable + 1); 268425cf1a30Sjl 268525cf1a30Sjl PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n", 268625cf1a30Sjl f, nr_base, nr_end); 268725cf1a30Sjl 268825cf1a30Sjl /* 268925cf1a30Sjl * Expand the non-reloc span to fully include any 269025cf1a30Sjl * dynamic segments it intersects with. 269125cf1a30Sjl */ 269225cf1a30Sjl for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) { 269325cf1a30Sjl uint64_t dyn_base = dyn->address; 269425cf1a30Sjl uint64_t dyn_end = dyn->address + dyn->size; 269525cf1a30Sjl 269625cf1a30Sjl if (nr_base > dyn_base && nr_base < dyn_end) 269725cf1a30Sjl nr_base = dyn_base; 269825cf1a30Sjl 269925cf1a30Sjl if (nr_end > dyn_base && nr_end < dyn_end) 270025cf1a30Sjl nr_end = dyn_end; 270125cf1a30Sjl } 270225cf1a30Sjl 270325cf1a30Sjl x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base); 270425cf1a30Sjl } 270525cf1a30Sjl 270625cf1a30Sjl if (x_ml == NULL) { 270725cf1a30Sjl PR_MEM("%s: source didn't have any non-reloc pages!\n", f); 270825cf1a30Sjl return (NULL); 270925cf1a30Sjl } 271025cf1a30Sjl 271125cf1a30Sjl PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path); 271225cf1a30Sjl PR_MEMLIST_DUMP(x_ml); 271325cf1a30Sjl 271425cf1a30Sjl return (x_ml); 271525cf1a30Sjl } 271625cf1a30Sjl 271725cf1a30Sjl /* 271825cf1a30Sjl * Check if source memlist can fit in target memlist while maintaining 271925cf1a30Sjl * relative offsets within board. 272025cf1a30Sjl */ 272125cf1a30Sjl static int 272225cf1a30Sjl dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist, 272325cf1a30Sjl dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 272425cf1a30Sjl { 272525cf1a30Sjl int canfit = 0; 272625cf1a30Sjl struct memlist *s_ml, *t_ml, *ml; 272725cf1a30Sjl uint64_t s_slice_mask, t_slice_mask; 272825cf1a30Sjl static fn_t f = "dr_mlist_canfit"; 272925cf1a30Sjl 273025cf1a30Sjl s_ml = memlist_dup(s_mlist); 273125cf1a30Sjl t_ml = memlist_dup(t_mlist); 273225cf1a30Sjl 273325cf1a30Sjl if (s_ml == NULL || t_ml == NULL) { 273425cf1a30Sjl cmn_err(CE_WARN, "%s: memlist_dup failed\n", f); 273525cf1a30Sjl goto done; 273625cf1a30Sjl } 273725cf1a30Sjl 273825cf1a30Sjl s_slice_mask = s_mp->sbm_slice_size - 1; 273925cf1a30Sjl t_slice_mask = t_mp->sbm_slice_size - 1; 274025cf1a30Sjl 274125cf1a30Sjl /* 274225cf1a30Sjl * Normalize to slice relative offsets. 274325cf1a30Sjl */ 274425cf1a30Sjl for (ml = s_ml; ml; ml = ml->next) 274525cf1a30Sjl ml->address &= s_slice_mask; 274625cf1a30Sjl 274725cf1a30Sjl for (ml = t_ml; ml; ml = ml->next) 274825cf1a30Sjl ml->address &= t_slice_mask; 274925cf1a30Sjl 275025cf1a30Sjl canfit = memlist_canfit(s_ml, t_ml); 275125cf1a30Sjl done: 275225cf1a30Sjl memlist_delete(s_ml); 275325cf1a30Sjl memlist_delete(t_ml); 275425cf1a30Sjl 275525cf1a30Sjl return (canfit); 275625cf1a30Sjl } 275725cf1a30Sjl 275825cf1a30Sjl /* 275925cf1a30Sjl * Memlist support. 276025cf1a30Sjl */ 276125cf1a30Sjl 276225cf1a30Sjl /* 276325cf1a30Sjl * Determine whether the source memlist (s_mlist) will 276425cf1a30Sjl * fit into the target memlist (t_mlist) in terms of 276525cf1a30Sjl * size and holes. Assumes the caller has normalized the 276625cf1a30Sjl * memlist physical addresses for comparison. 276725cf1a30Sjl */ 276825cf1a30Sjl static int 276925cf1a30Sjl memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist) 277025cf1a30Sjl { 277125cf1a30Sjl int rv = 0; 277225cf1a30Sjl struct memlist *s_ml, *t_ml; 277325cf1a30Sjl 277425cf1a30Sjl if ((s_mlist == NULL) || (t_mlist == NULL)) 277525cf1a30Sjl return (0); 277625cf1a30Sjl 277725cf1a30Sjl s_ml = s_mlist; 277825cf1a30Sjl for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) { 277925cf1a30Sjl uint64_t s_start, s_end; 278025cf1a30Sjl uint64_t t_start, t_end; 278125cf1a30Sjl 278225cf1a30Sjl t_start = t_ml->address; 278325cf1a30Sjl t_end = t_start + t_ml->size; 278425cf1a30Sjl 278525cf1a30Sjl for (; s_ml; s_ml = s_ml->next) { 278625cf1a30Sjl s_start = s_ml->address; 278725cf1a30Sjl s_end = s_start + s_ml->size; 278825cf1a30Sjl 278925cf1a30Sjl if ((s_start < t_start) || (s_end > t_end)) 279025cf1a30Sjl break; 279125cf1a30Sjl } 279225cf1a30Sjl } 279325cf1a30Sjl 279425cf1a30Sjl /* 279525cf1a30Sjl * If we ran out of source memlist chunks that mean 279625cf1a30Sjl * we found a home for all of them. 279725cf1a30Sjl */ 279825cf1a30Sjl if (s_ml == NULL) 279925cf1a30Sjl rv = 1; 280025cf1a30Sjl 280125cf1a30Sjl return (rv); 280225cf1a30Sjl } 2803