1*25cf1a30Sjl /* 2*25cf1a30Sjl * CDDL HEADER START 3*25cf1a30Sjl * 4*25cf1a30Sjl * The contents of this file are subject to the terms of the 5*25cf1a30Sjl * Common Development and Distribution License (the "License"). 6*25cf1a30Sjl * You may not use this file except in compliance with the License. 7*25cf1a30Sjl * 8*25cf1a30Sjl * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*25cf1a30Sjl * or http://www.opensolaris.org/os/licensing. 10*25cf1a30Sjl * See the License for the specific language governing permissions 11*25cf1a30Sjl * and limitations under the License. 12*25cf1a30Sjl * 13*25cf1a30Sjl * When distributing Covered Code, include this CDDL HEADER in each 14*25cf1a30Sjl * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*25cf1a30Sjl * If applicable, add the following below this CDDL HEADER, with the 16*25cf1a30Sjl * fields enclosed by brackets "[]" replaced with your own identifying 17*25cf1a30Sjl * information: Portions Copyright [yyyy] [name of copyright owner] 18*25cf1a30Sjl * 19*25cf1a30Sjl * CDDL HEADER END 20*25cf1a30Sjl */ 21*25cf1a30Sjl /* 22*25cf1a30Sjl * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23*25cf1a30Sjl * Use is subject to license terms. 24*25cf1a30Sjl */ 25*25cf1a30Sjl 26*25cf1a30Sjl #pragma ident "%Z%%M% %I% %E% SMI" 27*25cf1a30Sjl 28*25cf1a30Sjl /* 29*25cf1a30Sjl * DR memory support routines. 30*25cf1a30Sjl */ 31*25cf1a30Sjl 32*25cf1a30Sjl #include <sys/note.h> 33*25cf1a30Sjl #include <sys/debug.h> 34*25cf1a30Sjl #include <sys/types.h> 35*25cf1a30Sjl #include <sys/errno.h> 36*25cf1a30Sjl #include <sys/param.h> 37*25cf1a30Sjl #include <sys/dditypes.h> 38*25cf1a30Sjl #include <sys/kmem.h> 39*25cf1a30Sjl #include <sys/conf.h> 40*25cf1a30Sjl #include <sys/ddi.h> 41*25cf1a30Sjl #include <sys/sunddi.h> 42*25cf1a30Sjl #include <sys/sunndi.h> 43*25cf1a30Sjl #include <sys/ddi_impldefs.h> 44*25cf1a30Sjl #include <sys/ndi_impldefs.h> 45*25cf1a30Sjl #include <sys/sysmacros.h> 46*25cf1a30Sjl #include <sys/machsystm.h> 47*25cf1a30Sjl #include <sys/spitregs.h> 48*25cf1a30Sjl #include <sys/cpuvar.h> 49*25cf1a30Sjl #include <sys/promif.h> 50*25cf1a30Sjl #include <vm/seg_kmem.h> 51*25cf1a30Sjl #include <sys/lgrp.h> 52*25cf1a30Sjl #include <sys/platform_module.h> 53*25cf1a30Sjl 54*25cf1a30Sjl #include <vm/page.h> 55*25cf1a30Sjl 56*25cf1a30Sjl #include <sys/dr.h> 57*25cf1a30Sjl #include <sys/dr_util.h> 58*25cf1a30Sjl #include <sys/drmach.h> 59*25cf1a30Sjl 60*25cf1a30Sjl extern struct memlist *phys_install; 61*25cf1a30Sjl extern vnode_t retired_pages; 62*25cf1a30Sjl 63*25cf1a30Sjl /* TODO: push this reference below drmach line */ 64*25cf1a30Sjl extern int kcage_on; 65*25cf1a30Sjl 66*25cf1a30Sjl /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */ 67*25cf1a30Sjl static char *dr_ie_fmt = "%M% %d"; 68*25cf1a30Sjl 69*25cf1a30Sjl typedef enum { 70*25cf1a30Sjl DR_TP_INVALID = -1, 71*25cf1a30Sjl DR_TP_SAME, 72*25cf1a30Sjl DR_TP_LARGE, 73*25cf1a30Sjl DR_TP_NONRELOC, 74*25cf1a30Sjl DR_TP_FLOATING 75*25cf1a30Sjl } dr_target_pref_t; 76*25cf1a30Sjl 77*25cf1a30Sjl static int dr_post_detach_mem_unit(dr_mem_unit_t *mp); 78*25cf1a30Sjl static int dr_reserve_mem_spans(memhandle_t *mhp, 79*25cf1a30Sjl struct memlist *mlist); 80*25cf1a30Sjl static int dr_select_mem_target(dr_handle_t *hp, 81*25cf1a30Sjl dr_mem_unit_t *mp, struct memlist *ml); 82*25cf1a30Sjl static void dr_init_mem_unit_data(dr_mem_unit_t *mp); 83*25cf1a30Sjl static struct memlist *dr_memlist_del_retired_pages(struct memlist *ml); 84*25cf1a30Sjl static dr_target_pref_t dr_get_target_preference(dr_handle_t *hp, 85*25cf1a30Sjl dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp, 86*25cf1a30Sjl struct memlist *s_ml, struct memlist *x_ml, 87*25cf1a30Sjl struct memlist *b_ml); 88*25cf1a30Sjl 89*25cf1a30Sjl static int memlist_canfit(struct memlist *s_mlist, 90*25cf1a30Sjl struct memlist *t_mlist); 91*25cf1a30Sjl static int dr_del_mlist_query(struct memlist *mlist, 92*25cf1a30Sjl memquery_t *mp); 93*25cf1a30Sjl static struct memlist *dr_get_copy_mlist(struct memlist *s_ml, 94*25cf1a30Sjl struct memlist *t_ml, dr_mem_unit_t *s_mp, 95*25cf1a30Sjl dr_mem_unit_t *t_mp); 96*25cf1a30Sjl static struct memlist *dr_get_nonreloc_mlist(struct memlist *s_ml, 97*25cf1a30Sjl dr_mem_unit_t *s_mp); 98*25cf1a30Sjl static int dr_memlist_canfit(struct memlist *s_mlist, 99*25cf1a30Sjl struct memlist *t_mlist, dr_mem_unit_t *s_mp, 100*25cf1a30Sjl dr_mem_unit_t *t_mp); 101*25cf1a30Sjl 102*25cf1a30Sjl extern void page_unretire_pages(void); 103*25cf1a30Sjl 104*25cf1a30Sjl /* 105*25cf1a30Sjl * dr_mem_unit_t.sbm_flags 106*25cf1a30Sjl */ 107*25cf1a30Sjl #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */ 108*25cf1a30Sjl #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */ 109*25cf1a30Sjl #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */ 110*25cf1a30Sjl #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */ 111*25cf1a30Sjl #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */ 112*25cf1a30Sjl 113*25cf1a30Sjl /* helper macros */ 114*25cf1a30Sjl #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT) 115*25cf1a30Sjl #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT)) 116*25cf1a30Sjl 117*25cf1a30Sjl static struct memlist * 118*25cf1a30Sjl dr_get_memlist(dr_mem_unit_t *mp) 119*25cf1a30Sjl { 120*25cf1a30Sjl struct memlist *mlist = NULL; 121*25cf1a30Sjl sbd_error_t *err; 122*25cf1a30Sjl static fn_t f = "dr_get_memlist"; 123*25cf1a30Sjl 124*25cf1a30Sjl PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path); 125*25cf1a30Sjl 126*25cf1a30Sjl /* 127*25cf1a30Sjl * Return cached memlist, if present. 128*25cf1a30Sjl * This memlist will be present following an 129*25cf1a30Sjl * unconfigure (a.k.a: detach) of this memunit. 130*25cf1a30Sjl * It should only be used in the case were a configure 131*25cf1a30Sjl * is bringing this memunit back in without going 132*25cf1a30Sjl * through the disconnect and connect states. 133*25cf1a30Sjl */ 134*25cf1a30Sjl if (mp->sbm_mlist) { 135*25cf1a30Sjl PR_MEM("%s: found cached memlist\n", f); 136*25cf1a30Sjl 137*25cf1a30Sjl mlist = memlist_dup(mp->sbm_mlist); 138*25cf1a30Sjl } else { 139*25cf1a30Sjl uint64_t basepa = _ptob64(mp->sbm_basepfn); 140*25cf1a30Sjl 141*25cf1a30Sjl /* attempt to construct a memlist using phys_install */ 142*25cf1a30Sjl 143*25cf1a30Sjl /* round down to slice base address */ 144*25cf1a30Sjl basepa &= ~(mp->sbm_slice_size - 1); 145*25cf1a30Sjl 146*25cf1a30Sjl /* get a copy of phys_install to edit */ 147*25cf1a30Sjl memlist_read_lock(); 148*25cf1a30Sjl mlist = memlist_dup(phys_install); 149*25cf1a30Sjl memlist_read_unlock(); 150*25cf1a30Sjl 151*25cf1a30Sjl /* trim lower irrelevant span */ 152*25cf1a30Sjl if (mlist) 153*25cf1a30Sjl mlist = memlist_del_span(mlist, 0ull, basepa); 154*25cf1a30Sjl 155*25cf1a30Sjl /* trim upper irrelevant span */ 156*25cf1a30Sjl if (mlist) { 157*25cf1a30Sjl uint64_t endpa; 158*25cf1a30Sjl 159*25cf1a30Sjl basepa += mp->sbm_slice_size; 160*25cf1a30Sjl endpa = _ptob64(physmax + 1); 161*25cf1a30Sjl if (endpa > basepa) 162*25cf1a30Sjl mlist = memlist_del_span( 163*25cf1a30Sjl mlist, basepa, 164*25cf1a30Sjl endpa - basepa); 165*25cf1a30Sjl } 166*25cf1a30Sjl 167*25cf1a30Sjl if (mlist) { 168*25cf1a30Sjl /* successfully built a memlist */ 169*25cf1a30Sjl PR_MEM("%s: derived memlist from phys_install\n", f); 170*25cf1a30Sjl } 171*25cf1a30Sjl 172*25cf1a30Sjl /* if no mlist yet, try platform layer */ 173*25cf1a30Sjl if (!mlist) { 174*25cf1a30Sjl err = drmach_mem_get_memlist( 175*25cf1a30Sjl mp->sbm_cm.sbdev_id, &mlist); 176*25cf1a30Sjl if (err) { 177*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 178*25cf1a30Sjl mlist = NULL; /* paranoia */ 179*25cf1a30Sjl } 180*25cf1a30Sjl } 181*25cf1a30Sjl } 182*25cf1a30Sjl 183*25cf1a30Sjl PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path); 184*25cf1a30Sjl PR_MEMLIST_DUMP(mlist); 185*25cf1a30Sjl 186*25cf1a30Sjl return (mlist); 187*25cf1a30Sjl } 188*25cf1a30Sjl 189*25cf1a30Sjl typedef struct { 190*25cf1a30Sjl kcondvar_t cond; 191*25cf1a30Sjl kmutex_t lock; 192*25cf1a30Sjl int error; 193*25cf1a30Sjl int done; 194*25cf1a30Sjl } dr_release_mem_sync_t; 195*25cf1a30Sjl 196*25cf1a30Sjl /* 197*25cf1a30Sjl * Memory has been logically removed by the time this routine is called. 198*25cf1a30Sjl */ 199*25cf1a30Sjl static void 200*25cf1a30Sjl dr_mem_del_done(void *arg, int error) 201*25cf1a30Sjl { 202*25cf1a30Sjl dr_release_mem_sync_t *ds = arg; 203*25cf1a30Sjl 204*25cf1a30Sjl mutex_enter(&ds->lock); 205*25cf1a30Sjl ds->error = error; 206*25cf1a30Sjl ds->done = 1; 207*25cf1a30Sjl cv_signal(&ds->cond); 208*25cf1a30Sjl mutex_exit(&ds->lock); 209*25cf1a30Sjl } 210*25cf1a30Sjl 211*25cf1a30Sjl /* 212*25cf1a30Sjl * When we reach here the memory being drained should have 213*25cf1a30Sjl * already been reserved in dr_pre_release_mem(). 214*25cf1a30Sjl * Our only task here is to kick off the "drain" and wait 215*25cf1a30Sjl * for it to finish. 216*25cf1a30Sjl */ 217*25cf1a30Sjl void 218*25cf1a30Sjl dr_release_mem(dr_common_unit_t *cp) 219*25cf1a30Sjl { 220*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; 221*25cf1a30Sjl int err; 222*25cf1a30Sjl dr_release_mem_sync_t rms; 223*25cf1a30Sjl static fn_t f = "dr_release_mem"; 224*25cf1a30Sjl 225*25cf1a30Sjl /* check that this memory unit has been reserved */ 226*25cf1a30Sjl if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) { 227*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 228*25cf1a30Sjl return; 229*25cf1a30Sjl } 230*25cf1a30Sjl 231*25cf1a30Sjl bzero((void *) &rms, sizeof (rms)); 232*25cf1a30Sjl 233*25cf1a30Sjl mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL); 234*25cf1a30Sjl cv_init(&rms.cond, NULL, CV_DRIVER, NULL); 235*25cf1a30Sjl 236*25cf1a30Sjl mutex_enter(&rms.lock); 237*25cf1a30Sjl err = kphysm_del_start(mp->sbm_memhandle, 238*25cf1a30Sjl dr_mem_del_done, (void *) &rms); 239*25cf1a30Sjl if (err == KPHYSM_OK) { 240*25cf1a30Sjl /* wait for completion or interrupt */ 241*25cf1a30Sjl while (!rms.done) { 242*25cf1a30Sjl if (cv_wait_sig(&rms.cond, &rms.lock) == 0) { 243*25cf1a30Sjl /* then there is a pending UNIX signal */ 244*25cf1a30Sjl (void) kphysm_del_cancel(mp->sbm_memhandle); 245*25cf1a30Sjl 246*25cf1a30Sjl /* wait for completion */ 247*25cf1a30Sjl while (!rms.done) 248*25cf1a30Sjl cv_wait(&rms.cond, &rms.lock); 249*25cf1a30Sjl } 250*25cf1a30Sjl } 251*25cf1a30Sjl /* get the result of the memory delete operation */ 252*25cf1a30Sjl err = rms.error; 253*25cf1a30Sjl } 254*25cf1a30Sjl mutex_exit(&rms.lock); 255*25cf1a30Sjl 256*25cf1a30Sjl cv_destroy(&rms.cond); 257*25cf1a30Sjl mutex_destroy(&rms.lock); 258*25cf1a30Sjl 259*25cf1a30Sjl if (err != KPHYSM_OK) { 260*25cf1a30Sjl int e_code; 261*25cf1a30Sjl 262*25cf1a30Sjl switch (err) { 263*25cf1a30Sjl case KPHYSM_ENOWORK: 264*25cf1a30Sjl e_code = ESBD_NOERROR; 265*25cf1a30Sjl break; 266*25cf1a30Sjl 267*25cf1a30Sjl case KPHYSM_EHANDLE: 268*25cf1a30Sjl case KPHYSM_ESEQUENCE: 269*25cf1a30Sjl e_code = ESBD_INTERNAL; 270*25cf1a30Sjl break; 271*25cf1a30Sjl 272*25cf1a30Sjl case KPHYSM_ENOTVIABLE: 273*25cf1a30Sjl e_code = ESBD_MEM_NOTVIABLE; 274*25cf1a30Sjl break; 275*25cf1a30Sjl 276*25cf1a30Sjl case KPHYSM_EREFUSED: 277*25cf1a30Sjl e_code = ESBD_MEM_REFUSED; 278*25cf1a30Sjl break; 279*25cf1a30Sjl 280*25cf1a30Sjl case KPHYSM_ENONRELOC: 281*25cf1a30Sjl e_code = ESBD_MEM_NONRELOC; 282*25cf1a30Sjl break; 283*25cf1a30Sjl 284*25cf1a30Sjl case KPHYSM_ECANCELLED: 285*25cf1a30Sjl e_code = ESBD_MEM_CANCELLED; 286*25cf1a30Sjl break; 287*25cf1a30Sjl 288*25cf1a30Sjl case KPHYSM_ERESOURCE: 289*25cf1a30Sjl e_code = ESBD_MEMFAIL; 290*25cf1a30Sjl break; 291*25cf1a30Sjl 292*25cf1a30Sjl default: 293*25cf1a30Sjl cmn_err(CE_WARN, 294*25cf1a30Sjl "%s: unexpected kphysm error code %d," 295*25cf1a30Sjl " id 0x%p", 296*25cf1a30Sjl f, err, mp->sbm_cm.sbdev_id); 297*25cf1a30Sjl 298*25cf1a30Sjl e_code = ESBD_IO; 299*25cf1a30Sjl break; 300*25cf1a30Sjl } 301*25cf1a30Sjl 302*25cf1a30Sjl if (e_code != ESBD_NOERROR) { 303*25cf1a30Sjl dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code); 304*25cf1a30Sjl } 305*25cf1a30Sjl } 306*25cf1a30Sjl } 307*25cf1a30Sjl 308*25cf1a30Sjl void 309*25cf1a30Sjl dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp) 310*25cf1a30Sjl { 311*25cf1a30Sjl _NOTE(ARGUNUSED(hp)) 312*25cf1a30Sjl 313*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)cp; 314*25cf1a30Sjl struct memlist *ml, *mc; 315*25cf1a30Sjl sbd_error_t *err; 316*25cf1a30Sjl static fn_t f = "dr_attach_mem"; 317*25cf1a30Sjl 318*25cf1a30Sjl PR_MEM("%s...\n", f); 319*25cf1a30Sjl 320*25cf1a30Sjl dr_lock_status(hp->h_bd); 321*25cf1a30Sjl err = drmach_configure(cp->sbdev_id, 0); 322*25cf1a30Sjl dr_unlock_status(hp->h_bd); 323*25cf1a30Sjl if (err) { 324*25cf1a30Sjl DRERR_SET_C(&cp->sbdev_error, &err); 325*25cf1a30Sjl return; 326*25cf1a30Sjl } 327*25cf1a30Sjl 328*25cf1a30Sjl ml = dr_get_memlist(mp); 329*25cf1a30Sjl for (mc = ml; mc; mc = mc->next) { 330*25cf1a30Sjl int rv; 331*25cf1a30Sjl sbd_error_t *err; 332*25cf1a30Sjl 333*25cf1a30Sjl rv = kphysm_add_memory_dynamic( 334*25cf1a30Sjl (pfn_t)(mc->address >> PAGESHIFT), 335*25cf1a30Sjl (pgcnt_t)(mc->size >> PAGESHIFT)); 336*25cf1a30Sjl if (rv != KPHYSM_OK) { 337*25cf1a30Sjl /* 338*25cf1a30Sjl * translate kphysm error and 339*25cf1a30Sjl * store in devlist error 340*25cf1a30Sjl */ 341*25cf1a30Sjl switch (rv) { 342*25cf1a30Sjl case KPHYSM_ERESOURCE: 343*25cf1a30Sjl rv = ESBD_NOMEM; 344*25cf1a30Sjl break; 345*25cf1a30Sjl 346*25cf1a30Sjl case KPHYSM_EFAULT: 347*25cf1a30Sjl rv = ESBD_FAULT; 348*25cf1a30Sjl break; 349*25cf1a30Sjl 350*25cf1a30Sjl default: 351*25cf1a30Sjl rv = ESBD_INTERNAL; 352*25cf1a30Sjl break; 353*25cf1a30Sjl } 354*25cf1a30Sjl 355*25cf1a30Sjl if (rv == ESBD_INTERNAL) { 356*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 357*25cf1a30Sjl } else 358*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, rv); 359*25cf1a30Sjl break; 360*25cf1a30Sjl } 361*25cf1a30Sjl 362*25cf1a30Sjl err = drmach_mem_add_span( 363*25cf1a30Sjl mp->sbm_cm.sbdev_id, mc->address, mc->size); 364*25cf1a30Sjl if (err) { 365*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 366*25cf1a30Sjl break; 367*25cf1a30Sjl } 368*25cf1a30Sjl } 369*25cf1a30Sjl 370*25cf1a30Sjl memlist_delete(ml); 371*25cf1a30Sjl 372*25cf1a30Sjl /* back out if configure failed */ 373*25cf1a30Sjl if (mp->sbm_cm.sbdev_error != NULL) { 374*25cf1a30Sjl dr_lock_status(hp->h_bd); 375*25cf1a30Sjl err = drmach_unconfigure(cp->sbdev_id, 0); 376*25cf1a30Sjl if (err) 377*25cf1a30Sjl sbd_err_clear(&err); 378*25cf1a30Sjl dr_unlock_status(hp->h_bd); 379*25cf1a30Sjl } 380*25cf1a30Sjl } 381*25cf1a30Sjl 382*25cf1a30Sjl static struct memlist * 383*25cf1a30Sjl dr_memlist_del_retired_pages(struct memlist *mlist) 384*25cf1a30Sjl { 385*25cf1a30Sjl page_t *pp; 386*25cf1a30Sjl pfn_t pfn; 387*25cf1a30Sjl kmutex_t *vphm; 388*25cf1a30Sjl vnode_t *vp = &retired_pages; 389*25cf1a30Sjl static fn_t f = "dr_memlist_del_retired_pages"; 390*25cf1a30Sjl 391*25cf1a30Sjl vphm = page_vnode_mutex(vp); 392*25cf1a30Sjl mutex_enter(vphm); 393*25cf1a30Sjl 394*25cf1a30Sjl PR_MEM("%s\n", f); 395*25cf1a30Sjl 396*25cf1a30Sjl if ((pp = vp->v_pages) == NULL) { 397*25cf1a30Sjl mutex_exit(vphm); 398*25cf1a30Sjl return (mlist); 399*25cf1a30Sjl } 400*25cf1a30Sjl 401*25cf1a30Sjl do { 402*25cf1a30Sjl ASSERT(pp != NULL); 403*25cf1a30Sjl /* 404*25cf1a30Sjl * page_downgrade happens after page_hashin, so we 405*25cf1a30Sjl * can't assert PAGE_SE. Just assert locked to catch 406*25cf1a30Sjl * changes to the retired vnode locking scheme. 407*25cf1a30Sjl */ 408*25cf1a30Sjl ASSERT(PAGE_LOCKED(pp)); 409*25cf1a30Sjl ASSERT(pp->p_vnode == &retired_pages); 410*25cf1a30Sjl 411*25cf1a30Sjl if (!page_trylock(pp, SE_SHARED)) 412*25cf1a30Sjl continue; 413*25cf1a30Sjl 414*25cf1a30Sjl pfn = page_pptonum(pp); 415*25cf1a30Sjl 416*25cf1a30Sjl ASSERT((pp->p_offset >> PAGESHIFT) == pfn); 417*25cf1a30Sjl /* 418*25cf1a30Sjl * Page retirement currently breaks large pages into PAGESIZE 419*25cf1a30Sjl * pages. If this changes, need to remove the assert and deal 420*25cf1a30Sjl * with different page sizes. 421*25cf1a30Sjl */ 422*25cf1a30Sjl ASSERT(pp->p_szc == 0); 423*25cf1a30Sjl 424*25cf1a30Sjl if (address_in_memlist(mlist, ptob(pfn), PAGESIZE)) { 425*25cf1a30Sjl mlist = memlist_del_span(mlist, ptob(pfn), PAGESIZE); 426*25cf1a30Sjl PR_MEM("deleted retired page 0x%lx (pfn 0x%lx) " 427*25cf1a30Sjl "from memlist\n", ptob(pfn), pfn); 428*25cf1a30Sjl } 429*25cf1a30Sjl 430*25cf1a30Sjl page_unlock(pp); 431*25cf1a30Sjl } while ((pp = pp->p_vpnext) != vp->v_pages); 432*25cf1a30Sjl 433*25cf1a30Sjl mutex_exit(vphm); 434*25cf1a30Sjl 435*25cf1a30Sjl return (mlist); 436*25cf1a30Sjl } 437*25cf1a30Sjl 438*25cf1a30Sjl #ifdef DEBUG 439*25cf1a30Sjl int dbg_retirecnt = 10; 440*25cf1a30Sjl 441*25cf1a30Sjl static void 442*25cf1a30Sjl dbg_page_retire(struct memlist *r_ml) 443*25cf1a30Sjl { 444*25cf1a30Sjl struct memlist *t_ml; 445*25cf1a30Sjl page_t *pp, *epp; 446*25cf1a30Sjl pfn_t pfn, epfn; 447*25cf1a30Sjl struct memseg *seg; 448*25cf1a30Sjl 449*25cf1a30Sjl int dbg_retired = 0; 450*25cf1a30Sjl int dbg_skip = 10; 451*25cf1a30Sjl int dbg_seq = 1; 452*25cf1a30Sjl 453*25cf1a30Sjl if (r_ml == NULL) 454*25cf1a30Sjl return; 455*25cf1a30Sjl 456*25cf1a30Sjl for (t_ml = r_ml; (t_ml != NULL); t_ml = t_ml->next) { 457*25cf1a30Sjl pfn = _b64top(t_ml->address); 458*25cf1a30Sjl epfn = _b64top(t_ml->address + t_ml->size); 459*25cf1a30Sjl 460*25cf1a30Sjl for (seg = memsegs; seg != NULL; seg = seg->next) { 461*25cf1a30Sjl int retire = 0; 462*25cf1a30Sjl int skip = 0; 463*25cf1a30Sjl if (pfn >= seg->pages_end || epfn < seg->pages_base) 464*25cf1a30Sjl continue; 465*25cf1a30Sjl 466*25cf1a30Sjl pp = seg->pages; 467*25cf1a30Sjl if (pfn > seg->pages_base) 468*25cf1a30Sjl pp += pfn - seg->pages_base; 469*25cf1a30Sjl 470*25cf1a30Sjl epp = seg->epages; 471*25cf1a30Sjl if (epfn < seg->pages_end) 472*25cf1a30Sjl epp -= seg->pages_end - epfn; 473*25cf1a30Sjl 474*25cf1a30Sjl ASSERT(pp < epp); 475*25cf1a30Sjl #if 0 476*25cf1a30Sjl while (pp < epp) { 477*25cf1a30Sjl if (PP_ISFREE(pp) && !page_isfaulty(pp)) { 478*25cf1a30Sjl if (retire++ < dbg_seq) { 479*25cf1a30Sjl page_settoxic(pp, 480*25cf1a30Sjl PAGE_IS_FAULTY); 481*25cf1a30Sjl page_retire(pp, 482*25cf1a30Sjl PAGE_IS_FAILING); 483*25cf1a30Sjl if (++dbg_retired >= 484*25cf1a30Sjl dbg_retirecnt) 485*25cf1a30Sjl return; 486*25cf1a30Sjl } else if (skip++ >= dbg_skip) { 487*25cf1a30Sjl skip = 0; 488*25cf1a30Sjl retire = 0; 489*25cf1a30Sjl dbg_seq++; 490*25cf1a30Sjl } 491*25cf1a30Sjl } 492*25cf1a30Sjl pp++; 493*25cf1a30Sjl } 494*25cf1a30Sjl #endif /* 0 */ 495*25cf1a30Sjl while (pp < epp) { 496*25cf1a30Sjl if (PP_ISFREE(pp)) { 497*25cf1a30Sjl if (retire++ < dbg_seq) { 498*25cf1a30Sjl page_retire(t_ml->address, 499*25cf1a30Sjl PR_OK); 500*25cf1a30Sjl if (++dbg_retired >= 501*25cf1a30Sjl dbg_retirecnt) 502*25cf1a30Sjl return; 503*25cf1a30Sjl } else if (skip++ >= dbg_skip) { 504*25cf1a30Sjl skip = 0; 505*25cf1a30Sjl retire = 0; 506*25cf1a30Sjl dbg_seq++; 507*25cf1a30Sjl } 508*25cf1a30Sjl } 509*25cf1a30Sjl pp++; 510*25cf1a30Sjl } 511*25cf1a30Sjl } 512*25cf1a30Sjl } 513*25cf1a30Sjl } 514*25cf1a30Sjl #endif 515*25cf1a30Sjl 516*25cf1a30Sjl static int 517*25cf1a30Sjl dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 518*25cf1a30Sjl { 519*25cf1a30Sjl int rv = -1; 520*25cf1a30Sjl time_t copytime; 521*25cf1a30Sjl drmachid_t cr_id; 522*25cf1a30Sjl dr_sr_handle_t *srhp = NULL; 523*25cf1a30Sjl dr_board_t *t_bp, *s_bp; 524*25cf1a30Sjl struct memlist *c_ml, *d_ml; 525*25cf1a30Sjl sbd_error_t *err; 526*25cf1a30Sjl static fn_t f = "dr_move_memory"; 527*25cf1a30Sjl 528*25cf1a30Sjl PR_MEM("%s: (INLINE) moving memory from %s to %s\n", 529*25cf1a30Sjl f, 530*25cf1a30Sjl s_mp->sbm_cm.sbdev_path, 531*25cf1a30Sjl t_mp->sbm_cm.sbdev_path); 532*25cf1a30Sjl 533*25cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE); 534*25cf1a30Sjl ASSERT(s_mp->sbm_peer == t_mp); 535*25cf1a30Sjl ASSERT(s_mp->sbm_mlist); 536*25cf1a30Sjl 537*25cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 538*25cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 539*25cf1a30Sjl 540*25cf1a30Sjl #ifdef DEBUG 541*25cf1a30Sjl if (dbg_retirecnt) 542*25cf1a30Sjl dbg_page_retire(s_mp->sbm_mlist); 543*25cf1a30Sjl #endif 544*25cf1a30Sjl 545*25cf1a30Sjl /* 546*25cf1a30Sjl * create a memlist of spans to copy by removing 547*25cf1a30Sjl * the spans that have been deleted, if any, from 548*25cf1a30Sjl * the full source board memlist. s_mp->sbm_del_mlist 549*25cf1a30Sjl * will be NULL if there were no spans deleted from 550*25cf1a30Sjl * the source board. 551*25cf1a30Sjl */ 552*25cf1a30Sjl c_ml = memlist_dup(s_mp->sbm_mlist); 553*25cf1a30Sjl d_ml = s_mp->sbm_del_mlist; 554*25cf1a30Sjl while (d_ml != NULL) { 555*25cf1a30Sjl c_ml = memlist_del_span(c_ml, d_ml->address, d_ml->size); 556*25cf1a30Sjl d_ml = d_ml->next; 557*25cf1a30Sjl } 558*25cf1a30Sjl 559*25cf1a30Sjl /* 560*25cf1a30Sjl * Remove retired pages from the copy list. The page content 561*25cf1a30Sjl * need not be copied since the pages are no longer in use. 562*25cf1a30Sjl */ 563*25cf1a30Sjl PR_MEM("%s: copy list before removing retired pages (if any):\n", f); 564*25cf1a30Sjl PR_MEMLIST_DUMP(c_ml); 565*25cf1a30Sjl 566*25cf1a30Sjl c_ml = dr_memlist_del_retired_pages(c_ml); 567*25cf1a30Sjl 568*25cf1a30Sjl PR_MEM("%s: copy list after removing retired pages:\n", f); 569*25cf1a30Sjl PR_MEMLIST_DUMP(c_ml); 570*25cf1a30Sjl 571*25cf1a30Sjl /* 572*25cf1a30Sjl * With parallel copy, it shouldn't make a difference which 573*25cf1a30Sjl * CPU is the actual master during copy-rename since all 574*25cf1a30Sjl * CPUs participate in the parallel copy anyway. 575*25cf1a30Sjl */ 576*25cf1a30Sjl affinity_set(CPU_CURRENT); 577*25cf1a30Sjl 578*25cf1a30Sjl err = drmach_copy_rename_init( 579*25cf1a30Sjl t_mp->sbm_cm.sbdev_id, s_mp->sbm_cm.sbdev_id, c_ml, &cr_id); 580*25cf1a30Sjl if (err) { 581*25cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 582*25cf1a30Sjl affinity_clear(); 583*25cf1a30Sjl memlist_delete(c_ml); 584*25cf1a30Sjl return (-1); 585*25cf1a30Sjl } 586*25cf1a30Sjl 587*25cf1a30Sjl srhp = dr_get_sr_handle(hp); 588*25cf1a30Sjl ASSERT(srhp); 589*25cf1a30Sjl 590*25cf1a30Sjl copytime = lbolt; 591*25cf1a30Sjl 592*25cf1a30Sjl /* Quiesce the OS. */ 593*25cf1a30Sjl if (dr_suspend(srhp)) { 594*25cf1a30Sjl cmn_err(CE_WARN, "%s: failed to quiesce OS" 595*25cf1a30Sjl " for copy-rename", f); 596*25cf1a30Sjl 597*25cf1a30Sjl err = drmach_copy_rename_fini(cr_id); 598*25cf1a30Sjl if (err) { 599*25cf1a30Sjl /* 600*25cf1a30Sjl * no error is expected since the program has 601*25cf1a30Sjl * not yet run. 602*25cf1a30Sjl */ 603*25cf1a30Sjl 604*25cf1a30Sjl /* catch this in debug kernels */ 605*25cf1a30Sjl ASSERT(0); 606*25cf1a30Sjl 607*25cf1a30Sjl sbd_err_clear(&err); 608*25cf1a30Sjl } 609*25cf1a30Sjl 610*25cf1a30Sjl /* suspend error reached via hp */ 611*25cf1a30Sjl s_mp->sbm_cm.sbdev_error = hp->h_err; 612*25cf1a30Sjl hp->h_err = NULL; 613*25cf1a30Sjl goto done; 614*25cf1a30Sjl } 615*25cf1a30Sjl 616*25cf1a30Sjl drmach_copy_rename(cr_id); 617*25cf1a30Sjl 618*25cf1a30Sjl /* Resume the OS. */ 619*25cf1a30Sjl dr_resume(srhp); 620*25cf1a30Sjl 621*25cf1a30Sjl copytime = lbolt - copytime; 622*25cf1a30Sjl 623*25cf1a30Sjl if (err = drmach_copy_rename_fini(cr_id)) 624*25cf1a30Sjl goto done; 625*25cf1a30Sjl 626*25cf1a30Sjl /* 627*25cf1a30Sjl * Rename memory for lgroup. 628*25cf1a30Sjl * Source and target board numbers are packaged in arg. 629*25cf1a30Sjl */ 630*25cf1a30Sjl s_bp = s_mp->sbm_cm.sbdev_bp; 631*25cf1a30Sjl t_bp = t_mp->sbm_cm.sbdev_bp; 632*25cf1a30Sjl 633*25cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_RENAME, 634*25cf1a30Sjl (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16))); 635*25cf1a30Sjl 636*25cf1a30Sjl 637*25cf1a30Sjl PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n", 638*25cf1a30Sjl f, copytime, copytime / hz); 639*25cf1a30Sjl 640*25cf1a30Sjl rv = 0; 641*25cf1a30Sjl done: 642*25cf1a30Sjl if (srhp) 643*25cf1a30Sjl dr_release_sr_handle(srhp); 644*25cf1a30Sjl if (err) 645*25cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 646*25cf1a30Sjl affinity_clear(); 647*25cf1a30Sjl 648*25cf1a30Sjl return (rv); 649*25cf1a30Sjl } 650*25cf1a30Sjl 651*25cf1a30Sjl /* 652*25cf1a30Sjl * If detaching node contains memory that is "non-permanent" 653*25cf1a30Sjl * then the memory adr's are simply cleared. If the memory 654*25cf1a30Sjl * is non-relocatable, then do a copy-rename. 655*25cf1a30Sjl */ 656*25cf1a30Sjl void 657*25cf1a30Sjl dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp) 658*25cf1a30Sjl { 659*25cf1a30Sjl int rv = 0; 660*25cf1a30Sjl dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; 661*25cf1a30Sjl dr_mem_unit_t *t_mp; 662*25cf1a30Sjl dr_state_t state; 663*25cf1a30Sjl static fn_t f = "dr_detach_mem"; 664*25cf1a30Sjl 665*25cf1a30Sjl PR_MEM("%s...\n", f); 666*25cf1a30Sjl 667*25cf1a30Sjl /* lookup target mem unit and target board structure, if any */ 668*25cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 669*25cf1a30Sjl t_mp = s_mp->sbm_peer; 670*25cf1a30Sjl ASSERT(t_mp != NULL); 671*25cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 672*25cf1a30Sjl } else { 673*25cf1a30Sjl t_mp = NULL; 674*25cf1a30Sjl } 675*25cf1a30Sjl 676*25cf1a30Sjl /* verify mem unit's state is UNREFERENCED */ 677*25cf1a30Sjl state = s_mp->sbm_cm.sbdev_state; 678*25cf1a30Sjl if (state != DR_STATE_UNREFERENCED) { 679*25cf1a30Sjl dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE); 680*25cf1a30Sjl return; 681*25cf1a30Sjl } 682*25cf1a30Sjl 683*25cf1a30Sjl /* verify target mem unit's state is UNREFERENCED, if any */ 684*25cf1a30Sjl if (t_mp != NULL) { 685*25cf1a30Sjl state = t_mp->sbm_cm.sbdev_state; 686*25cf1a30Sjl if (state != DR_STATE_UNREFERENCED) { 687*25cf1a30Sjl dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE); 688*25cf1a30Sjl return; 689*25cf1a30Sjl } 690*25cf1a30Sjl } 691*25cf1a30Sjl 692*25cf1a30Sjl /* 693*25cf1a30Sjl * If there is no target board (no copy/rename was needed), then 694*25cf1a30Sjl * we're done! 695*25cf1a30Sjl */ 696*25cf1a30Sjl if (t_mp == NULL) { 697*25cf1a30Sjl sbd_error_t *err; 698*25cf1a30Sjl /* 699*25cf1a30Sjl * Reprogram interconnect hardware and disable 700*25cf1a30Sjl * memory controllers for memory node that's going away. 701*25cf1a30Sjl */ 702*25cf1a30Sjl 703*25cf1a30Sjl err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id); 704*25cf1a30Sjl if (err) { 705*25cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 706*25cf1a30Sjl rv = -1; 707*25cf1a30Sjl } 708*25cf1a30Sjl } else { 709*25cf1a30Sjl rv = dr_move_memory(hp, s_mp, t_mp); 710*25cf1a30Sjl PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n", 711*25cf1a30Sjl f, 712*25cf1a30Sjl rv ? "FAILED" : "COMPLETED", 713*25cf1a30Sjl s_mp->sbm_cm.sbdev_bp->b_num, 714*25cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num); 715*25cf1a30Sjl 716*25cf1a30Sjl if (rv != 0) 717*25cf1a30Sjl (void) dr_cancel_mem(s_mp); 718*25cf1a30Sjl } 719*25cf1a30Sjl 720*25cf1a30Sjl if (rv == 0) { 721*25cf1a30Sjl sbd_error_t *err; 722*25cf1a30Sjl 723*25cf1a30Sjl dr_lock_status(hp->h_bd); 724*25cf1a30Sjl err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id, 0); 725*25cf1a30Sjl dr_unlock_status(hp->h_bd); 726*25cf1a30Sjl if (err) 727*25cf1a30Sjl sbd_err_clear(&err); 728*25cf1a30Sjl } 729*25cf1a30Sjl } 730*25cf1a30Sjl 731*25cf1a30Sjl /* 732*25cf1a30Sjl * This routine acts as a wrapper for kphysm_del_span_query in order to 733*25cf1a30Sjl * support potential memory holes in a board's physical address space. 734*25cf1a30Sjl * It calls kphysm_del_span_query for each node in a memlist and accumulates 735*25cf1a30Sjl * the results in *mp. 736*25cf1a30Sjl */ 737*25cf1a30Sjl static int 738*25cf1a30Sjl dr_del_mlist_query(struct memlist *mlist, memquery_t *mp) 739*25cf1a30Sjl { 740*25cf1a30Sjl struct memlist *ml; 741*25cf1a30Sjl int rv = 0; 742*25cf1a30Sjl 743*25cf1a30Sjl 744*25cf1a30Sjl if (mlist == NULL) 745*25cf1a30Sjl cmn_err(CE_WARN, "dr_del_mlist_query: mlist=NULL\n"); 746*25cf1a30Sjl 747*25cf1a30Sjl mp->phys_pages = 0; 748*25cf1a30Sjl mp->managed = 0; 749*25cf1a30Sjl mp->nonrelocatable = 0; 750*25cf1a30Sjl mp->first_nonrelocatable = (pfn_t)-1; /* XXX */ 751*25cf1a30Sjl mp->last_nonrelocatable = 0; 752*25cf1a30Sjl 753*25cf1a30Sjl for (ml = mlist; ml; ml = ml->next) { 754*25cf1a30Sjl memquery_t mq; 755*25cf1a30Sjl 756*25cf1a30Sjl rv = kphysm_del_span_query( 757*25cf1a30Sjl _b64top(ml->address), _b64top(ml->size), &mq); 758*25cf1a30Sjl if (rv) 759*25cf1a30Sjl break; 760*25cf1a30Sjl 761*25cf1a30Sjl mp->phys_pages += mq.phys_pages; 762*25cf1a30Sjl mp->managed += mq.managed; 763*25cf1a30Sjl mp->nonrelocatable += mq.nonrelocatable; 764*25cf1a30Sjl 765*25cf1a30Sjl if (mq.nonrelocatable != 0) { 766*25cf1a30Sjl if (mq.first_nonrelocatable < mp->first_nonrelocatable) 767*25cf1a30Sjl mp->first_nonrelocatable = 768*25cf1a30Sjl mq.first_nonrelocatable; 769*25cf1a30Sjl if (mq.last_nonrelocatable > mp->last_nonrelocatable) 770*25cf1a30Sjl mp->last_nonrelocatable = 771*25cf1a30Sjl mq.last_nonrelocatable; 772*25cf1a30Sjl } 773*25cf1a30Sjl } 774*25cf1a30Sjl 775*25cf1a30Sjl if (mp->nonrelocatable == 0) 776*25cf1a30Sjl mp->first_nonrelocatable = 0; /* XXX */ 777*25cf1a30Sjl 778*25cf1a30Sjl return (rv); 779*25cf1a30Sjl } 780*25cf1a30Sjl 781*25cf1a30Sjl /* 782*25cf1a30Sjl * NOTE: This routine is only partially smart about multiple 783*25cf1a30Sjl * mem-units. Need to make mem-status structure smart 784*25cf1a30Sjl * about them also. 785*25cf1a30Sjl */ 786*25cf1a30Sjl int 787*25cf1a30Sjl dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp) 788*25cf1a30Sjl { 789*25cf1a30Sjl int m, mix; 790*25cf1a30Sjl memdelstat_t mdst; 791*25cf1a30Sjl memquery_t mq; 792*25cf1a30Sjl dr_board_t *bp; 793*25cf1a30Sjl dr_mem_unit_t *mp; 794*25cf1a30Sjl sbd_mem_stat_t *msp; 795*25cf1a30Sjl static fn_t f = "dr_mem_status"; 796*25cf1a30Sjl 797*25cf1a30Sjl bp = hp->h_bd; 798*25cf1a30Sjl devset &= DR_DEVS_PRESENT(bp); 799*25cf1a30Sjl 800*25cf1a30Sjl for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) { 801*25cf1a30Sjl int rv; 802*25cf1a30Sjl sbd_error_t *err; 803*25cf1a30Sjl drmach_status_t pstat; 804*25cf1a30Sjl dr_mem_unit_t *p_mp; 805*25cf1a30Sjl 806*25cf1a30Sjl if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0) 807*25cf1a30Sjl continue; 808*25cf1a30Sjl 809*25cf1a30Sjl mp = dr_get_mem_unit(bp, m); 810*25cf1a30Sjl 811*25cf1a30Sjl if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) { 812*25cf1a30Sjl /* present, but not fully initialized */ 813*25cf1a30Sjl continue; 814*25cf1a30Sjl } 815*25cf1a30Sjl 816*25cf1a30Sjl if (mp->sbm_cm.sbdev_id == (drmachid_t)0) 817*25cf1a30Sjl continue; 818*25cf1a30Sjl 819*25cf1a30Sjl /* fetch platform status */ 820*25cf1a30Sjl err = drmach_status(mp->sbm_cm.sbdev_id, &pstat); 821*25cf1a30Sjl if (err) { 822*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 823*25cf1a30Sjl continue; 824*25cf1a30Sjl } 825*25cf1a30Sjl 826*25cf1a30Sjl msp = &dsp->d_mem; 827*25cf1a30Sjl bzero((caddr_t)msp, sizeof (*msp)); 828*25cf1a30Sjl 829*25cf1a30Sjl strncpy(msp->ms_cm.c_id.c_name, pstat.type, 830*25cf1a30Sjl sizeof (msp->ms_cm.c_id.c_name)); 831*25cf1a30Sjl msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type; 832*25cf1a30Sjl msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT; 833*25cf1a30Sjl msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond; 834*25cf1a30Sjl msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy; 835*25cf1a30Sjl msp->ms_cm.c_time = mp->sbm_cm.sbdev_time; 836*25cf1a30Sjl msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate; 837*25cf1a30Sjl 838*25cf1a30Sjl msp->ms_totpages = mp->sbm_npages; 839*25cf1a30Sjl msp->ms_basepfn = mp->sbm_basepfn; 840*25cf1a30Sjl msp->ms_pageslost = mp->sbm_pageslost; 841*25cf1a30Sjl msp->ms_cage_enabled = kcage_on; 842*25cf1a30Sjl 843*25cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RESERVED) 844*25cf1a30Sjl p_mp = mp->sbm_peer; 845*25cf1a30Sjl else 846*25cf1a30Sjl p_mp = NULL; 847*25cf1a30Sjl 848*25cf1a30Sjl if (p_mp == NULL) { 849*25cf1a30Sjl msp->ms_peer_is_target = 0; 850*25cf1a30Sjl msp->ms_peer_ap_id[0] = '\0'; 851*25cf1a30Sjl } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) { 852*25cf1a30Sjl char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 853*25cf1a30Sjl char *minor; 854*25cf1a30Sjl 855*25cf1a30Sjl /* 856*25cf1a30Sjl * b_dip doesn't have to be held for ddi_pathname() 857*25cf1a30Sjl * because the board struct (dr_board_t) will be 858*25cf1a30Sjl * destroyed before b_dip detaches. 859*25cf1a30Sjl */ 860*25cf1a30Sjl (void) ddi_pathname(bp->b_dip, path); 861*25cf1a30Sjl minor = strchr(p_mp->sbm_cm.sbdev_path, ':'); 862*25cf1a30Sjl 863*25cf1a30Sjl snprintf(msp->ms_peer_ap_id, 864*25cf1a30Sjl sizeof (msp->ms_peer_ap_id), "%s%s", 865*25cf1a30Sjl path, (minor == NULL) ? "" : minor); 866*25cf1a30Sjl 867*25cf1a30Sjl kmem_free(path, MAXPATHLEN); 868*25cf1a30Sjl 869*25cf1a30Sjl if (p_mp->sbm_flags & DR_MFLAG_TARGET) 870*25cf1a30Sjl msp->ms_peer_is_target = 1; 871*25cf1a30Sjl } 872*25cf1a30Sjl 873*25cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RELOWNER) 874*25cf1a30Sjl rv = kphysm_del_status(mp->sbm_memhandle, &mdst); 875*25cf1a30Sjl else 876*25cf1a30Sjl rv = KPHYSM_EHANDLE; /* force 'if' to fail */ 877*25cf1a30Sjl 878*25cf1a30Sjl if (rv == KPHYSM_OK) { 879*25cf1a30Sjl /* 880*25cf1a30Sjl * Any pages above managed is "free", 881*25cf1a30Sjl * i.e. it's collected. 882*25cf1a30Sjl */ 883*25cf1a30Sjl msp->ms_detpages += (uint_t)(mdst.collected + 884*25cf1a30Sjl mdst.phys_pages - mdst.managed); 885*25cf1a30Sjl } else { 886*25cf1a30Sjl /* 887*25cf1a30Sjl * If we're UNREFERENCED or UNCONFIGURED, 888*25cf1a30Sjl * then the number of detached pages is 889*25cf1a30Sjl * however many pages are on the board. 890*25cf1a30Sjl * I.e. detached = not in use by OS. 891*25cf1a30Sjl */ 892*25cf1a30Sjl switch (msp->ms_cm.c_ostate) { 893*25cf1a30Sjl /* 894*25cf1a30Sjl * changed to use cfgadm states 895*25cf1a30Sjl * 896*25cf1a30Sjl * was: 897*25cf1a30Sjl * case DR_STATE_UNREFERENCED: 898*25cf1a30Sjl * case DR_STATE_UNCONFIGURED: 899*25cf1a30Sjl */ 900*25cf1a30Sjl case SBD_STAT_UNCONFIGURED: 901*25cf1a30Sjl msp->ms_detpages = msp->ms_totpages; 902*25cf1a30Sjl break; 903*25cf1a30Sjl 904*25cf1a30Sjl default: 905*25cf1a30Sjl break; 906*25cf1a30Sjl } 907*25cf1a30Sjl } 908*25cf1a30Sjl 909*25cf1a30Sjl /* 910*25cf1a30Sjl * kphysm_del_span_query can report non-reloc pages = total 911*25cf1a30Sjl * pages for memory that is not yet configured 912*25cf1a30Sjl */ 913*25cf1a30Sjl if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) { 914*25cf1a30Sjl struct memlist *ml; 915*25cf1a30Sjl 916*25cf1a30Sjl ml = dr_get_memlist(mp); 917*25cf1a30Sjl rv = ml ? dr_del_mlist_query(ml, &mq) : -1; 918*25cf1a30Sjl memlist_delete(ml); 919*25cf1a30Sjl 920*25cf1a30Sjl if (rv == KPHYSM_OK) { 921*25cf1a30Sjl msp->ms_managed_pages = mq.managed; 922*25cf1a30Sjl msp->ms_noreloc_pages = mq.nonrelocatable; 923*25cf1a30Sjl msp->ms_noreloc_first = 924*25cf1a30Sjl mq.first_nonrelocatable; 925*25cf1a30Sjl msp->ms_noreloc_last = 926*25cf1a30Sjl mq.last_nonrelocatable; 927*25cf1a30Sjl msp->ms_cm.c_sflags = 0; 928*25cf1a30Sjl if (mq.nonrelocatable) { 929*25cf1a30Sjl SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE, 930*25cf1a30Sjl msp->ms_cm.c_sflags); 931*25cf1a30Sjl } 932*25cf1a30Sjl } else { 933*25cf1a30Sjl PR_MEM("%s: kphysm_del_span_query() = %d\n", 934*25cf1a30Sjl f, rv); 935*25cf1a30Sjl } 936*25cf1a30Sjl } 937*25cf1a30Sjl 938*25cf1a30Sjl /* 939*25cf1a30Sjl * Check source unit state during copy-rename 940*25cf1a30Sjl */ 941*25cf1a30Sjl if ((mp->sbm_flags & DR_MFLAG_SOURCE) && 942*25cf1a30Sjl (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED || 943*25cf1a30Sjl mp->sbm_cm.sbdev_state == DR_STATE_RELEASE)) 944*25cf1a30Sjl msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED; 945*25cf1a30Sjl 946*25cf1a30Sjl mix++; 947*25cf1a30Sjl dsp++; 948*25cf1a30Sjl } 949*25cf1a30Sjl 950*25cf1a30Sjl return (mix); 951*25cf1a30Sjl } 952*25cf1a30Sjl 953*25cf1a30Sjl int 954*25cf1a30Sjl dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 955*25cf1a30Sjl { 956*25cf1a30Sjl _NOTE(ARGUNUSED(hp)) 957*25cf1a30Sjl 958*25cf1a30Sjl int err_flag = 0; 959*25cf1a30Sjl int d; 960*25cf1a30Sjl sbd_error_t *err; 961*25cf1a30Sjl static fn_t f = "dr_pre_attach_mem"; 962*25cf1a30Sjl 963*25cf1a30Sjl PR_MEM("%s...\n", f); 964*25cf1a30Sjl 965*25cf1a30Sjl for (d = 0; d < devnum; d++) { 966*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 967*25cf1a30Sjl dr_state_t state; 968*25cf1a30Sjl 969*25cf1a30Sjl cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path); 970*25cf1a30Sjl 971*25cf1a30Sjl state = mp->sbm_cm.sbdev_state; 972*25cf1a30Sjl switch (state) { 973*25cf1a30Sjl case DR_STATE_UNCONFIGURED: 974*25cf1a30Sjl PR_MEM("%s: recovering from UNCONFIG for %s\n", 975*25cf1a30Sjl f, 976*25cf1a30Sjl mp->sbm_cm.sbdev_path); 977*25cf1a30Sjl 978*25cf1a30Sjl /* use memlist cached by dr_post_detach_mem_unit */ 979*25cf1a30Sjl ASSERT(mp->sbm_mlist != NULL); 980*25cf1a30Sjl PR_MEM("%s: re-configuring cached memlist for %s:\n", 981*25cf1a30Sjl f, mp->sbm_cm.sbdev_path); 982*25cf1a30Sjl PR_MEMLIST_DUMP(mp->sbm_mlist); 983*25cf1a30Sjl 984*25cf1a30Sjl /* kphysm del handle should be have been freed */ 985*25cf1a30Sjl ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 986*25cf1a30Sjl 987*25cf1a30Sjl /*FALLTHROUGH*/ 988*25cf1a30Sjl 989*25cf1a30Sjl case DR_STATE_CONNECTED: 990*25cf1a30Sjl PR_MEM("%s: reprogramming mem hardware on %s\n", 991*25cf1a30Sjl f, mp->sbm_cm.sbdev_bp->b_path); 992*25cf1a30Sjl 993*25cf1a30Sjl PR_MEM("%s: enabling %s\n", 994*25cf1a30Sjl f, mp->sbm_cm.sbdev_path); 995*25cf1a30Sjl 996*25cf1a30Sjl err = drmach_mem_enable(mp->sbm_cm.sbdev_id); 997*25cf1a30Sjl if (err) { 998*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 999*25cf1a30Sjl err_flag = 1; 1000*25cf1a30Sjl } 1001*25cf1a30Sjl break; 1002*25cf1a30Sjl 1003*25cf1a30Sjl default: 1004*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE); 1005*25cf1a30Sjl err_flag = 1; 1006*25cf1a30Sjl break; 1007*25cf1a30Sjl } 1008*25cf1a30Sjl 1009*25cf1a30Sjl /* exit for loop if error encountered */ 1010*25cf1a30Sjl if (err_flag) 1011*25cf1a30Sjl break; 1012*25cf1a30Sjl } 1013*25cf1a30Sjl 1014*25cf1a30Sjl return (err_flag ? -1 : 0); 1015*25cf1a30Sjl } 1016*25cf1a30Sjl 1017*25cf1a30Sjl int 1018*25cf1a30Sjl dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 1019*25cf1a30Sjl { 1020*25cf1a30Sjl _NOTE(ARGUNUSED(hp)) 1021*25cf1a30Sjl 1022*25cf1a30Sjl int d; 1023*25cf1a30Sjl static fn_t f = "dr_post_attach_mem"; 1024*25cf1a30Sjl 1025*25cf1a30Sjl PR_MEM("%s...\n", f); 1026*25cf1a30Sjl 1027*25cf1a30Sjl for (d = 0; d < devnum; d++) { 1028*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 1029*25cf1a30Sjl struct memlist *mlist, *ml; 1030*25cf1a30Sjl 1031*25cf1a30Sjl mlist = dr_get_memlist(mp); 1032*25cf1a30Sjl if (mlist == NULL) { 1033*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL); 1034*25cf1a30Sjl continue; 1035*25cf1a30Sjl } 1036*25cf1a30Sjl 1037*25cf1a30Sjl /* 1038*25cf1a30Sjl * Verify the memory really did successfully attach 1039*25cf1a30Sjl * by checking for its existence in phys_install. 1040*25cf1a30Sjl */ 1041*25cf1a30Sjl memlist_read_lock(); 1042*25cf1a30Sjl if (memlist_intersect(phys_install, mlist) == 0) { 1043*25cf1a30Sjl memlist_read_unlock(); 1044*25cf1a30Sjl 1045*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 1046*25cf1a30Sjl 1047*25cf1a30Sjl PR_MEM("%s: %s memlist not in phys_install", 1048*25cf1a30Sjl f, mp->sbm_cm.sbdev_path); 1049*25cf1a30Sjl 1050*25cf1a30Sjl memlist_delete(mlist); 1051*25cf1a30Sjl continue; 1052*25cf1a30Sjl } 1053*25cf1a30Sjl memlist_read_unlock(); 1054*25cf1a30Sjl 1055*25cf1a30Sjl for (ml = mlist; ml != NULL; ml = ml->next) { 1056*25cf1a30Sjl sbd_error_t *err; 1057*25cf1a30Sjl 1058*25cf1a30Sjl err = drmach_mem_add_span( 1059*25cf1a30Sjl mp->sbm_cm.sbdev_id, 1060*25cf1a30Sjl ml->address, 1061*25cf1a30Sjl ml->size); 1062*25cf1a30Sjl if (err) 1063*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 1064*25cf1a30Sjl } 1065*25cf1a30Sjl 1066*25cf1a30Sjl memlist_delete(mlist); 1067*25cf1a30Sjl 1068*25cf1a30Sjl /* 1069*25cf1a30Sjl * Destroy cached memlist, if any. 1070*25cf1a30Sjl * There will be a cached memlist in sbm_mlist if 1071*25cf1a30Sjl * this board is being configured directly after 1072*25cf1a30Sjl * an unconfigure. 1073*25cf1a30Sjl * To support this transition, dr_post_detach_mem 1074*25cf1a30Sjl * left a copy of the last known memlist in sbm_mlist. 1075*25cf1a30Sjl * This memlist could differ from any derived from 1076*25cf1a30Sjl * hardware if while this memunit was last configured 1077*25cf1a30Sjl * the system detected and deleted bad pages from 1078*25cf1a30Sjl * phys_install. The location of those bad pages 1079*25cf1a30Sjl * will be reflected in the cached memlist. 1080*25cf1a30Sjl */ 1081*25cf1a30Sjl if (mp->sbm_mlist) { 1082*25cf1a30Sjl memlist_delete(mp->sbm_mlist); 1083*25cf1a30Sjl mp->sbm_mlist = NULL; 1084*25cf1a30Sjl } 1085*25cf1a30Sjl } 1086*25cf1a30Sjl 1087*25cf1a30Sjl return (0); 1088*25cf1a30Sjl } 1089*25cf1a30Sjl 1090*25cf1a30Sjl int 1091*25cf1a30Sjl dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 1092*25cf1a30Sjl { 1093*25cf1a30Sjl _NOTE(ARGUNUSED(hp)) 1094*25cf1a30Sjl 1095*25cf1a30Sjl int d; 1096*25cf1a30Sjl 1097*25cf1a30Sjl for (d = 0; d < devnum; d++) { 1098*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 1099*25cf1a30Sjl 1100*25cf1a30Sjl cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path); 1101*25cf1a30Sjl } 1102*25cf1a30Sjl 1103*25cf1a30Sjl return (0); 1104*25cf1a30Sjl } 1105*25cf1a30Sjl 1106*25cf1a30Sjl int 1107*25cf1a30Sjl dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 1108*25cf1a30Sjl { 1109*25cf1a30Sjl _NOTE(ARGUNUSED(hp)) 1110*25cf1a30Sjl 1111*25cf1a30Sjl int d, rv; 1112*25cf1a30Sjl static fn_t f = "dr_post_detach_mem"; 1113*25cf1a30Sjl 1114*25cf1a30Sjl PR_MEM("%s...\n", f); 1115*25cf1a30Sjl 1116*25cf1a30Sjl rv = 0; 1117*25cf1a30Sjl for (d = 0; d < devnum; d++) { 1118*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 1119*25cf1a30Sjl 1120*25cf1a30Sjl ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd); 1121*25cf1a30Sjl 1122*25cf1a30Sjl if (dr_post_detach_mem_unit(mp)) 1123*25cf1a30Sjl rv = -1; 1124*25cf1a30Sjl } 1125*25cf1a30Sjl 1126*25cf1a30Sjl return (rv); 1127*25cf1a30Sjl } 1128*25cf1a30Sjl 1129*25cf1a30Sjl static void 1130*25cf1a30Sjl dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml) 1131*25cf1a30Sjl { 1132*25cf1a30Sjl static fn_t f = "dr_add_memory_spans"; 1133*25cf1a30Sjl 1134*25cf1a30Sjl PR_MEM("%s...", f); 1135*25cf1a30Sjl PR_MEMLIST_DUMP(ml); 1136*25cf1a30Sjl 1137*25cf1a30Sjl #ifdef DEBUG 1138*25cf1a30Sjl memlist_read_lock(); 1139*25cf1a30Sjl if (memlist_intersect(phys_install, ml)) { 1140*25cf1a30Sjl PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f); 1141*25cf1a30Sjl } 1142*25cf1a30Sjl memlist_read_unlock(); 1143*25cf1a30Sjl #endif 1144*25cf1a30Sjl 1145*25cf1a30Sjl for (; ml; ml = ml->next) { 1146*25cf1a30Sjl pfn_t base; 1147*25cf1a30Sjl pgcnt_t npgs; 1148*25cf1a30Sjl int rv; 1149*25cf1a30Sjl sbd_error_t *err; 1150*25cf1a30Sjl 1151*25cf1a30Sjl base = _b64top(ml->address); 1152*25cf1a30Sjl npgs = _b64top(ml->size); 1153*25cf1a30Sjl 1154*25cf1a30Sjl rv = kphysm_add_memory_dynamic(base, npgs); 1155*25cf1a30Sjl 1156*25cf1a30Sjl err = drmach_mem_add_span( 1157*25cf1a30Sjl mp->sbm_cm.sbdev_id, 1158*25cf1a30Sjl ml->address, 1159*25cf1a30Sjl ml->size); 1160*25cf1a30Sjl 1161*25cf1a30Sjl if (err) 1162*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 1163*25cf1a30Sjl 1164*25cf1a30Sjl if (rv != KPHYSM_OK) { 1165*25cf1a30Sjl cmn_err(CE_WARN, "%s:" 1166*25cf1a30Sjl " unexpected kphysm_add_memory_dynamic" 1167*25cf1a30Sjl " return value %d;" 1168*25cf1a30Sjl " basepfn=0x%lx, npages=%ld\n", 1169*25cf1a30Sjl f, rv, base, npgs); 1170*25cf1a30Sjl 1171*25cf1a30Sjl continue; 1172*25cf1a30Sjl } 1173*25cf1a30Sjl } 1174*25cf1a30Sjl } 1175*25cf1a30Sjl 1176*25cf1a30Sjl static int 1177*25cf1a30Sjl dr_post_detach_mem_unit(dr_mem_unit_t *s_mp) 1178*25cf1a30Sjl { 1179*25cf1a30Sjl uint64_t sz = s_mp->sbm_slice_size; 1180*25cf1a30Sjl uint64_t sm = sz - 1; 1181*25cf1a30Sjl /* old and new below refer to PAs before and after copy-rename */ 1182*25cf1a30Sjl uint64_t s_old_basepa, s_new_basepa; 1183*25cf1a30Sjl uint64_t t_old_basepa, t_new_basepa; 1184*25cf1a30Sjl dr_mem_unit_t *t_mp, *x_mp; 1185*25cf1a30Sjl drmach_mem_info_t minfo; 1186*25cf1a30Sjl struct memlist *ml; 1187*25cf1a30Sjl struct memlist *t_excess_mlist; 1188*25cf1a30Sjl int rv; 1189*25cf1a30Sjl int s_excess_mem_deleted = 0; 1190*25cf1a30Sjl sbd_error_t *err; 1191*25cf1a30Sjl static fn_t f = "dr_post_detach_mem_unit"; 1192*25cf1a30Sjl 1193*25cf1a30Sjl PR_MEM("%s...\n", f); 1194*25cf1a30Sjl 1195*25cf1a30Sjl /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */ 1196*25cf1a30Sjl PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n", 1197*25cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 1198*25cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); 1199*25cf1a30Sjl 1200*25cf1a30Sjl /* sanity check */ 1201*25cf1a30Sjl ASSERT(s_mp->sbm_del_mlist == NULL || 1202*25cf1a30Sjl (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0); 1203*25cf1a30Sjl 1204*25cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 1205*25cf1a30Sjl t_mp = s_mp->sbm_peer; 1206*25cf1a30Sjl ASSERT(t_mp != NULL); 1207*25cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 1208*25cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 1209*25cf1a30Sjl 1210*25cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE); 1211*25cf1a30Sjl ASSERT(t_mp->sbm_del_mlist); 1212*25cf1a30Sjl 1213*25cf1a30Sjl PR_MEM("%s: target %s: deleted memlist:\n", 1214*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 1215*25cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_del_mlist); 1216*25cf1a30Sjl } else { 1217*25cf1a30Sjl /* this is no target unit */ 1218*25cf1a30Sjl t_mp = NULL; 1219*25cf1a30Sjl } 1220*25cf1a30Sjl 1221*25cf1a30Sjl /* 1222*25cf1a30Sjl * Verify the memory really did successfully detach 1223*25cf1a30Sjl * by checking for its non-existence in phys_install. 1224*25cf1a30Sjl */ 1225*25cf1a30Sjl rv = 0; 1226*25cf1a30Sjl memlist_read_lock(); 1227*25cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_RELDONE) { 1228*25cf1a30Sjl x_mp = s_mp; 1229*25cf1a30Sjl rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); 1230*25cf1a30Sjl } 1231*25cf1a30Sjl if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) { 1232*25cf1a30Sjl x_mp = t_mp; 1233*25cf1a30Sjl rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist); 1234*25cf1a30Sjl } 1235*25cf1a30Sjl memlist_read_unlock(); 1236*25cf1a30Sjl 1237*25cf1a30Sjl if (rv) { 1238*25cf1a30Sjl /* error: memlist still in phys_install */ 1239*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm); 1240*25cf1a30Sjl } 1241*25cf1a30Sjl 1242*25cf1a30Sjl /* 1243*25cf1a30Sjl * clean mem unit state and bail out if an error has been recorded. 1244*25cf1a30Sjl */ 1245*25cf1a30Sjl rv = 0; 1246*25cf1a30Sjl if (s_mp->sbm_cm.sbdev_error) { 1247*25cf1a30Sjl PR_MEM("%s: %s flags=%x", f, 1248*25cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); 1249*25cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm); 1250*25cf1a30Sjl DR_DEV_CLR_RELEASED(&s_mp->sbm_cm); 1251*25cf1a30Sjl dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED); 1252*25cf1a30Sjl rv = -1; 1253*25cf1a30Sjl } 1254*25cf1a30Sjl if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) { 1255*25cf1a30Sjl PR_MEM("%s: %s flags=%x", f, 1256*25cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags); 1257*25cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 1258*25cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 1259*25cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); 1260*25cf1a30Sjl rv = -1; 1261*25cf1a30Sjl } 1262*25cf1a30Sjl if (rv) 1263*25cf1a30Sjl goto cleanup; 1264*25cf1a30Sjl 1265*25cf1a30Sjl s_old_basepa = _ptob64(s_mp->sbm_basepfn); 1266*25cf1a30Sjl err = drmach_mem_get_info(s_mp->sbm_cm.sbdev_id, &minfo); 1267*25cf1a30Sjl ASSERT(err == NULL); 1268*25cf1a30Sjl s_new_basepa = minfo.mi_basepa; 1269*25cf1a30Sjl 1270*25cf1a30Sjl PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa); 1271*25cf1a30Sjl PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa); 1272*25cf1a30Sjl 1273*25cf1a30Sjl if (t_mp != NULL) { 1274*25cf1a30Sjl struct memlist *s_copy_mlist; 1275*25cf1a30Sjl 1276*25cf1a30Sjl t_old_basepa = _ptob64(t_mp->sbm_basepfn); 1277*25cf1a30Sjl err = drmach_mem_get_info(t_mp->sbm_cm.sbdev_id, &minfo); 1278*25cf1a30Sjl ASSERT(err == NULL); 1279*25cf1a30Sjl t_new_basepa = minfo.mi_basepa; 1280*25cf1a30Sjl 1281*25cf1a30Sjl PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa); 1282*25cf1a30Sjl PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa); 1283*25cf1a30Sjl 1284*25cf1a30Sjl /* 1285*25cf1a30Sjl * Construct copy list with original source addresses. 1286*25cf1a30Sjl * Used to add back excess target mem. 1287*25cf1a30Sjl */ 1288*25cf1a30Sjl s_copy_mlist = memlist_dup(s_mp->sbm_mlist); 1289*25cf1a30Sjl for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { 1290*25cf1a30Sjl s_copy_mlist = memlist_del_span(s_copy_mlist, 1291*25cf1a30Sjl ml->address, ml->size); 1292*25cf1a30Sjl } 1293*25cf1a30Sjl 1294*25cf1a30Sjl PR_MEM("%s: source copy list:\n:", f); 1295*25cf1a30Sjl PR_MEMLIST_DUMP(s_copy_mlist); 1296*25cf1a30Sjl 1297*25cf1a30Sjl /* 1298*25cf1a30Sjl * We had to swap mem-units, so update 1299*25cf1a30Sjl * memlists accordingly with new base 1300*25cf1a30Sjl * addresses. 1301*25cf1a30Sjl */ 1302*25cf1a30Sjl for (ml = t_mp->sbm_mlist; ml; ml = ml->next) { 1303*25cf1a30Sjl ml->address -= t_old_basepa; 1304*25cf1a30Sjl ml->address += t_new_basepa; 1305*25cf1a30Sjl } 1306*25cf1a30Sjl 1307*25cf1a30Sjl /* 1308*25cf1a30Sjl * There is no need to explicitly rename the target delete 1309*25cf1a30Sjl * memlist, because sbm_del_mlist and sbm_mlist always 1310*25cf1a30Sjl * point to the same memlist for a copy/rename operation. 1311*25cf1a30Sjl */ 1312*25cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 1313*25cf1a30Sjl 1314*25cf1a30Sjl PR_MEM("%s: renamed target memlist and delete memlist:\n", f); 1315*25cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_mlist); 1316*25cf1a30Sjl 1317*25cf1a30Sjl for (ml = s_mp->sbm_mlist; ml; ml = ml->next) { 1318*25cf1a30Sjl ml->address -= s_old_basepa; 1319*25cf1a30Sjl ml->address += s_new_basepa; 1320*25cf1a30Sjl } 1321*25cf1a30Sjl 1322*25cf1a30Sjl PR_MEM("%s: renamed source memlist:\n", f); 1323*25cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_mlist); 1324*25cf1a30Sjl 1325*25cf1a30Sjl /* 1326*25cf1a30Sjl * Keep track of dynamically added segments 1327*25cf1a30Sjl * since they cannot be split if we need to delete 1328*25cf1a30Sjl * excess source memory later for this board. 1329*25cf1a30Sjl */ 1330*25cf1a30Sjl if (t_mp->sbm_dyn_segs) 1331*25cf1a30Sjl memlist_delete(t_mp->sbm_dyn_segs); 1332*25cf1a30Sjl t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs; 1333*25cf1a30Sjl s_mp->sbm_dyn_segs = NULL; 1334*25cf1a30Sjl 1335*25cf1a30Sjl /* 1336*25cf1a30Sjl * Add back excess target memory. 1337*25cf1a30Sjl * Subtract out the portion of the target memory 1338*25cf1a30Sjl * node that was taken over by the source memory 1339*25cf1a30Sjl * node. 1340*25cf1a30Sjl */ 1341*25cf1a30Sjl t_excess_mlist = memlist_dup(t_mp->sbm_mlist); 1342*25cf1a30Sjl for (ml = s_copy_mlist; ml; ml = ml->next) { 1343*25cf1a30Sjl t_excess_mlist = 1344*25cf1a30Sjl memlist_del_span(t_excess_mlist, 1345*25cf1a30Sjl ml->address, ml->size); 1346*25cf1a30Sjl } 1347*25cf1a30Sjl 1348*25cf1a30Sjl /* 1349*25cf1a30Sjl * Update dynamically added segs 1350*25cf1a30Sjl */ 1351*25cf1a30Sjl for (ml = s_mp->sbm_del_mlist; ml; ml = ml->next) { 1352*25cf1a30Sjl t_mp->sbm_dyn_segs = 1353*25cf1a30Sjl memlist_del_span(t_mp->sbm_dyn_segs, 1354*25cf1a30Sjl ml->address, ml->size); 1355*25cf1a30Sjl } 1356*25cf1a30Sjl for (ml = t_excess_mlist; ml; ml = ml->next) { 1357*25cf1a30Sjl t_mp->sbm_dyn_segs = 1358*25cf1a30Sjl memlist_cat_span(t_mp->sbm_dyn_segs, 1359*25cf1a30Sjl ml->address, ml->size); 1360*25cf1a30Sjl } 1361*25cf1a30Sjl PR_MEM("%s: %s: updated dynamic seg list:\n", 1362*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 1363*25cf1a30Sjl PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs); 1364*25cf1a30Sjl 1365*25cf1a30Sjl if (t_excess_mlist != NULL) { 1366*25cf1a30Sjl /* 1367*25cf1a30Sjl * After the small <-> big copy-rename, 1368*25cf1a30Sjl * the original address space for the 1369*25cf1a30Sjl * source board may have excess to be 1370*25cf1a30Sjl * deleted. This is a case different 1371*25cf1a30Sjl * from the big->small excess source 1372*25cf1a30Sjl * memory case listed below. 1373*25cf1a30Sjl * Remove s_mp->sbm_del_mlist from 1374*25cf1a30Sjl * the kernel cage glist. 1375*25cf1a30Sjl */ 1376*25cf1a30Sjl for (ml = s_mp->sbm_del_mlist; ml; 1377*25cf1a30Sjl ml = ml->next) { 1378*25cf1a30Sjl PR_MEM("%s: delete small<->big copy-" 1379*25cf1a30Sjl "rename source excess memory", f); 1380*25cf1a30Sjl PR_MEMLIST_DUMP(ml); 1381*25cf1a30Sjl 1382*25cf1a30Sjl err = drmach_mem_del_span( 1383*25cf1a30Sjl s_mp->sbm_cm.sbdev_id, 1384*25cf1a30Sjl ml->address, ml->size); 1385*25cf1a30Sjl if (err) 1386*25cf1a30Sjl DRERR_SET_C(&s_mp-> 1387*25cf1a30Sjl sbm_cm.sbdev_error, &err); 1388*25cf1a30Sjl ASSERT(err == NULL); 1389*25cf1a30Sjl } 1390*25cf1a30Sjl 1391*25cf1a30Sjl /* 1392*25cf1a30Sjl * mark sbm_del_mlist as been deleted so that 1393*25cf1a30Sjl * we won't end up to delete it twice later 1394*25cf1a30Sjl * from the span list 1395*25cf1a30Sjl */ 1396*25cf1a30Sjl s_excess_mem_deleted = 1; 1397*25cf1a30Sjl 1398*25cf1a30Sjl PR_MEM("%s: adding back remaining portion" 1399*25cf1a30Sjl " of %s, memlist:\n", 1400*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 1401*25cf1a30Sjl PR_MEMLIST_DUMP(t_excess_mlist); 1402*25cf1a30Sjl 1403*25cf1a30Sjl dr_add_memory_spans(s_mp, t_excess_mlist); 1404*25cf1a30Sjl memlist_delete(t_excess_mlist); 1405*25cf1a30Sjl } 1406*25cf1a30Sjl memlist_delete(s_copy_mlist); 1407*25cf1a30Sjl 1408*25cf1a30Sjl #ifdef DEBUG 1409*25cf1a30Sjl /* 1410*25cf1a30Sjl * s_mp->sbm_del_mlist may still needed 1411*25cf1a30Sjl */ 1412*25cf1a30Sjl PR_MEM("%s: source delete memeory flag %d", 1413*25cf1a30Sjl f, s_excess_mem_deleted); 1414*25cf1a30Sjl PR_MEM("%s: source delete memlist", f); 1415*25cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_del_mlist); 1416*25cf1a30Sjl #endif 1417*25cf1a30Sjl 1418*25cf1a30Sjl } 1419*25cf1a30Sjl 1420*25cf1a30Sjl if (t_mp != NULL) { 1421*25cf1a30Sjl /* delete target's entire address space */ 1422*25cf1a30Sjl err = drmach_mem_del_span( 1423*25cf1a30Sjl t_mp->sbm_cm.sbdev_id, t_old_basepa & ~ sm, sz); 1424*25cf1a30Sjl if (err) 1425*25cf1a30Sjl DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err); 1426*25cf1a30Sjl ASSERT(err == NULL); 1427*25cf1a30Sjl 1428*25cf1a30Sjl /* 1429*25cf1a30Sjl * After the copy/rename, the original address space 1430*25cf1a30Sjl * for the source board (which is now located on the 1431*25cf1a30Sjl * target board) may now have some excess to be deleted. 1432*25cf1a30Sjl * Those excess memory on the source board are kept in 1433*25cf1a30Sjl * source board's sbm_del_mlist 1434*25cf1a30Sjl */ 1435*25cf1a30Sjl for (ml = s_mp->sbm_del_mlist; !s_excess_mem_deleted && ml; 1436*25cf1a30Sjl ml = ml->next) { 1437*25cf1a30Sjl PR_MEM("%s: delete source excess memory", f); 1438*25cf1a30Sjl PR_MEMLIST_DUMP(ml); 1439*25cf1a30Sjl 1440*25cf1a30Sjl err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, 1441*25cf1a30Sjl ml->address, ml->size); 1442*25cf1a30Sjl if (err) 1443*25cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 1444*25cf1a30Sjl ASSERT(err == NULL); 1445*25cf1a30Sjl } 1446*25cf1a30Sjl 1447*25cf1a30Sjl } else { 1448*25cf1a30Sjl /* delete board's entire address space */ 1449*25cf1a30Sjl err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id, 1450*25cf1a30Sjl s_old_basepa & ~ sm, sz); 1451*25cf1a30Sjl if (err) 1452*25cf1a30Sjl DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err); 1453*25cf1a30Sjl ASSERT(err == NULL); 1454*25cf1a30Sjl } 1455*25cf1a30Sjl 1456*25cf1a30Sjl cleanup: 1457*25cf1a30Sjl /* clean up target mem unit */ 1458*25cf1a30Sjl if (t_mp != NULL) { 1459*25cf1a30Sjl memlist_delete(t_mp->sbm_del_mlist); 1460*25cf1a30Sjl /* no need to delete sbm_mlist, it shares sbm_del_mlist */ 1461*25cf1a30Sjl 1462*25cf1a30Sjl t_mp->sbm_del_mlist = NULL; 1463*25cf1a30Sjl t_mp->sbm_mlist = NULL; 1464*25cf1a30Sjl t_mp->sbm_peer = NULL; 1465*25cf1a30Sjl t_mp->sbm_flags = 0; 1466*25cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 1467*25cf1a30Sjl dr_init_mem_unit_data(t_mp); 1468*25cf1a30Sjl 1469*25cf1a30Sjl } 1470*25cf1a30Sjl if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) { 1471*25cf1a30Sjl /* 1472*25cf1a30Sjl * now that copy/rename has completed, undo this 1473*25cf1a30Sjl * work that was done in dr_release_mem_done. 1474*25cf1a30Sjl */ 1475*25cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 1476*25cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 1477*25cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED); 1478*25cf1a30Sjl } 1479*25cf1a30Sjl 1480*25cf1a30Sjl /* 1481*25cf1a30Sjl * clean up (source) board's mem unit structure. 1482*25cf1a30Sjl * NOTE: sbm_mlist is retained if no error has been record (in other 1483*25cf1a30Sjl * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is 1484*25cf1a30Sjl * referred to elsewhere as the cached memlist. The cached memlist 1485*25cf1a30Sjl * is used to re-attach (configure back in) this memunit from the 1486*25cf1a30Sjl * unconfigured state. The memlist is retained because it may 1487*25cf1a30Sjl * represent bad pages that were detected while the memory was 1488*25cf1a30Sjl * configured into the OS. The OS deletes bad pages from phys_install. 1489*25cf1a30Sjl * Those deletes, if any, will be represented in the cached mlist. 1490*25cf1a30Sjl */ 1491*25cf1a30Sjl if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist) 1492*25cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 1493*25cf1a30Sjl 1494*25cf1a30Sjl if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) { 1495*25cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 1496*25cf1a30Sjl s_mp->sbm_mlist = NULL; 1497*25cf1a30Sjl } 1498*25cf1a30Sjl 1499*25cf1a30Sjl if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) { 1500*25cf1a30Sjl memlist_delete(s_mp->sbm_dyn_segs); 1501*25cf1a30Sjl s_mp->sbm_dyn_segs = NULL; 1502*25cf1a30Sjl } 1503*25cf1a30Sjl 1504*25cf1a30Sjl s_mp->sbm_del_mlist = NULL; 1505*25cf1a30Sjl s_mp->sbm_peer = NULL; 1506*25cf1a30Sjl s_mp->sbm_flags = 0; 1507*25cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 1508*25cf1a30Sjl dr_init_mem_unit_data(s_mp); 1509*25cf1a30Sjl 1510*25cf1a30Sjl PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path); 1511*25cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_mlist); 1512*25cf1a30Sjl 1513*25cf1a30Sjl return (0); 1514*25cf1a30Sjl } 1515*25cf1a30Sjl 1516*25cf1a30Sjl /* 1517*25cf1a30Sjl * Successful return from this function will have the memory 1518*25cf1a30Sjl * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated 1519*25cf1a30Sjl * and waiting. This routine's job is to select the memory that 1520*25cf1a30Sjl * actually has to be released (detached) which may not necessarily 1521*25cf1a30Sjl * be the same memory node that came in in devlist[], 1522*25cf1a30Sjl * i.e. a copy-rename is needed. 1523*25cf1a30Sjl */ 1524*25cf1a30Sjl int 1525*25cf1a30Sjl dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum) 1526*25cf1a30Sjl { 1527*25cf1a30Sjl int d; 1528*25cf1a30Sjl int err_flag = 0; 1529*25cf1a30Sjl static fn_t f = "dr_pre_release_mem"; 1530*25cf1a30Sjl 1531*25cf1a30Sjl PR_MEM("%s...\n", f); 1532*25cf1a30Sjl 1533*25cf1a30Sjl for (d = 0; d < devnum; d++) { 1534*25cf1a30Sjl dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d]; 1535*25cf1a30Sjl int rv; 1536*25cf1a30Sjl memquery_t mq; 1537*25cf1a30Sjl struct memlist *ml; 1538*25cf1a30Sjl 1539*25cf1a30Sjl if (mp->sbm_cm.sbdev_error) { 1540*25cf1a30Sjl err_flag = 1; 1541*25cf1a30Sjl continue; 1542*25cf1a30Sjl } else if (!kcage_on) { 1543*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF); 1544*25cf1a30Sjl err_flag = 1; 1545*25cf1a30Sjl continue; 1546*25cf1a30Sjl } 1547*25cf1a30Sjl 1548*25cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_RESERVED) { 1549*25cf1a30Sjl /* 1550*25cf1a30Sjl * Board is currently involved in a delete 1551*25cf1a30Sjl * memory operation. Can't detach this guy until 1552*25cf1a30Sjl * that operation completes. 1553*25cf1a30Sjl */ 1554*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL); 1555*25cf1a30Sjl err_flag = 1; 1556*25cf1a30Sjl break; 1557*25cf1a30Sjl } 1558*25cf1a30Sjl 1559*25cf1a30Sjl /* flags should be clean at this time */ 1560*25cf1a30Sjl ASSERT(mp->sbm_flags == 0); 1561*25cf1a30Sjl 1562*25cf1a30Sjl ASSERT(mp->sbm_mlist == NULL); 1563*25cf1a30Sjl ASSERT(mp->sbm_del_mlist == NULL); 1564*25cf1a30Sjl if (mp->sbm_mlist != NULL) { 1565*25cf1a30Sjl memlist_delete(mp->sbm_mlist); 1566*25cf1a30Sjl mp->sbm_mlist = NULL; 1567*25cf1a30Sjl } 1568*25cf1a30Sjl 1569*25cf1a30Sjl ml = dr_get_memlist(mp); 1570*25cf1a30Sjl if (ml == NULL) { 1571*25cf1a30Sjl err_flag = 1; 1572*25cf1a30Sjl PR_MEM("%s: no memlist found for %s\n", 1573*25cf1a30Sjl f, mp->sbm_cm.sbdev_path); 1574*25cf1a30Sjl continue; 1575*25cf1a30Sjl } 1576*25cf1a30Sjl 1577*25cf1a30Sjl /* 1578*25cf1a30Sjl * Check whether the detaching memory requires a 1579*25cf1a30Sjl * copy-rename. 1580*25cf1a30Sjl */ 1581*25cf1a30Sjl ASSERT(mp->sbm_npages != 0); 1582*25cf1a30Sjl rv = dr_del_mlist_query(ml, &mq); 1583*25cf1a30Sjl if (rv != KPHYSM_OK) { 1584*25cf1a30Sjl memlist_delete(ml); 1585*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 1586*25cf1a30Sjl err_flag = 1; 1587*25cf1a30Sjl break; 1588*25cf1a30Sjl } 1589*25cf1a30Sjl 1590*25cf1a30Sjl if (mq.nonrelocatable != 0) { 1591*25cf1a30Sjl if (!(dr_cmd_flags(hp) & 1592*25cf1a30Sjl (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) { 1593*25cf1a30Sjl memlist_delete(ml); 1594*25cf1a30Sjl /* caller wasn't prompted for a suspend */ 1595*25cf1a30Sjl dr_dev_err(CE_WARN, &mp->sbm_cm, 1596*25cf1a30Sjl ESBD_QUIESCE_REQD); 1597*25cf1a30Sjl err_flag = 1; 1598*25cf1a30Sjl break; 1599*25cf1a30Sjl } 1600*25cf1a30Sjl } 1601*25cf1a30Sjl 1602*25cf1a30Sjl /* allocate a kphysm handle */ 1603*25cf1a30Sjl rv = kphysm_del_gethandle(&mp->sbm_memhandle); 1604*25cf1a30Sjl if (rv != KPHYSM_OK) { 1605*25cf1a30Sjl memlist_delete(ml); 1606*25cf1a30Sjl 1607*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&mp->sbm_cm); 1608*25cf1a30Sjl err_flag = 1; 1609*25cf1a30Sjl break; 1610*25cf1a30Sjl } 1611*25cf1a30Sjl mp->sbm_flags |= DR_MFLAG_RELOWNER; 1612*25cf1a30Sjl 1613*25cf1a30Sjl if ((mq.nonrelocatable != 0) || 1614*25cf1a30Sjl dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) { 1615*25cf1a30Sjl /* 1616*25cf1a30Sjl * Either the detaching memory node contains 1617*25cf1a30Sjl * non-reloc memory or we failed to reserve the 1618*25cf1a30Sjl * detaching memory node (which did _not_ have 1619*25cf1a30Sjl * any non-reloc memory, i.e. some non-reloc mem 1620*25cf1a30Sjl * got onboard). 1621*25cf1a30Sjl */ 1622*25cf1a30Sjl 1623*25cf1a30Sjl if (dr_select_mem_target(hp, mp, ml)) { 1624*25cf1a30Sjl int rv; 1625*25cf1a30Sjl 1626*25cf1a30Sjl /* 1627*25cf1a30Sjl * We had no luck locating a target 1628*25cf1a30Sjl * memory node to be the recipient of 1629*25cf1a30Sjl * the non-reloc memory on the node 1630*25cf1a30Sjl * we're trying to detach. 1631*25cf1a30Sjl * Clean up be disposing the mem handle 1632*25cf1a30Sjl * and the mem list. 1633*25cf1a30Sjl */ 1634*25cf1a30Sjl rv = kphysm_del_release(mp->sbm_memhandle); 1635*25cf1a30Sjl if (rv != KPHYSM_OK) { 1636*25cf1a30Sjl /* 1637*25cf1a30Sjl * can do nothing but complain 1638*25cf1a30Sjl * and hope helpful for debug 1639*25cf1a30Sjl */ 1640*25cf1a30Sjl cmn_err(CE_WARN, "%s: unexpected" 1641*25cf1a30Sjl " kphysm_del_release return" 1642*25cf1a30Sjl " value %d", 1643*25cf1a30Sjl f, rv); 1644*25cf1a30Sjl } 1645*25cf1a30Sjl mp->sbm_flags &= ~DR_MFLAG_RELOWNER; 1646*25cf1a30Sjl 1647*25cf1a30Sjl memlist_delete(ml); 1648*25cf1a30Sjl 1649*25cf1a30Sjl /* make sure sbm_flags is clean */ 1650*25cf1a30Sjl ASSERT(mp->sbm_flags == 0); 1651*25cf1a30Sjl 1652*25cf1a30Sjl dr_dev_err(CE_WARN, 1653*25cf1a30Sjl &mp->sbm_cm, ESBD_NO_TARGET); 1654*25cf1a30Sjl 1655*25cf1a30Sjl err_flag = 1; 1656*25cf1a30Sjl break; 1657*25cf1a30Sjl } 1658*25cf1a30Sjl 1659*25cf1a30Sjl /* 1660*25cf1a30Sjl * ml is not memlist_delete'd here because 1661*25cf1a30Sjl * it has been assigned to mp->sbm_mlist 1662*25cf1a30Sjl * by dr_select_mem_target. 1663*25cf1a30Sjl */ 1664*25cf1a30Sjl } else { 1665*25cf1a30Sjl /* no target needed to detach this board */ 1666*25cf1a30Sjl mp->sbm_flags |= DR_MFLAG_RESERVED; 1667*25cf1a30Sjl mp->sbm_peer = NULL; 1668*25cf1a30Sjl mp->sbm_del_mlist = ml; 1669*25cf1a30Sjl mp->sbm_mlist = ml; 1670*25cf1a30Sjl mp->sbm_cm.sbdev_busy = 1; 1671*25cf1a30Sjl } 1672*25cf1a30Sjl #ifdef DEBUG 1673*25cf1a30Sjl ASSERT(mp->sbm_mlist != NULL); 1674*25cf1a30Sjl 1675*25cf1a30Sjl if (mp->sbm_flags & DR_MFLAG_SOURCE) { 1676*25cf1a30Sjl PR_MEM("%s: release of %s requires copy/rename;" 1677*25cf1a30Sjl " selected target board %s\n", 1678*25cf1a30Sjl f, 1679*25cf1a30Sjl mp->sbm_cm.sbdev_path, 1680*25cf1a30Sjl mp->sbm_peer->sbm_cm.sbdev_path); 1681*25cf1a30Sjl } else { 1682*25cf1a30Sjl PR_MEM("%s: copy/rename not required to release %s\n", 1683*25cf1a30Sjl f, mp->sbm_cm.sbdev_path); 1684*25cf1a30Sjl } 1685*25cf1a30Sjl 1686*25cf1a30Sjl ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER); 1687*25cf1a30Sjl ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED); 1688*25cf1a30Sjl #endif 1689*25cf1a30Sjl } 1690*25cf1a30Sjl 1691*25cf1a30Sjl return (err_flag ? -1 : 0); 1692*25cf1a30Sjl } 1693*25cf1a30Sjl 1694*25cf1a30Sjl void 1695*25cf1a30Sjl dr_release_mem_done(dr_common_unit_t *cp) 1696*25cf1a30Sjl { 1697*25cf1a30Sjl dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp; 1698*25cf1a30Sjl dr_mem_unit_t *t_mp, *mp; 1699*25cf1a30Sjl int rv; 1700*25cf1a30Sjl static fn_t f = "dr_release_mem_done"; 1701*25cf1a30Sjl 1702*25cf1a30Sjl /* 1703*25cf1a30Sjl * This unit will be flagged with DR_MFLAG_SOURCE, if it 1704*25cf1a30Sjl * has a target unit. 1705*25cf1a30Sjl */ 1706*25cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 1707*25cf1a30Sjl t_mp = s_mp->sbm_peer; 1708*25cf1a30Sjl ASSERT(t_mp != NULL); 1709*25cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 1710*25cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET); 1711*25cf1a30Sjl ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED); 1712*25cf1a30Sjl } else { 1713*25cf1a30Sjl /* this is no target unit */ 1714*25cf1a30Sjl t_mp = NULL; 1715*25cf1a30Sjl } 1716*25cf1a30Sjl 1717*25cf1a30Sjl /* free delete handle */ 1718*25cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER); 1719*25cf1a30Sjl ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED); 1720*25cf1a30Sjl rv = kphysm_del_release(s_mp->sbm_memhandle); 1721*25cf1a30Sjl if (rv != KPHYSM_OK) { 1722*25cf1a30Sjl /* 1723*25cf1a30Sjl * can do nothing but complain 1724*25cf1a30Sjl * and hope helpful for debug 1725*25cf1a30Sjl */ 1726*25cf1a30Sjl cmn_err(CE_WARN, "%s: unexpected kphysm_del_release" 1727*25cf1a30Sjl " return value %d", f, rv); 1728*25cf1a30Sjl } 1729*25cf1a30Sjl s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER; 1730*25cf1a30Sjl 1731*25cf1a30Sjl /* 1732*25cf1a30Sjl * If an error was encountered during release, clean up 1733*25cf1a30Sjl * the source (and target, if present) unit data. 1734*25cf1a30Sjl */ 1735*25cf1a30Sjl /* XXX Can we know that sbdev_error was encountered during release? */ 1736*25cf1a30Sjl if (s_mp->sbm_cm.sbdev_error != NULL) { 1737*25cf1a30Sjl PR_MEM("%s: %s: error %d noted\n", 1738*25cf1a30Sjl f, 1739*25cf1a30Sjl s_mp->sbm_cm.sbdev_path, 1740*25cf1a30Sjl s_mp->sbm_cm.sbdev_error->e_code); 1741*25cf1a30Sjl 1742*25cf1a30Sjl if (t_mp != NULL) { 1743*25cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 1744*25cf1a30Sjl t_mp->sbm_del_mlist = NULL; 1745*25cf1a30Sjl 1746*25cf1a30Sjl if (t_mp->sbm_mlist != NULL) { 1747*25cf1a30Sjl memlist_delete(t_mp->sbm_mlist); 1748*25cf1a30Sjl t_mp->sbm_mlist = NULL; 1749*25cf1a30Sjl } 1750*25cf1a30Sjl 1751*25cf1a30Sjl t_mp->sbm_peer = NULL; 1752*25cf1a30Sjl t_mp->sbm_flags = 0; 1753*25cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 1754*25cf1a30Sjl } 1755*25cf1a30Sjl 1756*25cf1a30Sjl if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) 1757*25cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 1758*25cf1a30Sjl s_mp->sbm_del_mlist = NULL; 1759*25cf1a30Sjl 1760*25cf1a30Sjl if (s_mp->sbm_mlist != NULL) { 1761*25cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 1762*25cf1a30Sjl s_mp->sbm_mlist = NULL; 1763*25cf1a30Sjl } 1764*25cf1a30Sjl 1765*25cf1a30Sjl s_mp->sbm_peer = NULL; 1766*25cf1a30Sjl s_mp->sbm_flags = 0; 1767*25cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 1768*25cf1a30Sjl 1769*25cf1a30Sjl /* bail out */ 1770*25cf1a30Sjl return; 1771*25cf1a30Sjl } 1772*25cf1a30Sjl 1773*25cf1a30Sjl DR_DEV_SET_RELEASED(&s_mp->sbm_cm); 1774*25cf1a30Sjl dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE); 1775*25cf1a30Sjl 1776*25cf1a30Sjl if (t_mp != NULL) { 1777*25cf1a30Sjl /* 1778*25cf1a30Sjl * the kphysm delete operation that drained the source 1779*25cf1a30Sjl * board also drained this target board. Since the source 1780*25cf1a30Sjl * board drain is now known to have succeeded, we know this 1781*25cf1a30Sjl * target board is drained too. 1782*25cf1a30Sjl * 1783*25cf1a30Sjl * because DR_DEV_SET_RELEASED and dr_device_transition 1784*25cf1a30Sjl * is done here, the dr_release_dev_done should not 1785*25cf1a30Sjl * fail. 1786*25cf1a30Sjl */ 1787*25cf1a30Sjl DR_DEV_SET_RELEASED(&t_mp->sbm_cm); 1788*25cf1a30Sjl dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE); 1789*25cf1a30Sjl 1790*25cf1a30Sjl /* 1791*25cf1a30Sjl * NOTE: do not transition target's board state, 1792*25cf1a30Sjl * even if the mem-unit was the last configure 1793*25cf1a30Sjl * unit of the board. When copy/rename completes 1794*25cf1a30Sjl * this mem-unit will transitioned back to 1795*25cf1a30Sjl * the configured state. In the meantime, the 1796*25cf1a30Sjl * board's must remain as is. 1797*25cf1a30Sjl */ 1798*25cf1a30Sjl } 1799*25cf1a30Sjl 1800*25cf1a30Sjl /* if board(s) had deleted memory, verify it is gone */ 1801*25cf1a30Sjl rv = 0; 1802*25cf1a30Sjl memlist_read_lock(); 1803*25cf1a30Sjl if (s_mp->sbm_del_mlist != NULL) { 1804*25cf1a30Sjl mp = s_mp; 1805*25cf1a30Sjl rv = memlist_intersect(phys_install, mp->sbm_del_mlist); 1806*25cf1a30Sjl } 1807*25cf1a30Sjl if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) { 1808*25cf1a30Sjl mp = t_mp; 1809*25cf1a30Sjl rv = memlist_intersect(phys_install, mp->sbm_del_mlist); 1810*25cf1a30Sjl } 1811*25cf1a30Sjl memlist_read_unlock(); 1812*25cf1a30Sjl if (rv) { 1813*25cf1a30Sjl cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): " 1814*25cf1a30Sjl "deleted memory still found in phys_install", 1815*25cf1a30Sjl f, 1816*25cf1a30Sjl (mp == t_mp ? "target " : ""), 1817*25cf1a30Sjl mp->sbm_cm.sbdev_bp->b_num, 1818*25cf1a30Sjl mp->sbm_cm.sbdev_unum); 1819*25cf1a30Sjl 1820*25cf1a30Sjl DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm); 1821*25cf1a30Sjl return; 1822*25cf1a30Sjl } 1823*25cf1a30Sjl 1824*25cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_RELDONE; 1825*25cf1a30Sjl if (t_mp != NULL) 1826*25cf1a30Sjl t_mp->sbm_flags |= DR_MFLAG_RELDONE; 1827*25cf1a30Sjl 1828*25cf1a30Sjl /* this should not fail */ 1829*25cf1a30Sjl if (dr_release_dev_done(&s_mp->sbm_cm) != 0) { 1830*25cf1a30Sjl /* catch this in debug kernels */ 1831*25cf1a30Sjl ASSERT(0); 1832*25cf1a30Sjl return; 1833*25cf1a30Sjl } 1834*25cf1a30Sjl 1835*25cf1a30Sjl PR_MEM("%s: marking %s release DONE\n", 1836*25cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 1837*25cf1a30Sjl 1838*25cf1a30Sjl s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; 1839*25cf1a30Sjl 1840*25cf1a30Sjl if (t_mp != NULL) { 1841*25cf1a30Sjl /* should not fail */ 1842*25cf1a30Sjl rv = dr_release_dev_done(&t_mp->sbm_cm); 1843*25cf1a30Sjl if (rv != 0) { 1844*25cf1a30Sjl /* catch this in debug kernels */ 1845*25cf1a30Sjl ASSERT(0); 1846*25cf1a30Sjl return; 1847*25cf1a30Sjl } 1848*25cf1a30Sjl 1849*25cf1a30Sjl PR_MEM("%s: marking %s release DONE\n", 1850*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 1851*25cf1a30Sjl 1852*25cf1a30Sjl t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED; 1853*25cf1a30Sjl } 1854*25cf1a30Sjl } 1855*25cf1a30Sjl 1856*25cf1a30Sjl /*ARGSUSED*/ 1857*25cf1a30Sjl int 1858*25cf1a30Sjl dr_disconnect_mem(dr_mem_unit_t *mp) 1859*25cf1a30Sjl { 1860*25cf1a30Sjl static fn_t f = "dr_disconnect_mem"; 1861*25cf1a30Sjl update_membounds_t umb; 1862*25cf1a30Sjl 1863*25cf1a30Sjl #ifdef DEBUG 1864*25cf1a30Sjl int state = mp->sbm_cm.sbdev_state; 1865*25cf1a30Sjl ASSERT(state == DR_STATE_CONNECTED || 1866*25cf1a30Sjl state == DR_STATE_UNCONFIGURED); 1867*25cf1a30Sjl #endif 1868*25cf1a30Sjl 1869*25cf1a30Sjl PR_MEM("%s...\n", f); 1870*25cf1a30Sjl 1871*25cf1a30Sjl if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist) 1872*25cf1a30Sjl memlist_delete(mp->sbm_del_mlist); 1873*25cf1a30Sjl mp->sbm_del_mlist = NULL; 1874*25cf1a30Sjl 1875*25cf1a30Sjl if (mp->sbm_mlist) { 1876*25cf1a30Sjl memlist_delete(mp->sbm_mlist); 1877*25cf1a30Sjl mp->sbm_mlist = NULL; 1878*25cf1a30Sjl } 1879*25cf1a30Sjl 1880*25cf1a30Sjl /* 1881*25cf1a30Sjl * Remove memory from lgroup 1882*25cf1a30Sjl * For now, only board info is required. 1883*25cf1a30Sjl */ 1884*25cf1a30Sjl umb.u_board = mp->sbm_cm.sbdev_bp->b_num; 1885*25cf1a30Sjl umb.u_base = (uint64_t)-1; 1886*25cf1a30Sjl umb.u_len = (uint64_t)-1; 1887*25cf1a30Sjl 1888*25cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb); 1889*25cf1a30Sjl 1890*25cf1a30Sjl return (0); 1891*25cf1a30Sjl } 1892*25cf1a30Sjl 1893*25cf1a30Sjl int 1894*25cf1a30Sjl dr_cancel_mem(dr_mem_unit_t *s_mp) 1895*25cf1a30Sjl { 1896*25cf1a30Sjl dr_mem_unit_t *t_mp; 1897*25cf1a30Sjl dr_state_t state; 1898*25cf1a30Sjl static fn_t f = "dr_cancel_mem"; 1899*25cf1a30Sjl 1900*25cf1a30Sjl state = s_mp->sbm_cm.sbdev_state; 1901*25cf1a30Sjl 1902*25cf1a30Sjl if (s_mp->sbm_flags & DR_MFLAG_TARGET) { 1903*25cf1a30Sjl /* must cancel source board, not target board */ 1904*25cf1a30Sjl /* TODO: set error */ 1905*25cf1a30Sjl return (-1); 1906*25cf1a30Sjl } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) { 1907*25cf1a30Sjl t_mp = s_mp->sbm_peer; 1908*25cf1a30Sjl ASSERT(t_mp != NULL); 1909*25cf1a30Sjl ASSERT(t_mp->sbm_peer == s_mp); 1910*25cf1a30Sjl 1911*25cf1a30Sjl /* must always match the source board's state */ 1912*25cf1a30Sjl /* TODO: is this assertion correct? */ 1913*25cf1a30Sjl ASSERT(t_mp->sbm_cm.sbdev_state == state); 1914*25cf1a30Sjl } else { 1915*25cf1a30Sjl /* this is no target unit */ 1916*25cf1a30Sjl t_mp = NULL; 1917*25cf1a30Sjl } 1918*25cf1a30Sjl 1919*25cf1a30Sjl switch (state) { 1920*25cf1a30Sjl case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */ 1921*25cf1a30Sjl ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 1922*25cf1a30Sjl 1923*25cf1a30Sjl if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) { 1924*25cf1a30Sjl PR_MEM("%s: undoing target %s memory delete\n", 1925*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 1926*25cf1a30Sjl dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist); 1927*25cf1a30Sjl 1928*25cf1a30Sjl DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm); 1929*25cf1a30Sjl } 1930*25cf1a30Sjl 1931*25cf1a30Sjl if (s_mp->sbm_del_mlist != NULL) { 1932*25cf1a30Sjl PR_MEM("%s: undoing %s memory delete\n", 1933*25cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 1934*25cf1a30Sjl 1935*25cf1a30Sjl dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist); 1936*25cf1a30Sjl } 1937*25cf1a30Sjl 1938*25cf1a30Sjl /*FALLTHROUGH*/ 1939*25cf1a30Sjl 1940*25cf1a30Sjl /* TODO: should no longer be possible to see the release state here */ 1941*25cf1a30Sjl case DR_STATE_RELEASE: /* state set by dr_release_mem_done */ 1942*25cf1a30Sjl 1943*25cf1a30Sjl ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0); 1944*25cf1a30Sjl 1945*25cf1a30Sjl if (t_mp != NULL) { 1946*25cf1a30Sjl ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist); 1947*25cf1a30Sjl t_mp->sbm_del_mlist = NULL; 1948*25cf1a30Sjl 1949*25cf1a30Sjl if (t_mp->sbm_mlist != NULL) { 1950*25cf1a30Sjl memlist_delete(t_mp->sbm_mlist); 1951*25cf1a30Sjl t_mp->sbm_mlist = NULL; 1952*25cf1a30Sjl } 1953*25cf1a30Sjl 1954*25cf1a30Sjl t_mp->sbm_peer = NULL; 1955*25cf1a30Sjl t_mp->sbm_flags = 0; 1956*25cf1a30Sjl t_mp->sbm_cm.sbdev_busy = 0; 1957*25cf1a30Sjl dr_init_mem_unit_data(t_mp); 1958*25cf1a30Sjl 1959*25cf1a30Sjl DR_DEV_CLR_RELEASED(&t_mp->sbm_cm); 1960*25cf1a30Sjl 1961*25cf1a30Sjl dr_device_transition( 1962*25cf1a30Sjl &t_mp->sbm_cm, DR_STATE_CONFIGURED); 1963*25cf1a30Sjl } 1964*25cf1a30Sjl 1965*25cf1a30Sjl if (s_mp->sbm_del_mlist != s_mp->sbm_mlist) 1966*25cf1a30Sjl memlist_delete(s_mp->sbm_del_mlist); 1967*25cf1a30Sjl s_mp->sbm_del_mlist = NULL; 1968*25cf1a30Sjl 1969*25cf1a30Sjl if (s_mp->sbm_mlist != NULL) { 1970*25cf1a30Sjl memlist_delete(s_mp->sbm_mlist); 1971*25cf1a30Sjl s_mp->sbm_mlist = NULL; 1972*25cf1a30Sjl } 1973*25cf1a30Sjl 1974*25cf1a30Sjl s_mp->sbm_peer = NULL; 1975*25cf1a30Sjl s_mp->sbm_flags = 0; 1976*25cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 0; 1977*25cf1a30Sjl dr_init_mem_unit_data(s_mp); 1978*25cf1a30Sjl 1979*25cf1a30Sjl return (0); 1980*25cf1a30Sjl 1981*25cf1a30Sjl default: 1982*25cf1a30Sjl PR_MEM("%s: WARNING unexpected state (%d) for %s\n", 1983*25cf1a30Sjl f, (int)state, s_mp->sbm_cm.sbdev_path); 1984*25cf1a30Sjl 1985*25cf1a30Sjl return (-1); 1986*25cf1a30Sjl } 1987*25cf1a30Sjl /*NOTREACHED*/ 1988*25cf1a30Sjl } 1989*25cf1a30Sjl 1990*25cf1a30Sjl void 1991*25cf1a30Sjl dr_init_mem_unit(dr_mem_unit_t *mp) 1992*25cf1a30Sjl { 1993*25cf1a30Sjl dr_state_t new_state; 1994*25cf1a30Sjl 1995*25cf1a30Sjl 1996*25cf1a30Sjl if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) { 1997*25cf1a30Sjl new_state = DR_STATE_CONFIGURED; 1998*25cf1a30Sjl mp->sbm_cm.sbdev_cond = SBD_COND_OK; 1999*25cf1a30Sjl } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) { 2000*25cf1a30Sjl new_state = DR_STATE_CONNECTED; 2001*25cf1a30Sjl mp->sbm_cm.sbdev_cond = SBD_COND_OK; 2002*25cf1a30Sjl } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) { 2003*25cf1a30Sjl new_state = DR_STATE_OCCUPIED; 2004*25cf1a30Sjl } else { 2005*25cf1a30Sjl new_state = DR_STATE_EMPTY; 2006*25cf1a30Sjl } 2007*25cf1a30Sjl 2008*25cf1a30Sjl if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) 2009*25cf1a30Sjl dr_init_mem_unit_data(mp); 2010*25cf1a30Sjl 2011*25cf1a30Sjl /* delay transition until fully initialized */ 2012*25cf1a30Sjl dr_device_transition(&mp->sbm_cm, new_state); 2013*25cf1a30Sjl } 2014*25cf1a30Sjl 2015*25cf1a30Sjl static void 2016*25cf1a30Sjl dr_init_mem_unit_data(dr_mem_unit_t *mp) 2017*25cf1a30Sjl { 2018*25cf1a30Sjl drmachid_t id = mp->sbm_cm.sbdev_id; 2019*25cf1a30Sjl drmach_mem_info_t minfo; 2020*25cf1a30Sjl sbd_error_t *err; 2021*25cf1a30Sjl static fn_t f = "dr_init_mem_unit_data"; 2022*25cf1a30Sjl update_membounds_t umb; 2023*25cf1a30Sjl 2024*25cf1a30Sjl PR_MEM("%s...\n", f); 2025*25cf1a30Sjl 2026*25cf1a30Sjl /* a little sanity checking */ 2027*25cf1a30Sjl ASSERT(mp->sbm_peer == NULL); 2028*25cf1a30Sjl ASSERT(mp->sbm_flags == 0); 2029*25cf1a30Sjl 2030*25cf1a30Sjl if (err = drmach_mem_get_info(id, &minfo)) { 2031*25cf1a30Sjl DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err); 2032*25cf1a30Sjl return; 2033*25cf1a30Sjl } 2034*25cf1a30Sjl mp->sbm_basepfn = _b64top(minfo.mi_basepa); 2035*25cf1a30Sjl mp->sbm_npages = _b64top(minfo.mi_size); 2036*25cf1a30Sjl mp->sbm_alignment_mask = _b64top(minfo.mi_alignment_mask); 2037*25cf1a30Sjl mp->sbm_slice_size = minfo.mi_slice_size; 2038*25cf1a30Sjl 2039*25cf1a30Sjl /* 2040*25cf1a30Sjl * Add memory to lgroup 2041*25cf1a30Sjl */ 2042*25cf1a30Sjl umb.u_board = mp->sbm_cm.sbdev_bp->b_num; 2043*25cf1a30Sjl umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT; 2044*25cf1a30Sjl umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT; 2045*25cf1a30Sjl 2046*25cf1a30Sjl lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb); 2047*25cf1a30Sjl 2048*25cf1a30Sjl PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n", 2049*25cf1a30Sjl f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages); 2050*25cf1a30Sjl } 2051*25cf1a30Sjl 2052*25cf1a30Sjl static int 2053*25cf1a30Sjl dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml) 2054*25cf1a30Sjl { 2055*25cf1a30Sjl int err; 2056*25cf1a30Sjl pfn_t base; 2057*25cf1a30Sjl pgcnt_t npgs; 2058*25cf1a30Sjl struct memlist *mc; 2059*25cf1a30Sjl static fn_t f = "dr_reserve_mem_spans"; 2060*25cf1a30Sjl 2061*25cf1a30Sjl PR_MEM("%s...\n", f); 2062*25cf1a30Sjl 2063*25cf1a30Sjl /* 2064*25cf1a30Sjl * Walk the supplied memlist scheduling each span for removal 2065*25cf1a30Sjl * with kphysm_del_span. It is possible that a span may intersect 2066*25cf1a30Sjl * an area occupied by the cage. 2067*25cf1a30Sjl */ 2068*25cf1a30Sjl for (mc = ml; mc != NULL; mc = mc->next) { 2069*25cf1a30Sjl base = _b64top(mc->address); 2070*25cf1a30Sjl npgs = _b64top(mc->size); 2071*25cf1a30Sjl 2072*25cf1a30Sjl err = kphysm_del_span(*mhp, base, npgs); 2073*25cf1a30Sjl if (err != KPHYSM_OK) { 2074*25cf1a30Sjl cmn_err(CE_WARN, "%s memory reserve failed." 2075*25cf1a30Sjl " unexpected kphysm_del_span return value %d;" 2076*25cf1a30Sjl " basepfn=0x%lx npages=%ld", 2077*25cf1a30Sjl f, err, base, npgs); 2078*25cf1a30Sjl 2079*25cf1a30Sjl return (-1); 2080*25cf1a30Sjl } 2081*25cf1a30Sjl } 2082*25cf1a30Sjl 2083*25cf1a30Sjl return (0); 2084*25cf1a30Sjl } 2085*25cf1a30Sjl 2086*25cf1a30Sjl #define DR_SMT_NPREF_SETS 6 2087*25cf1a30Sjl #define DR_SMT_NUNITS_PER_SET MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD 2088*25cf1a30Sjl 2089*25cf1a30Sjl /* debug counters */ 2090*25cf1a30Sjl int dr_smt_realigned; 2091*25cf1a30Sjl int dr_smt_preference[DR_SMT_NPREF_SETS]; 2092*25cf1a30Sjl 2093*25cf1a30Sjl #ifdef DEBUG 2094*25cf1a30Sjl uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */ 2095*25cf1a30Sjl #endif 2096*25cf1a30Sjl 2097*25cf1a30Sjl /* 2098*25cf1a30Sjl * Find and reserve a copy/rename target board suitable for the 2099*25cf1a30Sjl * given source board. 2100*25cf1a30Sjl * All boards in the system are examined and categorized in relation to 2101*25cf1a30Sjl * their memory size versus the source board's memory size. Order of 2102*25cf1a30Sjl * preference is: 2103*25cf1a30Sjl * 1st copy all source, source/target same size 2104*25cf1a30Sjl * 2nd copy all source, larger target 2105*25cf1a30Sjl * 3rd copy nonrelocatable source span 2106*25cf1a30Sjl */ 2107*25cf1a30Sjl static int 2108*25cf1a30Sjl dr_select_mem_target(dr_handle_t *hp, 2109*25cf1a30Sjl dr_mem_unit_t *s_mp, struct memlist *s_ml) 2110*25cf1a30Sjl { 2111*25cf1a30Sjl dr_target_pref_t preference; /* lower value is higher preference */ 2112*25cf1a30Sjl int idx; 2113*25cf1a30Sjl dr_mem_unit_t **sets; 2114*25cf1a30Sjl 2115*25cf1a30Sjl int t_bd; 2116*25cf1a30Sjl int t_unit; 2117*25cf1a30Sjl int rv; 2118*25cf1a30Sjl dr_board_t *s_bp, *t_bp; 2119*25cf1a30Sjl dr_mem_unit_t *t_mp, *c_mp; 2120*25cf1a30Sjl struct memlist *d_ml, *t_ml, *ml, *b_ml, *x_ml = NULL; 2121*25cf1a30Sjl memquery_t s_mq = {0}; 2122*25cf1a30Sjl static fn_t f = "dr_select_mem_target"; 2123*25cf1a30Sjl 2124*25cf1a30Sjl PR_MEM("%s...\n", f); 2125*25cf1a30Sjl 2126*25cf1a30Sjl ASSERT(s_ml != NULL); 2127*25cf1a30Sjl 2128*25cf1a30Sjl sets = GETSTRUCT(dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET * 2129*25cf1a30Sjl DR_SMT_NPREF_SETS); 2130*25cf1a30Sjl 2131*25cf1a30Sjl s_bp = hp->h_bd; 2132*25cf1a30Sjl /* calculate the offset into the slice of the last source board pfn */ 2133*25cf1a30Sjl ASSERT(s_mp->sbm_npages != 0); 2134*25cf1a30Sjl 2135*25cf1a30Sjl /* 2136*25cf1a30Sjl * Find non-relocatable span on source board. 2137*25cf1a30Sjl */ 2138*25cf1a30Sjl rv = kphysm_del_span_query(s_mp->sbm_basepfn, s_mp->sbm_npages, &s_mq); 2139*25cf1a30Sjl if (rv != KPHYSM_OK) { 2140*25cf1a30Sjl PR_MEM("%s: %s: unexpected kphysm_del_span_query" 2141*25cf1a30Sjl " return value %d; basepfn 0x%lx, npages %ld\n", 2142*25cf1a30Sjl f, s_mp->sbm_cm.sbdev_path, rv, s_mp->sbm_basepfn, 2143*25cf1a30Sjl s_mp->sbm_npages); 2144*25cf1a30Sjl return (-1); 2145*25cf1a30Sjl } 2146*25cf1a30Sjl 2147*25cf1a30Sjl ASSERT(s_mq.phys_pages != 0); 2148*25cf1a30Sjl ASSERT(s_mq.nonrelocatable != 0); 2149*25cf1a30Sjl 2150*25cf1a30Sjl PR_MEM("%s: %s: nonrelocatable span (0x%lx..0x%lx)\n", f, 2151*25cf1a30Sjl s_mp->sbm_cm.sbdev_path, s_mq.first_nonrelocatable, 2152*25cf1a30Sjl s_mq.last_nonrelocatable); 2153*25cf1a30Sjl 2154*25cf1a30Sjl /* break down s_ml if it contains dynamic segments */ 2155*25cf1a30Sjl b_ml = memlist_dup(s_ml); 2156*25cf1a30Sjl 2157*25cf1a30Sjl for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->next) { 2158*25cf1a30Sjl b_ml = memlist_del_span(b_ml, ml->address, ml->size); 2159*25cf1a30Sjl b_ml = memlist_cat_span(b_ml, ml->address, ml->size); 2160*25cf1a30Sjl } 2161*25cf1a30Sjl 2162*25cf1a30Sjl 2163*25cf1a30Sjl /* 2164*25cf1a30Sjl * Make one pass through all memory units on all boards 2165*25cf1a30Sjl * and categorize them with respect to the source board. 2166*25cf1a30Sjl */ 2167*25cf1a30Sjl for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) { 2168*25cf1a30Sjl /* 2169*25cf1a30Sjl * The board structs are a contiguous array 2170*25cf1a30Sjl * so we take advantage of that to find the 2171*25cf1a30Sjl * correct board struct pointer for a given 2172*25cf1a30Sjl * board number. 2173*25cf1a30Sjl */ 2174*25cf1a30Sjl t_bp = dr_lookup_board(t_bd); 2175*25cf1a30Sjl 2176*25cf1a30Sjl /* source board can not be its own target */ 2177*25cf1a30Sjl if (s_bp->b_num == t_bp->b_num) 2178*25cf1a30Sjl continue; 2179*25cf1a30Sjl 2180*25cf1a30Sjl for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) { 2181*25cf1a30Sjl 2182*25cf1a30Sjl t_mp = dr_get_mem_unit(t_bp, t_unit); 2183*25cf1a30Sjl 2184*25cf1a30Sjl /* this memory node must be attached */ 2185*25cf1a30Sjl if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm)) 2186*25cf1a30Sjl continue; 2187*25cf1a30Sjl 2188*25cf1a30Sjl /* source unit can not be its own target */ 2189*25cf1a30Sjl if (s_mp == t_mp) { 2190*25cf1a30Sjl /* catch this is debug kernels */ 2191*25cf1a30Sjl ASSERT(0); 2192*25cf1a30Sjl continue; 2193*25cf1a30Sjl } 2194*25cf1a30Sjl 2195*25cf1a30Sjl /* 2196*25cf1a30Sjl * this memory node must not already be reserved 2197*25cf1a30Sjl * by some other memory delete operation. 2198*25cf1a30Sjl */ 2199*25cf1a30Sjl if (t_mp->sbm_flags & DR_MFLAG_RESERVED) 2200*25cf1a30Sjl continue; 2201*25cf1a30Sjl 2202*25cf1a30Sjl /* get target board memlist */ 2203*25cf1a30Sjl t_ml = dr_get_memlist(t_mp); 2204*25cf1a30Sjl if (t_ml == NULL) { 2205*25cf1a30Sjl cmn_err(CE_WARN, "%s: no memlist for" 2206*25cf1a30Sjl " mem-unit %d, board %d", f, 2207*25cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num, 2208*25cf1a30Sjl t_mp->sbm_cm.sbdev_unum); 2209*25cf1a30Sjl continue; 2210*25cf1a30Sjl } 2211*25cf1a30Sjl 2212*25cf1a30Sjl preference = dr_get_target_preference(hp, t_mp, s_mp, 2213*25cf1a30Sjl t_ml, s_ml, b_ml); 2214*25cf1a30Sjl 2215*25cf1a30Sjl if (preference == DR_TP_INVALID) 2216*25cf1a30Sjl continue; 2217*25cf1a30Sjl 2218*25cf1a30Sjl dr_smt_preference[preference]++; 2219*25cf1a30Sjl 2220*25cf1a30Sjl /* calculate index to start of preference set */ 2221*25cf1a30Sjl idx = DR_SMT_NUNITS_PER_SET * preference; 2222*25cf1a30Sjl /* calculate offset to respective element */ 2223*25cf1a30Sjl idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit; 2224*25cf1a30Sjl 2225*25cf1a30Sjl ASSERT(idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS); 2226*25cf1a30Sjl sets[idx] = t_mp; 2227*25cf1a30Sjl } 2228*25cf1a30Sjl } 2229*25cf1a30Sjl 2230*25cf1a30Sjl if (b_ml != NULL) 2231*25cf1a30Sjl memlist_delete(b_ml); 2232*25cf1a30Sjl 2233*25cf1a30Sjl /* 2234*25cf1a30Sjl * NOTE: this would be a good place to sort each candidate 2235*25cf1a30Sjl * set in to some desired order, e.g. memory size in ascending 2236*25cf1a30Sjl * order. Without an additional sorting step here, the order 2237*25cf1a30Sjl * within a set is ascending board number order. 2238*25cf1a30Sjl */ 2239*25cf1a30Sjl 2240*25cf1a30Sjl c_mp = NULL; 2241*25cf1a30Sjl x_ml = NULL; 2242*25cf1a30Sjl t_ml = NULL; 2243*25cf1a30Sjl for (idx = 0; idx < DR_SMT_NUNITS_PER_SET * DR_SMT_NPREF_SETS; idx++) { 2244*25cf1a30Sjl memquery_t mq; 2245*25cf1a30Sjl 2246*25cf1a30Sjl preference = (dr_target_pref_t)(idx / DR_SMT_NUNITS_PER_SET); 2247*25cf1a30Sjl 2248*25cf1a30Sjl ASSERT(preference != DR_TP_INVALID); 2249*25cf1a30Sjl 2250*25cf1a30Sjl /* cleanup t_ml after previous pass */ 2251*25cf1a30Sjl if (t_ml != NULL) { 2252*25cf1a30Sjl memlist_delete(t_ml); 2253*25cf1a30Sjl t_ml = NULL; 2254*25cf1a30Sjl } 2255*25cf1a30Sjl 2256*25cf1a30Sjl /* get candidate target board mem unit */ 2257*25cf1a30Sjl t_mp = sets[idx]; 2258*25cf1a30Sjl if (t_mp == NULL) 2259*25cf1a30Sjl continue; 2260*25cf1a30Sjl 2261*25cf1a30Sjl /* get target board memlist */ 2262*25cf1a30Sjl t_ml = dr_get_memlist(t_mp); 2263*25cf1a30Sjl if (t_ml == NULL) { 2264*25cf1a30Sjl cmn_err(CE_WARN, "%s: no memlist for" 2265*25cf1a30Sjl " mem-unit %d, board %d", 2266*25cf1a30Sjl f, 2267*25cf1a30Sjl t_mp->sbm_cm.sbdev_bp->b_num, 2268*25cf1a30Sjl t_mp->sbm_cm.sbdev_unum); 2269*25cf1a30Sjl 2270*25cf1a30Sjl continue; 2271*25cf1a30Sjl } 2272*25cf1a30Sjl 2273*25cf1a30Sjl PR_MEM("%s: checking for no-reloc in %s, " 2274*25cf1a30Sjl " basepfn=0x%lx, npages=%ld\n", 2275*25cf1a30Sjl f, 2276*25cf1a30Sjl t_mp->sbm_cm.sbdev_path, 2277*25cf1a30Sjl t_mp->sbm_basepfn, 2278*25cf1a30Sjl t_mp->sbm_npages); 2279*25cf1a30Sjl 2280*25cf1a30Sjl rv = dr_del_mlist_query(t_ml, &mq); 2281*25cf1a30Sjl if (rv != KPHYSM_OK) { 2282*25cf1a30Sjl PR_MEM("%s: kphysm_del_span_query:" 2283*25cf1a30Sjl " unexpected return value %d\n", f, rv); 2284*25cf1a30Sjl 2285*25cf1a30Sjl continue; 2286*25cf1a30Sjl } 2287*25cf1a30Sjl 2288*25cf1a30Sjl if (mq.nonrelocatable != 0) { 2289*25cf1a30Sjl PR_MEM("%s: candidate %s has" 2290*25cf1a30Sjl " nonrelocatable span [0x%lx..0x%lx]\n", 2291*25cf1a30Sjl f, 2292*25cf1a30Sjl t_mp->sbm_cm.sbdev_path, 2293*25cf1a30Sjl mq.first_nonrelocatable, 2294*25cf1a30Sjl mq.last_nonrelocatable); 2295*25cf1a30Sjl 2296*25cf1a30Sjl continue; 2297*25cf1a30Sjl } 2298*25cf1a30Sjl 2299*25cf1a30Sjl #ifdef DEBUG 2300*25cf1a30Sjl /* 2301*25cf1a30Sjl * This is a debug tool for excluding certain boards 2302*25cf1a30Sjl * from being selected as a target board candidate. 2303*25cf1a30Sjl * dr_ignore_board is only tested by this driver. 2304*25cf1a30Sjl * It must be set with adb, obp, /etc/system or your 2305*25cf1a30Sjl * favorite debugger. 2306*25cf1a30Sjl */ 2307*25cf1a30Sjl if (dr_ignore_board & 2308*25cf1a30Sjl (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) { 2309*25cf1a30Sjl PR_MEM("%s: dr_ignore_board flag set," 2310*25cf1a30Sjl " ignoring %s as candidate\n", 2311*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 2312*25cf1a30Sjl continue; 2313*25cf1a30Sjl } 2314*25cf1a30Sjl #endif 2315*25cf1a30Sjl 2316*25cf1a30Sjl /* 2317*25cf1a30Sjl * Reserve excess source board memory, if any. 2318*25cf1a30Sjl * 2319*25cf1a30Sjl * Only the nonrelocatable source span will be copied 2320*25cf1a30Sjl * so schedule the rest of the source mem to be deleted. 2321*25cf1a30Sjl */ 2322*25cf1a30Sjl switch (preference) { 2323*25cf1a30Sjl case DR_TP_NONRELOC: 2324*25cf1a30Sjl /* 2325*25cf1a30Sjl * Get source copy memlist and use it to construct 2326*25cf1a30Sjl * delete memlist. 2327*25cf1a30Sjl */ 2328*25cf1a30Sjl d_ml = memlist_dup(s_ml); 2329*25cf1a30Sjl x_ml = dr_get_copy_mlist(s_ml, t_ml, s_mp, t_mp); 2330*25cf1a30Sjl 2331*25cf1a30Sjl /* XXX */ 2332*25cf1a30Sjl ASSERT(d_ml != NULL); 2333*25cf1a30Sjl ASSERT(x_ml != NULL); 2334*25cf1a30Sjl 2335*25cf1a30Sjl for (ml = x_ml; ml != NULL; ml = ml->next) { 2336*25cf1a30Sjl d_ml = memlist_del_span(d_ml, ml->address, 2337*25cf1a30Sjl ml->size); 2338*25cf1a30Sjl } 2339*25cf1a30Sjl 2340*25cf1a30Sjl PR_MEM("%s: %s: reserving src brd memlist:\n", f, 2341*25cf1a30Sjl s_mp->sbm_cm.sbdev_path); 2342*25cf1a30Sjl PR_MEMLIST_DUMP(d_ml); 2343*25cf1a30Sjl 2344*25cf1a30Sjl /* reserve excess spans */ 2345*25cf1a30Sjl if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, 2346*25cf1a30Sjl d_ml) != 0) { 2347*25cf1a30Sjl /* likely more non-reloc pages appeared */ 2348*25cf1a30Sjl /* TODO: restart from top? */ 2349*25cf1a30Sjl continue; 2350*25cf1a30Sjl } 2351*25cf1a30Sjl break; 2352*25cf1a30Sjl default: 2353*25cf1a30Sjl d_ml = NULL; 2354*25cf1a30Sjl break; 2355*25cf1a30Sjl } 2356*25cf1a30Sjl 2357*25cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_RESERVED; 2358*25cf1a30Sjl 2359*25cf1a30Sjl /* 2360*25cf1a30Sjl * reserve all memory on target board. 2361*25cf1a30Sjl * NOTE: source board's memhandle is used. 2362*25cf1a30Sjl * 2363*25cf1a30Sjl * If this succeeds (eq 0), then target selection is 2364*25cf1a30Sjl * complete and all unwanted memory spans, both source and 2365*25cf1a30Sjl * target, have been reserved. Loop is terminated. 2366*25cf1a30Sjl */ 2367*25cf1a30Sjl if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) { 2368*25cf1a30Sjl PR_MEM("%s: %s: target board memory reserved\n", 2369*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 2370*25cf1a30Sjl 2371*25cf1a30Sjl /* a candidate target board is now reserved */ 2372*25cf1a30Sjl t_mp->sbm_flags |= DR_MFLAG_RESERVED; 2373*25cf1a30Sjl c_mp = t_mp; 2374*25cf1a30Sjl 2375*25cf1a30Sjl /* *** EXITING LOOP *** */ 2376*25cf1a30Sjl break; 2377*25cf1a30Sjl } 2378*25cf1a30Sjl 2379*25cf1a30Sjl /* did not successfully reserve the target board. */ 2380*25cf1a30Sjl PR_MEM("%s: could not reserve target %s\n", 2381*25cf1a30Sjl f, t_mp->sbm_cm.sbdev_path); 2382*25cf1a30Sjl 2383*25cf1a30Sjl /* 2384*25cf1a30Sjl * NOTE: an undo of the dr_reserve_mem_span work 2385*25cf1a30Sjl * will happen automatically when the memhandle 2386*25cf1a30Sjl * (s_mp->sbm_memhandle) is kphysm_del_release'd. 2387*25cf1a30Sjl */ 2388*25cf1a30Sjl 2389*25cf1a30Sjl s_mp->sbm_flags &= ~DR_MFLAG_RESERVED; 2390*25cf1a30Sjl } 2391*25cf1a30Sjl 2392*25cf1a30Sjl /* clean up after memlist editing logic */ 2393*25cf1a30Sjl if (x_ml != NULL) 2394*25cf1a30Sjl memlist_delete(x_ml); 2395*25cf1a30Sjl 2396*25cf1a30Sjl FREESTRUCT(sets, dr_mem_unit_t *, DR_SMT_NUNITS_PER_SET * 2397*25cf1a30Sjl DR_SMT_NPREF_SETS); 2398*25cf1a30Sjl 2399*25cf1a30Sjl /* 2400*25cf1a30Sjl * c_mp will be NULL when the entire sets[] array 2401*25cf1a30Sjl * has been searched without reserving a target board. 2402*25cf1a30Sjl */ 2403*25cf1a30Sjl if (c_mp == NULL) { 2404*25cf1a30Sjl PR_MEM("%s: %s: target selection failed.\n", 2405*25cf1a30Sjl f, s_mp->sbm_cm.sbdev_path); 2406*25cf1a30Sjl 2407*25cf1a30Sjl if (t_ml != NULL) 2408*25cf1a30Sjl memlist_delete(t_ml); 2409*25cf1a30Sjl 2410*25cf1a30Sjl return (-1); 2411*25cf1a30Sjl } 2412*25cf1a30Sjl 2413*25cf1a30Sjl PR_MEM("%s: found target %s for source %s\n", 2414*25cf1a30Sjl f, 2415*25cf1a30Sjl c_mp->sbm_cm.sbdev_path, 2416*25cf1a30Sjl s_mp->sbm_cm.sbdev_path); 2417*25cf1a30Sjl 2418*25cf1a30Sjl s_mp->sbm_peer = c_mp; 2419*25cf1a30Sjl s_mp->sbm_flags |= DR_MFLAG_SOURCE; 2420*25cf1a30Sjl s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */ 2421*25cf1a30Sjl s_mp->sbm_mlist = s_ml; 2422*25cf1a30Sjl s_mp->sbm_cm.sbdev_busy = 1; 2423*25cf1a30Sjl 2424*25cf1a30Sjl c_mp->sbm_peer = s_mp; 2425*25cf1a30Sjl c_mp->sbm_flags |= DR_MFLAG_TARGET; 2426*25cf1a30Sjl c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */ 2427*25cf1a30Sjl c_mp->sbm_mlist = t_ml; 2428*25cf1a30Sjl c_mp->sbm_cm.sbdev_busy = 1; 2429*25cf1a30Sjl 2430*25cf1a30Sjl return (0); 2431*25cf1a30Sjl } 2432*25cf1a30Sjl 2433*25cf1a30Sjl /* 2434*25cf1a30Sjl * Returns target preference rank: 2435*25cf1a30Sjl * -1 not a valid copy-rename target board 2436*25cf1a30Sjl * 0 copy all source, source/target same size 2437*25cf1a30Sjl * 1 copy all source, larger target 2438*25cf1a30Sjl * 2 copy nonrelocatable source span 2439*25cf1a30Sjl */ 2440*25cf1a30Sjl static dr_target_pref_t 2441*25cf1a30Sjl dr_get_target_preference(dr_handle_t *hp, 2442*25cf1a30Sjl dr_mem_unit_t *t_mp, dr_mem_unit_t *s_mp, 2443*25cf1a30Sjl struct memlist *t_ml, struct memlist *s_ml, 2444*25cf1a30Sjl struct memlist *b_ml) 2445*25cf1a30Sjl { 2446*25cf1a30Sjl dr_target_pref_t preference; 2447*25cf1a30Sjl struct memlist *s_nonreloc_ml = NULL; 2448*25cf1a30Sjl drmachid_t t_id; 2449*25cf1a30Sjl static fn_t f = "dr_get_target_preference"; 2450*25cf1a30Sjl 2451*25cf1a30Sjl t_id = t_mp->sbm_cm.sbdev_bp->b_id; 2452*25cf1a30Sjl 2453*25cf1a30Sjl /* 2454*25cf1a30Sjl * Can the entire source board be copied? 2455*25cf1a30Sjl */ 2456*25cf1a30Sjl if (dr_memlist_canfit(s_ml, t_ml, s_mp, t_mp)) { 2457*25cf1a30Sjl if (s_mp->sbm_npages == t_mp->sbm_npages) 2458*25cf1a30Sjl preference = DR_TP_SAME; /* same size */ 2459*25cf1a30Sjl else 2460*25cf1a30Sjl preference = DR_TP_LARGE; /* larger target */ 2461*25cf1a30Sjl } else { 2462*25cf1a30Sjl /* 2463*25cf1a30Sjl * Entire source won't fit so try non-relocatable memory only 2464*25cf1a30Sjl * (target aligned). 2465*25cf1a30Sjl */ 2466*25cf1a30Sjl s_nonreloc_ml = dr_get_nonreloc_mlist(b_ml, s_mp); 2467*25cf1a30Sjl if (s_nonreloc_ml == NULL) { 2468*25cf1a30Sjl PR_MEM("%s: dr_get_nonreloc_mlist failed\n", f); 2469*25cf1a30Sjl preference = DR_TP_INVALID; 2470*25cf1a30Sjl } 2471*25cf1a30Sjl if (dr_memlist_canfit(s_nonreloc_ml, t_ml, s_mp, t_mp)) 2472*25cf1a30Sjl preference = DR_TP_NONRELOC; 2473*25cf1a30Sjl else 2474*25cf1a30Sjl preference = DR_TP_INVALID; 2475*25cf1a30Sjl } 2476*25cf1a30Sjl 2477*25cf1a30Sjl if (s_nonreloc_ml != NULL) 2478*25cf1a30Sjl memlist_delete(s_nonreloc_ml); 2479*25cf1a30Sjl 2480*25cf1a30Sjl /* 2481*25cf1a30Sjl * Force floating board preference lower than all other boards 2482*25cf1a30Sjl * if the force flag is present; otherwise disallow the board. 2483*25cf1a30Sjl */ 2484*25cf1a30Sjl if ((preference != DR_TP_INVALID) && drmach_board_is_floating(t_id)) { 2485*25cf1a30Sjl if (dr_cmd_flags(hp) & SBD_FLAG_FORCE) 2486*25cf1a30Sjl preference += DR_TP_FLOATING; 2487*25cf1a30Sjl else 2488*25cf1a30Sjl preference = DR_TP_INVALID; 2489*25cf1a30Sjl } 2490*25cf1a30Sjl 2491*25cf1a30Sjl PR_MEM("%s: %s preference=%d\n", f, t_mp->sbm_cm.sbdev_path, 2492*25cf1a30Sjl preference); 2493*25cf1a30Sjl 2494*25cf1a30Sjl return (preference); 2495*25cf1a30Sjl } 2496*25cf1a30Sjl 2497*25cf1a30Sjl /* 2498*25cf1a30Sjl * Create a memlist representing the source memory that will be copied to 2499*25cf1a30Sjl * the target board. The memory to be copied is the maximum amount that 2500*25cf1a30Sjl * will fit on the target board. 2501*25cf1a30Sjl */ 2502*25cf1a30Sjl static struct memlist * 2503*25cf1a30Sjl dr_get_copy_mlist(struct memlist *s_mlist, struct memlist *t_mlist, 2504*25cf1a30Sjl dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 2505*25cf1a30Sjl { 2506*25cf1a30Sjl struct memlist *t_ml, *s_copy_ml, *s_del_ml, *ml, *x_ml; 2507*25cf1a30Sjl uint64_t s_slice_mask, s_slice_base; 2508*25cf1a30Sjl uint64_t t_slice_mask, t_slice_base; 2509*25cf1a30Sjl static fn_t f = "dr_get_copy_mlist"; 2510*25cf1a30Sjl 2511*25cf1a30Sjl ASSERT(s_mlist != NULL); 2512*25cf1a30Sjl ASSERT(t_mlist != NULL); 2513*25cf1a30Sjl ASSERT(t_mp->sbm_slice_size == s_mp->sbm_slice_size); 2514*25cf1a30Sjl 2515*25cf1a30Sjl s_slice_mask = s_mp->sbm_slice_size - 1; 2516*25cf1a30Sjl s_slice_base = s_mlist->address & ~s_slice_mask; 2517*25cf1a30Sjl 2518*25cf1a30Sjl t_slice_mask = t_mp->sbm_slice_size - 1; 2519*25cf1a30Sjl t_slice_base = t_mlist->address & ~t_slice_mask; 2520*25cf1a30Sjl 2521*25cf1a30Sjl t_ml = memlist_dup(t_mlist); 2522*25cf1a30Sjl s_del_ml = memlist_dup(s_mlist); 2523*25cf1a30Sjl s_copy_ml = memlist_dup(s_mlist); 2524*25cf1a30Sjl 2525*25cf1a30Sjl /* XXX */ 2526*25cf1a30Sjl ASSERT(t_ml != NULL); 2527*25cf1a30Sjl ASSERT(s_del_ml != NULL); 2528*25cf1a30Sjl ASSERT(s_copy_ml != NULL); 2529*25cf1a30Sjl 2530*25cf1a30Sjl /* 2531*25cf1a30Sjl * To construct the source copy memlist: 2532*25cf1a30Sjl * 2533*25cf1a30Sjl * The target memlist is converted to the post-rename 2534*25cf1a30Sjl * source addresses. This is the physical address range 2535*25cf1a30Sjl * the target will have after the copy-rename. Overlaying 2536*25cf1a30Sjl * and deleting this from the current source memlist will 2537*25cf1a30Sjl * give the source delete memlist. The copy memlist is 2538*25cf1a30Sjl * the reciprocal of the source delete memlist. 2539*25cf1a30Sjl */ 2540*25cf1a30Sjl for (ml = t_ml; ml != NULL; ml = ml->next) { 2541*25cf1a30Sjl /* 2542*25cf1a30Sjl * Normalize relative to target slice base PA 2543*25cf1a30Sjl * in order to preseve slice offsets. 2544*25cf1a30Sjl */ 2545*25cf1a30Sjl ml->address -= t_slice_base; 2546*25cf1a30Sjl /* 2547*25cf1a30Sjl * Convert to source slice PA address. 2548*25cf1a30Sjl */ 2549*25cf1a30Sjl ml->address += s_slice_base; 2550*25cf1a30Sjl } 2551*25cf1a30Sjl 2552*25cf1a30Sjl for (ml = t_ml; ml != NULL; ml = ml->next) { 2553*25cf1a30Sjl s_del_ml = memlist_del_span(s_del_ml, ml->address, ml->size); 2554*25cf1a30Sjl } 2555*25cf1a30Sjl 2556*25cf1a30Sjl /* 2557*25cf1a30Sjl * Expand the delete mlist to fully include any dynamic segments 2558*25cf1a30Sjl * it intersects with. 2559*25cf1a30Sjl */ 2560*25cf1a30Sjl for (x_ml = NULL, ml = s_del_ml; ml != NULL; ml = ml->next) { 2561*25cf1a30Sjl uint64_t del_base = ml->address; 2562*25cf1a30Sjl uint64_t del_end = ml->address + ml->size; 2563*25cf1a30Sjl struct memlist *dyn; 2564*25cf1a30Sjl 2565*25cf1a30Sjl for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) { 2566*25cf1a30Sjl uint64_t dyn_base = dyn->address; 2567*25cf1a30Sjl uint64_t dyn_end = dyn->address + dyn->size; 2568*25cf1a30Sjl 2569*25cf1a30Sjl if (del_base > dyn_base && del_base < dyn_end) 2570*25cf1a30Sjl del_base = dyn_base; 2571*25cf1a30Sjl 2572*25cf1a30Sjl if (del_end > dyn_base && del_end < dyn_end) 2573*25cf1a30Sjl del_end = dyn_end; 2574*25cf1a30Sjl } 2575*25cf1a30Sjl 2576*25cf1a30Sjl x_ml = memlist_cat_span(x_ml, del_base, del_end - del_base); 2577*25cf1a30Sjl } 2578*25cf1a30Sjl 2579*25cf1a30Sjl memlist_delete(s_del_ml); 2580*25cf1a30Sjl s_del_ml = x_ml; 2581*25cf1a30Sjl 2582*25cf1a30Sjl for (ml = s_del_ml; ml != NULL; ml = ml->next) { 2583*25cf1a30Sjl s_copy_ml = memlist_del_span(s_copy_ml, ml->address, ml->size); 2584*25cf1a30Sjl } 2585*25cf1a30Sjl 2586*25cf1a30Sjl PR_MEM("%s: source delete mlist\n", f); 2587*25cf1a30Sjl PR_MEMLIST_DUMP(s_del_ml); 2588*25cf1a30Sjl 2589*25cf1a30Sjl PR_MEM("%s: source copy mlist\n", f); 2590*25cf1a30Sjl PR_MEMLIST_DUMP(s_copy_ml); 2591*25cf1a30Sjl 2592*25cf1a30Sjl memlist_delete(t_ml); 2593*25cf1a30Sjl memlist_delete(s_del_ml); 2594*25cf1a30Sjl 2595*25cf1a30Sjl return (s_copy_ml); 2596*25cf1a30Sjl } 2597*25cf1a30Sjl 2598*25cf1a30Sjl /* 2599*25cf1a30Sjl * Scan the non-relocatable spans on the source memory 2600*25cf1a30Sjl * and construct a minimum mlist that includes all non-reloc 2601*25cf1a30Sjl * memory subject to target alignment, and dynamic segment 2602*25cf1a30Sjl * constraints where only whole dynamic segments may be deleted. 2603*25cf1a30Sjl */ 2604*25cf1a30Sjl static struct memlist * 2605*25cf1a30Sjl dr_get_nonreloc_mlist(struct memlist *s_ml, dr_mem_unit_t *s_mp) 2606*25cf1a30Sjl { 2607*25cf1a30Sjl struct memlist *x_ml = NULL; 2608*25cf1a30Sjl struct memlist *ml; 2609*25cf1a30Sjl static fn_t f = "dr_get_nonreloc_mlist"; 2610*25cf1a30Sjl 2611*25cf1a30Sjl PR_MEM("%s: checking for split of dyn seg list:\n", f); 2612*25cf1a30Sjl PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs); 2613*25cf1a30Sjl 2614*25cf1a30Sjl for (ml = s_ml; ml; ml = ml->next) { 2615*25cf1a30Sjl int rv; 2616*25cf1a30Sjl uint64_t nr_base, nr_end; 2617*25cf1a30Sjl memquery_t mq; 2618*25cf1a30Sjl struct memlist *dyn; 2619*25cf1a30Sjl 2620*25cf1a30Sjl rv = kphysm_del_span_query( 2621*25cf1a30Sjl _b64top(ml->address), _b64top(ml->size), &mq); 2622*25cf1a30Sjl if (rv) { 2623*25cf1a30Sjl memlist_delete(x_ml); 2624*25cf1a30Sjl return (NULL); 2625*25cf1a30Sjl } 2626*25cf1a30Sjl 2627*25cf1a30Sjl if (mq.nonrelocatable == 0) 2628*25cf1a30Sjl continue; 2629*25cf1a30Sjl 2630*25cf1a30Sjl PR_MEM("%s: non-reloc span: 0x%lx, 0x%lx (%lx, %lx)\n", f, 2631*25cf1a30Sjl _ptob64(mq.first_nonrelocatable), 2632*25cf1a30Sjl _ptob64(mq.last_nonrelocatable), 2633*25cf1a30Sjl mq.first_nonrelocatable, 2634*25cf1a30Sjl mq.last_nonrelocatable); 2635*25cf1a30Sjl 2636*25cf1a30Sjl /* 2637*25cf1a30Sjl * Align the span at both ends to allow for possible 2638*25cf1a30Sjl * cage expansion. 2639*25cf1a30Sjl */ 2640*25cf1a30Sjl nr_base = _ptob64(mq.first_nonrelocatable); 2641*25cf1a30Sjl nr_end = _ptob64(mq.last_nonrelocatable + 1); 2642*25cf1a30Sjl 2643*25cf1a30Sjl PR_MEM("%s: adjusted non-reloc span: 0x%lx, 0x%lx\n", 2644*25cf1a30Sjl f, nr_base, nr_end); 2645*25cf1a30Sjl 2646*25cf1a30Sjl /* 2647*25cf1a30Sjl * Expand the non-reloc span to fully include any 2648*25cf1a30Sjl * dynamic segments it intersects with. 2649*25cf1a30Sjl */ 2650*25cf1a30Sjl for (dyn = s_mp->sbm_dyn_segs; dyn != NULL; dyn = dyn->next) { 2651*25cf1a30Sjl uint64_t dyn_base = dyn->address; 2652*25cf1a30Sjl uint64_t dyn_end = dyn->address + dyn->size; 2653*25cf1a30Sjl 2654*25cf1a30Sjl if (nr_base > dyn_base && nr_base < dyn_end) 2655*25cf1a30Sjl nr_base = dyn_base; 2656*25cf1a30Sjl 2657*25cf1a30Sjl if (nr_end > dyn_base && nr_end < dyn_end) 2658*25cf1a30Sjl nr_end = dyn_end; 2659*25cf1a30Sjl } 2660*25cf1a30Sjl 2661*25cf1a30Sjl x_ml = memlist_cat_span(x_ml, nr_base, nr_end - nr_base); 2662*25cf1a30Sjl } 2663*25cf1a30Sjl 2664*25cf1a30Sjl if (x_ml == NULL) { 2665*25cf1a30Sjl PR_MEM("%s: source didn't have any non-reloc pages!\n", f); 2666*25cf1a30Sjl return (NULL); 2667*25cf1a30Sjl } 2668*25cf1a30Sjl 2669*25cf1a30Sjl PR_MEM("%s: %s: edited source memlist:\n", f, s_mp->sbm_cm.sbdev_path); 2670*25cf1a30Sjl PR_MEMLIST_DUMP(x_ml); 2671*25cf1a30Sjl 2672*25cf1a30Sjl return (x_ml); 2673*25cf1a30Sjl } 2674*25cf1a30Sjl 2675*25cf1a30Sjl /* 2676*25cf1a30Sjl * Check if source memlist can fit in target memlist while maintaining 2677*25cf1a30Sjl * relative offsets within board. 2678*25cf1a30Sjl */ 2679*25cf1a30Sjl static int 2680*25cf1a30Sjl dr_memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist, 2681*25cf1a30Sjl dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp) 2682*25cf1a30Sjl { 2683*25cf1a30Sjl int canfit = 0; 2684*25cf1a30Sjl struct memlist *s_ml, *t_ml, *ml; 2685*25cf1a30Sjl uint64_t s_slice_mask, t_slice_mask; 2686*25cf1a30Sjl static fn_t f = "dr_mlist_canfit"; 2687*25cf1a30Sjl 2688*25cf1a30Sjl s_ml = memlist_dup(s_mlist); 2689*25cf1a30Sjl t_ml = memlist_dup(t_mlist); 2690*25cf1a30Sjl 2691*25cf1a30Sjl if (s_ml == NULL || t_ml == NULL) { 2692*25cf1a30Sjl cmn_err(CE_WARN, "%s: memlist_dup failed\n", f); 2693*25cf1a30Sjl goto done; 2694*25cf1a30Sjl } 2695*25cf1a30Sjl 2696*25cf1a30Sjl s_slice_mask = s_mp->sbm_slice_size - 1; 2697*25cf1a30Sjl t_slice_mask = t_mp->sbm_slice_size - 1; 2698*25cf1a30Sjl 2699*25cf1a30Sjl /* 2700*25cf1a30Sjl * Normalize to slice relative offsets. 2701*25cf1a30Sjl */ 2702*25cf1a30Sjl for (ml = s_ml; ml; ml = ml->next) 2703*25cf1a30Sjl ml->address &= s_slice_mask; 2704*25cf1a30Sjl 2705*25cf1a30Sjl for (ml = t_ml; ml; ml = ml->next) 2706*25cf1a30Sjl ml->address &= t_slice_mask; 2707*25cf1a30Sjl 2708*25cf1a30Sjl canfit = memlist_canfit(s_ml, t_ml); 2709*25cf1a30Sjl done: 2710*25cf1a30Sjl memlist_delete(s_ml); 2711*25cf1a30Sjl memlist_delete(t_ml); 2712*25cf1a30Sjl 2713*25cf1a30Sjl return (canfit); 2714*25cf1a30Sjl } 2715*25cf1a30Sjl 2716*25cf1a30Sjl /* 2717*25cf1a30Sjl * Memlist support. 2718*25cf1a30Sjl */ 2719*25cf1a30Sjl 2720*25cf1a30Sjl /* 2721*25cf1a30Sjl * Determine whether the source memlist (s_mlist) will 2722*25cf1a30Sjl * fit into the target memlist (t_mlist) in terms of 2723*25cf1a30Sjl * size and holes. Assumes the caller has normalized the 2724*25cf1a30Sjl * memlist physical addresses for comparison. 2725*25cf1a30Sjl */ 2726*25cf1a30Sjl static int 2727*25cf1a30Sjl memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist) 2728*25cf1a30Sjl { 2729*25cf1a30Sjl int rv = 0; 2730*25cf1a30Sjl struct memlist *s_ml, *t_ml; 2731*25cf1a30Sjl 2732*25cf1a30Sjl if ((s_mlist == NULL) || (t_mlist == NULL)) 2733*25cf1a30Sjl return (0); 2734*25cf1a30Sjl 2735*25cf1a30Sjl s_ml = s_mlist; 2736*25cf1a30Sjl for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->next) { 2737*25cf1a30Sjl uint64_t s_start, s_end; 2738*25cf1a30Sjl uint64_t t_start, t_end; 2739*25cf1a30Sjl 2740*25cf1a30Sjl t_start = t_ml->address; 2741*25cf1a30Sjl t_end = t_start + t_ml->size; 2742*25cf1a30Sjl 2743*25cf1a30Sjl for (; s_ml; s_ml = s_ml->next) { 2744*25cf1a30Sjl s_start = s_ml->address; 2745*25cf1a30Sjl s_end = s_start + s_ml->size; 2746*25cf1a30Sjl 2747*25cf1a30Sjl if ((s_start < t_start) || (s_end > t_end)) 2748*25cf1a30Sjl break; 2749*25cf1a30Sjl } 2750*25cf1a30Sjl } 2751*25cf1a30Sjl 2752*25cf1a30Sjl /* 2753*25cf1a30Sjl * If we ran out of source memlist chunks that mean 2754*25cf1a30Sjl * we found a home for all of them. 2755*25cf1a30Sjl */ 2756*25cf1a30Sjl if (s_ml == NULL) 2757*25cf1a30Sjl rv = 1; 2758*25cf1a30Sjl 2759*25cf1a30Sjl return (rv); 2760*25cf1a30Sjl } 2761