1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate /* 31*7c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 32*7c478bd9Sstevel@tonic-gate * The Regents of the University of California 33*7c478bd9Sstevel@tonic-gate * All Rights Reserved 34*7c478bd9Sstevel@tonic-gate * 35*7c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 36*7c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 37*7c478bd9Sstevel@tonic-gate * contributors. 38*7c478bd9Sstevel@tonic-gate */ 39*7c478bd9Sstevel@tonic-gate 40*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 41*7c478bd9Sstevel@tonic-gate 42*7c478bd9Sstevel@tonic-gate /* 43*7c478bd9Sstevel@tonic-gate * VM - address spaces. 44*7c478bd9Sstevel@tonic-gate */ 45*7c478bd9Sstevel@tonic-gate 46*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 57*7c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h> 58*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 59*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 60*7c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 61*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 62*7c478bd9Sstevel@tonic-gate 63*7c478bd9Sstevel@tonic-gate #include <vm/hat.h> 64*7c478bd9Sstevel@tonic-gate #include <vm/xhat.h> 65*7c478bd9Sstevel@tonic-gate #include <vm/as.h> 66*7c478bd9Sstevel@tonic-gate #include <vm/seg.h> 67*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 68*7c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h> 69*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 70*7c478bd9Sstevel@tonic-gate #include <vm/seg_map.h> 71*7c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h> 72*7c478bd9Sstevel@tonic-gate #include <vm/page.h> 73*7c478bd9Sstevel@tonic-gate 74*7c478bd9Sstevel@tonic-gate clock_t deadlk_wait = 1; /* number of ticks to wait before retrying */ 75*7c478bd9Sstevel@tonic-gate 76*7c478bd9Sstevel@tonic-gate static struct kmem_cache *as_cache; 77*7c478bd9Sstevel@tonic-gate 78*7c478bd9Sstevel@tonic-gate static void as_setwatchprot(struct as *, caddr_t, size_t, uint_t); 79*7c478bd9Sstevel@tonic-gate static void as_clearwatchprot(struct as *, caddr_t, size_t); 80*7c478bd9Sstevel@tonic-gate 81*7c478bd9Sstevel@tonic-gate 82*7c478bd9Sstevel@tonic-gate /* 83*7c478bd9Sstevel@tonic-gate * Verifying the segment lists is very time-consuming; it may not be 84*7c478bd9Sstevel@tonic-gate * desirable always to define VERIFY_SEGLIST when DEBUG is set. 85*7c478bd9Sstevel@tonic-gate */ 86*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 87*7c478bd9Sstevel@tonic-gate #define VERIFY_SEGLIST 88*7c478bd9Sstevel@tonic-gate int do_as_verify = 0; 89*7c478bd9Sstevel@tonic-gate #endif 90*7c478bd9Sstevel@tonic-gate 91*7c478bd9Sstevel@tonic-gate /* 92*7c478bd9Sstevel@tonic-gate * Allocate a new callback data structure entry and fill in the events of 93*7c478bd9Sstevel@tonic-gate * interest, the address range of interest, and the callback argument. 94*7c478bd9Sstevel@tonic-gate * Link the entry on the as->a_callbacks list. A callback entry for the 95*7c478bd9Sstevel@tonic-gate * entire address space may be specified with vaddr = 0 and size = -1. 96*7c478bd9Sstevel@tonic-gate * 97*7c478bd9Sstevel@tonic-gate * CALLERS RESPONSIBILITY: If not calling from within the process context for 98*7c478bd9Sstevel@tonic-gate * the specified as, the caller must guarantee persistence of the specified as 99*7c478bd9Sstevel@tonic-gate * for the duration of this function (eg. pages being locked within the as 100*7c478bd9Sstevel@tonic-gate * will guarantee persistence). 101*7c478bd9Sstevel@tonic-gate */ 102*7c478bd9Sstevel@tonic-gate int 103*7c478bd9Sstevel@tonic-gate as_add_callback(struct as *as, void (*cb_func)(), void *arg, uint_t events, 104*7c478bd9Sstevel@tonic-gate caddr_t vaddr, size_t size, int sleepflag) 105*7c478bd9Sstevel@tonic-gate { 106*7c478bd9Sstevel@tonic-gate struct as_callback *current_head, *cb; 107*7c478bd9Sstevel@tonic-gate caddr_t saddr; 108*7c478bd9Sstevel@tonic-gate size_t rsize; 109*7c478bd9Sstevel@tonic-gate 110*7c478bd9Sstevel@tonic-gate /* callback function and an event are mandatory */ 111*7c478bd9Sstevel@tonic-gate if ((cb_func == NULL) || ((events & AS_ALL_EVENT) == 0)) 112*7c478bd9Sstevel@tonic-gate return (EINVAL); 113*7c478bd9Sstevel@tonic-gate 114*7c478bd9Sstevel@tonic-gate /* Adding a callback after as_free has been called is not allowed */ 115*7c478bd9Sstevel@tonic-gate if (as == &kas) 116*7c478bd9Sstevel@tonic-gate return (ENOMEM); 117*7c478bd9Sstevel@tonic-gate 118*7c478bd9Sstevel@tonic-gate /* 119*7c478bd9Sstevel@tonic-gate * vaddr = 0 and size = -1 is used to indicate that the callback range 120*7c478bd9Sstevel@tonic-gate * is the entire address space so no rounding is done in that case. 121*7c478bd9Sstevel@tonic-gate */ 122*7c478bd9Sstevel@tonic-gate if (size != -1) { 123*7c478bd9Sstevel@tonic-gate saddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); 124*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(vaddr + size) + PAGEOFFSET) & PAGEMASK) - 125*7c478bd9Sstevel@tonic-gate (size_t)saddr; 126*7c478bd9Sstevel@tonic-gate /* check for wraparound */ 127*7c478bd9Sstevel@tonic-gate if (saddr + rsize < saddr) 128*7c478bd9Sstevel@tonic-gate return (ENOMEM); 129*7c478bd9Sstevel@tonic-gate } else { 130*7c478bd9Sstevel@tonic-gate if (vaddr != 0) 131*7c478bd9Sstevel@tonic-gate return (EINVAL); 132*7c478bd9Sstevel@tonic-gate saddr = vaddr; 133*7c478bd9Sstevel@tonic-gate rsize = size; 134*7c478bd9Sstevel@tonic-gate } 135*7c478bd9Sstevel@tonic-gate 136*7c478bd9Sstevel@tonic-gate /* Allocate and initialize a callback entry */ 137*7c478bd9Sstevel@tonic-gate cb = kmem_zalloc(sizeof (struct as_callback), sleepflag); 138*7c478bd9Sstevel@tonic-gate if (cb == NULL) 139*7c478bd9Sstevel@tonic-gate return (EAGAIN); 140*7c478bd9Sstevel@tonic-gate 141*7c478bd9Sstevel@tonic-gate cb->ascb_func = cb_func; 142*7c478bd9Sstevel@tonic-gate cb->ascb_arg = arg; 143*7c478bd9Sstevel@tonic-gate cb->ascb_events = events; 144*7c478bd9Sstevel@tonic-gate cb->ascb_saddr = saddr; 145*7c478bd9Sstevel@tonic-gate cb->ascb_len = rsize; 146*7c478bd9Sstevel@tonic-gate 147*7c478bd9Sstevel@tonic-gate /* Add the entry to the list */ 148*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 149*7c478bd9Sstevel@tonic-gate current_head = as->a_callbacks; 150*7c478bd9Sstevel@tonic-gate as->a_callbacks = cb; 151*7c478bd9Sstevel@tonic-gate cb->ascb_next = current_head; 152*7c478bd9Sstevel@tonic-gate 153*7c478bd9Sstevel@tonic-gate /* 154*7c478bd9Sstevel@tonic-gate * The call to this function may lose in a race with 155*7c478bd9Sstevel@tonic-gate * a pertinent event - eg. a thread does long term memory locking 156*7c478bd9Sstevel@tonic-gate * but before the callback is added another thread executes as_unmap. 157*7c478bd9Sstevel@tonic-gate * A broadcast here resolves that. 158*7c478bd9Sstevel@tonic-gate */ 159*7c478bd9Sstevel@tonic-gate if ((cb->ascb_events & AS_UNMAPWAIT_EVENT) && AS_ISUNMAPWAIT(as)) { 160*7c478bd9Sstevel@tonic-gate AS_CLRUNMAPWAIT(as); 161*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate 164*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 165*7c478bd9Sstevel@tonic-gate return (0); 166*7c478bd9Sstevel@tonic-gate } 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate /* 169*7c478bd9Sstevel@tonic-gate * Search the callback list for an entry which pertains to arg. 170*7c478bd9Sstevel@tonic-gate * 171*7c478bd9Sstevel@tonic-gate * This is called from within the client upon completion of the callback. 172*7c478bd9Sstevel@tonic-gate * RETURN VALUES: 173*7c478bd9Sstevel@tonic-gate * AS_CALLBACK_DELETED (callback entry found and deleted) 174*7c478bd9Sstevel@tonic-gate * AS_CALLBACK_NOTFOUND (no callback entry found - this is ok) 175*7c478bd9Sstevel@tonic-gate * AS_CALLBACK_DELETE_DEFERRED (callback is in process, delete of this 176*7c478bd9Sstevel@tonic-gate * entry will be made in as_do_callbacks) 177*7c478bd9Sstevel@tonic-gate * 178*7c478bd9Sstevel@tonic-gate * If as_delete_callback encounters a matching entry with AS_CALLBACK_CALLED 179*7c478bd9Sstevel@tonic-gate * set, it indicates that as_do_callbacks is processing this entry. The 180*7c478bd9Sstevel@tonic-gate * AS_ALL_EVENT events are cleared in the entry, and a broadcast is made 181*7c478bd9Sstevel@tonic-gate * to unblock as_do_callbacks, in case it is blocked. 182*7c478bd9Sstevel@tonic-gate * 183*7c478bd9Sstevel@tonic-gate * CALLERS RESPONSIBILITY: If not calling from within the process context for 184*7c478bd9Sstevel@tonic-gate * the specified as, the caller must guarantee persistence of the specified as 185*7c478bd9Sstevel@tonic-gate * for the duration of this function (eg. pages being locked within the as 186*7c478bd9Sstevel@tonic-gate * will guarantee persistence). 187*7c478bd9Sstevel@tonic-gate */ 188*7c478bd9Sstevel@tonic-gate uint_t 189*7c478bd9Sstevel@tonic-gate as_delete_callback(struct as *as, void *arg) 190*7c478bd9Sstevel@tonic-gate { 191*7c478bd9Sstevel@tonic-gate struct as_callback **prevcb = &as->a_callbacks; 192*7c478bd9Sstevel@tonic-gate struct as_callback *cb; 193*7c478bd9Sstevel@tonic-gate uint_t rc = AS_CALLBACK_NOTFOUND; 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 196*7c478bd9Sstevel@tonic-gate for (cb = as->a_callbacks; cb; prevcb = &cb->ascb_next, cb = *prevcb) { 197*7c478bd9Sstevel@tonic-gate if (cb->ascb_arg != arg) 198*7c478bd9Sstevel@tonic-gate continue; 199*7c478bd9Sstevel@tonic-gate 200*7c478bd9Sstevel@tonic-gate /* 201*7c478bd9Sstevel@tonic-gate * If the events indicate AS_CALLBACK_CALLED, just clear 202*7c478bd9Sstevel@tonic-gate * AS_ALL_EVENT in the events field and wakeup the thread 203*7c478bd9Sstevel@tonic-gate * that may be waiting in as_do_callbacks. as_do_callbacks 204*7c478bd9Sstevel@tonic-gate * will take care of removing this entry from the list. In 205*7c478bd9Sstevel@tonic-gate * that case, return AS_CALLBACK_DELETE_DEFERRED. Otherwise 206*7c478bd9Sstevel@tonic-gate * (AS_CALLBACK_CALLED not set), just remove it from the 207*7c478bd9Sstevel@tonic-gate * list, return the memory and return AS_CALLBACK_DELETED. 208*7c478bd9Sstevel@tonic-gate */ 209*7c478bd9Sstevel@tonic-gate if ((cb->ascb_events & AS_CALLBACK_CALLED) != 0) { 210*7c478bd9Sstevel@tonic-gate /* leave AS_CALLBACK_CALLED */ 211*7c478bd9Sstevel@tonic-gate cb->ascb_events &= ~AS_ALL_EVENT; 212*7c478bd9Sstevel@tonic-gate rc = AS_CALLBACK_DELETE_DEFERRED; 213*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 214*7c478bd9Sstevel@tonic-gate } else { 215*7c478bd9Sstevel@tonic-gate *prevcb = cb->ascb_next; 216*7c478bd9Sstevel@tonic-gate kmem_free(cb, sizeof (struct as_callback)); 217*7c478bd9Sstevel@tonic-gate rc = AS_CALLBACK_DELETED; 218*7c478bd9Sstevel@tonic-gate } 219*7c478bd9Sstevel@tonic-gate break; 220*7c478bd9Sstevel@tonic-gate } 221*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 222*7c478bd9Sstevel@tonic-gate return (rc); 223*7c478bd9Sstevel@tonic-gate } 224*7c478bd9Sstevel@tonic-gate 225*7c478bd9Sstevel@tonic-gate /* 226*7c478bd9Sstevel@tonic-gate * Searches the as callback list for a matching entry. 227*7c478bd9Sstevel@tonic-gate * Returns a pointer to the first matching callback, or NULL if 228*7c478bd9Sstevel@tonic-gate * nothing is found. 229*7c478bd9Sstevel@tonic-gate * This function never sleeps so it is ok to call it with more 230*7c478bd9Sstevel@tonic-gate * locks held but the (required) a_contents mutex. 231*7c478bd9Sstevel@tonic-gate * 232*7c478bd9Sstevel@tonic-gate * See also comment on as_do_callbacks below. 233*7c478bd9Sstevel@tonic-gate */ 234*7c478bd9Sstevel@tonic-gate static struct as_callback * 235*7c478bd9Sstevel@tonic-gate as_find_callback(struct as *as, uint_t events, caddr_t event_addr, 236*7c478bd9Sstevel@tonic-gate size_t event_len) 237*7c478bd9Sstevel@tonic-gate { 238*7c478bd9Sstevel@tonic-gate struct as_callback *cb; 239*7c478bd9Sstevel@tonic-gate 240*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&as->a_contents)); 241*7c478bd9Sstevel@tonic-gate for (cb = as->a_callbacks; cb != NULL; cb = cb->ascb_next) { 242*7c478bd9Sstevel@tonic-gate /* 243*7c478bd9Sstevel@tonic-gate * If the callback has not already been called, then 244*7c478bd9Sstevel@tonic-gate * check if events or address range pertains. An event_len 245*7c478bd9Sstevel@tonic-gate * of zero means do an unconditional callback. 246*7c478bd9Sstevel@tonic-gate */ 247*7c478bd9Sstevel@tonic-gate if (((cb->ascb_events & AS_CALLBACK_CALLED) != 0) || 248*7c478bd9Sstevel@tonic-gate ((event_len != 0) && (((cb->ascb_events & events) == 0) || 249*7c478bd9Sstevel@tonic-gate (event_addr + event_len < cb->ascb_saddr) || 250*7c478bd9Sstevel@tonic-gate (event_addr > (cb->ascb_saddr + cb->ascb_len))))) { 251*7c478bd9Sstevel@tonic-gate continue; 252*7c478bd9Sstevel@tonic-gate } 253*7c478bd9Sstevel@tonic-gate break; 254*7c478bd9Sstevel@tonic-gate } 255*7c478bd9Sstevel@tonic-gate return (cb); 256*7c478bd9Sstevel@tonic-gate } 257*7c478bd9Sstevel@tonic-gate 258*7c478bd9Sstevel@tonic-gate /* 259*7c478bd9Sstevel@tonic-gate * Executes a given callback and removes it from the callback list for 260*7c478bd9Sstevel@tonic-gate * this address space. 261*7c478bd9Sstevel@tonic-gate * This function may sleep so the caller must drop all locks except 262*7c478bd9Sstevel@tonic-gate * a_contents before calling this func. 263*7c478bd9Sstevel@tonic-gate * 264*7c478bd9Sstevel@tonic-gate * See also comments on as_do_callbacks below. 265*7c478bd9Sstevel@tonic-gate */ 266*7c478bd9Sstevel@tonic-gate static void 267*7c478bd9Sstevel@tonic-gate as_execute_callback(struct as *as, struct as_callback *cb, 268*7c478bd9Sstevel@tonic-gate uint_t events) 269*7c478bd9Sstevel@tonic-gate { 270*7c478bd9Sstevel@tonic-gate struct as_callback **prevcb; 271*7c478bd9Sstevel@tonic-gate void *cb_arg; 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&as->a_contents) && (cb->ascb_events & events)); 274*7c478bd9Sstevel@tonic-gate cb->ascb_events |= AS_CALLBACK_CALLED; 275*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 276*7c478bd9Sstevel@tonic-gate (*cb->ascb_func)(as, cb->ascb_arg, events); 277*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 278*7c478bd9Sstevel@tonic-gate /* 279*7c478bd9Sstevel@tonic-gate * the callback function is required to delete the callback 280*7c478bd9Sstevel@tonic-gate * when the callback function determines it is OK for 281*7c478bd9Sstevel@tonic-gate * this thread to continue. as_delete_callback will clear 282*7c478bd9Sstevel@tonic-gate * the AS_ALL_EVENT in the events field when it is deleted. 283*7c478bd9Sstevel@tonic-gate * If the callback function called as_delete_callback, 284*7c478bd9Sstevel@tonic-gate * events will already be cleared and there will be no blocking. 285*7c478bd9Sstevel@tonic-gate */ 286*7c478bd9Sstevel@tonic-gate while ((cb->ascb_events & events) != 0) { 287*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 288*7c478bd9Sstevel@tonic-gate } 289*7c478bd9Sstevel@tonic-gate /* 290*7c478bd9Sstevel@tonic-gate * This entry needs to be taken off the list. Normally, the 291*7c478bd9Sstevel@tonic-gate * callback func itself does that, but unfortunately the list 292*7c478bd9Sstevel@tonic-gate * may have changed while the callback was running because the 293*7c478bd9Sstevel@tonic-gate * a_contents mutex was dropped and someone else other than the 294*7c478bd9Sstevel@tonic-gate * callback func itself could have called as_delete_callback, 295*7c478bd9Sstevel@tonic-gate * so we have to search to find this entry again. The entry 296*7c478bd9Sstevel@tonic-gate * must have AS_CALLBACK_CALLED, and have the same 'arg'. 297*7c478bd9Sstevel@tonic-gate */ 298*7c478bd9Sstevel@tonic-gate cb_arg = cb->ascb_arg; 299*7c478bd9Sstevel@tonic-gate prevcb = &as->a_callbacks; 300*7c478bd9Sstevel@tonic-gate for (cb = as->a_callbacks; cb != NULL; 301*7c478bd9Sstevel@tonic-gate prevcb = &cb->ascb_next, cb = *prevcb) { 302*7c478bd9Sstevel@tonic-gate if (((cb->ascb_events & AS_CALLBACK_CALLED) == 0) || 303*7c478bd9Sstevel@tonic-gate (cb_arg != cb->ascb_arg)) { 304*7c478bd9Sstevel@tonic-gate continue; 305*7c478bd9Sstevel@tonic-gate } 306*7c478bd9Sstevel@tonic-gate *prevcb = cb->ascb_next; 307*7c478bd9Sstevel@tonic-gate kmem_free(cb, sizeof (struct as_callback)); 308*7c478bd9Sstevel@tonic-gate break; 309*7c478bd9Sstevel@tonic-gate } 310*7c478bd9Sstevel@tonic-gate } 311*7c478bd9Sstevel@tonic-gate 312*7c478bd9Sstevel@tonic-gate /* 313*7c478bd9Sstevel@tonic-gate * Check the callback list for a matching event and intersection of 314*7c478bd9Sstevel@tonic-gate * address range. If there is a match invoke the callback. Skip an entry if: 315*7c478bd9Sstevel@tonic-gate * - a callback is already in progress for this entry (AS_CALLBACK_CALLED) 316*7c478bd9Sstevel@tonic-gate * - not event of interest 317*7c478bd9Sstevel@tonic-gate * - not address range of interest 318*7c478bd9Sstevel@tonic-gate * 319*7c478bd9Sstevel@tonic-gate * An event_len of zero indicates a request for an unconditional callback 320*7c478bd9Sstevel@tonic-gate * (regardless of event), only the AS_CALLBACK_CALLED is checked. The 321*7c478bd9Sstevel@tonic-gate * a_contents lock must be dropped before a callback, so only one callback 322*7c478bd9Sstevel@tonic-gate * can be done before returning. Return -1 (true) if a callback was 323*7c478bd9Sstevel@tonic-gate * executed and removed from the list, else return 0 (false). 324*7c478bd9Sstevel@tonic-gate * 325*7c478bd9Sstevel@tonic-gate * The logically separate parts, i.e. finding a matching callback and 326*7c478bd9Sstevel@tonic-gate * executing a given callback have been separated into two functions 327*7c478bd9Sstevel@tonic-gate * so that they can be called with different sets of locks held beyond 328*7c478bd9Sstevel@tonic-gate * the always-required a_contents. as_find_callback does not sleep so 329*7c478bd9Sstevel@tonic-gate * it is ok to call it if more locks than a_contents (i.e. the a_lock 330*7c478bd9Sstevel@tonic-gate * rwlock) are held. as_execute_callback on the other hand may sleep 331*7c478bd9Sstevel@tonic-gate * so all locks beyond a_contents must be dropped by the caller if one 332*7c478bd9Sstevel@tonic-gate * does not want to end comatose. 333*7c478bd9Sstevel@tonic-gate */ 334*7c478bd9Sstevel@tonic-gate static int 335*7c478bd9Sstevel@tonic-gate as_do_callbacks(struct as *as, uint_t events, caddr_t event_addr, 336*7c478bd9Sstevel@tonic-gate size_t event_len) 337*7c478bd9Sstevel@tonic-gate { 338*7c478bd9Sstevel@tonic-gate struct as_callback *cb; 339*7c478bd9Sstevel@tonic-gate 340*7c478bd9Sstevel@tonic-gate if ((cb = as_find_callback(as, events, event_addr, event_len))) { 341*7c478bd9Sstevel@tonic-gate as_execute_callback(as, cb, events); 342*7c478bd9Sstevel@tonic-gate return (-1); 343*7c478bd9Sstevel@tonic-gate } 344*7c478bd9Sstevel@tonic-gate return (0); 345*7c478bd9Sstevel@tonic-gate } 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate /* 348*7c478bd9Sstevel@tonic-gate * Search for the segment containing addr. If a segment containing addr 349*7c478bd9Sstevel@tonic-gate * exists, that segment is returned. If no such segment exists, and 350*7c478bd9Sstevel@tonic-gate * the list spans addresses greater than addr, then the first segment 351*7c478bd9Sstevel@tonic-gate * whose base is greater than addr is returned; otherwise, NULL is 352*7c478bd9Sstevel@tonic-gate * returned unless tail is true, in which case the last element of the 353*7c478bd9Sstevel@tonic-gate * list is returned. 354*7c478bd9Sstevel@tonic-gate * 355*7c478bd9Sstevel@tonic-gate * a_seglast is used to cache the last found segment for repeated 356*7c478bd9Sstevel@tonic-gate * searches to the same addr (which happens frequently). 357*7c478bd9Sstevel@tonic-gate */ 358*7c478bd9Sstevel@tonic-gate struct seg * 359*7c478bd9Sstevel@tonic-gate as_findseg(struct as *as, caddr_t addr, int tail) 360*7c478bd9Sstevel@tonic-gate { 361*7c478bd9Sstevel@tonic-gate struct seg *seg = as->a_seglast; 362*7c478bd9Sstevel@tonic-gate avl_index_t where; 363*7c478bd9Sstevel@tonic-gate 364*7c478bd9Sstevel@tonic-gate ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 365*7c478bd9Sstevel@tonic-gate 366*7c478bd9Sstevel@tonic-gate if (seg != NULL && 367*7c478bd9Sstevel@tonic-gate seg->s_base <= addr && 368*7c478bd9Sstevel@tonic-gate addr < seg->s_base + seg->s_size) 369*7c478bd9Sstevel@tonic-gate return (seg); 370*7c478bd9Sstevel@tonic-gate 371*7c478bd9Sstevel@tonic-gate seg = avl_find(&as->a_segtree, &addr, &where); 372*7c478bd9Sstevel@tonic-gate if (seg != NULL) 373*7c478bd9Sstevel@tonic-gate return (as->a_seglast = seg); 374*7c478bd9Sstevel@tonic-gate 375*7c478bd9Sstevel@tonic-gate seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 376*7c478bd9Sstevel@tonic-gate if (seg == NULL && tail) 377*7c478bd9Sstevel@tonic-gate seg = avl_last(&as->a_segtree); 378*7c478bd9Sstevel@tonic-gate return (as->a_seglast = seg); 379*7c478bd9Sstevel@tonic-gate } 380*7c478bd9Sstevel@tonic-gate 381*7c478bd9Sstevel@tonic-gate #ifdef VERIFY_SEGLIST 382*7c478bd9Sstevel@tonic-gate /* 383*7c478bd9Sstevel@tonic-gate * verify that the linked list is coherent 384*7c478bd9Sstevel@tonic-gate */ 385*7c478bd9Sstevel@tonic-gate static void 386*7c478bd9Sstevel@tonic-gate as_verify(struct as *as) 387*7c478bd9Sstevel@tonic-gate { 388*7c478bd9Sstevel@tonic-gate struct seg *seg, *seglast, *p, *n; 389*7c478bd9Sstevel@tonic-gate uint_t nsegs = 0; 390*7c478bd9Sstevel@tonic-gate 391*7c478bd9Sstevel@tonic-gate if (do_as_verify == 0) 392*7c478bd9Sstevel@tonic-gate return; 393*7c478bd9Sstevel@tonic-gate 394*7c478bd9Sstevel@tonic-gate seglast = as->a_seglast; 395*7c478bd9Sstevel@tonic-gate 396*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 397*7c478bd9Sstevel@tonic-gate ASSERT(seg->s_as == as); 398*7c478bd9Sstevel@tonic-gate p = AS_SEGPREV(as, seg); 399*7c478bd9Sstevel@tonic-gate n = AS_SEGNEXT(as, seg); 400*7c478bd9Sstevel@tonic-gate ASSERT(p == NULL || p->s_as == as); 401*7c478bd9Sstevel@tonic-gate ASSERT(p == NULL || p->s_base < seg->s_base); 402*7c478bd9Sstevel@tonic-gate ASSERT(n == NULL || n->s_base > seg->s_base); 403*7c478bd9Sstevel@tonic-gate ASSERT(n != NULL || seg == avl_last(&as->a_segtree)); 404*7c478bd9Sstevel@tonic-gate if (seg == seglast) 405*7c478bd9Sstevel@tonic-gate seglast = NULL; 406*7c478bd9Sstevel@tonic-gate nsegs++; 407*7c478bd9Sstevel@tonic-gate } 408*7c478bd9Sstevel@tonic-gate ASSERT(seglast == NULL); 409*7c478bd9Sstevel@tonic-gate ASSERT(avl_numnodes(&as->a_segtree) == nsegs); 410*7c478bd9Sstevel@tonic-gate } 411*7c478bd9Sstevel@tonic-gate #endif /* VERIFY_SEGLIST */ 412*7c478bd9Sstevel@tonic-gate 413*7c478bd9Sstevel@tonic-gate /* 414*7c478bd9Sstevel@tonic-gate * Add a new segment to the address space. The avl_find() 415*7c478bd9Sstevel@tonic-gate * may be expensive so we attempt to use last segment accessed 416*7c478bd9Sstevel@tonic-gate * in as_gap() as an insertion point. 417*7c478bd9Sstevel@tonic-gate */ 418*7c478bd9Sstevel@tonic-gate int 419*7c478bd9Sstevel@tonic-gate as_addseg(struct as *as, struct seg *newseg) 420*7c478bd9Sstevel@tonic-gate { 421*7c478bd9Sstevel@tonic-gate struct seg *seg; 422*7c478bd9Sstevel@tonic-gate caddr_t addr; 423*7c478bd9Sstevel@tonic-gate caddr_t eaddr; 424*7c478bd9Sstevel@tonic-gate avl_index_t where; 425*7c478bd9Sstevel@tonic-gate 426*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 427*7c478bd9Sstevel@tonic-gate 428*7c478bd9Sstevel@tonic-gate as->a_updatedir = 1; /* inform /proc */ 429*7c478bd9Sstevel@tonic-gate gethrestime(&as->a_updatetime); 430*7c478bd9Sstevel@tonic-gate 431*7c478bd9Sstevel@tonic-gate if (as->a_lastgaphl != NULL) { 432*7c478bd9Sstevel@tonic-gate struct seg *hseg = NULL; 433*7c478bd9Sstevel@tonic-gate struct seg *lseg = NULL; 434*7c478bd9Sstevel@tonic-gate 435*7c478bd9Sstevel@tonic-gate if (as->a_lastgaphl->s_base > newseg->s_base) { 436*7c478bd9Sstevel@tonic-gate hseg = as->a_lastgaphl; 437*7c478bd9Sstevel@tonic-gate lseg = AVL_PREV(&as->a_segtree, hseg); 438*7c478bd9Sstevel@tonic-gate } else { 439*7c478bd9Sstevel@tonic-gate lseg = as->a_lastgaphl; 440*7c478bd9Sstevel@tonic-gate hseg = AVL_NEXT(&as->a_segtree, lseg); 441*7c478bd9Sstevel@tonic-gate } 442*7c478bd9Sstevel@tonic-gate 443*7c478bd9Sstevel@tonic-gate if (hseg && lseg && lseg->s_base < newseg->s_base && 444*7c478bd9Sstevel@tonic-gate hseg->s_base > newseg->s_base) { 445*7c478bd9Sstevel@tonic-gate avl_insert_here(&as->a_segtree, newseg, lseg, 446*7c478bd9Sstevel@tonic-gate AVL_AFTER); 447*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = NULL; 448*7c478bd9Sstevel@tonic-gate as->a_seglast = newseg; 449*7c478bd9Sstevel@tonic-gate return (0); 450*7c478bd9Sstevel@tonic-gate } 451*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = NULL; 452*7c478bd9Sstevel@tonic-gate } 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate addr = newseg->s_base; 455*7c478bd9Sstevel@tonic-gate eaddr = addr + newseg->s_size; 456*7c478bd9Sstevel@tonic-gate again: 457*7c478bd9Sstevel@tonic-gate 458*7c478bd9Sstevel@tonic-gate seg = avl_find(&as->a_segtree, &addr, &where); 459*7c478bd9Sstevel@tonic-gate 460*7c478bd9Sstevel@tonic-gate if (seg == NULL) 461*7c478bd9Sstevel@tonic-gate seg = avl_nearest(&as->a_segtree, where, AVL_AFTER); 462*7c478bd9Sstevel@tonic-gate 463*7c478bd9Sstevel@tonic-gate if (seg == NULL) 464*7c478bd9Sstevel@tonic-gate seg = avl_last(&as->a_segtree); 465*7c478bd9Sstevel@tonic-gate 466*7c478bd9Sstevel@tonic-gate if (seg != NULL) { 467*7c478bd9Sstevel@tonic-gate caddr_t base = seg->s_base; 468*7c478bd9Sstevel@tonic-gate 469*7c478bd9Sstevel@tonic-gate /* 470*7c478bd9Sstevel@tonic-gate * If top of seg is below the requested address, then 471*7c478bd9Sstevel@tonic-gate * the insertion point is at the end of the linked list, 472*7c478bd9Sstevel@tonic-gate * and seg points to the tail of the list. Otherwise, 473*7c478bd9Sstevel@tonic-gate * the insertion point is immediately before seg. 474*7c478bd9Sstevel@tonic-gate */ 475*7c478bd9Sstevel@tonic-gate if (base + seg->s_size > addr) { 476*7c478bd9Sstevel@tonic-gate if (addr >= base || eaddr > base) { 477*7c478bd9Sstevel@tonic-gate #ifdef __sparc 478*7c478bd9Sstevel@tonic-gate extern struct seg_ops segnf_ops; 479*7c478bd9Sstevel@tonic-gate 480*7c478bd9Sstevel@tonic-gate /* 481*7c478bd9Sstevel@tonic-gate * no-fault segs must disappear if overlaid. 482*7c478bd9Sstevel@tonic-gate * XXX need new segment type so 483*7c478bd9Sstevel@tonic-gate * we don't have to check s_ops 484*7c478bd9Sstevel@tonic-gate */ 485*7c478bd9Sstevel@tonic-gate if (seg->s_ops == &segnf_ops) { 486*7c478bd9Sstevel@tonic-gate seg_unmap(seg); 487*7c478bd9Sstevel@tonic-gate goto again; 488*7c478bd9Sstevel@tonic-gate } 489*7c478bd9Sstevel@tonic-gate #endif 490*7c478bd9Sstevel@tonic-gate return (-1); /* overlapping segment */ 491*7c478bd9Sstevel@tonic-gate } 492*7c478bd9Sstevel@tonic-gate } 493*7c478bd9Sstevel@tonic-gate } 494*7c478bd9Sstevel@tonic-gate as->a_seglast = newseg; 495*7c478bd9Sstevel@tonic-gate avl_insert(&as->a_segtree, newseg, where); 496*7c478bd9Sstevel@tonic-gate 497*7c478bd9Sstevel@tonic-gate #ifdef VERIFY_SEGLIST 498*7c478bd9Sstevel@tonic-gate as_verify(as); 499*7c478bd9Sstevel@tonic-gate #endif 500*7c478bd9Sstevel@tonic-gate return (0); 501*7c478bd9Sstevel@tonic-gate } 502*7c478bd9Sstevel@tonic-gate 503*7c478bd9Sstevel@tonic-gate struct seg * 504*7c478bd9Sstevel@tonic-gate as_removeseg(struct as *as, struct seg *seg) 505*7c478bd9Sstevel@tonic-gate { 506*7c478bd9Sstevel@tonic-gate avl_tree_t *t; 507*7c478bd9Sstevel@tonic-gate 508*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 509*7c478bd9Sstevel@tonic-gate 510*7c478bd9Sstevel@tonic-gate as->a_updatedir = 1; /* inform /proc */ 511*7c478bd9Sstevel@tonic-gate gethrestime(&as->a_updatetime); 512*7c478bd9Sstevel@tonic-gate 513*7c478bd9Sstevel@tonic-gate if (seg == NULL) 514*7c478bd9Sstevel@tonic-gate return (NULL); 515*7c478bd9Sstevel@tonic-gate 516*7c478bd9Sstevel@tonic-gate t = &as->a_segtree; 517*7c478bd9Sstevel@tonic-gate if (as->a_seglast == seg) 518*7c478bd9Sstevel@tonic-gate as->a_seglast = NULL; 519*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = NULL; 520*7c478bd9Sstevel@tonic-gate 521*7c478bd9Sstevel@tonic-gate /* 522*7c478bd9Sstevel@tonic-gate * if this segment is at an address higher than 523*7c478bd9Sstevel@tonic-gate * a_lastgap, set a_lastgap to the next segment (NULL if last segment) 524*7c478bd9Sstevel@tonic-gate */ 525*7c478bd9Sstevel@tonic-gate if (as->a_lastgap && 526*7c478bd9Sstevel@tonic-gate (seg == as->a_lastgap || seg->s_base > as->a_lastgap->s_base)) 527*7c478bd9Sstevel@tonic-gate as->a_lastgap = AVL_NEXT(t, seg); 528*7c478bd9Sstevel@tonic-gate 529*7c478bd9Sstevel@tonic-gate /* 530*7c478bd9Sstevel@tonic-gate * remove the segment from the seg tree 531*7c478bd9Sstevel@tonic-gate */ 532*7c478bd9Sstevel@tonic-gate avl_remove(t, seg); 533*7c478bd9Sstevel@tonic-gate 534*7c478bd9Sstevel@tonic-gate #ifdef VERIFY_SEGLIST 535*7c478bd9Sstevel@tonic-gate as_verify(as); 536*7c478bd9Sstevel@tonic-gate #endif 537*7c478bd9Sstevel@tonic-gate return (seg); 538*7c478bd9Sstevel@tonic-gate } 539*7c478bd9Sstevel@tonic-gate 540*7c478bd9Sstevel@tonic-gate /* 541*7c478bd9Sstevel@tonic-gate * Find a segment containing addr. 542*7c478bd9Sstevel@tonic-gate */ 543*7c478bd9Sstevel@tonic-gate struct seg * 544*7c478bd9Sstevel@tonic-gate as_segat(struct as *as, caddr_t addr) 545*7c478bd9Sstevel@tonic-gate { 546*7c478bd9Sstevel@tonic-gate struct seg *seg = as->a_seglast; 547*7c478bd9Sstevel@tonic-gate 548*7c478bd9Sstevel@tonic-gate ASSERT(AS_LOCK_HELD(as, &as->a_lock)); 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate if (seg != NULL && seg->s_base <= addr && 551*7c478bd9Sstevel@tonic-gate addr < seg->s_base + seg->s_size) 552*7c478bd9Sstevel@tonic-gate return (seg); 553*7c478bd9Sstevel@tonic-gate 554*7c478bd9Sstevel@tonic-gate seg = avl_find(&as->a_segtree, &addr, NULL); 555*7c478bd9Sstevel@tonic-gate return (seg); 556*7c478bd9Sstevel@tonic-gate } 557*7c478bd9Sstevel@tonic-gate 558*7c478bd9Sstevel@tonic-gate /* 559*7c478bd9Sstevel@tonic-gate * Serialize all searches for holes in an address space to 560*7c478bd9Sstevel@tonic-gate * prevent two or more threads from allocating the same virtual 561*7c478bd9Sstevel@tonic-gate * address range. The address space must not be "read/write" 562*7c478bd9Sstevel@tonic-gate * locked by the caller since we may block. 563*7c478bd9Sstevel@tonic-gate */ 564*7c478bd9Sstevel@tonic-gate void 565*7c478bd9Sstevel@tonic-gate as_rangelock(struct as *as) 566*7c478bd9Sstevel@tonic-gate { 567*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 568*7c478bd9Sstevel@tonic-gate while (AS_ISCLAIMGAP(as)) 569*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 570*7c478bd9Sstevel@tonic-gate AS_SETCLAIMGAP(as); 571*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 572*7c478bd9Sstevel@tonic-gate } 573*7c478bd9Sstevel@tonic-gate 574*7c478bd9Sstevel@tonic-gate /* 575*7c478bd9Sstevel@tonic-gate * Release hold on a_state & AS_CLAIMGAP and signal any other blocked threads. 576*7c478bd9Sstevel@tonic-gate */ 577*7c478bd9Sstevel@tonic-gate void 578*7c478bd9Sstevel@tonic-gate as_rangeunlock(struct as *as) 579*7c478bd9Sstevel@tonic-gate { 580*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 581*7c478bd9Sstevel@tonic-gate AS_CLRCLAIMGAP(as); 582*7c478bd9Sstevel@tonic-gate cv_signal(&as->a_cv); 583*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 584*7c478bd9Sstevel@tonic-gate } 585*7c478bd9Sstevel@tonic-gate 586*7c478bd9Sstevel@tonic-gate /* 587*7c478bd9Sstevel@tonic-gate * compar segments (or just an address) by segment address range 588*7c478bd9Sstevel@tonic-gate */ 589*7c478bd9Sstevel@tonic-gate static int 590*7c478bd9Sstevel@tonic-gate as_segcompar(const void *x, const void *y) 591*7c478bd9Sstevel@tonic-gate { 592*7c478bd9Sstevel@tonic-gate struct seg *a = (struct seg *)x; 593*7c478bd9Sstevel@tonic-gate struct seg *b = (struct seg *)y; 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate if (a->s_base < b->s_base) 596*7c478bd9Sstevel@tonic-gate return (-1); 597*7c478bd9Sstevel@tonic-gate if (a->s_base >= b->s_base + b->s_size) 598*7c478bd9Sstevel@tonic-gate return (1); 599*7c478bd9Sstevel@tonic-gate return (0); 600*7c478bd9Sstevel@tonic-gate } 601*7c478bd9Sstevel@tonic-gate 602*7c478bd9Sstevel@tonic-gate 603*7c478bd9Sstevel@tonic-gate void 604*7c478bd9Sstevel@tonic-gate as_avlinit(struct as *as) 605*7c478bd9Sstevel@tonic-gate { 606*7c478bd9Sstevel@tonic-gate avl_create(&as->a_segtree, as_segcompar, sizeof (struct seg), 607*7c478bd9Sstevel@tonic-gate offsetof(struct seg, s_tree)); 608*7c478bd9Sstevel@tonic-gate avl_create(&as->a_wpage, wp_compare, sizeof (struct watched_page), 609*7c478bd9Sstevel@tonic-gate offsetof(struct watched_page, wp_link)); 610*7c478bd9Sstevel@tonic-gate } 611*7c478bd9Sstevel@tonic-gate 612*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 613*7c478bd9Sstevel@tonic-gate static int 614*7c478bd9Sstevel@tonic-gate as_constructor(void *buf, void *cdrarg, int kmflags) 615*7c478bd9Sstevel@tonic-gate { 616*7c478bd9Sstevel@tonic-gate struct as *as = buf; 617*7c478bd9Sstevel@tonic-gate 618*7c478bd9Sstevel@tonic-gate mutex_init(&as->a_contents, NULL, MUTEX_DEFAULT, NULL); 619*7c478bd9Sstevel@tonic-gate cv_init(&as->a_cv, NULL, CV_DEFAULT, NULL); 620*7c478bd9Sstevel@tonic-gate rw_init(&as->a_lock, NULL, RW_DEFAULT, NULL); 621*7c478bd9Sstevel@tonic-gate as_avlinit(as); 622*7c478bd9Sstevel@tonic-gate return (0); 623*7c478bd9Sstevel@tonic-gate } 624*7c478bd9Sstevel@tonic-gate 625*7c478bd9Sstevel@tonic-gate /*ARGSUSED1*/ 626*7c478bd9Sstevel@tonic-gate static void 627*7c478bd9Sstevel@tonic-gate as_destructor(void *buf, void *cdrarg) 628*7c478bd9Sstevel@tonic-gate { 629*7c478bd9Sstevel@tonic-gate struct as *as = buf; 630*7c478bd9Sstevel@tonic-gate 631*7c478bd9Sstevel@tonic-gate avl_destroy(&as->a_segtree); 632*7c478bd9Sstevel@tonic-gate mutex_destroy(&as->a_contents); 633*7c478bd9Sstevel@tonic-gate cv_destroy(&as->a_cv); 634*7c478bd9Sstevel@tonic-gate rw_destroy(&as->a_lock); 635*7c478bd9Sstevel@tonic-gate } 636*7c478bd9Sstevel@tonic-gate 637*7c478bd9Sstevel@tonic-gate void 638*7c478bd9Sstevel@tonic-gate as_init(void) 639*7c478bd9Sstevel@tonic-gate { 640*7c478bd9Sstevel@tonic-gate as_cache = kmem_cache_create("as_cache", sizeof (struct as), 0, 641*7c478bd9Sstevel@tonic-gate as_constructor, as_destructor, NULL, NULL, NULL, 0); 642*7c478bd9Sstevel@tonic-gate } 643*7c478bd9Sstevel@tonic-gate 644*7c478bd9Sstevel@tonic-gate /* 645*7c478bd9Sstevel@tonic-gate * Allocate and initialize an address space data structure. 646*7c478bd9Sstevel@tonic-gate * We call hat_alloc to allow any machine dependent 647*7c478bd9Sstevel@tonic-gate * information in the hat structure to be initialized. 648*7c478bd9Sstevel@tonic-gate */ 649*7c478bd9Sstevel@tonic-gate struct as * 650*7c478bd9Sstevel@tonic-gate as_alloc(void) 651*7c478bd9Sstevel@tonic-gate { 652*7c478bd9Sstevel@tonic-gate struct as *as; 653*7c478bd9Sstevel@tonic-gate 654*7c478bd9Sstevel@tonic-gate as = kmem_cache_alloc(as_cache, KM_SLEEP); 655*7c478bd9Sstevel@tonic-gate 656*7c478bd9Sstevel@tonic-gate as->a_flags = 0; 657*7c478bd9Sstevel@tonic-gate as->a_vbits = 0; 658*7c478bd9Sstevel@tonic-gate as->a_hrm = NULL; 659*7c478bd9Sstevel@tonic-gate as->a_seglast = NULL; 660*7c478bd9Sstevel@tonic-gate as->a_size = 0; 661*7c478bd9Sstevel@tonic-gate as->a_updatedir = 0; 662*7c478bd9Sstevel@tonic-gate gethrestime(&as->a_updatetime); 663*7c478bd9Sstevel@tonic-gate as->a_objectdir = NULL; 664*7c478bd9Sstevel@tonic-gate as->a_sizedir = 0; 665*7c478bd9Sstevel@tonic-gate as->a_userlimit = (caddr_t)USERLIMIT; 666*7c478bd9Sstevel@tonic-gate as->a_lastgap = NULL; 667*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = NULL; 668*7c478bd9Sstevel@tonic-gate as->a_callbacks = NULL; 669*7c478bd9Sstevel@tonic-gate 670*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 671*7c478bd9Sstevel@tonic-gate as->a_hat = hat_alloc(as); /* create hat for default system mmu */ 672*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 673*7c478bd9Sstevel@tonic-gate 674*7c478bd9Sstevel@tonic-gate as->a_xhat = NULL; 675*7c478bd9Sstevel@tonic-gate 676*7c478bd9Sstevel@tonic-gate return (as); 677*7c478bd9Sstevel@tonic-gate } 678*7c478bd9Sstevel@tonic-gate 679*7c478bd9Sstevel@tonic-gate /* 680*7c478bd9Sstevel@tonic-gate * Free an address space data structure. 681*7c478bd9Sstevel@tonic-gate * Need to free the hat first and then 682*7c478bd9Sstevel@tonic-gate * all the segments on this as and finally 683*7c478bd9Sstevel@tonic-gate * the space for the as struct itself. 684*7c478bd9Sstevel@tonic-gate */ 685*7c478bd9Sstevel@tonic-gate void 686*7c478bd9Sstevel@tonic-gate as_free(struct as *as) 687*7c478bd9Sstevel@tonic-gate { 688*7c478bd9Sstevel@tonic-gate struct hat *hat = as->a_hat; 689*7c478bd9Sstevel@tonic-gate struct seg *seg, *next; 690*7c478bd9Sstevel@tonic-gate int called = 0; 691*7c478bd9Sstevel@tonic-gate 692*7c478bd9Sstevel@tonic-gate top: 693*7c478bd9Sstevel@tonic-gate /* 694*7c478bd9Sstevel@tonic-gate * Invoke ALL callbacks. as_do_callbacks will do one callback 695*7c478bd9Sstevel@tonic-gate * per call, and not return (-1) until the callback has completed. 696*7c478bd9Sstevel@tonic-gate * When as_do_callbacks returns zero, all callbacks have completed. 697*7c478bd9Sstevel@tonic-gate */ 698*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 699*7c478bd9Sstevel@tonic-gate while (as->a_callbacks && as_do_callbacks(as, AS_ALL_EVENT, 0, 0)); 700*7c478bd9Sstevel@tonic-gate 701*7c478bd9Sstevel@tonic-gate /* This will prevent new XHATs from attaching to as */ 702*7c478bd9Sstevel@tonic-gate if (!called) 703*7c478bd9Sstevel@tonic-gate AS_SETBUSY(as); 704*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 705*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 706*7c478bd9Sstevel@tonic-gate 707*7c478bd9Sstevel@tonic-gate if (!called) { 708*7c478bd9Sstevel@tonic-gate called = 1; 709*7c478bd9Sstevel@tonic-gate hat_free_start(hat); 710*7c478bd9Sstevel@tonic-gate if (as->a_xhat != NULL) 711*7c478bd9Sstevel@tonic-gate xhat_free_start_all(as); 712*7c478bd9Sstevel@tonic-gate } 713*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; seg = next) { 714*7c478bd9Sstevel@tonic-gate int err; 715*7c478bd9Sstevel@tonic-gate 716*7c478bd9Sstevel@tonic-gate next = AS_SEGNEXT(as, seg); 717*7c478bd9Sstevel@tonic-gate err = SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 718*7c478bd9Sstevel@tonic-gate if (err == EAGAIN) { 719*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 720*7c478bd9Sstevel@tonic-gate if (as->a_callbacks) { 721*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 722*7c478bd9Sstevel@tonic-gate } else { 723*7c478bd9Sstevel@tonic-gate /* 724*7c478bd9Sstevel@tonic-gate * Memory is currently locked. Wait for a 725*7c478bd9Sstevel@tonic-gate * cv_signal that it has been unlocked, then 726*7c478bd9Sstevel@tonic-gate * try the operation again. 727*7c478bd9Sstevel@tonic-gate */ 728*7c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) 729*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 730*7c478bd9Sstevel@tonic-gate AS_SETUNMAPWAIT(as); 731*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 732*7c478bd9Sstevel@tonic-gate while (AS_ISUNMAPWAIT(as)) 733*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 734*7c478bd9Sstevel@tonic-gate } 735*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 736*7c478bd9Sstevel@tonic-gate goto top; 737*7c478bd9Sstevel@tonic-gate } else { 738*7c478bd9Sstevel@tonic-gate /* 739*7c478bd9Sstevel@tonic-gate * We do not expect any other error return at this 740*7c478bd9Sstevel@tonic-gate * time. This is similar to an ASSERT in seg_unmap() 741*7c478bd9Sstevel@tonic-gate */ 742*7c478bd9Sstevel@tonic-gate ASSERT(err == 0); 743*7c478bd9Sstevel@tonic-gate } 744*7c478bd9Sstevel@tonic-gate } 745*7c478bd9Sstevel@tonic-gate hat_free_end(hat); 746*7c478bd9Sstevel@tonic-gate if (as->a_xhat != NULL) 747*7c478bd9Sstevel@tonic-gate xhat_free_end_all(as); 748*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 749*7c478bd9Sstevel@tonic-gate 750*7c478bd9Sstevel@tonic-gate /* /proc stuff */ 751*7c478bd9Sstevel@tonic-gate ASSERT(avl_numnodes(&as->a_wpage) == 0); 752*7c478bd9Sstevel@tonic-gate if (as->a_objectdir) { 753*7c478bd9Sstevel@tonic-gate kmem_free(as->a_objectdir, as->a_sizedir * sizeof (vnode_t *)); 754*7c478bd9Sstevel@tonic-gate as->a_objectdir = NULL; 755*7c478bd9Sstevel@tonic-gate as->a_sizedir = 0; 756*7c478bd9Sstevel@tonic-gate } 757*7c478bd9Sstevel@tonic-gate 758*7c478bd9Sstevel@tonic-gate /* 759*7c478bd9Sstevel@tonic-gate * Free the struct as back to kmem. Assert it has no segments. 760*7c478bd9Sstevel@tonic-gate */ 761*7c478bd9Sstevel@tonic-gate ASSERT(avl_numnodes(&as->a_segtree) == 0); 762*7c478bd9Sstevel@tonic-gate kmem_cache_free(as_cache, as); 763*7c478bd9Sstevel@tonic-gate } 764*7c478bd9Sstevel@tonic-gate 765*7c478bd9Sstevel@tonic-gate int 766*7c478bd9Sstevel@tonic-gate as_dup(struct as *as, struct as **outas) 767*7c478bd9Sstevel@tonic-gate { 768*7c478bd9Sstevel@tonic-gate struct as *newas; 769*7c478bd9Sstevel@tonic-gate struct seg *seg, *newseg; 770*7c478bd9Sstevel@tonic-gate int error; 771*7c478bd9Sstevel@tonic-gate 772*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 773*7c478bd9Sstevel@tonic-gate as_clearwatch(as); 774*7c478bd9Sstevel@tonic-gate newas = as_alloc(); 775*7c478bd9Sstevel@tonic-gate newas->a_userlimit = as->a_userlimit; 776*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(newas, &newas->a_lock, RW_WRITER); 777*7c478bd9Sstevel@tonic-gate 778*7c478bd9Sstevel@tonic-gate /* This will prevent new XHATs from attaching */ 779*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 780*7c478bd9Sstevel@tonic-gate AS_SETBUSY(as); 781*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 782*7c478bd9Sstevel@tonic-gate mutex_enter(&newas->a_contents); 783*7c478bd9Sstevel@tonic-gate AS_SETBUSY(newas); 784*7c478bd9Sstevel@tonic-gate mutex_exit(&newas->a_contents); 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate 787*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 788*7c478bd9Sstevel@tonic-gate 789*7c478bd9Sstevel@tonic-gate if (seg->s_flags & S_PURGE) 790*7c478bd9Sstevel@tonic-gate continue; 791*7c478bd9Sstevel@tonic-gate 792*7c478bd9Sstevel@tonic-gate newseg = seg_alloc(newas, seg->s_base, seg->s_size); 793*7c478bd9Sstevel@tonic-gate if (newseg == NULL) { 794*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(newas, &newas->a_lock); 795*7c478bd9Sstevel@tonic-gate as_setwatch(as); 796*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 797*7c478bd9Sstevel@tonic-gate AS_CLRBUSY(as); 798*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 799*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 800*7c478bd9Sstevel@tonic-gate as_free(newas); 801*7c478bd9Sstevel@tonic-gate return (-1); 802*7c478bd9Sstevel@tonic-gate } 803*7c478bd9Sstevel@tonic-gate if ((error = SEGOP_DUP(seg, newseg)) != 0) { 804*7c478bd9Sstevel@tonic-gate /* 805*7c478bd9Sstevel@tonic-gate * We call seg_free() on the new seg 806*7c478bd9Sstevel@tonic-gate * because the segment is not set up 807*7c478bd9Sstevel@tonic-gate * completely; i.e. it has no ops. 808*7c478bd9Sstevel@tonic-gate */ 809*7c478bd9Sstevel@tonic-gate as_setwatch(as); 810*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 811*7c478bd9Sstevel@tonic-gate AS_CLRBUSY(as); 812*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 813*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 814*7c478bd9Sstevel@tonic-gate seg_free(newseg); 815*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(newas, &newas->a_lock); 816*7c478bd9Sstevel@tonic-gate as_free(newas); 817*7c478bd9Sstevel@tonic-gate return (error); 818*7c478bd9Sstevel@tonic-gate } 819*7c478bd9Sstevel@tonic-gate newas->a_size += seg->s_size; 820*7c478bd9Sstevel@tonic-gate } 821*7c478bd9Sstevel@tonic-gate 822*7c478bd9Sstevel@tonic-gate error = hat_dup(as->a_hat, newas->a_hat, NULL, 0, HAT_DUP_ALL); 823*7c478bd9Sstevel@tonic-gate if (as->a_xhat != NULL) 824*7c478bd9Sstevel@tonic-gate error |= xhat_dup_all(as, newas, NULL, 0, HAT_DUP_ALL); 825*7c478bd9Sstevel@tonic-gate 826*7c478bd9Sstevel@tonic-gate mutex_enter(&newas->a_contents); 827*7c478bd9Sstevel@tonic-gate AS_CLRBUSY(newas); 828*7c478bd9Sstevel@tonic-gate mutex_exit(&newas->a_contents); 829*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(newas, &newas->a_lock); 830*7c478bd9Sstevel@tonic-gate 831*7c478bd9Sstevel@tonic-gate as_setwatch(as); 832*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 833*7c478bd9Sstevel@tonic-gate AS_CLRBUSY(as); 834*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 835*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 836*7c478bd9Sstevel@tonic-gate if (error != 0) { 837*7c478bd9Sstevel@tonic-gate as_free(newas); 838*7c478bd9Sstevel@tonic-gate return (error); 839*7c478bd9Sstevel@tonic-gate } 840*7c478bd9Sstevel@tonic-gate *outas = newas; 841*7c478bd9Sstevel@tonic-gate return (0); 842*7c478bd9Sstevel@tonic-gate } 843*7c478bd9Sstevel@tonic-gate 844*7c478bd9Sstevel@tonic-gate /* 845*7c478bd9Sstevel@tonic-gate * Handle a ``fault'' at addr for size bytes. 846*7c478bd9Sstevel@tonic-gate */ 847*7c478bd9Sstevel@tonic-gate faultcode_t 848*7c478bd9Sstevel@tonic-gate as_fault(struct hat *hat, struct as *as, caddr_t addr, size_t size, 849*7c478bd9Sstevel@tonic-gate enum fault_type type, enum seg_rw rw) 850*7c478bd9Sstevel@tonic-gate { 851*7c478bd9Sstevel@tonic-gate struct seg *seg; 852*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 853*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 854*7c478bd9Sstevel@tonic-gate size_t ssize; 855*7c478bd9Sstevel@tonic-gate faultcode_t res = 0; 856*7c478bd9Sstevel@tonic-gate caddr_t addrsav; 857*7c478bd9Sstevel@tonic-gate struct seg *segsav; 858*7c478bd9Sstevel@tonic-gate int as_lock_held; 859*7c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 860*7c478bd9Sstevel@tonic-gate int is_xhat = 0; 861*7c478bd9Sstevel@tonic-gate int holding_wpage = 0; 862*7c478bd9Sstevel@tonic-gate extern struct seg_ops segdev_ops; 863*7c478bd9Sstevel@tonic-gate 864*7c478bd9Sstevel@tonic-gate 865*7c478bd9Sstevel@tonic-gate 866*7c478bd9Sstevel@tonic-gate if (as->a_hat != hat) { 867*7c478bd9Sstevel@tonic-gate /* This must be an XHAT then */ 868*7c478bd9Sstevel@tonic-gate is_xhat = 1; 869*7c478bd9Sstevel@tonic-gate 870*7c478bd9Sstevel@tonic-gate if ((type != F_INVAL) || (as == &kas)) 871*7c478bd9Sstevel@tonic-gate return (FC_NOSUPPORT); 872*7c478bd9Sstevel@tonic-gate } 873*7c478bd9Sstevel@tonic-gate 874*7c478bd9Sstevel@tonic-gate retry: 875*7c478bd9Sstevel@tonic-gate if (!is_xhat) { 876*7c478bd9Sstevel@tonic-gate /* 877*7c478bd9Sstevel@tonic-gate * Indicate that the lwp is not to be stopped while waiting 878*7c478bd9Sstevel@tonic-gate * for a pagefault. This is to avoid deadlock while debugging 879*7c478bd9Sstevel@tonic-gate * a process via /proc over NFS (in particular). 880*7c478bd9Sstevel@tonic-gate */ 881*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 882*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 883*7c478bd9Sstevel@tonic-gate 884*7c478bd9Sstevel@tonic-gate /* 885*7c478bd9Sstevel@tonic-gate * same length must be used when we softlock and softunlock. 886*7c478bd9Sstevel@tonic-gate * We don't support softunlocking lengths less than 887*7c478bd9Sstevel@tonic-gate * the original length when there is largepage support. 888*7c478bd9Sstevel@tonic-gate * See seg_dev.c for more comments. 889*7c478bd9Sstevel@tonic-gate */ 890*7c478bd9Sstevel@tonic-gate switch (type) { 891*7c478bd9Sstevel@tonic-gate 892*7c478bd9Sstevel@tonic-gate case F_SOFTLOCK: 893*7c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, softlock, 1); 894*7c478bd9Sstevel@tonic-gate break; 895*7c478bd9Sstevel@tonic-gate 896*7c478bd9Sstevel@tonic-gate case F_SOFTUNLOCK: 897*7c478bd9Sstevel@tonic-gate break; 898*7c478bd9Sstevel@tonic-gate 899*7c478bd9Sstevel@tonic-gate case F_PROT: 900*7c478bd9Sstevel@tonic-gate CPU_STATS_ADD_K(vm, prot_fault, 1); 901*7c478bd9Sstevel@tonic-gate break; 902*7c478bd9Sstevel@tonic-gate 903*7c478bd9Sstevel@tonic-gate case F_INVAL: 904*7c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 905*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, as_fault, 1); 906*7c478bd9Sstevel@tonic-gate if (as == &kas) 907*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, vm, kernel_asflt, 1); 908*7c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 909*7c478bd9Sstevel@tonic-gate break; 910*7c478bd9Sstevel@tonic-gate } 911*7c478bd9Sstevel@tonic-gate } 912*7c478bd9Sstevel@tonic-gate 913*7c478bd9Sstevel@tonic-gate /* Kernel probe */ 914*7c478bd9Sstevel@tonic-gate TNF_PROBE_3(address_fault, "vm pagefault", /* CSTYLED */, 915*7c478bd9Sstevel@tonic-gate tnf_opaque, address, addr, 916*7c478bd9Sstevel@tonic-gate tnf_fault_type, fault_type, type, 917*7c478bd9Sstevel@tonic-gate tnf_seg_access, access, rw); 918*7c478bd9Sstevel@tonic-gate 919*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 920*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 921*7c478bd9Sstevel@tonic-gate (size_t)raddr; 922*7c478bd9Sstevel@tonic-gate 923*7c478bd9Sstevel@tonic-gate /* 924*7c478bd9Sstevel@tonic-gate * XXX -- Don't grab the as lock for segkmap. We should grab it for 925*7c478bd9Sstevel@tonic-gate * correctness, but then we could be stuck holding this lock for 926*7c478bd9Sstevel@tonic-gate * a LONG time if the fault needs to be resolved on a slow 927*7c478bd9Sstevel@tonic-gate * filesystem, and then no-one will be able to exec new commands, 928*7c478bd9Sstevel@tonic-gate * as exec'ing requires the write lock on the as. 929*7c478bd9Sstevel@tonic-gate */ 930*7c478bd9Sstevel@tonic-gate if (as == &kas && segkmap && segkmap->s_base <= raddr && 931*7c478bd9Sstevel@tonic-gate raddr + size < segkmap->s_base + segkmap->s_size) { 932*7c478bd9Sstevel@tonic-gate /* 933*7c478bd9Sstevel@tonic-gate * if (as==&kas), this can't be XHAT: we've already returned 934*7c478bd9Sstevel@tonic-gate * FC_NOSUPPORT. 935*7c478bd9Sstevel@tonic-gate */ 936*7c478bd9Sstevel@tonic-gate seg = segkmap; 937*7c478bd9Sstevel@tonic-gate as_lock_held = 0; 938*7c478bd9Sstevel@tonic-gate } else { 939*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 940*7c478bd9Sstevel@tonic-gate if (is_xhat && avl_numnodes(&as->a_wpage) != 0) { 941*7c478bd9Sstevel@tonic-gate /* 942*7c478bd9Sstevel@tonic-gate * Grab and hold the writers' lock on the as 943*7c478bd9Sstevel@tonic-gate * if the fault is to a watched page. 944*7c478bd9Sstevel@tonic-gate * This will keep CPUs from "peeking" at the 945*7c478bd9Sstevel@tonic-gate * address range while we're temporarily boosting 946*7c478bd9Sstevel@tonic-gate * the permissions for the XHAT device to 947*7c478bd9Sstevel@tonic-gate * resolve the fault in the segment layer. 948*7c478bd9Sstevel@tonic-gate * 949*7c478bd9Sstevel@tonic-gate * We could check whether faulted address 950*7c478bd9Sstevel@tonic-gate * is within a watched page and only then grab 951*7c478bd9Sstevel@tonic-gate * the writer lock, but this is simpler. 952*7c478bd9Sstevel@tonic-gate */ 953*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 954*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 955*7c478bd9Sstevel@tonic-gate } 956*7c478bd9Sstevel@tonic-gate 957*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 958*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 959*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 960*7c478bd9Sstevel@tonic-gate if ((lwp != NULL) && (!is_xhat)) 961*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 962*7c478bd9Sstevel@tonic-gate return (FC_NOMAP); 963*7c478bd9Sstevel@tonic-gate } 964*7c478bd9Sstevel@tonic-gate 965*7c478bd9Sstevel@tonic-gate as_lock_held = 1; 966*7c478bd9Sstevel@tonic-gate } 967*7c478bd9Sstevel@tonic-gate 968*7c478bd9Sstevel@tonic-gate addrsav = raddr; 969*7c478bd9Sstevel@tonic-gate segsav = seg; 970*7c478bd9Sstevel@tonic-gate 971*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= ssize, raddr += ssize) { 972*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 973*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 974*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 975*7c478bd9Sstevel@tonic-gate res = FC_NOMAP; 976*7c478bd9Sstevel@tonic-gate break; 977*7c478bd9Sstevel@tonic-gate } 978*7c478bd9Sstevel@tonic-gate } 979*7c478bd9Sstevel@tonic-gate if (raddr + rsize > seg->s_base + seg->s_size) 980*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 981*7c478bd9Sstevel@tonic-gate else 982*7c478bd9Sstevel@tonic-gate ssize = rsize; 983*7c478bd9Sstevel@tonic-gate 984*7c478bd9Sstevel@tonic-gate if (!is_xhat || (seg->s_ops != &segdev_ops)) { 985*7c478bd9Sstevel@tonic-gate 986*7c478bd9Sstevel@tonic-gate if (is_xhat && avl_numnodes(&as->a_wpage) != 0 && 987*7c478bd9Sstevel@tonic-gate pr_is_watchpage_as(raddr, rw, as)) { 988*7c478bd9Sstevel@tonic-gate /* 989*7c478bd9Sstevel@tonic-gate * Handle watch pages. If we're faulting on a 990*7c478bd9Sstevel@tonic-gate * watched page from an X-hat, we have to 991*7c478bd9Sstevel@tonic-gate * restore the original permissions while we 992*7c478bd9Sstevel@tonic-gate * handle the fault. 993*7c478bd9Sstevel@tonic-gate */ 994*7c478bd9Sstevel@tonic-gate as_clearwatch(as); 995*7c478bd9Sstevel@tonic-gate holding_wpage = 1; 996*7c478bd9Sstevel@tonic-gate } 997*7c478bd9Sstevel@tonic-gate 998*7c478bd9Sstevel@tonic-gate res = SEGOP_FAULT(hat, seg, raddr, ssize, type, rw); 999*7c478bd9Sstevel@tonic-gate 1000*7c478bd9Sstevel@tonic-gate /* Restore watchpoints */ 1001*7c478bd9Sstevel@tonic-gate if (holding_wpage) { 1002*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1003*7c478bd9Sstevel@tonic-gate holding_wpage = 0; 1004*7c478bd9Sstevel@tonic-gate } 1005*7c478bd9Sstevel@tonic-gate 1006*7c478bd9Sstevel@tonic-gate if (res != 0) 1007*7c478bd9Sstevel@tonic-gate break; 1008*7c478bd9Sstevel@tonic-gate } else { 1009*7c478bd9Sstevel@tonic-gate /* XHAT does not support seg_dev */ 1010*7c478bd9Sstevel@tonic-gate res = FC_NOSUPPORT; 1011*7c478bd9Sstevel@tonic-gate break; 1012*7c478bd9Sstevel@tonic-gate } 1013*7c478bd9Sstevel@tonic-gate } 1014*7c478bd9Sstevel@tonic-gate 1015*7c478bd9Sstevel@tonic-gate /* 1016*7c478bd9Sstevel@tonic-gate * If we were SOFTLOCKing and encountered a failure, 1017*7c478bd9Sstevel@tonic-gate * we must SOFTUNLOCK the range we already did. (Maybe we 1018*7c478bd9Sstevel@tonic-gate * should just panic if we are SOFTLOCKing or even SOFTUNLOCKing 1019*7c478bd9Sstevel@tonic-gate * right here...) 1020*7c478bd9Sstevel@tonic-gate */ 1021*7c478bd9Sstevel@tonic-gate if (res != 0 && type == F_SOFTLOCK) { 1022*7c478bd9Sstevel@tonic-gate for (seg = segsav; addrsav < raddr; addrsav += ssize) { 1023*7c478bd9Sstevel@tonic-gate if (addrsav >= seg->s_base + seg->s_size) 1024*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 1025*7c478bd9Sstevel@tonic-gate ASSERT(seg != NULL); 1026*7c478bd9Sstevel@tonic-gate /* 1027*7c478bd9Sstevel@tonic-gate * Now call the fault routine again to perform the 1028*7c478bd9Sstevel@tonic-gate * unlock using S_OTHER instead of the rw variable 1029*7c478bd9Sstevel@tonic-gate * since we never got a chance to touch the pages. 1030*7c478bd9Sstevel@tonic-gate */ 1031*7c478bd9Sstevel@tonic-gate if (raddr > seg->s_base + seg->s_size) 1032*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - addrsav; 1033*7c478bd9Sstevel@tonic-gate else 1034*7c478bd9Sstevel@tonic-gate ssize = raddr - addrsav; 1035*7c478bd9Sstevel@tonic-gate (void) SEGOP_FAULT(hat, seg, addrsav, ssize, 1036*7c478bd9Sstevel@tonic-gate F_SOFTUNLOCK, S_OTHER); 1037*7c478bd9Sstevel@tonic-gate } 1038*7c478bd9Sstevel@tonic-gate } 1039*7c478bd9Sstevel@tonic-gate if (as_lock_held) 1040*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1041*7c478bd9Sstevel@tonic-gate if ((lwp != NULL) && (!is_xhat)) 1042*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 1043*7c478bd9Sstevel@tonic-gate /* 1044*7c478bd9Sstevel@tonic-gate * If the lower levels returned EDEADLK for a fault, 1045*7c478bd9Sstevel@tonic-gate * It means that we should retry the fault. Let's wait 1046*7c478bd9Sstevel@tonic-gate * a bit also to let the deadlock causing condition clear. 1047*7c478bd9Sstevel@tonic-gate * This is part of a gross hack to work around a design flaw 1048*7c478bd9Sstevel@tonic-gate * in the ufs/sds logging code and should go away when the 1049*7c478bd9Sstevel@tonic-gate * logging code is re-designed to fix the problem. See bug 1050*7c478bd9Sstevel@tonic-gate * 4125102 for details of the problem. 1051*7c478bd9Sstevel@tonic-gate */ 1052*7c478bd9Sstevel@tonic-gate if (FC_ERRNO(res) == EDEADLK) { 1053*7c478bd9Sstevel@tonic-gate delay(deadlk_wait); 1054*7c478bd9Sstevel@tonic-gate res = 0; 1055*7c478bd9Sstevel@tonic-gate goto retry; 1056*7c478bd9Sstevel@tonic-gate } 1057*7c478bd9Sstevel@tonic-gate return (res); 1058*7c478bd9Sstevel@tonic-gate } 1059*7c478bd9Sstevel@tonic-gate 1060*7c478bd9Sstevel@tonic-gate 1061*7c478bd9Sstevel@tonic-gate 1062*7c478bd9Sstevel@tonic-gate /* 1063*7c478bd9Sstevel@tonic-gate * Asynchronous ``fault'' at addr for size bytes. 1064*7c478bd9Sstevel@tonic-gate */ 1065*7c478bd9Sstevel@tonic-gate faultcode_t 1066*7c478bd9Sstevel@tonic-gate as_faulta(struct as *as, caddr_t addr, size_t size) 1067*7c478bd9Sstevel@tonic-gate { 1068*7c478bd9Sstevel@tonic-gate struct seg *seg; 1069*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 1070*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 1071*7c478bd9Sstevel@tonic-gate faultcode_t res = 0; 1072*7c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 1073*7c478bd9Sstevel@tonic-gate 1074*7c478bd9Sstevel@tonic-gate retry: 1075*7c478bd9Sstevel@tonic-gate /* 1076*7c478bd9Sstevel@tonic-gate * Indicate that the lwp is not to be stopped while waiting 1077*7c478bd9Sstevel@tonic-gate * for a pagefault. This is to avoid deadlock while debugging 1078*7c478bd9Sstevel@tonic-gate * a process via /proc over NFS (in particular). 1079*7c478bd9Sstevel@tonic-gate */ 1080*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 1081*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 1082*7c478bd9Sstevel@tonic-gate 1083*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1084*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1085*7c478bd9Sstevel@tonic-gate (size_t)raddr; 1086*7c478bd9Sstevel@tonic-gate 1087*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1088*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 1089*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1090*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1091*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 1092*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 1093*7c478bd9Sstevel@tonic-gate return (FC_NOMAP); 1094*7c478bd9Sstevel@tonic-gate } 1095*7c478bd9Sstevel@tonic-gate 1096*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= PAGESIZE, raddr += PAGESIZE) { 1097*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 1098*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 1099*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 1100*7c478bd9Sstevel@tonic-gate res = FC_NOMAP; 1101*7c478bd9Sstevel@tonic-gate break; 1102*7c478bd9Sstevel@tonic-gate } 1103*7c478bd9Sstevel@tonic-gate } 1104*7c478bd9Sstevel@tonic-gate res = SEGOP_FAULTA(seg, raddr); 1105*7c478bd9Sstevel@tonic-gate if (res != 0) 1106*7c478bd9Sstevel@tonic-gate break; 1107*7c478bd9Sstevel@tonic-gate } 1108*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1109*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 1110*7c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 1111*7c478bd9Sstevel@tonic-gate /* 1112*7c478bd9Sstevel@tonic-gate * If the lower levels returned EDEADLK for a fault, 1113*7c478bd9Sstevel@tonic-gate * It means that we should retry the fault. Let's wait 1114*7c478bd9Sstevel@tonic-gate * a bit also to let the deadlock causing condition clear. 1115*7c478bd9Sstevel@tonic-gate * This is part of a gross hack to work around a design flaw 1116*7c478bd9Sstevel@tonic-gate * in the ufs/sds logging code and should go away when the 1117*7c478bd9Sstevel@tonic-gate * logging code is re-designed to fix the problem. See bug 1118*7c478bd9Sstevel@tonic-gate * 4125102 for details of the problem. 1119*7c478bd9Sstevel@tonic-gate */ 1120*7c478bd9Sstevel@tonic-gate if (FC_ERRNO(res) == EDEADLK) { 1121*7c478bd9Sstevel@tonic-gate delay(deadlk_wait); 1122*7c478bd9Sstevel@tonic-gate res = 0; 1123*7c478bd9Sstevel@tonic-gate goto retry; 1124*7c478bd9Sstevel@tonic-gate } 1125*7c478bd9Sstevel@tonic-gate return (res); 1126*7c478bd9Sstevel@tonic-gate } 1127*7c478bd9Sstevel@tonic-gate 1128*7c478bd9Sstevel@tonic-gate /* 1129*7c478bd9Sstevel@tonic-gate * Set the virtual mapping for the interval from [addr : addr + size) 1130*7c478bd9Sstevel@tonic-gate * in address space `as' to have the specified protection. 1131*7c478bd9Sstevel@tonic-gate * It is ok for the range to cross over several segments, 1132*7c478bd9Sstevel@tonic-gate * as long as they are contiguous. 1133*7c478bd9Sstevel@tonic-gate */ 1134*7c478bd9Sstevel@tonic-gate int 1135*7c478bd9Sstevel@tonic-gate as_setprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1136*7c478bd9Sstevel@tonic-gate { 1137*7c478bd9Sstevel@tonic-gate struct seg *seg; 1138*7c478bd9Sstevel@tonic-gate struct as_callback *cb; 1139*7c478bd9Sstevel@tonic-gate size_t ssize; 1140*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 1141*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 1142*7c478bd9Sstevel@tonic-gate int error = 0, writer = 0; 1143*7c478bd9Sstevel@tonic-gate caddr_t saveraddr; 1144*7c478bd9Sstevel@tonic-gate size_t saversize; 1145*7c478bd9Sstevel@tonic-gate 1146*7c478bd9Sstevel@tonic-gate setprot_top: 1147*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1148*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1149*7c478bd9Sstevel@tonic-gate (size_t)raddr; 1150*7c478bd9Sstevel@tonic-gate 1151*7c478bd9Sstevel@tonic-gate if (raddr + rsize < raddr) /* check for wraparound */ 1152*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1153*7c478bd9Sstevel@tonic-gate 1154*7c478bd9Sstevel@tonic-gate saveraddr = raddr; 1155*7c478bd9Sstevel@tonic-gate saversize = rsize; 1156*7c478bd9Sstevel@tonic-gate 1157*7c478bd9Sstevel@tonic-gate /* 1158*7c478bd9Sstevel@tonic-gate * Normally we only lock the as as a reader. But 1159*7c478bd9Sstevel@tonic-gate * if due to setprot the segment driver needs to split 1160*7c478bd9Sstevel@tonic-gate * a segment it will return IE_RETRY. Therefore we re-aquire 1161*7c478bd9Sstevel@tonic-gate * the as lock as a writer so the segment driver can change 1162*7c478bd9Sstevel@tonic-gate * the seg list. Also the segment driver will return IE_RETRY 1163*7c478bd9Sstevel@tonic-gate * after it has changed the segment list so we therefore keep 1164*7c478bd9Sstevel@tonic-gate * locking as a writer. Since these opeartions should be rare 1165*7c478bd9Sstevel@tonic-gate * want to only lock as a writer when necessary. 1166*7c478bd9Sstevel@tonic-gate */ 1167*7c478bd9Sstevel@tonic-gate if (writer || avl_numnodes(&as->a_wpage) != 0) { 1168*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1169*7c478bd9Sstevel@tonic-gate } else { 1170*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1171*7c478bd9Sstevel@tonic-gate } 1172*7c478bd9Sstevel@tonic-gate 1173*7c478bd9Sstevel@tonic-gate as_clearwatchprot(as, raddr, rsize); 1174*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 1175*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1176*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1177*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1178*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1179*7c478bd9Sstevel@tonic-gate } 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1182*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 1183*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 1184*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 1185*7c478bd9Sstevel@tonic-gate error = ENOMEM; 1186*7c478bd9Sstevel@tonic-gate break; 1187*7c478bd9Sstevel@tonic-gate } 1188*7c478bd9Sstevel@tonic-gate } 1189*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1190*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 1191*7c478bd9Sstevel@tonic-gate else 1192*7c478bd9Sstevel@tonic-gate ssize = rsize; 1193*7c478bd9Sstevel@tonic-gate error = SEGOP_SETPROT(seg, raddr, ssize, prot); 1194*7c478bd9Sstevel@tonic-gate 1195*7c478bd9Sstevel@tonic-gate if (error == IE_NOMEM) { 1196*7c478bd9Sstevel@tonic-gate error = EAGAIN; 1197*7c478bd9Sstevel@tonic-gate break; 1198*7c478bd9Sstevel@tonic-gate } 1199*7c478bd9Sstevel@tonic-gate 1200*7c478bd9Sstevel@tonic-gate if (error == IE_RETRY) { 1201*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1202*7c478bd9Sstevel@tonic-gate writer = 1; 1203*7c478bd9Sstevel@tonic-gate goto setprot_top; 1204*7c478bd9Sstevel@tonic-gate } 1205*7c478bd9Sstevel@tonic-gate 1206*7c478bd9Sstevel@tonic-gate if (error == EAGAIN) { 1207*7c478bd9Sstevel@tonic-gate /* 1208*7c478bd9Sstevel@tonic-gate * Make sure we have a_lock as writer. 1209*7c478bd9Sstevel@tonic-gate */ 1210*7c478bd9Sstevel@tonic-gate if (writer == 0) { 1211*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1212*7c478bd9Sstevel@tonic-gate writer = 1; 1213*7c478bd9Sstevel@tonic-gate goto setprot_top; 1214*7c478bd9Sstevel@tonic-gate } 1215*7c478bd9Sstevel@tonic-gate 1216*7c478bd9Sstevel@tonic-gate /* 1217*7c478bd9Sstevel@tonic-gate * Memory is currently locked. It must be unlocked 1218*7c478bd9Sstevel@tonic-gate * before this operation can succeed through a retry. 1219*7c478bd9Sstevel@tonic-gate * The possible reasons for locked memory and 1220*7c478bd9Sstevel@tonic-gate * corresponding strategies for unlocking are: 1221*7c478bd9Sstevel@tonic-gate * (1) Normal I/O 1222*7c478bd9Sstevel@tonic-gate * wait for a signal that the I/O operation 1223*7c478bd9Sstevel@tonic-gate * has completed and the memory is unlocked. 1224*7c478bd9Sstevel@tonic-gate * (2) Asynchronous I/O 1225*7c478bd9Sstevel@tonic-gate * The aio subsystem does not unlock pages when 1226*7c478bd9Sstevel@tonic-gate * the I/O is completed. Those pages are unlocked 1227*7c478bd9Sstevel@tonic-gate * when the application calls aiowait/aioerror. 1228*7c478bd9Sstevel@tonic-gate * So, to prevent blocking forever, cv_broadcast() 1229*7c478bd9Sstevel@tonic-gate * is done to wake up aio_cleanup_thread. 1230*7c478bd9Sstevel@tonic-gate * Subsequently, segvn_reclaim will be called, and 1231*7c478bd9Sstevel@tonic-gate * that will do AS_CLRUNMAPWAIT() and wake us up. 1232*7c478bd9Sstevel@tonic-gate * (3) Long term page locking: 1233*7c478bd9Sstevel@tonic-gate * Drivers intending to have pages locked for a 1234*7c478bd9Sstevel@tonic-gate * period considerably longer than for normal I/O 1235*7c478bd9Sstevel@tonic-gate * (essentially forever) may have registered for a 1236*7c478bd9Sstevel@tonic-gate * callback so they may unlock these pages on 1237*7c478bd9Sstevel@tonic-gate * request. This is needed to allow this operation 1238*7c478bd9Sstevel@tonic-gate * to succeed. Each entry on the callback list is 1239*7c478bd9Sstevel@tonic-gate * examined. If the event or address range pertains 1240*7c478bd9Sstevel@tonic-gate * the callback is invoked (unless it already is in 1241*7c478bd9Sstevel@tonic-gate * progress). The a_contents lock must be dropped 1242*7c478bd9Sstevel@tonic-gate * before the callback, so only one callback can 1243*7c478bd9Sstevel@tonic-gate * be done at a time. Go to the top and do more 1244*7c478bd9Sstevel@tonic-gate * until zero is returned. If zero is returned, 1245*7c478bd9Sstevel@tonic-gate * either there were no callbacks for this event 1246*7c478bd9Sstevel@tonic-gate * or they were already in progress. 1247*7c478bd9Sstevel@tonic-gate */ 1248*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1249*7c478bd9Sstevel@tonic-gate if (as->a_callbacks && 1250*7c478bd9Sstevel@tonic-gate (cb = as_find_callback(as, AS_SETPROT_EVENT, 1251*7c478bd9Sstevel@tonic-gate seg->s_base, seg->s_size))) { 1252*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1253*7c478bd9Sstevel@tonic-gate as_execute_callback(as, cb, AS_SETPROT_EVENT); 1254*7c478bd9Sstevel@tonic-gate } else { 1255*7c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) 1256*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 1257*7c478bd9Sstevel@tonic-gate AS_SETUNMAPWAIT(as); 1258*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1259*7c478bd9Sstevel@tonic-gate while (AS_ISUNMAPWAIT(as)) 1260*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 1261*7c478bd9Sstevel@tonic-gate } 1262*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1263*7c478bd9Sstevel@tonic-gate goto setprot_top; 1264*7c478bd9Sstevel@tonic-gate } else if (error != 0) 1265*7c478bd9Sstevel@tonic-gate break; 1266*7c478bd9Sstevel@tonic-gate } 1267*7c478bd9Sstevel@tonic-gate if (error != 0) { 1268*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1269*7c478bd9Sstevel@tonic-gate } else { 1270*7c478bd9Sstevel@tonic-gate as_setwatchprot(as, saveraddr, saversize, prot); 1271*7c478bd9Sstevel@tonic-gate } 1272*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1273*7c478bd9Sstevel@tonic-gate return (error); 1274*7c478bd9Sstevel@tonic-gate } 1275*7c478bd9Sstevel@tonic-gate 1276*7c478bd9Sstevel@tonic-gate /* 1277*7c478bd9Sstevel@tonic-gate * Check to make sure that the interval [addr, addr + size) 1278*7c478bd9Sstevel@tonic-gate * in address space `as' has at least the specified protection. 1279*7c478bd9Sstevel@tonic-gate * It is ok for the range to cross over several segments, as long 1280*7c478bd9Sstevel@tonic-gate * as they are contiguous. 1281*7c478bd9Sstevel@tonic-gate */ 1282*7c478bd9Sstevel@tonic-gate int 1283*7c478bd9Sstevel@tonic-gate as_checkprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 1284*7c478bd9Sstevel@tonic-gate { 1285*7c478bd9Sstevel@tonic-gate struct seg *seg; 1286*7c478bd9Sstevel@tonic-gate size_t ssize; 1287*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 1288*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 1289*7c478bd9Sstevel@tonic-gate int error = 0; 1290*7c478bd9Sstevel@tonic-gate 1291*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1292*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1293*7c478bd9Sstevel@tonic-gate (size_t)raddr; 1294*7c478bd9Sstevel@tonic-gate 1295*7c478bd9Sstevel@tonic-gate if (raddr + rsize < raddr) /* check for wraparound */ 1296*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1297*7c478bd9Sstevel@tonic-gate 1298*7c478bd9Sstevel@tonic-gate /* 1299*7c478bd9Sstevel@tonic-gate * This is ugly as sin... 1300*7c478bd9Sstevel@tonic-gate * Normally, we only acquire the address space readers lock. 1301*7c478bd9Sstevel@tonic-gate * However, if the address space has watchpoints present, 1302*7c478bd9Sstevel@tonic-gate * we must acquire the writer lock on the address space for 1303*7c478bd9Sstevel@tonic-gate * the benefit of as_clearwatchprot() and as_setwatchprot(). 1304*7c478bd9Sstevel@tonic-gate */ 1305*7c478bd9Sstevel@tonic-gate if (avl_numnodes(&as->a_wpage) != 0) 1306*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1307*7c478bd9Sstevel@tonic-gate else 1308*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1309*7c478bd9Sstevel@tonic-gate as_clearwatchprot(as, raddr, rsize); 1310*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 1311*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1312*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1313*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1314*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1315*7c478bd9Sstevel@tonic-gate } 1316*7c478bd9Sstevel@tonic-gate 1317*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= ssize, raddr += ssize) { 1318*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 1319*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 1320*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 1321*7c478bd9Sstevel@tonic-gate error = ENOMEM; 1322*7c478bd9Sstevel@tonic-gate break; 1323*7c478bd9Sstevel@tonic-gate } 1324*7c478bd9Sstevel@tonic-gate } 1325*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) 1326*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 1327*7c478bd9Sstevel@tonic-gate else 1328*7c478bd9Sstevel@tonic-gate ssize = rsize; 1329*7c478bd9Sstevel@tonic-gate 1330*7c478bd9Sstevel@tonic-gate error = SEGOP_CHECKPROT(seg, raddr, ssize, prot); 1331*7c478bd9Sstevel@tonic-gate if (error != 0) 1332*7c478bd9Sstevel@tonic-gate break; 1333*7c478bd9Sstevel@tonic-gate } 1334*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1335*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1336*7c478bd9Sstevel@tonic-gate return (error); 1337*7c478bd9Sstevel@tonic-gate } 1338*7c478bd9Sstevel@tonic-gate 1339*7c478bd9Sstevel@tonic-gate int 1340*7c478bd9Sstevel@tonic-gate as_unmap(struct as *as, caddr_t addr, size_t size) 1341*7c478bd9Sstevel@tonic-gate { 1342*7c478bd9Sstevel@tonic-gate struct seg *seg, *seg_next; 1343*7c478bd9Sstevel@tonic-gate struct as_callback *cb; 1344*7c478bd9Sstevel@tonic-gate caddr_t raddr, eaddr; 1345*7c478bd9Sstevel@tonic-gate size_t ssize; 1346*7c478bd9Sstevel@tonic-gate int err; 1347*7c478bd9Sstevel@tonic-gate 1348*7c478bd9Sstevel@tonic-gate top: 1349*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1350*7c478bd9Sstevel@tonic-gate eaddr = (caddr_t)(((uintptr_t)(addr + size) + PAGEOFFSET) & 1351*7c478bd9Sstevel@tonic-gate (uintptr_t)PAGEMASK); 1352*7c478bd9Sstevel@tonic-gate 1353*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1354*7c478bd9Sstevel@tonic-gate 1355*7c478bd9Sstevel@tonic-gate as->a_updatedir = 1; /* inform /proc */ 1356*7c478bd9Sstevel@tonic-gate gethrestime(&as->a_updatetime); 1357*7c478bd9Sstevel@tonic-gate 1358*7c478bd9Sstevel@tonic-gate /* 1359*7c478bd9Sstevel@tonic-gate * Use as_findseg to find the first segment in the range, then 1360*7c478bd9Sstevel@tonic-gate * step through the segments in order, following s_next. 1361*7c478bd9Sstevel@tonic-gate */ 1362*7c478bd9Sstevel@tonic-gate as_clearwatchprot(as, raddr, eaddr - raddr); 1363*7c478bd9Sstevel@tonic-gate 1364*7c478bd9Sstevel@tonic-gate for (seg = as_findseg(as, raddr, 0); seg != NULL; seg = seg_next) { 1365*7c478bd9Sstevel@tonic-gate if (eaddr <= seg->s_base) 1366*7c478bd9Sstevel@tonic-gate break; /* eaddr was in a gap; all done */ 1367*7c478bd9Sstevel@tonic-gate 1368*7c478bd9Sstevel@tonic-gate /* this is implied by the test above */ 1369*7c478bd9Sstevel@tonic-gate ASSERT(raddr < eaddr); 1370*7c478bd9Sstevel@tonic-gate 1371*7c478bd9Sstevel@tonic-gate if (raddr < seg->s_base) 1372*7c478bd9Sstevel@tonic-gate raddr = seg->s_base; /* raddr was in a gap */ 1373*7c478bd9Sstevel@tonic-gate 1374*7c478bd9Sstevel@tonic-gate if (eaddr > (seg->s_base + seg->s_size)) 1375*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 1376*7c478bd9Sstevel@tonic-gate else 1377*7c478bd9Sstevel@tonic-gate ssize = eaddr - raddr; 1378*7c478bd9Sstevel@tonic-gate 1379*7c478bd9Sstevel@tonic-gate /* 1380*7c478bd9Sstevel@tonic-gate * Save next segment pointer since seg can be 1381*7c478bd9Sstevel@tonic-gate * destroyed during the segment unmap operation. 1382*7c478bd9Sstevel@tonic-gate */ 1383*7c478bd9Sstevel@tonic-gate seg_next = AS_SEGNEXT(as, seg); 1384*7c478bd9Sstevel@tonic-gate 1385*7c478bd9Sstevel@tonic-gate err = SEGOP_UNMAP(seg, raddr, ssize); 1386*7c478bd9Sstevel@tonic-gate if (err == EAGAIN) { 1387*7c478bd9Sstevel@tonic-gate /* 1388*7c478bd9Sstevel@tonic-gate * Memory is currently locked. It must be unlocked 1389*7c478bd9Sstevel@tonic-gate * before this operation can succeed through a retry. 1390*7c478bd9Sstevel@tonic-gate * The possible reasons for locked memory and 1391*7c478bd9Sstevel@tonic-gate * corresponding strategies for unlocking are: 1392*7c478bd9Sstevel@tonic-gate * (1) Normal I/O 1393*7c478bd9Sstevel@tonic-gate * wait for a signal that the I/O operation 1394*7c478bd9Sstevel@tonic-gate * has completed and the memory is unlocked. 1395*7c478bd9Sstevel@tonic-gate * (2) Asynchronous I/O 1396*7c478bd9Sstevel@tonic-gate * The aio subsystem does not unlock pages when 1397*7c478bd9Sstevel@tonic-gate * the I/O is completed. Those pages are unlocked 1398*7c478bd9Sstevel@tonic-gate * when the application calls aiowait/aioerror. 1399*7c478bd9Sstevel@tonic-gate * So, to prevent blocking forever, cv_broadcast() 1400*7c478bd9Sstevel@tonic-gate * is done to wake up aio_cleanup_thread. 1401*7c478bd9Sstevel@tonic-gate * Subsequently, segvn_reclaim will be called, and 1402*7c478bd9Sstevel@tonic-gate * that will do AS_CLRUNMAPWAIT() and wake us up. 1403*7c478bd9Sstevel@tonic-gate * (3) Long term page locking: 1404*7c478bd9Sstevel@tonic-gate * Drivers intending to have pages locked for a 1405*7c478bd9Sstevel@tonic-gate * period considerably longer than for normal I/O 1406*7c478bd9Sstevel@tonic-gate * (essentially forever) may have registered for a 1407*7c478bd9Sstevel@tonic-gate * callback so they may unlock these pages on 1408*7c478bd9Sstevel@tonic-gate * request. This is needed to allow this operation 1409*7c478bd9Sstevel@tonic-gate * to succeed. Each entry on the callback list is 1410*7c478bd9Sstevel@tonic-gate * examined. If the event or address range pertains 1411*7c478bd9Sstevel@tonic-gate * the callback is invoked (unless it already is in 1412*7c478bd9Sstevel@tonic-gate * progress). The a_contents lock must be dropped 1413*7c478bd9Sstevel@tonic-gate * before the callback, so only one callback can 1414*7c478bd9Sstevel@tonic-gate * be done at a time. Go to the top and do more 1415*7c478bd9Sstevel@tonic-gate * until zero is returned. If zero is returned, 1416*7c478bd9Sstevel@tonic-gate * either there were no callbacks for this event 1417*7c478bd9Sstevel@tonic-gate * or they were already in progress. 1418*7c478bd9Sstevel@tonic-gate */ 1419*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1420*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1421*7c478bd9Sstevel@tonic-gate if (as->a_callbacks && 1422*7c478bd9Sstevel@tonic-gate (cb = as_find_callback(as, AS_UNMAP_EVENT, 1423*7c478bd9Sstevel@tonic-gate seg->s_base, seg->s_size))) { 1424*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1425*7c478bd9Sstevel@tonic-gate as_execute_callback(as, cb, AS_UNMAP_EVENT); 1426*7c478bd9Sstevel@tonic-gate } else { 1427*7c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) 1428*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 1429*7c478bd9Sstevel@tonic-gate AS_SETUNMAPWAIT(as); 1430*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1431*7c478bd9Sstevel@tonic-gate while (AS_ISUNMAPWAIT(as)) 1432*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 1433*7c478bd9Sstevel@tonic-gate } 1434*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1435*7c478bd9Sstevel@tonic-gate goto top; 1436*7c478bd9Sstevel@tonic-gate } else if (err == IE_RETRY) { 1437*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1438*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1439*7c478bd9Sstevel@tonic-gate goto top; 1440*7c478bd9Sstevel@tonic-gate } else if (err) { 1441*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1442*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1443*7c478bd9Sstevel@tonic-gate return (-1); 1444*7c478bd9Sstevel@tonic-gate } 1445*7c478bd9Sstevel@tonic-gate 1446*7c478bd9Sstevel@tonic-gate as->a_size -= ssize; 1447*7c478bd9Sstevel@tonic-gate raddr += ssize; 1448*7c478bd9Sstevel@tonic-gate } 1449*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1450*7c478bd9Sstevel@tonic-gate return (0); 1451*7c478bd9Sstevel@tonic-gate } 1452*7c478bd9Sstevel@tonic-gate 1453*7c478bd9Sstevel@tonic-gate static int 1454*7c478bd9Sstevel@tonic-gate as_map_vnsegs(struct as *as, caddr_t addr, size_t size, 1455*7c478bd9Sstevel@tonic-gate int (*crfp)(), struct segvn_crargs *vn_a, int *segcreated) 1456*7c478bd9Sstevel@tonic-gate { 1457*7c478bd9Sstevel@tonic-gate int text = vn_a->flags & MAP_TEXT; 1458*7c478bd9Sstevel@tonic-gate uint_t szcvec = map_execseg_pgszcvec(text, addr, size); 1459*7c478bd9Sstevel@tonic-gate uint_t szc; 1460*7c478bd9Sstevel@tonic-gate uint_t nszc; 1461*7c478bd9Sstevel@tonic-gate int error; 1462*7c478bd9Sstevel@tonic-gate caddr_t a; 1463*7c478bd9Sstevel@tonic-gate caddr_t eaddr; 1464*7c478bd9Sstevel@tonic-gate size_t segsize; 1465*7c478bd9Sstevel@tonic-gate struct seg *seg; 1466*7c478bd9Sstevel@tonic-gate uint_t save_szcvec; 1467*7c478bd9Sstevel@tonic-gate size_t pgsz; 1468*7c478bd9Sstevel@tonic-gate struct vattr va; 1469*7c478bd9Sstevel@tonic-gate u_offset_t eoff; 1470*7c478bd9Sstevel@tonic-gate size_t save_size = 0; 1471*7c478bd9Sstevel@tonic-gate 1472*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 1473*7c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(addr, PAGESIZE)); 1474*7c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(size, PAGESIZE)); 1475*7c478bd9Sstevel@tonic-gate ASSERT(vn_a->vp != NULL); 1476*7c478bd9Sstevel@tonic-gate ASSERT(vn_a->amp == NULL); 1477*7c478bd9Sstevel@tonic-gate 1478*7c478bd9Sstevel@tonic-gate again: 1479*7c478bd9Sstevel@tonic-gate if (szcvec <= 1) { 1480*7c478bd9Sstevel@tonic-gate seg = seg_alloc(as, addr, size); 1481*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1482*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1483*7c478bd9Sstevel@tonic-gate } 1484*7c478bd9Sstevel@tonic-gate vn_a->szc = 0; 1485*7c478bd9Sstevel@tonic-gate error = (*crfp)(seg, vn_a); 1486*7c478bd9Sstevel@tonic-gate if (error != 0) { 1487*7c478bd9Sstevel@tonic-gate seg_free(seg); 1488*7c478bd9Sstevel@tonic-gate } 1489*7c478bd9Sstevel@tonic-gate return (error); 1490*7c478bd9Sstevel@tonic-gate } 1491*7c478bd9Sstevel@tonic-gate 1492*7c478bd9Sstevel@tonic-gate va.va_mask = AT_SIZE; 1493*7c478bd9Sstevel@tonic-gate if (VOP_GETATTR(vn_a->vp, &va, ATTR_HINT, vn_a->cred) != 0) { 1494*7c478bd9Sstevel@tonic-gate szcvec = 0; 1495*7c478bd9Sstevel@tonic-gate goto again; 1496*7c478bd9Sstevel@tonic-gate } 1497*7c478bd9Sstevel@tonic-gate eoff = vn_a->offset & PAGEMASK; 1498*7c478bd9Sstevel@tonic-gate if (eoff >= va.va_size) { 1499*7c478bd9Sstevel@tonic-gate szcvec = 0; 1500*7c478bd9Sstevel@tonic-gate goto again; 1501*7c478bd9Sstevel@tonic-gate } 1502*7c478bd9Sstevel@tonic-gate eoff += size; 1503*7c478bd9Sstevel@tonic-gate if (btopr(va.va_size) < btopr(eoff)) { 1504*7c478bd9Sstevel@tonic-gate save_size = size; 1505*7c478bd9Sstevel@tonic-gate size = va.va_size - (vn_a->offset & PAGEMASK); 1506*7c478bd9Sstevel@tonic-gate size = P2ROUNDUP_TYPED(size, PAGESIZE, size_t); 1507*7c478bd9Sstevel@tonic-gate szcvec = map_execseg_pgszcvec(text, addr, size); 1508*7c478bd9Sstevel@tonic-gate if (szcvec <= 1) { 1509*7c478bd9Sstevel@tonic-gate size = save_size; 1510*7c478bd9Sstevel@tonic-gate goto again; 1511*7c478bd9Sstevel@tonic-gate } 1512*7c478bd9Sstevel@tonic-gate } 1513*7c478bd9Sstevel@tonic-gate 1514*7c478bd9Sstevel@tonic-gate eaddr = addr + size; 1515*7c478bd9Sstevel@tonic-gate save_szcvec = szcvec; 1516*7c478bd9Sstevel@tonic-gate szcvec >>= 1; 1517*7c478bd9Sstevel@tonic-gate szc = 0; 1518*7c478bd9Sstevel@tonic-gate nszc = 0; 1519*7c478bd9Sstevel@tonic-gate while (szcvec) { 1520*7c478bd9Sstevel@tonic-gate if ((szcvec & 0x1) == 0) { 1521*7c478bd9Sstevel@tonic-gate nszc++; 1522*7c478bd9Sstevel@tonic-gate szcvec >>= 1; 1523*7c478bd9Sstevel@tonic-gate continue; 1524*7c478bd9Sstevel@tonic-gate } 1525*7c478bd9Sstevel@tonic-gate nszc++; 1526*7c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(nszc); 1527*7c478bd9Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1528*7c478bd9Sstevel@tonic-gate if (a != addr) { 1529*7c478bd9Sstevel@tonic-gate ASSERT(a < eaddr); 1530*7c478bd9Sstevel@tonic-gate segsize = a - addr; 1531*7c478bd9Sstevel@tonic-gate seg = seg_alloc(as, addr, segsize); 1532*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1533*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1534*7c478bd9Sstevel@tonic-gate } 1535*7c478bd9Sstevel@tonic-gate vn_a->szc = szc; 1536*7c478bd9Sstevel@tonic-gate error = (*crfp)(seg, vn_a); 1537*7c478bd9Sstevel@tonic-gate if (error != 0) { 1538*7c478bd9Sstevel@tonic-gate seg_free(seg); 1539*7c478bd9Sstevel@tonic-gate return (error); 1540*7c478bd9Sstevel@tonic-gate } 1541*7c478bd9Sstevel@tonic-gate *segcreated = 1; 1542*7c478bd9Sstevel@tonic-gate vn_a->offset += segsize; 1543*7c478bd9Sstevel@tonic-gate addr = a; 1544*7c478bd9Sstevel@tonic-gate } 1545*7c478bd9Sstevel@tonic-gate szc = nszc; 1546*7c478bd9Sstevel@tonic-gate szcvec >>= 1; 1547*7c478bd9Sstevel@tonic-gate } 1548*7c478bd9Sstevel@tonic-gate 1549*7c478bd9Sstevel@tonic-gate ASSERT(addr < eaddr); 1550*7c478bd9Sstevel@tonic-gate szcvec = save_szcvec | 1; /* add 8K pages */ 1551*7c478bd9Sstevel@tonic-gate while (szcvec) { 1552*7c478bd9Sstevel@tonic-gate a = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 1553*7c478bd9Sstevel@tonic-gate ASSERT(a >= addr); 1554*7c478bd9Sstevel@tonic-gate if (a != addr) { 1555*7c478bd9Sstevel@tonic-gate segsize = a - addr; 1556*7c478bd9Sstevel@tonic-gate seg = seg_alloc(as, addr, segsize); 1557*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1558*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1559*7c478bd9Sstevel@tonic-gate } 1560*7c478bd9Sstevel@tonic-gate vn_a->szc = szc; 1561*7c478bd9Sstevel@tonic-gate error = (*crfp)(seg, vn_a); 1562*7c478bd9Sstevel@tonic-gate if (error != 0) { 1563*7c478bd9Sstevel@tonic-gate seg_free(seg); 1564*7c478bd9Sstevel@tonic-gate return (error); 1565*7c478bd9Sstevel@tonic-gate } 1566*7c478bd9Sstevel@tonic-gate *segcreated = 1; 1567*7c478bd9Sstevel@tonic-gate vn_a->offset += segsize; 1568*7c478bd9Sstevel@tonic-gate addr = a; 1569*7c478bd9Sstevel@tonic-gate } 1570*7c478bd9Sstevel@tonic-gate szcvec &= ~(1 << szc); 1571*7c478bd9Sstevel@tonic-gate if (szcvec) { 1572*7c478bd9Sstevel@tonic-gate szc = highbit(szcvec) - 1; 1573*7c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 1574*7c478bd9Sstevel@tonic-gate } 1575*7c478bd9Sstevel@tonic-gate } 1576*7c478bd9Sstevel@tonic-gate ASSERT(addr == eaddr); 1577*7c478bd9Sstevel@tonic-gate 1578*7c478bd9Sstevel@tonic-gate if (save_size) { 1579*7c478bd9Sstevel@tonic-gate size = save_size - size; 1580*7c478bd9Sstevel@tonic-gate goto again; 1581*7c478bd9Sstevel@tonic-gate } 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate return (0); 1584*7c478bd9Sstevel@tonic-gate } 1585*7c478bd9Sstevel@tonic-gate 1586*7c478bd9Sstevel@tonic-gate int 1587*7c478bd9Sstevel@tonic-gate as_map(struct as *as, caddr_t addr, size_t size, int (*crfp)(), void *argsp) 1588*7c478bd9Sstevel@tonic-gate { 1589*7c478bd9Sstevel@tonic-gate struct seg *seg = NULL; 1590*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 1591*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 1592*7c478bd9Sstevel@tonic-gate int error; 1593*7c478bd9Sstevel@tonic-gate struct proc *p = curproc; 1594*7c478bd9Sstevel@tonic-gate 1595*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1596*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 1597*7c478bd9Sstevel@tonic-gate (size_t)raddr; 1598*7c478bd9Sstevel@tonic-gate 1599*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1600*7c478bd9Sstevel@tonic-gate 1601*7c478bd9Sstevel@tonic-gate /* 1602*7c478bd9Sstevel@tonic-gate * check for wrap around 1603*7c478bd9Sstevel@tonic-gate */ 1604*7c478bd9Sstevel@tonic-gate if ((raddr + rsize < raddr) || (as->a_size > (ULONG_MAX - size))) { 1605*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1606*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1607*7c478bd9Sstevel@tonic-gate } 1608*7c478bd9Sstevel@tonic-gate 1609*7c478bd9Sstevel@tonic-gate as->a_updatedir = 1; /* inform /proc */ 1610*7c478bd9Sstevel@tonic-gate gethrestime(&as->a_updatetime); 1611*7c478bd9Sstevel@tonic-gate 1612*7c478bd9Sstevel@tonic-gate if (as != &kas && as->a_size + rsize > (size_t)p->p_vmem_ctl) { 1613*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1614*7c478bd9Sstevel@tonic-gate 1615*7c478bd9Sstevel@tonic-gate (void) rctl_action(rctlproc_legacy[RLIMIT_VMEM], p->p_rctls, p, 1616*7c478bd9Sstevel@tonic-gate RCA_UNSAFE_ALL); 1617*7c478bd9Sstevel@tonic-gate 1618*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1619*7c478bd9Sstevel@tonic-gate } 1620*7c478bd9Sstevel@tonic-gate 1621*7c478bd9Sstevel@tonic-gate if (AS_MAP_VNSEGS_USELPGS(crfp, argsp)) { 1622*7c478bd9Sstevel@tonic-gate int unmap = 0; 1623*7c478bd9Sstevel@tonic-gate error = as_map_vnsegs(as, raddr, rsize, crfp, 1624*7c478bd9Sstevel@tonic-gate (struct segvn_crargs *)argsp, &unmap); 1625*7c478bd9Sstevel@tonic-gate if (error != 0) { 1626*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1627*7c478bd9Sstevel@tonic-gate if (unmap) { 1628*7c478bd9Sstevel@tonic-gate (void) as_unmap(as, addr, size); 1629*7c478bd9Sstevel@tonic-gate } 1630*7c478bd9Sstevel@tonic-gate return (error); 1631*7c478bd9Sstevel@tonic-gate } 1632*7c478bd9Sstevel@tonic-gate } else { 1633*7c478bd9Sstevel@tonic-gate seg = seg_alloc(as, addr, size); 1634*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 1635*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1636*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1637*7c478bd9Sstevel@tonic-gate } 1638*7c478bd9Sstevel@tonic-gate 1639*7c478bd9Sstevel@tonic-gate error = (*crfp)(seg, argsp); 1640*7c478bd9Sstevel@tonic-gate if (error != 0) { 1641*7c478bd9Sstevel@tonic-gate seg_free(seg); 1642*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1643*7c478bd9Sstevel@tonic-gate return (error); 1644*7c478bd9Sstevel@tonic-gate } 1645*7c478bd9Sstevel@tonic-gate } 1646*7c478bd9Sstevel@tonic-gate 1647*7c478bd9Sstevel@tonic-gate /* 1648*7c478bd9Sstevel@tonic-gate * Add size now so as_unmap will work if as_ctl fails. 1649*7c478bd9Sstevel@tonic-gate */ 1650*7c478bd9Sstevel@tonic-gate as->a_size += rsize; 1651*7c478bd9Sstevel@tonic-gate 1652*7c478bd9Sstevel@tonic-gate as_setwatch(as); 1653*7c478bd9Sstevel@tonic-gate 1654*7c478bd9Sstevel@tonic-gate /* 1655*7c478bd9Sstevel@tonic-gate * If the address space is locked, 1656*7c478bd9Sstevel@tonic-gate * establish memory locks for the new segment. 1657*7c478bd9Sstevel@tonic-gate */ 1658*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1659*7c478bd9Sstevel@tonic-gate if (AS_ISPGLCK(as)) { 1660*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1661*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1662*7c478bd9Sstevel@tonic-gate error = as_ctl(as, addr, size, MC_LOCK, 0, 0, NULL, 0); 1663*7c478bd9Sstevel@tonic-gate if (error != 0) 1664*7c478bd9Sstevel@tonic-gate (void) as_unmap(as, addr, size); 1665*7c478bd9Sstevel@tonic-gate } else { 1666*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1667*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1668*7c478bd9Sstevel@tonic-gate } 1669*7c478bd9Sstevel@tonic-gate return (error); 1670*7c478bd9Sstevel@tonic-gate } 1671*7c478bd9Sstevel@tonic-gate 1672*7c478bd9Sstevel@tonic-gate 1673*7c478bd9Sstevel@tonic-gate /* 1674*7c478bd9Sstevel@tonic-gate * Delete all segments in the address space marked with S_PURGE. 1675*7c478bd9Sstevel@tonic-gate * This is currently used for Sparc V9 nofault ASI segments (seg_nf.c). 1676*7c478bd9Sstevel@tonic-gate * These segments are deleted as a first step before calls to as_gap(), so 1677*7c478bd9Sstevel@tonic-gate * that they don't affect mmap() or shmat(). 1678*7c478bd9Sstevel@tonic-gate */ 1679*7c478bd9Sstevel@tonic-gate void 1680*7c478bd9Sstevel@tonic-gate as_purge(struct as *as) 1681*7c478bd9Sstevel@tonic-gate { 1682*7c478bd9Sstevel@tonic-gate struct seg *seg; 1683*7c478bd9Sstevel@tonic-gate struct seg *next_seg; 1684*7c478bd9Sstevel@tonic-gate 1685*7c478bd9Sstevel@tonic-gate /* 1686*7c478bd9Sstevel@tonic-gate * the setting of NEEDSPURGE is protect by as_rangelock(), so 1687*7c478bd9Sstevel@tonic-gate * no need to grab a_contents mutex for this check 1688*7c478bd9Sstevel@tonic-gate */ 1689*7c478bd9Sstevel@tonic-gate if ((as->a_flags & AS_NEEDSPURGE) == 0) 1690*7c478bd9Sstevel@tonic-gate return; 1691*7c478bd9Sstevel@tonic-gate 1692*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 1693*7c478bd9Sstevel@tonic-gate next_seg = NULL; 1694*7c478bd9Sstevel@tonic-gate seg = AS_SEGFIRST(as); 1695*7c478bd9Sstevel@tonic-gate while (seg != NULL) { 1696*7c478bd9Sstevel@tonic-gate next_seg = AS_SEGNEXT(as, seg); 1697*7c478bd9Sstevel@tonic-gate if (seg->s_flags & S_PURGE) 1698*7c478bd9Sstevel@tonic-gate SEGOP_UNMAP(seg, seg->s_base, seg->s_size); 1699*7c478bd9Sstevel@tonic-gate seg = next_seg; 1700*7c478bd9Sstevel@tonic-gate } 1701*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1702*7c478bd9Sstevel@tonic-gate 1703*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1704*7c478bd9Sstevel@tonic-gate as->a_flags &= ~AS_NEEDSPURGE; 1705*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1706*7c478bd9Sstevel@tonic-gate } 1707*7c478bd9Sstevel@tonic-gate 1708*7c478bd9Sstevel@tonic-gate /* 1709*7c478bd9Sstevel@tonic-gate * Find a hole of at least size minlen within [base, base + len). 1710*7c478bd9Sstevel@tonic-gate * 1711*7c478bd9Sstevel@tonic-gate * If flags specifies AH_HI, the hole will have the highest possible address 1712*7c478bd9Sstevel@tonic-gate * in the range. We use the as->a_lastgap field to figure out where to 1713*7c478bd9Sstevel@tonic-gate * start looking for a gap. 1714*7c478bd9Sstevel@tonic-gate * 1715*7c478bd9Sstevel@tonic-gate * Otherwise, the gap will have the lowest possible address. 1716*7c478bd9Sstevel@tonic-gate * 1717*7c478bd9Sstevel@tonic-gate * If flags specifies AH_CONTAIN, the hole will contain the address addr. 1718*7c478bd9Sstevel@tonic-gate * 1719*7c478bd9Sstevel@tonic-gate * If an adequate hole is found, base and len are set to reflect the part of 1720*7c478bd9Sstevel@tonic-gate * the hole that is within range, and 0 is returned, otherwise, 1721*7c478bd9Sstevel@tonic-gate * -1 is returned. 1722*7c478bd9Sstevel@tonic-gate * 1723*7c478bd9Sstevel@tonic-gate * NOTE: This routine is not correct when base+len overflows caddr_t. 1724*7c478bd9Sstevel@tonic-gate */ 1725*7c478bd9Sstevel@tonic-gate int 1726*7c478bd9Sstevel@tonic-gate as_gap(struct as *as, size_t minlen, caddr_t *basep, size_t *lenp, uint_t flags, 1727*7c478bd9Sstevel@tonic-gate caddr_t addr) 1728*7c478bd9Sstevel@tonic-gate { 1729*7c478bd9Sstevel@tonic-gate caddr_t lobound = *basep; 1730*7c478bd9Sstevel@tonic-gate caddr_t hibound = lobound + *lenp; 1731*7c478bd9Sstevel@tonic-gate struct seg *lseg, *hseg; 1732*7c478bd9Sstevel@tonic-gate caddr_t lo, hi; 1733*7c478bd9Sstevel@tonic-gate int forward; 1734*7c478bd9Sstevel@tonic-gate caddr_t save_base; 1735*7c478bd9Sstevel@tonic-gate size_t save_len; 1736*7c478bd9Sstevel@tonic-gate 1737*7c478bd9Sstevel@tonic-gate save_base = *basep; 1738*7c478bd9Sstevel@tonic-gate save_len = *lenp; 1739*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1740*7c478bd9Sstevel@tonic-gate if (AS_SEGFIRST(as) == NULL) { 1741*7c478bd9Sstevel@tonic-gate if (valid_va_range(basep, lenp, minlen, flags & AH_DIR)) { 1742*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1743*7c478bd9Sstevel@tonic-gate return (0); 1744*7c478bd9Sstevel@tonic-gate } else { 1745*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1746*7c478bd9Sstevel@tonic-gate *basep = save_base; 1747*7c478bd9Sstevel@tonic-gate *lenp = save_len; 1748*7c478bd9Sstevel@tonic-gate return (-1); 1749*7c478bd9Sstevel@tonic-gate } 1750*7c478bd9Sstevel@tonic-gate } 1751*7c478bd9Sstevel@tonic-gate 1752*7c478bd9Sstevel@tonic-gate /* 1753*7c478bd9Sstevel@tonic-gate * Set up to iterate over all the inter-segment holes in the given 1754*7c478bd9Sstevel@tonic-gate * direction. lseg is NULL for the lowest-addressed hole and hseg is 1755*7c478bd9Sstevel@tonic-gate * NULL for the highest-addressed hole. If moving backwards, we reset 1756*7c478bd9Sstevel@tonic-gate * sseg to denote the highest-addressed segment. 1757*7c478bd9Sstevel@tonic-gate */ 1758*7c478bd9Sstevel@tonic-gate forward = (flags & AH_DIR) == AH_LO; 1759*7c478bd9Sstevel@tonic-gate if (forward) { 1760*7c478bd9Sstevel@tonic-gate hseg = as_findseg(as, lobound, 1); 1761*7c478bd9Sstevel@tonic-gate lseg = AS_SEGPREV(as, hseg); 1762*7c478bd9Sstevel@tonic-gate } else { 1763*7c478bd9Sstevel@tonic-gate 1764*7c478bd9Sstevel@tonic-gate /* 1765*7c478bd9Sstevel@tonic-gate * If allocating at least as much as the last allocation, 1766*7c478bd9Sstevel@tonic-gate * use a_lastgap's base as a better estimate of hibound. 1767*7c478bd9Sstevel@tonic-gate */ 1768*7c478bd9Sstevel@tonic-gate if (as->a_lastgap && 1769*7c478bd9Sstevel@tonic-gate minlen >= as->a_lastgap->s_size && 1770*7c478bd9Sstevel@tonic-gate hibound >= as->a_lastgap->s_base) 1771*7c478bd9Sstevel@tonic-gate hibound = as->a_lastgap->s_base; 1772*7c478bd9Sstevel@tonic-gate 1773*7c478bd9Sstevel@tonic-gate hseg = as_findseg(as, hibound, 1); 1774*7c478bd9Sstevel@tonic-gate if (hseg->s_base + hseg->s_size < hibound) { 1775*7c478bd9Sstevel@tonic-gate lseg = hseg; 1776*7c478bd9Sstevel@tonic-gate hseg = NULL; 1777*7c478bd9Sstevel@tonic-gate } else { 1778*7c478bd9Sstevel@tonic-gate lseg = AS_SEGPREV(as, hseg); 1779*7c478bd9Sstevel@tonic-gate } 1780*7c478bd9Sstevel@tonic-gate } 1781*7c478bd9Sstevel@tonic-gate 1782*7c478bd9Sstevel@tonic-gate for (;;) { 1783*7c478bd9Sstevel@tonic-gate /* 1784*7c478bd9Sstevel@tonic-gate * Set lo and hi to the hole's boundaries. (We should really 1785*7c478bd9Sstevel@tonic-gate * use MAXADDR in place of hibound in the expression below, 1786*7c478bd9Sstevel@tonic-gate * but can't express it easily; using hibound in its place is 1787*7c478bd9Sstevel@tonic-gate * harmless.) 1788*7c478bd9Sstevel@tonic-gate */ 1789*7c478bd9Sstevel@tonic-gate lo = (lseg == NULL) ? 0 : lseg->s_base + lseg->s_size; 1790*7c478bd9Sstevel@tonic-gate hi = (hseg == NULL) ? hibound : hseg->s_base; 1791*7c478bd9Sstevel@tonic-gate /* 1792*7c478bd9Sstevel@tonic-gate * If the iteration has moved past the interval from lobound 1793*7c478bd9Sstevel@tonic-gate * to hibound it's pointless to continue. 1794*7c478bd9Sstevel@tonic-gate */ 1795*7c478bd9Sstevel@tonic-gate if ((forward && lo > hibound) || (!forward && hi < lobound)) 1796*7c478bd9Sstevel@tonic-gate break; 1797*7c478bd9Sstevel@tonic-gate else if (lo > hibound || hi < lobound) 1798*7c478bd9Sstevel@tonic-gate goto cont; 1799*7c478bd9Sstevel@tonic-gate /* 1800*7c478bd9Sstevel@tonic-gate * Candidate hole lies at least partially within the allowable 1801*7c478bd9Sstevel@tonic-gate * range. Restrict it to fall completely within that range, 1802*7c478bd9Sstevel@tonic-gate * i.e., to [max(lo, lobound), min(hi, hibound)]. 1803*7c478bd9Sstevel@tonic-gate */ 1804*7c478bd9Sstevel@tonic-gate if (lo < lobound) 1805*7c478bd9Sstevel@tonic-gate lo = lobound; 1806*7c478bd9Sstevel@tonic-gate if (hi > hibound) 1807*7c478bd9Sstevel@tonic-gate hi = hibound; 1808*7c478bd9Sstevel@tonic-gate /* 1809*7c478bd9Sstevel@tonic-gate * Verify that the candidate hole is big enough and meets 1810*7c478bd9Sstevel@tonic-gate * hardware constraints. 1811*7c478bd9Sstevel@tonic-gate */ 1812*7c478bd9Sstevel@tonic-gate *basep = lo; 1813*7c478bd9Sstevel@tonic-gate *lenp = hi - lo; 1814*7c478bd9Sstevel@tonic-gate if (valid_va_range(basep, lenp, minlen, 1815*7c478bd9Sstevel@tonic-gate forward ? AH_LO : AH_HI) && 1816*7c478bd9Sstevel@tonic-gate ((flags & AH_CONTAIN) == 0 || 1817*7c478bd9Sstevel@tonic-gate (*basep <= addr && *basep + *lenp > addr))) { 1818*7c478bd9Sstevel@tonic-gate if (!forward) 1819*7c478bd9Sstevel@tonic-gate as->a_lastgap = hseg; 1820*7c478bd9Sstevel@tonic-gate if (hseg != NULL) 1821*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = hseg; 1822*7c478bd9Sstevel@tonic-gate else 1823*7c478bd9Sstevel@tonic-gate as->a_lastgaphl = lseg; 1824*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1825*7c478bd9Sstevel@tonic-gate return (0); 1826*7c478bd9Sstevel@tonic-gate } 1827*7c478bd9Sstevel@tonic-gate cont: 1828*7c478bd9Sstevel@tonic-gate /* 1829*7c478bd9Sstevel@tonic-gate * Move to the next hole. 1830*7c478bd9Sstevel@tonic-gate */ 1831*7c478bd9Sstevel@tonic-gate if (forward) { 1832*7c478bd9Sstevel@tonic-gate lseg = hseg; 1833*7c478bd9Sstevel@tonic-gate if (lseg == NULL) 1834*7c478bd9Sstevel@tonic-gate break; 1835*7c478bd9Sstevel@tonic-gate hseg = AS_SEGNEXT(as, hseg); 1836*7c478bd9Sstevel@tonic-gate } else { 1837*7c478bd9Sstevel@tonic-gate hseg = lseg; 1838*7c478bd9Sstevel@tonic-gate if (hseg == NULL) 1839*7c478bd9Sstevel@tonic-gate break; 1840*7c478bd9Sstevel@tonic-gate lseg = AS_SEGPREV(as, lseg); 1841*7c478bd9Sstevel@tonic-gate } 1842*7c478bd9Sstevel@tonic-gate } 1843*7c478bd9Sstevel@tonic-gate *basep = save_base; 1844*7c478bd9Sstevel@tonic-gate *lenp = save_len; 1845*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1846*7c478bd9Sstevel@tonic-gate return (-1); 1847*7c478bd9Sstevel@tonic-gate } 1848*7c478bd9Sstevel@tonic-gate 1849*7c478bd9Sstevel@tonic-gate /* 1850*7c478bd9Sstevel@tonic-gate * Return the next range within [base, base + len) that is backed 1851*7c478bd9Sstevel@tonic-gate * with "real memory". Skip holes and non-seg_vn segments. 1852*7c478bd9Sstevel@tonic-gate * We're lazy and only return one segment at a time. 1853*7c478bd9Sstevel@tonic-gate */ 1854*7c478bd9Sstevel@tonic-gate int 1855*7c478bd9Sstevel@tonic-gate as_memory(struct as *as, caddr_t *basep, size_t *lenp) 1856*7c478bd9Sstevel@tonic-gate { 1857*7c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_shmops; /* needs a header file */ 1858*7c478bd9Sstevel@tonic-gate struct seg *seg; 1859*7c478bd9Sstevel@tonic-gate caddr_t addr, eaddr; 1860*7c478bd9Sstevel@tonic-gate caddr_t segend; 1861*7c478bd9Sstevel@tonic-gate 1862*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1863*7c478bd9Sstevel@tonic-gate 1864*7c478bd9Sstevel@tonic-gate addr = *basep; 1865*7c478bd9Sstevel@tonic-gate eaddr = addr + *lenp; 1866*7c478bd9Sstevel@tonic-gate 1867*7c478bd9Sstevel@tonic-gate seg = as_findseg(as, addr, 0); 1868*7c478bd9Sstevel@tonic-gate if (seg != NULL) 1869*7c478bd9Sstevel@tonic-gate addr = MAX(seg->s_base, addr); 1870*7c478bd9Sstevel@tonic-gate 1871*7c478bd9Sstevel@tonic-gate for (;;) { 1872*7c478bd9Sstevel@tonic-gate if (seg == NULL || addr >= eaddr || eaddr <= seg->s_base) { 1873*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1874*7c478bd9Sstevel@tonic-gate return (EINVAL); 1875*7c478bd9Sstevel@tonic-gate } 1876*7c478bd9Sstevel@tonic-gate 1877*7c478bd9Sstevel@tonic-gate if (seg->s_ops == &segvn_ops) { 1878*7c478bd9Sstevel@tonic-gate segend = seg->s_base + seg->s_size; 1879*7c478bd9Sstevel@tonic-gate break; 1880*7c478bd9Sstevel@tonic-gate } 1881*7c478bd9Sstevel@tonic-gate 1882*7c478bd9Sstevel@tonic-gate /* 1883*7c478bd9Sstevel@tonic-gate * We do ISM by looking into the private data 1884*7c478bd9Sstevel@tonic-gate * to determine the real size of the segment. 1885*7c478bd9Sstevel@tonic-gate */ 1886*7c478bd9Sstevel@tonic-gate if (seg->s_ops == &segspt_shmops) { 1887*7c478bd9Sstevel@tonic-gate segend = seg->s_base + spt_realsize(seg); 1888*7c478bd9Sstevel@tonic-gate if (addr < segend) 1889*7c478bd9Sstevel@tonic-gate break; 1890*7c478bd9Sstevel@tonic-gate } 1891*7c478bd9Sstevel@tonic-gate 1892*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 1893*7c478bd9Sstevel@tonic-gate 1894*7c478bd9Sstevel@tonic-gate if (seg != NULL) 1895*7c478bd9Sstevel@tonic-gate addr = seg->s_base; 1896*7c478bd9Sstevel@tonic-gate } 1897*7c478bd9Sstevel@tonic-gate 1898*7c478bd9Sstevel@tonic-gate *basep = addr; 1899*7c478bd9Sstevel@tonic-gate 1900*7c478bd9Sstevel@tonic-gate if (segend > eaddr) 1901*7c478bd9Sstevel@tonic-gate *lenp = eaddr - addr; 1902*7c478bd9Sstevel@tonic-gate else 1903*7c478bd9Sstevel@tonic-gate *lenp = segend - addr; 1904*7c478bd9Sstevel@tonic-gate 1905*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1906*7c478bd9Sstevel@tonic-gate return (0); 1907*7c478bd9Sstevel@tonic-gate } 1908*7c478bd9Sstevel@tonic-gate 1909*7c478bd9Sstevel@tonic-gate /* 1910*7c478bd9Sstevel@tonic-gate * Swap the pages associated with the address space as out to 1911*7c478bd9Sstevel@tonic-gate * secondary storage, returning the number of bytes actually 1912*7c478bd9Sstevel@tonic-gate * swapped. 1913*7c478bd9Sstevel@tonic-gate * 1914*7c478bd9Sstevel@tonic-gate * The value returned is intended to correlate well with the process's 1915*7c478bd9Sstevel@tonic-gate * memory requirements. Its usefulness for this purpose depends on 1916*7c478bd9Sstevel@tonic-gate * how well the segment-level routines do at returning accurate 1917*7c478bd9Sstevel@tonic-gate * information. 1918*7c478bd9Sstevel@tonic-gate */ 1919*7c478bd9Sstevel@tonic-gate size_t 1920*7c478bd9Sstevel@tonic-gate as_swapout(struct as *as) 1921*7c478bd9Sstevel@tonic-gate { 1922*7c478bd9Sstevel@tonic-gate struct seg *seg; 1923*7c478bd9Sstevel@tonic-gate size_t swpcnt = 0; 1924*7c478bd9Sstevel@tonic-gate 1925*7c478bd9Sstevel@tonic-gate /* 1926*7c478bd9Sstevel@tonic-gate * Kernel-only processes have given up their address 1927*7c478bd9Sstevel@tonic-gate * spaces. Of course, we shouldn't be attempting to 1928*7c478bd9Sstevel@tonic-gate * swap out such processes in the first place... 1929*7c478bd9Sstevel@tonic-gate */ 1930*7c478bd9Sstevel@tonic-gate if (as == NULL) 1931*7c478bd9Sstevel@tonic-gate return (0); 1932*7c478bd9Sstevel@tonic-gate 1933*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 1934*7c478bd9Sstevel@tonic-gate 1935*7c478bd9Sstevel@tonic-gate /* Prevent XHATs from attaching */ 1936*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1937*7c478bd9Sstevel@tonic-gate AS_SETBUSY(as); 1938*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1939*7c478bd9Sstevel@tonic-gate 1940*7c478bd9Sstevel@tonic-gate 1941*7c478bd9Sstevel@tonic-gate /* 1942*7c478bd9Sstevel@tonic-gate * Free all mapping resources associated with the address 1943*7c478bd9Sstevel@tonic-gate * space. The segment-level swapout routines capitalize 1944*7c478bd9Sstevel@tonic-gate * on this unmapping by scavanging pages that have become 1945*7c478bd9Sstevel@tonic-gate * unmapped here. 1946*7c478bd9Sstevel@tonic-gate */ 1947*7c478bd9Sstevel@tonic-gate hat_swapout(as->a_hat); 1948*7c478bd9Sstevel@tonic-gate if (as->a_xhat != NULL) 1949*7c478bd9Sstevel@tonic-gate xhat_swapout_all(as); 1950*7c478bd9Sstevel@tonic-gate 1951*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 1952*7c478bd9Sstevel@tonic-gate AS_CLRBUSY(as); 1953*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 1954*7c478bd9Sstevel@tonic-gate 1955*7c478bd9Sstevel@tonic-gate /* 1956*7c478bd9Sstevel@tonic-gate * Call the swapout routines of all segments in the address 1957*7c478bd9Sstevel@tonic-gate * space to do the actual work, accumulating the amount of 1958*7c478bd9Sstevel@tonic-gate * space reclaimed. 1959*7c478bd9Sstevel@tonic-gate */ 1960*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { 1961*7c478bd9Sstevel@tonic-gate struct seg_ops *ov = seg->s_ops; 1962*7c478bd9Sstevel@tonic-gate 1963*7c478bd9Sstevel@tonic-gate /* 1964*7c478bd9Sstevel@tonic-gate * We have to check to see if the seg has 1965*7c478bd9Sstevel@tonic-gate * an ops vector because the seg may have 1966*7c478bd9Sstevel@tonic-gate * been in the middle of being set up when 1967*7c478bd9Sstevel@tonic-gate * the process was picked for swapout. 1968*7c478bd9Sstevel@tonic-gate */ 1969*7c478bd9Sstevel@tonic-gate if ((ov != NULL) && (ov->swapout != NULL)) 1970*7c478bd9Sstevel@tonic-gate swpcnt += SEGOP_SWAPOUT(seg); 1971*7c478bd9Sstevel@tonic-gate } 1972*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 1973*7c478bd9Sstevel@tonic-gate return (swpcnt); 1974*7c478bd9Sstevel@tonic-gate } 1975*7c478bd9Sstevel@tonic-gate 1976*7c478bd9Sstevel@tonic-gate /* 1977*7c478bd9Sstevel@tonic-gate * Determine whether data from the mappings in interval [addr, addr + size) 1978*7c478bd9Sstevel@tonic-gate * are in the primary memory (core) cache. 1979*7c478bd9Sstevel@tonic-gate */ 1980*7c478bd9Sstevel@tonic-gate int 1981*7c478bd9Sstevel@tonic-gate as_incore(struct as *as, caddr_t addr, 1982*7c478bd9Sstevel@tonic-gate size_t size, char *vec, size_t *sizep) 1983*7c478bd9Sstevel@tonic-gate { 1984*7c478bd9Sstevel@tonic-gate struct seg *seg; 1985*7c478bd9Sstevel@tonic-gate size_t ssize; 1986*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 1987*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 1988*7c478bd9Sstevel@tonic-gate size_t isize; /* iteration size */ 1989*7c478bd9Sstevel@tonic-gate int error = 0; /* result, assume success */ 1990*7c478bd9Sstevel@tonic-gate 1991*7c478bd9Sstevel@tonic-gate *sizep = 0; 1992*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 1993*7c478bd9Sstevel@tonic-gate rsize = ((((size_t)addr + size) + PAGEOFFSET) & PAGEMASK) - 1994*7c478bd9Sstevel@tonic-gate (size_t)raddr; 1995*7c478bd9Sstevel@tonic-gate 1996*7c478bd9Sstevel@tonic-gate if (raddr + rsize < raddr) /* check for wraparound */ 1997*7c478bd9Sstevel@tonic-gate return (ENOMEM); 1998*7c478bd9Sstevel@tonic-gate 1999*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2000*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 2001*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2002*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2003*7c478bd9Sstevel@tonic-gate return (-1); 2004*7c478bd9Sstevel@tonic-gate } 2005*7c478bd9Sstevel@tonic-gate 2006*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2007*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 2008*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 2009*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 2010*7c478bd9Sstevel@tonic-gate error = -1; 2011*7c478bd9Sstevel@tonic-gate break; 2012*7c478bd9Sstevel@tonic-gate } 2013*7c478bd9Sstevel@tonic-gate } 2014*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2015*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 2016*7c478bd9Sstevel@tonic-gate else 2017*7c478bd9Sstevel@tonic-gate ssize = rsize; 2018*7c478bd9Sstevel@tonic-gate *sizep += isize = SEGOP_INCORE(seg, raddr, ssize, vec); 2019*7c478bd9Sstevel@tonic-gate if (isize != ssize) { 2020*7c478bd9Sstevel@tonic-gate error = -1; 2021*7c478bd9Sstevel@tonic-gate break; 2022*7c478bd9Sstevel@tonic-gate } 2023*7c478bd9Sstevel@tonic-gate vec += btopr(ssize); 2024*7c478bd9Sstevel@tonic-gate } 2025*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2026*7c478bd9Sstevel@tonic-gate return (error); 2027*7c478bd9Sstevel@tonic-gate } 2028*7c478bd9Sstevel@tonic-gate 2029*7c478bd9Sstevel@tonic-gate static void 2030*7c478bd9Sstevel@tonic-gate as_segunlock(struct seg *seg, caddr_t addr, int attr, 2031*7c478bd9Sstevel@tonic-gate ulong_t *bitmap, size_t position, size_t npages) 2032*7c478bd9Sstevel@tonic-gate { 2033*7c478bd9Sstevel@tonic-gate caddr_t range_start; 2034*7c478bd9Sstevel@tonic-gate size_t pos1 = position; 2035*7c478bd9Sstevel@tonic-gate size_t pos2; 2036*7c478bd9Sstevel@tonic-gate size_t size; 2037*7c478bd9Sstevel@tonic-gate size_t end_pos = npages + position; 2038*7c478bd9Sstevel@tonic-gate 2039*7c478bd9Sstevel@tonic-gate while (bt_range(bitmap, &pos1, &pos2, end_pos)) { 2040*7c478bd9Sstevel@tonic-gate size = ptob((pos2 - pos1)); 2041*7c478bd9Sstevel@tonic-gate range_start = (caddr_t)((uintptr_t)addr + 2042*7c478bd9Sstevel@tonic-gate ptob(pos1 - position)); 2043*7c478bd9Sstevel@tonic-gate 2044*7c478bd9Sstevel@tonic-gate (void) SEGOP_LOCKOP(seg, range_start, size, attr, MC_UNLOCK, 2045*7c478bd9Sstevel@tonic-gate (ulong_t *)NULL, (size_t)NULL); 2046*7c478bd9Sstevel@tonic-gate pos1 = pos2; 2047*7c478bd9Sstevel@tonic-gate } 2048*7c478bd9Sstevel@tonic-gate } 2049*7c478bd9Sstevel@tonic-gate 2050*7c478bd9Sstevel@tonic-gate static void 2051*7c478bd9Sstevel@tonic-gate as_unlockerr(struct as *as, int attr, ulong_t *mlock_map, 2052*7c478bd9Sstevel@tonic-gate caddr_t raddr, size_t rsize) 2053*7c478bd9Sstevel@tonic-gate { 2054*7c478bd9Sstevel@tonic-gate struct seg *seg = as_segat(as, raddr); 2055*7c478bd9Sstevel@tonic-gate size_t ssize; 2056*7c478bd9Sstevel@tonic-gate 2057*7c478bd9Sstevel@tonic-gate while (rsize != 0) { 2058*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) 2059*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 2060*7c478bd9Sstevel@tonic-gate 2061*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2062*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 2063*7c478bd9Sstevel@tonic-gate else 2064*7c478bd9Sstevel@tonic-gate ssize = rsize; 2065*7c478bd9Sstevel@tonic-gate 2066*7c478bd9Sstevel@tonic-gate as_segunlock(seg, raddr, attr, mlock_map, 0, btopr(ssize)); 2067*7c478bd9Sstevel@tonic-gate 2068*7c478bd9Sstevel@tonic-gate rsize -= ssize; 2069*7c478bd9Sstevel@tonic-gate raddr += ssize; 2070*7c478bd9Sstevel@tonic-gate } 2071*7c478bd9Sstevel@tonic-gate } 2072*7c478bd9Sstevel@tonic-gate 2073*7c478bd9Sstevel@tonic-gate /* 2074*7c478bd9Sstevel@tonic-gate * Cache control operations over the interval [addr, addr + size) in 2075*7c478bd9Sstevel@tonic-gate * address space "as". 2076*7c478bd9Sstevel@tonic-gate */ 2077*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2078*7c478bd9Sstevel@tonic-gate int 2079*7c478bd9Sstevel@tonic-gate as_ctl(struct as *as, caddr_t addr, size_t size, int func, int attr, 2080*7c478bd9Sstevel@tonic-gate uintptr_t arg, ulong_t *lock_map, size_t pos) 2081*7c478bd9Sstevel@tonic-gate { 2082*7c478bd9Sstevel@tonic-gate struct seg *seg; /* working segment */ 2083*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 2084*7c478bd9Sstevel@tonic-gate caddr_t initraddr; /* saved initial rounded down addr */ 2085*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 2086*7c478bd9Sstevel@tonic-gate size_t initrsize; /* saved initial rounded up size */ 2087*7c478bd9Sstevel@tonic-gate size_t ssize; /* size of seg */ 2088*7c478bd9Sstevel@tonic-gate int error = 0; /* result */ 2089*7c478bd9Sstevel@tonic-gate size_t mlock_size; /* size of bitmap */ 2090*7c478bd9Sstevel@tonic-gate ulong_t *mlock_map; /* pointer to bitmap used */ 2091*7c478bd9Sstevel@tonic-gate /* to represent the locked */ 2092*7c478bd9Sstevel@tonic-gate /* pages. */ 2093*7c478bd9Sstevel@tonic-gate retry: 2094*7c478bd9Sstevel@tonic-gate if (error == IE_RETRY) 2095*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2096*7c478bd9Sstevel@tonic-gate else 2097*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2098*7c478bd9Sstevel@tonic-gate 2099*7c478bd9Sstevel@tonic-gate /* 2100*7c478bd9Sstevel@tonic-gate * If these are address space lock/unlock operations, loop over 2101*7c478bd9Sstevel@tonic-gate * all segments in the address space, as appropriate. 2102*7c478bd9Sstevel@tonic-gate */ 2103*7c478bd9Sstevel@tonic-gate if (func == MC_LOCKAS) { 2104*7c478bd9Sstevel@tonic-gate size_t npages, idx; 2105*7c478bd9Sstevel@tonic-gate size_t rlen = 0; /* rounded as length */ 2106*7c478bd9Sstevel@tonic-gate 2107*7c478bd9Sstevel@tonic-gate idx = pos; 2108*7c478bd9Sstevel@tonic-gate 2109*7c478bd9Sstevel@tonic-gate if (arg & MCL_FUTURE) { 2110*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 2111*7c478bd9Sstevel@tonic-gate AS_SETPGLCK(as); 2112*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 2113*7c478bd9Sstevel@tonic-gate } 2114*7c478bd9Sstevel@tonic-gate if ((arg & MCL_CURRENT) == 0) { 2115*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2116*7c478bd9Sstevel@tonic-gate return (0); 2117*7c478bd9Sstevel@tonic-gate } 2118*7c478bd9Sstevel@tonic-gate 2119*7c478bd9Sstevel@tonic-gate seg = AS_SEGFIRST(as); 2120*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2121*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2122*7c478bd9Sstevel@tonic-gate return (0); 2123*7c478bd9Sstevel@tonic-gate } 2124*7c478bd9Sstevel@tonic-gate 2125*7c478bd9Sstevel@tonic-gate do { 2126*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)seg->s_base & 2127*7c478bd9Sstevel@tonic-gate (uintptr_t)PAGEMASK); 2128*7c478bd9Sstevel@tonic-gate rlen += (((uintptr_t)(seg->s_base + seg->s_size) + 2129*7c478bd9Sstevel@tonic-gate PAGEOFFSET) & PAGEMASK) - (uintptr_t)raddr; 2130*7c478bd9Sstevel@tonic-gate } while ((seg = AS_SEGNEXT(as, seg)) != NULL); 2131*7c478bd9Sstevel@tonic-gate 2132*7c478bd9Sstevel@tonic-gate mlock_size = BT_BITOUL(btopr(rlen)); 2133*7c478bd9Sstevel@tonic-gate if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2134*7c478bd9Sstevel@tonic-gate sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2135*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2136*7c478bd9Sstevel@tonic-gate return (EAGAIN); 2137*7c478bd9Sstevel@tonic-gate } 2138*7c478bd9Sstevel@tonic-gate 2139*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2140*7c478bd9Sstevel@tonic-gate error = SEGOP_LOCKOP(seg, seg->s_base, 2141*7c478bd9Sstevel@tonic-gate seg->s_size, attr, MC_LOCK, mlock_map, pos); 2142*7c478bd9Sstevel@tonic-gate if (error != 0) 2143*7c478bd9Sstevel@tonic-gate break; 2144*7c478bd9Sstevel@tonic-gate pos += seg_pages(seg); 2145*7c478bd9Sstevel@tonic-gate } 2146*7c478bd9Sstevel@tonic-gate 2147*7c478bd9Sstevel@tonic-gate if (error) { 2148*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg != NULL; 2149*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg)) { 2150*7c478bd9Sstevel@tonic-gate 2151*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)seg->s_base & 2152*7c478bd9Sstevel@tonic-gate (uintptr_t)PAGEMASK); 2153*7c478bd9Sstevel@tonic-gate npages = seg_pages(seg); 2154*7c478bd9Sstevel@tonic-gate as_segunlock(seg, raddr, attr, mlock_map, 2155*7c478bd9Sstevel@tonic-gate idx, npages); 2156*7c478bd9Sstevel@tonic-gate idx += npages; 2157*7c478bd9Sstevel@tonic-gate } 2158*7c478bd9Sstevel@tonic-gate } 2159*7c478bd9Sstevel@tonic-gate 2160*7c478bd9Sstevel@tonic-gate kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2161*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2162*7c478bd9Sstevel@tonic-gate goto lockerr; 2163*7c478bd9Sstevel@tonic-gate } else if (func == MC_UNLOCKAS) { 2164*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 2165*7c478bd9Sstevel@tonic-gate AS_CLRPGLCK(as); 2166*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 2167*7c478bd9Sstevel@tonic-gate 2168*7c478bd9Sstevel@tonic-gate for (seg = AS_SEGFIRST(as); seg; seg = AS_SEGNEXT(as, seg)) { 2169*7c478bd9Sstevel@tonic-gate error = SEGOP_LOCKOP(seg, seg->s_base, 2170*7c478bd9Sstevel@tonic-gate seg->s_size, attr, MC_UNLOCK, NULL, 0); 2171*7c478bd9Sstevel@tonic-gate if (error != 0) 2172*7c478bd9Sstevel@tonic-gate break; 2173*7c478bd9Sstevel@tonic-gate } 2174*7c478bd9Sstevel@tonic-gate 2175*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2176*7c478bd9Sstevel@tonic-gate goto lockerr; 2177*7c478bd9Sstevel@tonic-gate } 2178*7c478bd9Sstevel@tonic-gate 2179*7c478bd9Sstevel@tonic-gate /* 2180*7c478bd9Sstevel@tonic-gate * Normalize addresses and sizes. 2181*7c478bd9Sstevel@tonic-gate */ 2182*7c478bd9Sstevel@tonic-gate initraddr = raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2183*7c478bd9Sstevel@tonic-gate initrsize = rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2184*7c478bd9Sstevel@tonic-gate (size_t)raddr; 2185*7c478bd9Sstevel@tonic-gate 2186*7c478bd9Sstevel@tonic-gate if (raddr + rsize < raddr) { /* check for wraparound */ 2187*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2188*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2189*7c478bd9Sstevel@tonic-gate } 2190*7c478bd9Sstevel@tonic-gate 2191*7c478bd9Sstevel@tonic-gate /* 2192*7c478bd9Sstevel@tonic-gate * Get initial segment. 2193*7c478bd9Sstevel@tonic-gate */ 2194*7c478bd9Sstevel@tonic-gate if ((seg = as_segat(as, raddr)) == NULL) { 2195*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2196*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2197*7c478bd9Sstevel@tonic-gate } 2198*7c478bd9Sstevel@tonic-gate 2199*7c478bd9Sstevel@tonic-gate if (func == MC_LOCK) { 2200*7c478bd9Sstevel@tonic-gate mlock_size = BT_BITOUL(btopr(rsize)); 2201*7c478bd9Sstevel@tonic-gate if ((mlock_map = (ulong_t *)kmem_zalloc(mlock_size * 2202*7c478bd9Sstevel@tonic-gate sizeof (ulong_t), KM_NOSLEEP)) == NULL) { 2203*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2204*7c478bd9Sstevel@tonic-gate return (EAGAIN); 2205*7c478bd9Sstevel@tonic-gate } 2206*7c478bd9Sstevel@tonic-gate } 2207*7c478bd9Sstevel@tonic-gate 2208*7c478bd9Sstevel@tonic-gate /* 2209*7c478bd9Sstevel@tonic-gate * Loop over all segments. If a hole in the address range is 2210*7c478bd9Sstevel@tonic-gate * discovered, then fail. For each segment, perform the appropriate 2211*7c478bd9Sstevel@tonic-gate * control operation. 2212*7c478bd9Sstevel@tonic-gate */ 2213*7c478bd9Sstevel@tonic-gate while (rsize != 0) { 2214*7c478bd9Sstevel@tonic-gate 2215*7c478bd9Sstevel@tonic-gate /* 2216*7c478bd9Sstevel@tonic-gate * Make sure there's no hole, calculate the portion 2217*7c478bd9Sstevel@tonic-gate * of the next segment to be operated over. 2218*7c478bd9Sstevel@tonic-gate */ 2219*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 2220*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 2221*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 2222*7c478bd9Sstevel@tonic-gate if (func == MC_LOCK) { 2223*7c478bd9Sstevel@tonic-gate as_unlockerr(as, attr, mlock_map, 2224*7c478bd9Sstevel@tonic-gate initraddr, initrsize - rsize); 2225*7c478bd9Sstevel@tonic-gate kmem_free(mlock_map, 2226*7c478bd9Sstevel@tonic-gate mlock_size * sizeof (ulong_t)); 2227*7c478bd9Sstevel@tonic-gate } 2228*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2229*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2230*7c478bd9Sstevel@tonic-gate } 2231*7c478bd9Sstevel@tonic-gate } 2232*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) 2233*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 2234*7c478bd9Sstevel@tonic-gate else 2235*7c478bd9Sstevel@tonic-gate ssize = rsize; 2236*7c478bd9Sstevel@tonic-gate 2237*7c478bd9Sstevel@tonic-gate /* 2238*7c478bd9Sstevel@tonic-gate * Dispatch on specific function. 2239*7c478bd9Sstevel@tonic-gate */ 2240*7c478bd9Sstevel@tonic-gate switch (func) { 2241*7c478bd9Sstevel@tonic-gate 2242*7c478bd9Sstevel@tonic-gate /* 2243*7c478bd9Sstevel@tonic-gate * Synchronize cached data from mappings with backing 2244*7c478bd9Sstevel@tonic-gate * objects. 2245*7c478bd9Sstevel@tonic-gate */ 2246*7c478bd9Sstevel@tonic-gate case MC_SYNC: 2247*7c478bd9Sstevel@tonic-gate if (error = SEGOP_SYNC(seg, raddr, ssize, 2248*7c478bd9Sstevel@tonic-gate attr, (uint_t)arg)) { 2249*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2250*7c478bd9Sstevel@tonic-gate return (error); 2251*7c478bd9Sstevel@tonic-gate } 2252*7c478bd9Sstevel@tonic-gate break; 2253*7c478bd9Sstevel@tonic-gate 2254*7c478bd9Sstevel@tonic-gate /* 2255*7c478bd9Sstevel@tonic-gate * Lock pages in memory. 2256*7c478bd9Sstevel@tonic-gate */ 2257*7c478bd9Sstevel@tonic-gate case MC_LOCK: 2258*7c478bd9Sstevel@tonic-gate if (error = SEGOP_LOCKOP(seg, raddr, ssize, 2259*7c478bd9Sstevel@tonic-gate attr, func, mlock_map, pos)) { 2260*7c478bd9Sstevel@tonic-gate as_unlockerr(as, attr, mlock_map, initraddr, 2261*7c478bd9Sstevel@tonic-gate initrsize - rsize + ssize); 2262*7c478bd9Sstevel@tonic-gate kmem_free(mlock_map, mlock_size * 2263*7c478bd9Sstevel@tonic-gate sizeof (ulong_t)); 2264*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2265*7c478bd9Sstevel@tonic-gate goto lockerr; 2266*7c478bd9Sstevel@tonic-gate } 2267*7c478bd9Sstevel@tonic-gate break; 2268*7c478bd9Sstevel@tonic-gate 2269*7c478bd9Sstevel@tonic-gate /* 2270*7c478bd9Sstevel@tonic-gate * Unlock mapped pages. 2271*7c478bd9Sstevel@tonic-gate */ 2272*7c478bd9Sstevel@tonic-gate case MC_UNLOCK: 2273*7c478bd9Sstevel@tonic-gate (void) SEGOP_LOCKOP(seg, raddr, ssize, attr, func, 2274*7c478bd9Sstevel@tonic-gate (ulong_t *)NULL, (size_t)NULL); 2275*7c478bd9Sstevel@tonic-gate break; 2276*7c478bd9Sstevel@tonic-gate 2277*7c478bd9Sstevel@tonic-gate /* 2278*7c478bd9Sstevel@tonic-gate * Store VM advise for mapped pages in segment layer. 2279*7c478bd9Sstevel@tonic-gate */ 2280*7c478bd9Sstevel@tonic-gate case MC_ADVISE: 2281*7c478bd9Sstevel@tonic-gate error = SEGOP_ADVISE(seg, raddr, ssize, (uint_t)arg); 2282*7c478bd9Sstevel@tonic-gate 2283*7c478bd9Sstevel@tonic-gate /* 2284*7c478bd9Sstevel@tonic-gate * Check for regular errors and special retry error 2285*7c478bd9Sstevel@tonic-gate */ 2286*7c478bd9Sstevel@tonic-gate if (error) { 2287*7c478bd9Sstevel@tonic-gate if (error == IE_RETRY) { 2288*7c478bd9Sstevel@tonic-gate /* 2289*7c478bd9Sstevel@tonic-gate * Need to acquire writers lock, so 2290*7c478bd9Sstevel@tonic-gate * have to drop readers lock and start 2291*7c478bd9Sstevel@tonic-gate * all over again 2292*7c478bd9Sstevel@tonic-gate */ 2293*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2294*7c478bd9Sstevel@tonic-gate goto retry; 2295*7c478bd9Sstevel@tonic-gate } else if (error == IE_REATTACH) { 2296*7c478bd9Sstevel@tonic-gate /* 2297*7c478bd9Sstevel@tonic-gate * Find segment for current address 2298*7c478bd9Sstevel@tonic-gate * because current segment just got 2299*7c478bd9Sstevel@tonic-gate * split or concatenated 2300*7c478bd9Sstevel@tonic-gate */ 2301*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 2302*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2303*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2304*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2305*7c478bd9Sstevel@tonic-gate } 2306*7c478bd9Sstevel@tonic-gate } else { 2307*7c478bd9Sstevel@tonic-gate /* 2308*7c478bd9Sstevel@tonic-gate * Regular error 2309*7c478bd9Sstevel@tonic-gate */ 2310*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2311*7c478bd9Sstevel@tonic-gate return (error); 2312*7c478bd9Sstevel@tonic-gate } 2313*7c478bd9Sstevel@tonic-gate } 2314*7c478bd9Sstevel@tonic-gate break; 2315*7c478bd9Sstevel@tonic-gate 2316*7c478bd9Sstevel@tonic-gate /* 2317*7c478bd9Sstevel@tonic-gate * Can't happen. 2318*7c478bd9Sstevel@tonic-gate */ 2319*7c478bd9Sstevel@tonic-gate default: 2320*7c478bd9Sstevel@tonic-gate panic("as_ctl: bad operation %d", func); 2321*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 2322*7c478bd9Sstevel@tonic-gate } 2323*7c478bd9Sstevel@tonic-gate 2324*7c478bd9Sstevel@tonic-gate rsize -= ssize; 2325*7c478bd9Sstevel@tonic-gate raddr += ssize; 2326*7c478bd9Sstevel@tonic-gate } 2327*7c478bd9Sstevel@tonic-gate 2328*7c478bd9Sstevel@tonic-gate if (func == MC_LOCK) 2329*7c478bd9Sstevel@tonic-gate kmem_free(mlock_map, mlock_size * sizeof (ulong_t)); 2330*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2331*7c478bd9Sstevel@tonic-gate return (0); 2332*7c478bd9Sstevel@tonic-gate lockerr: 2333*7c478bd9Sstevel@tonic-gate 2334*7c478bd9Sstevel@tonic-gate /* 2335*7c478bd9Sstevel@tonic-gate * If the lower levels returned EDEADLK for a segment lockop, 2336*7c478bd9Sstevel@tonic-gate * it means that we should retry the operation. Let's wait 2337*7c478bd9Sstevel@tonic-gate * a bit also to let the deadlock causing condition clear. 2338*7c478bd9Sstevel@tonic-gate * This is part of a gross hack to work around a design flaw 2339*7c478bd9Sstevel@tonic-gate * in the ufs/sds logging code and should go away when the 2340*7c478bd9Sstevel@tonic-gate * logging code is re-designed to fix the problem. See bug 2341*7c478bd9Sstevel@tonic-gate * 4125102 for details of the problem. 2342*7c478bd9Sstevel@tonic-gate */ 2343*7c478bd9Sstevel@tonic-gate if (error == EDEADLK) { 2344*7c478bd9Sstevel@tonic-gate delay(deadlk_wait); 2345*7c478bd9Sstevel@tonic-gate error = 0; 2346*7c478bd9Sstevel@tonic-gate goto retry; 2347*7c478bd9Sstevel@tonic-gate } 2348*7c478bd9Sstevel@tonic-gate return (error); 2349*7c478bd9Sstevel@tonic-gate } 2350*7c478bd9Sstevel@tonic-gate 2351*7c478bd9Sstevel@tonic-gate /* 2352*7c478bd9Sstevel@tonic-gate * Special code for exec to move the stack segment from its interim 2353*7c478bd9Sstevel@tonic-gate * place in the old address to the right place in the new address space. 2354*7c478bd9Sstevel@tonic-gate */ 2355*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2356*7c478bd9Sstevel@tonic-gate int 2357*7c478bd9Sstevel@tonic-gate as_exec(struct as *oas, caddr_t ostka, size_t stksz, 2358*7c478bd9Sstevel@tonic-gate struct as *nas, caddr_t nstka, uint_t hatflag) 2359*7c478bd9Sstevel@tonic-gate { 2360*7c478bd9Sstevel@tonic-gate struct seg *stkseg; 2361*7c478bd9Sstevel@tonic-gate 2362*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(oas, &oas->a_lock, RW_WRITER); 2363*7c478bd9Sstevel@tonic-gate stkseg = as_segat(oas, ostka); 2364*7c478bd9Sstevel@tonic-gate stkseg = as_removeseg(oas, stkseg); 2365*7c478bd9Sstevel@tonic-gate ASSERT(stkseg != NULL); 2366*7c478bd9Sstevel@tonic-gate ASSERT(stkseg->s_base == ostka && stkseg->s_size == stksz); 2367*7c478bd9Sstevel@tonic-gate stkseg->s_as = nas; 2368*7c478bd9Sstevel@tonic-gate stkseg->s_base = nstka; 2369*7c478bd9Sstevel@tonic-gate 2370*7c478bd9Sstevel@tonic-gate /* 2371*7c478bd9Sstevel@tonic-gate * It's ok to lock the address space we are about to exec to. 2372*7c478bd9Sstevel@tonic-gate */ 2373*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(nas, &nas->a_lock, RW_WRITER); 2374*7c478bd9Sstevel@tonic-gate ASSERT(avl_numnodes(&nas->a_wpage) == 0); 2375*7c478bd9Sstevel@tonic-gate nas->a_size += stkseg->s_size; 2376*7c478bd9Sstevel@tonic-gate oas->a_size -= stkseg->s_size; 2377*7c478bd9Sstevel@tonic-gate (void) as_addseg(nas, stkseg); 2378*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(nas, &nas->a_lock); 2379*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(oas, &oas->a_lock); 2380*7c478bd9Sstevel@tonic-gate return (0); 2381*7c478bd9Sstevel@tonic-gate } 2382*7c478bd9Sstevel@tonic-gate 2383*7c478bd9Sstevel@tonic-gate static int 2384*7c478bd9Sstevel@tonic-gate f_decode(faultcode_t fault_err) 2385*7c478bd9Sstevel@tonic-gate { 2386*7c478bd9Sstevel@tonic-gate int error = 0; 2387*7c478bd9Sstevel@tonic-gate 2388*7c478bd9Sstevel@tonic-gate switch (FC_CODE(fault_err)) { 2389*7c478bd9Sstevel@tonic-gate case FC_OBJERR: 2390*7c478bd9Sstevel@tonic-gate error = FC_ERRNO(fault_err); 2391*7c478bd9Sstevel@tonic-gate break; 2392*7c478bd9Sstevel@tonic-gate case FC_PROT: 2393*7c478bd9Sstevel@tonic-gate error = EACCES; 2394*7c478bd9Sstevel@tonic-gate break; 2395*7c478bd9Sstevel@tonic-gate default: 2396*7c478bd9Sstevel@tonic-gate error = EFAULT; 2397*7c478bd9Sstevel@tonic-gate break; 2398*7c478bd9Sstevel@tonic-gate } 2399*7c478bd9Sstevel@tonic-gate return (error); 2400*7c478bd9Sstevel@tonic-gate } 2401*7c478bd9Sstevel@tonic-gate 2402*7c478bd9Sstevel@tonic-gate /* 2403*7c478bd9Sstevel@tonic-gate * lock pages in a given address space. Return shadow list. If 2404*7c478bd9Sstevel@tonic-gate * the list is NULL, the MMU mapping is also locked. 2405*7c478bd9Sstevel@tonic-gate */ 2406*7c478bd9Sstevel@tonic-gate int 2407*7c478bd9Sstevel@tonic-gate as_pagelock(struct as *as, struct page ***ppp, caddr_t addr, 2408*7c478bd9Sstevel@tonic-gate size_t size, enum seg_rw rw) 2409*7c478bd9Sstevel@tonic-gate { 2410*7c478bd9Sstevel@tonic-gate size_t rsize; 2411*7c478bd9Sstevel@tonic-gate caddr_t base; 2412*7c478bd9Sstevel@tonic-gate caddr_t raddr; 2413*7c478bd9Sstevel@tonic-gate faultcode_t fault_err; 2414*7c478bd9Sstevel@tonic-gate struct seg *seg; 2415*7c478bd9Sstevel@tonic-gate int res; 2416*7c478bd9Sstevel@tonic-gate int prefaulted = 0; 2417*7c478bd9Sstevel@tonic-gate 2418*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_START, 2419*7c478bd9Sstevel@tonic-gate "as_pagelock_start: addr %p size %ld", addr, size); 2420*7c478bd9Sstevel@tonic-gate 2421*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2422*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2423*7c478bd9Sstevel@tonic-gate (size_t)raddr; 2424*7c478bd9Sstevel@tonic-gate top: 2425*7c478bd9Sstevel@tonic-gate /* 2426*7c478bd9Sstevel@tonic-gate * if the request crosses two segments let 2427*7c478bd9Sstevel@tonic-gate * as_fault handle it. 2428*7c478bd9Sstevel@tonic-gate */ 2429*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2430*7c478bd9Sstevel@tonic-gate seg = as_findseg(as, addr, 0); 2431*7c478bd9Sstevel@tonic-gate if ((seg == NULL) || ((base = seg->s_base) > addr) || 2432*7c478bd9Sstevel@tonic-gate (addr + size) > base + seg->s_size) { 2433*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2434*7c478bd9Sstevel@tonic-gate goto slow; 2435*7c478bd9Sstevel@tonic-gate } 2436*7c478bd9Sstevel@tonic-gate 2437*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_START, 2438*7c478bd9Sstevel@tonic-gate "seg_lock_1_start: raddr %p rsize %ld", raddr, rsize); 2439*7c478bd9Sstevel@tonic-gate 2440*7c478bd9Sstevel@tonic-gate /* 2441*7c478bd9Sstevel@tonic-gate * try to lock pages and pass back shadow list 2442*7c478bd9Sstevel@tonic-gate */ 2443*7c478bd9Sstevel@tonic-gate res = SEGOP_PAGELOCK(seg, raddr, rsize, ppp, L_PAGELOCK, rw); 2444*7c478bd9Sstevel@tonic-gate 2445*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_SEG_LOCK_END, "seg_lock_1_end"); 2446*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2447*7c478bd9Sstevel@tonic-gate if (res == 0) { 2448*7c478bd9Sstevel@tonic-gate return (0); 2449*7c478bd9Sstevel@tonic-gate } else if (res == ENOTSUP || prefaulted) { 2450*7c478bd9Sstevel@tonic-gate /* 2451*7c478bd9Sstevel@tonic-gate * (1) segment driver doesn't support PAGELOCK fastpath, or 2452*7c478bd9Sstevel@tonic-gate * (2) we've already tried fast path unsuccessfully after 2453*7c478bd9Sstevel@tonic-gate * faulting in the addr range below; system might be 2454*7c478bd9Sstevel@tonic-gate * thrashing or there may not be enough availrmem. 2455*7c478bd9Sstevel@tonic-gate */ 2456*7c478bd9Sstevel@tonic-gate goto slow; 2457*7c478bd9Sstevel@tonic-gate } 2458*7c478bd9Sstevel@tonic-gate 2459*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_FAULT_START, 2460*7c478bd9Sstevel@tonic-gate "as_fault_start: addr %p size %ld", addr, size); 2461*7c478bd9Sstevel@tonic-gate 2462*7c478bd9Sstevel@tonic-gate /* 2463*7c478bd9Sstevel@tonic-gate * we might get here because of some COW fault or non 2464*7c478bd9Sstevel@tonic-gate * existing page. Let as_fault deal with it. Just load 2465*7c478bd9Sstevel@tonic-gate * the page, don't lock the MMU mapping. 2466*7c478bd9Sstevel@tonic-gate */ 2467*7c478bd9Sstevel@tonic-gate fault_err = as_fault(as->a_hat, as, addr, size, F_INVAL, rw); 2468*7c478bd9Sstevel@tonic-gate if (fault_err != 0) { 2469*7c478bd9Sstevel@tonic-gate return (f_decode(fault_err)); 2470*7c478bd9Sstevel@tonic-gate } 2471*7c478bd9Sstevel@tonic-gate 2472*7c478bd9Sstevel@tonic-gate prefaulted = 1; 2473*7c478bd9Sstevel@tonic-gate 2474*7c478bd9Sstevel@tonic-gate /* 2475*7c478bd9Sstevel@tonic-gate * try fast path again; since we've dropped a_lock, 2476*7c478bd9Sstevel@tonic-gate * we need to try the dance from the start to see if 2477*7c478bd9Sstevel@tonic-gate * the addr range is still valid. 2478*7c478bd9Sstevel@tonic-gate */ 2479*7c478bd9Sstevel@tonic-gate goto top; 2480*7c478bd9Sstevel@tonic-gate slow: 2481*7c478bd9Sstevel@tonic-gate /* 2482*7c478bd9Sstevel@tonic-gate * load the page and lock the MMU mapping. 2483*7c478bd9Sstevel@tonic-gate */ 2484*7c478bd9Sstevel@tonic-gate fault_err = as_fault(as->a_hat, as, addr, size, F_SOFTLOCK, rw); 2485*7c478bd9Sstevel@tonic-gate if (fault_err != 0) { 2486*7c478bd9Sstevel@tonic-gate return (f_decode(fault_err)); 2487*7c478bd9Sstevel@tonic-gate } 2488*7c478bd9Sstevel@tonic-gate *ppp = NULL; 2489*7c478bd9Sstevel@tonic-gate 2490*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_LOCK_END, "as_pagelock_end"); 2491*7c478bd9Sstevel@tonic-gate return (0); 2492*7c478bd9Sstevel@tonic-gate } 2493*7c478bd9Sstevel@tonic-gate 2494*7c478bd9Sstevel@tonic-gate /* 2495*7c478bd9Sstevel@tonic-gate * unlock pages in a given address range 2496*7c478bd9Sstevel@tonic-gate */ 2497*7c478bd9Sstevel@tonic-gate void 2498*7c478bd9Sstevel@tonic-gate as_pageunlock(struct as *as, struct page **pp, caddr_t addr, size_t size, 2499*7c478bd9Sstevel@tonic-gate enum seg_rw rw) 2500*7c478bd9Sstevel@tonic-gate { 2501*7c478bd9Sstevel@tonic-gate struct seg *seg; 2502*7c478bd9Sstevel@tonic-gate size_t rsize; 2503*7c478bd9Sstevel@tonic-gate caddr_t raddr; 2504*7c478bd9Sstevel@tonic-gate 2505*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_START, 2506*7c478bd9Sstevel@tonic-gate "as_pageunlock_start: addr %p size %ld", addr, size); 2507*7c478bd9Sstevel@tonic-gate 2508*7c478bd9Sstevel@tonic-gate /* 2509*7c478bd9Sstevel@tonic-gate * if the shadow list is NULL, as_pagelock was 2510*7c478bd9Sstevel@tonic-gate * falling back to as_fault 2511*7c478bd9Sstevel@tonic-gate */ 2512*7c478bd9Sstevel@tonic-gate if (pp == NULL) { 2513*7c478bd9Sstevel@tonic-gate (void) as_fault(as->a_hat, as, addr, size, F_SOFTUNLOCK, rw); 2514*7c478bd9Sstevel@tonic-gate return; 2515*7c478bd9Sstevel@tonic-gate } 2516*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2517*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2518*7c478bd9Sstevel@tonic-gate (size_t)raddr; 2519*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2520*7c478bd9Sstevel@tonic-gate seg = as_findseg(as, addr, 0); 2521*7c478bd9Sstevel@tonic-gate ASSERT(seg); 2522*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_PHYSIO, TR_PHYSIO_SEG_UNLOCK_START, 2523*7c478bd9Sstevel@tonic-gate "seg_unlock_start: raddr %p rsize %ld", raddr, rsize); 2524*7c478bd9Sstevel@tonic-gate SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGEUNLOCK, rw); 2525*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2526*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_PHYSIO, TR_PHYSIO_AS_UNLOCK_END, "as_pageunlock_end"); 2527*7c478bd9Sstevel@tonic-gate } 2528*7c478bd9Sstevel@tonic-gate 2529*7c478bd9Sstevel@tonic-gate /* 2530*7c478bd9Sstevel@tonic-gate * reclaim cached pages in a given address range 2531*7c478bd9Sstevel@tonic-gate */ 2532*7c478bd9Sstevel@tonic-gate void 2533*7c478bd9Sstevel@tonic-gate as_pagereclaim(struct as *as, struct page **pp, caddr_t addr, 2534*7c478bd9Sstevel@tonic-gate size_t size, enum seg_rw rw) 2535*7c478bd9Sstevel@tonic-gate { 2536*7c478bd9Sstevel@tonic-gate struct seg *seg; 2537*7c478bd9Sstevel@tonic-gate size_t rsize; 2538*7c478bd9Sstevel@tonic-gate caddr_t raddr; 2539*7c478bd9Sstevel@tonic-gate 2540*7c478bd9Sstevel@tonic-gate ASSERT(AS_READ_HELD(as, &as->a_lock)); 2541*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 2542*7c478bd9Sstevel@tonic-gate 2543*7c478bd9Sstevel@tonic-gate raddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2544*7c478bd9Sstevel@tonic-gate rsize = (((size_t)(addr + size) + PAGEOFFSET) & PAGEMASK) - 2545*7c478bd9Sstevel@tonic-gate (size_t)raddr; 2546*7c478bd9Sstevel@tonic-gate seg = as_findseg(as, addr, 0); 2547*7c478bd9Sstevel@tonic-gate ASSERT(seg); 2548*7c478bd9Sstevel@tonic-gate SEGOP_PAGELOCK(seg, raddr, rsize, &pp, L_PAGERECLAIM, rw); 2549*7c478bd9Sstevel@tonic-gate } 2550*7c478bd9Sstevel@tonic-gate 2551*7c478bd9Sstevel@tonic-gate #define MAXPAGEFLIP 4 2552*7c478bd9Sstevel@tonic-gate #define MAXPAGEFLIPSIZ MAXPAGEFLIP*PAGESIZE 2553*7c478bd9Sstevel@tonic-gate 2554*7c478bd9Sstevel@tonic-gate int 2555*7c478bd9Sstevel@tonic-gate as_setpagesize(struct as *as, caddr_t addr, size_t size, uint_t szc, 2556*7c478bd9Sstevel@tonic-gate boolean_t wait) 2557*7c478bd9Sstevel@tonic-gate { 2558*7c478bd9Sstevel@tonic-gate struct seg *seg; 2559*7c478bd9Sstevel@tonic-gate size_t ssize; 2560*7c478bd9Sstevel@tonic-gate caddr_t raddr; /* rounded down addr */ 2561*7c478bd9Sstevel@tonic-gate size_t rsize; /* rounded up size */ 2562*7c478bd9Sstevel@tonic-gate int error = 0; 2563*7c478bd9Sstevel@tonic-gate size_t pgsz = page_get_pagesize(szc); 2564*7c478bd9Sstevel@tonic-gate 2565*7c478bd9Sstevel@tonic-gate setpgsz_top: 2566*7c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(addr, pgsz) || !IS_P2ALIGNED(size, pgsz)) { 2567*7c478bd9Sstevel@tonic-gate return (EINVAL); 2568*7c478bd9Sstevel@tonic-gate } 2569*7c478bd9Sstevel@tonic-gate 2570*7c478bd9Sstevel@tonic-gate raddr = addr; 2571*7c478bd9Sstevel@tonic-gate rsize = size; 2572*7c478bd9Sstevel@tonic-gate 2573*7c478bd9Sstevel@tonic-gate if (raddr + rsize < raddr) /* check for wraparound */ 2574*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2575*7c478bd9Sstevel@tonic-gate 2576*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER); 2577*7c478bd9Sstevel@tonic-gate as_clearwatchprot(as, raddr, rsize); 2578*7c478bd9Sstevel@tonic-gate seg = as_segat(as, raddr); 2579*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2580*7c478bd9Sstevel@tonic-gate as_setwatch(as); 2581*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2582*7c478bd9Sstevel@tonic-gate return (ENOMEM); 2583*7c478bd9Sstevel@tonic-gate } 2584*7c478bd9Sstevel@tonic-gate 2585*7c478bd9Sstevel@tonic-gate for (; rsize != 0; rsize -= ssize, raddr += ssize) { 2586*7c478bd9Sstevel@tonic-gate if (raddr >= seg->s_base + seg->s_size) { 2587*7c478bd9Sstevel@tonic-gate seg = AS_SEGNEXT(as, seg); 2588*7c478bd9Sstevel@tonic-gate if (seg == NULL || raddr != seg->s_base) { 2589*7c478bd9Sstevel@tonic-gate error = ENOMEM; 2590*7c478bd9Sstevel@tonic-gate break; 2591*7c478bd9Sstevel@tonic-gate } 2592*7c478bd9Sstevel@tonic-gate } 2593*7c478bd9Sstevel@tonic-gate if ((raddr + rsize) > (seg->s_base + seg->s_size)) { 2594*7c478bd9Sstevel@tonic-gate ssize = seg->s_base + seg->s_size - raddr; 2595*7c478bd9Sstevel@tonic-gate } else { 2596*7c478bd9Sstevel@tonic-gate ssize = rsize; 2597*7c478bd9Sstevel@tonic-gate } 2598*7c478bd9Sstevel@tonic-gate 2599*7c478bd9Sstevel@tonic-gate error = SEGOP_SETPAGESIZE(seg, raddr, ssize, szc); 2600*7c478bd9Sstevel@tonic-gate 2601*7c478bd9Sstevel@tonic-gate if (error == IE_NOMEM) { 2602*7c478bd9Sstevel@tonic-gate error = EAGAIN; 2603*7c478bd9Sstevel@tonic-gate break; 2604*7c478bd9Sstevel@tonic-gate } 2605*7c478bd9Sstevel@tonic-gate 2606*7c478bd9Sstevel@tonic-gate if (error == IE_RETRY) { 2607*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2608*7c478bd9Sstevel@tonic-gate goto setpgsz_top; 2609*7c478bd9Sstevel@tonic-gate } 2610*7c478bd9Sstevel@tonic-gate 2611*7c478bd9Sstevel@tonic-gate if (error == ENOTSUP) { 2612*7c478bd9Sstevel@tonic-gate error = EINVAL; 2613*7c478bd9Sstevel@tonic-gate break; 2614*7c478bd9Sstevel@tonic-gate } 2615*7c478bd9Sstevel@tonic-gate 2616*7c478bd9Sstevel@tonic-gate if (wait && (error == EAGAIN)) { 2617*7c478bd9Sstevel@tonic-gate /* 2618*7c478bd9Sstevel@tonic-gate * Memory is currently locked. It must be unlocked 2619*7c478bd9Sstevel@tonic-gate * before this operation can succeed through a retry. 2620*7c478bd9Sstevel@tonic-gate * The possible reasons for locked memory and 2621*7c478bd9Sstevel@tonic-gate * corresponding strategies for unlocking are: 2622*7c478bd9Sstevel@tonic-gate * (1) Normal I/O 2623*7c478bd9Sstevel@tonic-gate * wait for a signal that the I/O operation 2624*7c478bd9Sstevel@tonic-gate * has completed and the memory is unlocked. 2625*7c478bd9Sstevel@tonic-gate * (2) Asynchronous I/O 2626*7c478bd9Sstevel@tonic-gate * The aio subsystem does not unlock pages when 2627*7c478bd9Sstevel@tonic-gate * the I/O is completed. Those pages are unlocked 2628*7c478bd9Sstevel@tonic-gate * when the application calls aiowait/aioerror. 2629*7c478bd9Sstevel@tonic-gate * So, to prevent blocking forever, cv_broadcast() 2630*7c478bd9Sstevel@tonic-gate * is done to wake up aio_cleanup_thread. 2631*7c478bd9Sstevel@tonic-gate * Subsequently, segvn_reclaim will be called, and 2632*7c478bd9Sstevel@tonic-gate * that will do AS_CLRUNMAPWAIT() and wake us up. 2633*7c478bd9Sstevel@tonic-gate * (3) Long term page locking: 2634*7c478bd9Sstevel@tonic-gate * This is not relevant for as_setpagesize() 2635*7c478bd9Sstevel@tonic-gate * because we cannot change the page size for 2636*7c478bd9Sstevel@tonic-gate * driver memory. The attempt to do so will 2637*7c478bd9Sstevel@tonic-gate * fail with a different error than EAGAIN so 2638*7c478bd9Sstevel@tonic-gate * there's no need to trigger as callbacks like 2639*7c478bd9Sstevel@tonic-gate * as_unmap, as_setprot or as_free would do. 2640*7c478bd9Sstevel@tonic-gate */ 2641*7c478bd9Sstevel@tonic-gate mutex_enter(&as->a_contents); 2642*7c478bd9Sstevel@tonic-gate if (AS_ISUNMAPWAIT(as) == 0) { 2643*7c478bd9Sstevel@tonic-gate cv_broadcast(&as->a_cv); 2644*7c478bd9Sstevel@tonic-gate } 2645*7c478bd9Sstevel@tonic-gate AS_SETUNMAPWAIT(as); 2646*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2647*7c478bd9Sstevel@tonic-gate while (AS_ISUNMAPWAIT(as)) { 2648*7c478bd9Sstevel@tonic-gate cv_wait(&as->a_cv, &as->a_contents); 2649*7c478bd9Sstevel@tonic-gate } 2650*7c478bd9Sstevel@tonic-gate mutex_exit(&as->a_contents); 2651*7c478bd9Sstevel@tonic-gate goto setpgsz_top; 2652*7c478bd9Sstevel@tonic-gate } else if (error != 0) { 2653*7c478bd9Sstevel@tonic-gate break; 2654*7c478bd9Sstevel@tonic-gate } 2655*7c478bd9Sstevel@tonic-gate } 2656*7c478bd9Sstevel@tonic-gate as_setwatch(as); 2657*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2658*7c478bd9Sstevel@tonic-gate return (error); 2659*7c478bd9Sstevel@tonic-gate } 2660*7c478bd9Sstevel@tonic-gate 2661*7c478bd9Sstevel@tonic-gate /* 2662*7c478bd9Sstevel@tonic-gate * Setup all of the uninitialized watched pages that we can. 2663*7c478bd9Sstevel@tonic-gate */ 2664*7c478bd9Sstevel@tonic-gate void 2665*7c478bd9Sstevel@tonic-gate as_setwatch(struct as *as) 2666*7c478bd9Sstevel@tonic-gate { 2667*7c478bd9Sstevel@tonic-gate struct watched_page *pwp; 2668*7c478bd9Sstevel@tonic-gate struct seg *seg; 2669*7c478bd9Sstevel@tonic-gate caddr_t vaddr; 2670*7c478bd9Sstevel@tonic-gate uint_t prot; 2671*7c478bd9Sstevel@tonic-gate int err, retrycnt; 2672*7c478bd9Sstevel@tonic-gate 2673*7c478bd9Sstevel@tonic-gate if (avl_numnodes(&as->a_wpage) == 0) 2674*7c478bd9Sstevel@tonic-gate return; 2675*7c478bd9Sstevel@tonic-gate 2676*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2677*7c478bd9Sstevel@tonic-gate 2678*7c478bd9Sstevel@tonic-gate for (pwp = avl_first(&as->a_wpage); pwp != NULL; 2679*7c478bd9Sstevel@tonic-gate pwp = AVL_NEXT(&as->a_wpage, pwp)) { 2680*7c478bd9Sstevel@tonic-gate retrycnt = 0; 2681*7c478bd9Sstevel@tonic-gate retry: 2682*7c478bd9Sstevel@tonic-gate vaddr = pwp->wp_vaddr; 2683*7c478bd9Sstevel@tonic-gate if (pwp->wp_oprot != 0 || /* already set up */ 2684*7c478bd9Sstevel@tonic-gate (seg = as_segat(as, vaddr)) == NULL || 2685*7c478bd9Sstevel@tonic-gate SEGOP_GETPROT(seg, vaddr, 0, &prot) != 0) 2686*7c478bd9Sstevel@tonic-gate continue; 2687*7c478bd9Sstevel@tonic-gate 2688*7c478bd9Sstevel@tonic-gate pwp->wp_oprot = prot; 2689*7c478bd9Sstevel@tonic-gate if (pwp->wp_read) 2690*7c478bd9Sstevel@tonic-gate prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2691*7c478bd9Sstevel@tonic-gate if (pwp->wp_write) 2692*7c478bd9Sstevel@tonic-gate prot &= ~PROT_WRITE; 2693*7c478bd9Sstevel@tonic-gate if (pwp->wp_exec) 2694*7c478bd9Sstevel@tonic-gate prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2695*7c478bd9Sstevel@tonic-gate if (!(pwp->wp_flags & WP_NOWATCH) && prot != pwp->wp_oprot) { 2696*7c478bd9Sstevel@tonic-gate err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 2697*7c478bd9Sstevel@tonic-gate if (err == IE_RETRY) { 2698*7c478bd9Sstevel@tonic-gate pwp->wp_oprot = 0; 2699*7c478bd9Sstevel@tonic-gate ASSERT(retrycnt == 0); 2700*7c478bd9Sstevel@tonic-gate retrycnt++; 2701*7c478bd9Sstevel@tonic-gate goto retry; 2702*7c478bd9Sstevel@tonic-gate } 2703*7c478bd9Sstevel@tonic-gate } 2704*7c478bd9Sstevel@tonic-gate pwp->wp_prot = prot; 2705*7c478bd9Sstevel@tonic-gate } 2706*7c478bd9Sstevel@tonic-gate } 2707*7c478bd9Sstevel@tonic-gate 2708*7c478bd9Sstevel@tonic-gate /* 2709*7c478bd9Sstevel@tonic-gate * Clear all of the watched pages in the address space. 2710*7c478bd9Sstevel@tonic-gate */ 2711*7c478bd9Sstevel@tonic-gate void 2712*7c478bd9Sstevel@tonic-gate as_clearwatch(struct as *as) 2713*7c478bd9Sstevel@tonic-gate { 2714*7c478bd9Sstevel@tonic-gate struct watched_page *pwp; 2715*7c478bd9Sstevel@tonic-gate struct seg *seg; 2716*7c478bd9Sstevel@tonic-gate caddr_t vaddr; 2717*7c478bd9Sstevel@tonic-gate uint_t prot; 2718*7c478bd9Sstevel@tonic-gate int err, retrycnt; 2719*7c478bd9Sstevel@tonic-gate 2720*7c478bd9Sstevel@tonic-gate if (avl_numnodes(&as->a_wpage) == 0) 2721*7c478bd9Sstevel@tonic-gate return; 2722*7c478bd9Sstevel@tonic-gate 2723*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2724*7c478bd9Sstevel@tonic-gate 2725*7c478bd9Sstevel@tonic-gate for (pwp = avl_first(&as->a_wpage); pwp != NULL; 2726*7c478bd9Sstevel@tonic-gate pwp = AVL_NEXT(&as->a_wpage, pwp)) { 2727*7c478bd9Sstevel@tonic-gate retrycnt = 0; 2728*7c478bd9Sstevel@tonic-gate retry: 2729*7c478bd9Sstevel@tonic-gate vaddr = pwp->wp_vaddr; 2730*7c478bd9Sstevel@tonic-gate if (pwp->wp_oprot == 0 || /* not set up */ 2731*7c478bd9Sstevel@tonic-gate (seg = as_segat(as, vaddr)) == NULL) 2732*7c478bd9Sstevel@tonic-gate continue; 2733*7c478bd9Sstevel@tonic-gate 2734*7c478bd9Sstevel@tonic-gate if ((prot = pwp->wp_oprot) != pwp->wp_prot) { 2735*7c478bd9Sstevel@tonic-gate err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, prot); 2736*7c478bd9Sstevel@tonic-gate if (err == IE_RETRY) { 2737*7c478bd9Sstevel@tonic-gate ASSERT(retrycnt == 0); 2738*7c478bd9Sstevel@tonic-gate retrycnt++; 2739*7c478bd9Sstevel@tonic-gate goto retry; 2740*7c478bd9Sstevel@tonic-gate } 2741*7c478bd9Sstevel@tonic-gate } 2742*7c478bd9Sstevel@tonic-gate pwp->wp_oprot = 0; 2743*7c478bd9Sstevel@tonic-gate pwp->wp_prot = 0; 2744*7c478bd9Sstevel@tonic-gate } 2745*7c478bd9Sstevel@tonic-gate } 2746*7c478bd9Sstevel@tonic-gate 2747*7c478bd9Sstevel@tonic-gate /* 2748*7c478bd9Sstevel@tonic-gate * Force a new setup for all the watched pages in the range. 2749*7c478bd9Sstevel@tonic-gate */ 2750*7c478bd9Sstevel@tonic-gate static void 2751*7c478bd9Sstevel@tonic-gate as_setwatchprot(struct as *as, caddr_t addr, size_t size, uint_t prot) 2752*7c478bd9Sstevel@tonic-gate { 2753*7c478bd9Sstevel@tonic-gate struct watched_page *pwp; 2754*7c478bd9Sstevel@tonic-gate struct watched_page tpw; 2755*7c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + size; 2756*7c478bd9Sstevel@tonic-gate caddr_t vaddr; 2757*7c478bd9Sstevel@tonic-gate struct seg *seg; 2758*7c478bd9Sstevel@tonic-gate int err, retrycnt; 2759*7c478bd9Sstevel@tonic-gate uint_t wprot; 2760*7c478bd9Sstevel@tonic-gate avl_index_t where; 2761*7c478bd9Sstevel@tonic-gate 2762*7c478bd9Sstevel@tonic-gate if (avl_numnodes(&as->a_wpage) == 0) 2763*7c478bd9Sstevel@tonic-gate return; 2764*7c478bd9Sstevel@tonic-gate 2765*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2766*7c478bd9Sstevel@tonic-gate 2767*7c478bd9Sstevel@tonic-gate tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2768*7c478bd9Sstevel@tonic-gate if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 2769*7c478bd9Sstevel@tonic-gate pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 2770*7c478bd9Sstevel@tonic-gate 2771*7c478bd9Sstevel@tonic-gate while (pwp != NULL && pwp->wp_vaddr < eaddr) { 2772*7c478bd9Sstevel@tonic-gate retrycnt = 0; 2773*7c478bd9Sstevel@tonic-gate vaddr = pwp->wp_vaddr; 2774*7c478bd9Sstevel@tonic-gate 2775*7c478bd9Sstevel@tonic-gate wprot = prot; 2776*7c478bd9Sstevel@tonic-gate if (pwp->wp_read) 2777*7c478bd9Sstevel@tonic-gate wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2778*7c478bd9Sstevel@tonic-gate if (pwp->wp_write) 2779*7c478bd9Sstevel@tonic-gate wprot &= ~PROT_WRITE; 2780*7c478bd9Sstevel@tonic-gate if (pwp->wp_exec) 2781*7c478bd9Sstevel@tonic-gate wprot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); 2782*7c478bd9Sstevel@tonic-gate if (!(pwp->wp_flags & WP_NOWATCH) && wprot != pwp->wp_oprot) { 2783*7c478bd9Sstevel@tonic-gate retry: 2784*7c478bd9Sstevel@tonic-gate seg = as_segat(as, vaddr); 2785*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2786*7c478bd9Sstevel@tonic-gate panic("as_setwatchprot: no seg"); 2787*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 2788*7c478bd9Sstevel@tonic-gate } 2789*7c478bd9Sstevel@tonic-gate err = SEGOP_SETPROT(seg, vaddr, PAGESIZE, wprot); 2790*7c478bd9Sstevel@tonic-gate if (err == IE_RETRY) { 2791*7c478bd9Sstevel@tonic-gate ASSERT(retrycnt == 0); 2792*7c478bd9Sstevel@tonic-gate retrycnt++; 2793*7c478bd9Sstevel@tonic-gate goto retry; 2794*7c478bd9Sstevel@tonic-gate } 2795*7c478bd9Sstevel@tonic-gate } 2796*7c478bd9Sstevel@tonic-gate pwp->wp_oprot = prot; 2797*7c478bd9Sstevel@tonic-gate pwp->wp_prot = wprot; 2798*7c478bd9Sstevel@tonic-gate 2799*7c478bd9Sstevel@tonic-gate pwp = AVL_NEXT(&as->a_wpage, pwp); 2800*7c478bd9Sstevel@tonic-gate } 2801*7c478bd9Sstevel@tonic-gate } 2802*7c478bd9Sstevel@tonic-gate 2803*7c478bd9Sstevel@tonic-gate /* 2804*7c478bd9Sstevel@tonic-gate * Clear all of the watched pages in the range. 2805*7c478bd9Sstevel@tonic-gate */ 2806*7c478bd9Sstevel@tonic-gate static void 2807*7c478bd9Sstevel@tonic-gate as_clearwatchprot(struct as *as, caddr_t addr, size_t size) 2808*7c478bd9Sstevel@tonic-gate { 2809*7c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + size; 2810*7c478bd9Sstevel@tonic-gate struct watched_page *pwp; 2811*7c478bd9Sstevel@tonic-gate struct watched_page tpw; 2812*7c478bd9Sstevel@tonic-gate uint_t prot; 2813*7c478bd9Sstevel@tonic-gate struct seg *seg; 2814*7c478bd9Sstevel@tonic-gate int err, retrycnt; 2815*7c478bd9Sstevel@tonic-gate avl_index_t where; 2816*7c478bd9Sstevel@tonic-gate 2817*7c478bd9Sstevel@tonic-gate if (avl_numnodes(&as->a_wpage) == 0) 2818*7c478bd9Sstevel@tonic-gate return; 2819*7c478bd9Sstevel@tonic-gate 2820*7c478bd9Sstevel@tonic-gate tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); 2821*7c478bd9Sstevel@tonic-gate if ((pwp = avl_find(&as->a_wpage, &tpw, &where)) == NULL) 2822*7c478bd9Sstevel@tonic-gate pwp = avl_nearest(&as->a_wpage, where, AVL_AFTER); 2823*7c478bd9Sstevel@tonic-gate 2824*7c478bd9Sstevel@tonic-gate ASSERT(AS_WRITE_HELD(as, &as->a_lock)); 2825*7c478bd9Sstevel@tonic-gate 2826*7c478bd9Sstevel@tonic-gate while (pwp != NULL && pwp->wp_vaddr < eaddr) { 2827*7c478bd9Sstevel@tonic-gate ASSERT(addr >= pwp->wp_vaddr); 2828*7c478bd9Sstevel@tonic-gate 2829*7c478bd9Sstevel@tonic-gate if ((prot = pwp->wp_oprot) != 0) { 2830*7c478bd9Sstevel@tonic-gate retrycnt = 0; 2831*7c478bd9Sstevel@tonic-gate 2832*7c478bd9Sstevel@tonic-gate if (prot != pwp->wp_prot) { 2833*7c478bd9Sstevel@tonic-gate retry: 2834*7c478bd9Sstevel@tonic-gate seg = as_segat(as, pwp->wp_vaddr); 2835*7c478bd9Sstevel@tonic-gate if (seg == NULL) 2836*7c478bd9Sstevel@tonic-gate continue; 2837*7c478bd9Sstevel@tonic-gate err = SEGOP_SETPROT(seg, pwp->wp_vaddr, 2838*7c478bd9Sstevel@tonic-gate PAGESIZE, prot); 2839*7c478bd9Sstevel@tonic-gate if (err == IE_RETRY) { 2840*7c478bd9Sstevel@tonic-gate ASSERT(retrycnt == 0); 2841*7c478bd9Sstevel@tonic-gate retrycnt++; 2842*7c478bd9Sstevel@tonic-gate goto retry; 2843*7c478bd9Sstevel@tonic-gate 2844*7c478bd9Sstevel@tonic-gate } 2845*7c478bd9Sstevel@tonic-gate } 2846*7c478bd9Sstevel@tonic-gate pwp->wp_oprot = 0; 2847*7c478bd9Sstevel@tonic-gate pwp->wp_prot = 0; 2848*7c478bd9Sstevel@tonic-gate } 2849*7c478bd9Sstevel@tonic-gate 2850*7c478bd9Sstevel@tonic-gate pwp = AVL_NEXT(&as->a_wpage, pwp); 2851*7c478bd9Sstevel@tonic-gate } 2852*7c478bd9Sstevel@tonic-gate } 2853*7c478bd9Sstevel@tonic-gate 2854*7c478bd9Sstevel@tonic-gate void 2855*7c478bd9Sstevel@tonic-gate as_signal_proc(struct as *as, k_siginfo_t *siginfo) 2856*7c478bd9Sstevel@tonic-gate { 2857*7c478bd9Sstevel@tonic-gate struct proc *p; 2858*7c478bd9Sstevel@tonic-gate 2859*7c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 2860*7c478bd9Sstevel@tonic-gate for (p = practive; p; p = p->p_next) { 2861*7c478bd9Sstevel@tonic-gate if (p->p_as == as) { 2862*7c478bd9Sstevel@tonic-gate mutex_enter(&p->p_lock); 2863*7c478bd9Sstevel@tonic-gate if (p->p_as == as) 2864*7c478bd9Sstevel@tonic-gate sigaddq(p, NULL, siginfo, KM_NOSLEEP); 2865*7c478bd9Sstevel@tonic-gate mutex_exit(&p->p_lock); 2866*7c478bd9Sstevel@tonic-gate } 2867*7c478bd9Sstevel@tonic-gate } 2868*7c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 2869*7c478bd9Sstevel@tonic-gate } 2870*7c478bd9Sstevel@tonic-gate 2871*7c478bd9Sstevel@tonic-gate /* 2872*7c478bd9Sstevel@tonic-gate * return memory object ID 2873*7c478bd9Sstevel@tonic-gate */ 2874*7c478bd9Sstevel@tonic-gate int 2875*7c478bd9Sstevel@tonic-gate as_getmemid(struct as *as, caddr_t addr, memid_t *memidp) 2876*7c478bd9Sstevel@tonic-gate { 2877*7c478bd9Sstevel@tonic-gate struct seg *seg; 2878*7c478bd9Sstevel@tonic-gate int sts; 2879*7c478bd9Sstevel@tonic-gate 2880*7c478bd9Sstevel@tonic-gate AS_LOCK_ENTER(as, &as->a_lock, RW_READER); 2881*7c478bd9Sstevel@tonic-gate seg = as_segat(as, addr); 2882*7c478bd9Sstevel@tonic-gate if (seg == NULL) { 2883*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2884*7c478bd9Sstevel@tonic-gate return (EFAULT); 2885*7c478bd9Sstevel@tonic-gate } 2886*7c478bd9Sstevel@tonic-gate /* 2887*7c478bd9Sstevel@tonic-gate * catch old drivers which may not support getmemid 2888*7c478bd9Sstevel@tonic-gate */ 2889*7c478bd9Sstevel@tonic-gate if (seg->s_ops->getmemid == NULL) { 2890*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2891*7c478bd9Sstevel@tonic-gate return (ENODEV); 2892*7c478bd9Sstevel@tonic-gate } 2893*7c478bd9Sstevel@tonic-gate 2894*7c478bd9Sstevel@tonic-gate sts = SEGOP_GETMEMID(seg, addr, memidp); 2895*7c478bd9Sstevel@tonic-gate 2896*7c478bd9Sstevel@tonic-gate AS_LOCK_EXIT(as, &as->a_lock); 2897*7c478bd9Sstevel@tonic-gate return (sts); 2898*7c478bd9Sstevel@tonic-gate } 2899