1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/mman.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/stat.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 33*7c478bd9Sstevel@tonic-gate #include <assert.h> 34*7c478bd9Sstevel@tonic-gate #include <errno.h> 35*7c478bd9Sstevel@tonic-gate #include <fcntl.h> 36*7c478bd9Sstevel@tonic-gate #include <libproc.h> 37*7c478bd9Sstevel@tonic-gate #include <limits.h> 38*7c478bd9Sstevel@tonic-gate #include <procfs.h> 39*7c478bd9Sstevel@tonic-gate #include <stdio.h> 40*7c478bd9Sstevel@tonic-gate #include <stdlib.h> 41*7c478bd9Sstevel@tonic-gate #include <strings.h> 42*7c478bd9Sstevel@tonic-gate #include <time.h> 43*7c478bd9Sstevel@tonic-gate #include <unistd.h> 44*7c478bd9Sstevel@tonic-gate #include "rcapd.h" 45*7c478bd9Sstevel@tonic-gate #include "rcapd_rfd.h" 46*7c478bd9Sstevel@tonic-gate #include "rcapd_mapping.h" 47*7c478bd9Sstevel@tonic-gate #include "utils.h" 48*7c478bd9Sstevel@tonic-gate 49*7c478bd9Sstevel@tonic-gate static int lpc_xmap_update(lprocess_t *); 50*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 51*7c478bd9Sstevel@tonic-gate extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2); 52*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 53*7c478bd9Sstevel@tonic-gate 54*7c478bd9Sstevel@tonic-gate /* 55*7c478bd9Sstevel@tonic-gate * The number of file descriptors required to grab a process and create an 56*7c478bd9Sstevel@tonic-gate * agent in it. 57*7c478bd9Sstevel@tonic-gate */ 58*7c478bd9Sstevel@tonic-gate #define PGRAB_FD_COUNT 10 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate /* 61*7c478bd9Sstevel@tonic-gate * Record a position in an address space as it corresponds to a prpageheader_t 62*7c478bd9Sstevel@tonic-gate * and affiliated structures. 63*7c478bd9Sstevel@tonic-gate */ 64*7c478bd9Sstevel@tonic-gate typedef struct prpageheader_cur { 65*7c478bd9Sstevel@tonic-gate int pr_nmap; /* number of mappings in address space */ 66*7c478bd9Sstevel@tonic-gate int pr_map; /* number of this mapping */ 67*7c478bd9Sstevel@tonic-gate uint64_t pr_pgoff; /* page offset into mapping */ 68*7c478bd9Sstevel@tonic-gate uint64_t pr_npage; /* number of pages in mapping */ 69*7c478bd9Sstevel@tonic-gate uint64_t pr_pagesize; /* page size of mapping */ 70*7c478bd9Sstevel@tonic-gate uintptr_t pr_addr; /* base of mapping */ 71*7c478bd9Sstevel@tonic-gate prpageheader_t *pr_prpageheader; /* associated page header */ 72*7c478bd9Sstevel@tonic-gate void *pr_pdaddr; /* address of page's byte in pagedata */ 73*7c478bd9Sstevel@tonic-gate prxmap_t *pr_xmap; /* array containing per-segment information */ 74*7c478bd9Sstevel@tonic-gate int pr_nxmap; /* number of xmaps in array */ 75*7c478bd9Sstevel@tonic-gate int64_t pr_rss; /* number of resident pages in mapping, */ 76*7c478bd9Sstevel@tonic-gate /* or -1 if xmap is out of sync */ 77*7c478bd9Sstevel@tonic-gate int64_t pr_pg_rss; /* number of pageable pages in mapping, or -1 */ 78*7c478bd9Sstevel@tonic-gate } prpageheader_cur_t; 79*7c478bd9Sstevel@tonic-gate 80*7c478bd9Sstevel@tonic-gate static struct ps_prochandle *scan_pr; /* currently-scanned process's handle */ 81*7c478bd9Sstevel@tonic-gate 82*7c478bd9Sstevel@tonic-gate typedef enum { 83*7c478bd9Sstevel@tonic-gate STDL_NORMAL, 84*7c478bd9Sstevel@tonic-gate STDL_HIGH 85*7c478bd9Sstevel@tonic-gate } st_debug_level_t; 86*7c478bd9Sstevel@tonic-gate 87*7c478bd9Sstevel@tonic-gate /* 88*7c478bd9Sstevel@tonic-gate * Output a scanning-related debug message. 89*7c478bd9Sstevel@tonic-gate */ 90*7c478bd9Sstevel@tonic-gate /*PRINTFLIKE3*/ /*ARGSUSED*/ 91*7c478bd9Sstevel@tonic-gate static void 92*7c478bd9Sstevel@tonic-gate st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...) 93*7c478bd9Sstevel@tonic-gate { 94*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG 95*7c478bd9Sstevel@tonic-gate va_list alist; 96*7c478bd9Sstevel@tonic-gate char *buf; 97*7c478bd9Sstevel@tonic-gate size_t len; 98*7c478bd9Sstevel@tonic-gate 99*7c478bd9Sstevel@tonic-gate if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH 100*7c478bd9Sstevel@tonic-gate : RCM_DEBUG)) 101*7c478bd9Sstevel@tonic-gate return; 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate len = strlen(msg) + LINELEN; 104*7c478bd9Sstevel@tonic-gate buf = malloc(len); 105*7c478bd9Sstevel@tonic-gate if (buf == NULL) 106*7c478bd9Sstevel@tonic-gate return; 107*7c478bd9Sstevel@tonic-gate (void) snprintf(buf, len, "%s %s scanner %s", rcfg.rcfg_mode_name, 108*7c478bd9Sstevel@tonic-gate lcol->lcol_name, msg); 109*7c478bd9Sstevel@tonic-gate 110*7c478bd9Sstevel@tonic-gate va_start(alist, msg); 111*7c478bd9Sstevel@tonic-gate vdprintfe(RCM_DEBUG, buf, alist); 112*7c478bd9Sstevel@tonic-gate va_end(alist); 113*7c478bd9Sstevel@tonic-gate 114*7c478bd9Sstevel@tonic-gate free(buf); 115*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */ 116*7c478bd9Sstevel@tonic-gate } 117*7c478bd9Sstevel@tonic-gate 118*7c478bd9Sstevel@tonic-gate /* 119*7c478bd9Sstevel@tonic-gate * Determine the collection's current victim, based on its last. The last will 120*7c478bd9Sstevel@tonic-gate * be returned, or, if invalid, any other valid process, if the collection has 121*7c478bd9Sstevel@tonic-gate * any. 122*7c478bd9Sstevel@tonic-gate */ 123*7c478bd9Sstevel@tonic-gate static lprocess_t * 124*7c478bd9Sstevel@tonic-gate get_valid_victim(lcollection_t *lcol, lprocess_t *lpc) 125*7c478bd9Sstevel@tonic-gate { 126*7c478bd9Sstevel@tonic-gate if (lpc == NULL || !lcollection_member(lcol, lpc)) 127*7c478bd9Sstevel@tonic-gate lpc = lcol->lcol_lprocess; 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate /* 130*7c478bd9Sstevel@tonic-gate * Find the next scannable process, and make it the victim. 131*7c478bd9Sstevel@tonic-gate */ 132*7c478bd9Sstevel@tonic-gate while (lpc != NULL && lpc->lpc_unscannable != 0) 133*7c478bd9Sstevel@tonic-gate lpc = lpc->lpc_next; 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate return (lpc); 136*7c478bd9Sstevel@tonic-gate } 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate /* 139*7c478bd9Sstevel@tonic-gate * Get a process's combined current pagedata (per-page referenced and modified 140*7c478bd9Sstevel@tonic-gate * bits) and set the supplied pointer to it. The caller is responsible for 141*7c478bd9Sstevel@tonic-gate * freeing the data. If the pagedata is unreadable, a nonzero value is 142*7c478bd9Sstevel@tonic-gate * returned, and errno is set. Otherwise, 0 is returned. 143*7c478bd9Sstevel@tonic-gate */ 144*7c478bd9Sstevel@tonic-gate static int 145*7c478bd9Sstevel@tonic-gate get_pagedata(prpageheader_t **pghpp, int fd) 146*7c478bd9Sstevel@tonic-gate { 147*7c478bd9Sstevel@tonic-gate int res; 148*7c478bd9Sstevel@tonic-gate struct stat st; 149*7c478bd9Sstevel@tonic-gate 150*7c478bd9Sstevel@tonic-gate redo: 151*7c478bd9Sstevel@tonic-gate errno = 0; 152*7c478bd9Sstevel@tonic-gate if (fstat(fd, &st) != 0) { 153*7c478bd9Sstevel@tonic-gate debug("cannot stat pagedata\n"); 154*7c478bd9Sstevel@tonic-gate return (-1); 155*7c478bd9Sstevel@tonic-gate } 156*7c478bd9Sstevel@tonic-gate 157*7c478bd9Sstevel@tonic-gate errno = 0; 158*7c478bd9Sstevel@tonic-gate *pghpp = malloc(st.st_size); 159*7c478bd9Sstevel@tonic-gate if (*pghpp == NULL) { 160*7c478bd9Sstevel@tonic-gate debug("cannot malloc() %ld bytes for pagedata", st.st_size); 161*7c478bd9Sstevel@tonic-gate return (-1); 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate (void) bzero(*pghpp, st.st_size); 164*7c478bd9Sstevel@tonic-gate 165*7c478bd9Sstevel@tonic-gate errno = 0; 166*7c478bd9Sstevel@tonic-gate if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) { 167*7c478bd9Sstevel@tonic-gate free(*pghpp); 168*7c478bd9Sstevel@tonic-gate *pghpp = NULL; 169*7c478bd9Sstevel@tonic-gate if (res > 0 || errno == E2BIG) { 170*7c478bd9Sstevel@tonic-gate debug("pagedata changed size, retrying\n"); 171*7c478bd9Sstevel@tonic-gate goto redo; 172*7c478bd9Sstevel@tonic-gate } else { 173*7c478bd9Sstevel@tonic-gate debug("cannot read pagedata"); 174*7c478bd9Sstevel@tonic-gate return (-1); 175*7c478bd9Sstevel@tonic-gate } 176*7c478bd9Sstevel@tonic-gate } 177*7c478bd9Sstevel@tonic-gate 178*7c478bd9Sstevel@tonic-gate return (0); 179*7c478bd9Sstevel@tonic-gate } 180*7c478bd9Sstevel@tonic-gate 181*7c478bd9Sstevel@tonic-gate /* 182*7c478bd9Sstevel@tonic-gate * Return the count of kilobytes of pages represented by the given pagedata 183*7c478bd9Sstevel@tonic-gate * which meet the given criteria, having pages which are in all of the states 184*7c478bd9Sstevel@tonic-gate * specified by the mask, and in none of the states in the notmask. If the 185*7c478bd9Sstevel@tonic-gate * CP_CLEAR flag is set, the pagedata will also be cleared. 186*7c478bd9Sstevel@tonic-gate */ 187*7c478bd9Sstevel@tonic-gate #define CP_CLEAR 1 188*7c478bd9Sstevel@tonic-gate static uint64_t 189*7c478bd9Sstevel@tonic-gate count_pages(prpageheader_t *pghp, int flags, int mask, int notmask) 190*7c478bd9Sstevel@tonic-gate { 191*7c478bd9Sstevel@tonic-gate int map; 192*7c478bd9Sstevel@tonic-gate caddr_t cur, end; 193*7c478bd9Sstevel@tonic-gate prpageheader_t pgh = *pghp; 194*7c478bd9Sstevel@tonic-gate prasmap_t *asmapp; 195*7c478bd9Sstevel@tonic-gate uint64_t count = 0; 196*7c478bd9Sstevel@tonic-gate 197*7c478bd9Sstevel@tonic-gate cur = (caddr_t)pghp + sizeof (*pghp); 198*7c478bd9Sstevel@tonic-gate for (map = 0; map < pgh.pr_nmap; map++) { 199*7c478bd9Sstevel@tonic-gate asmapp = (prasmap_t *)(uintptr_t)cur; 200*7c478bd9Sstevel@tonic-gate cur += sizeof (*asmapp); 201*7c478bd9Sstevel@tonic-gate end = cur + asmapp->pr_npage; 202*7c478bd9Sstevel@tonic-gate while (cur < end) { 203*7c478bd9Sstevel@tonic-gate if ((*cur & mask) == mask && (*cur & notmask) == 0) 204*7c478bd9Sstevel@tonic-gate count += asmapp->pr_pagesize / 1024; 205*7c478bd9Sstevel@tonic-gate if ((flags & CP_CLEAR) != 0) 206*7c478bd9Sstevel@tonic-gate *cur = 0; 207*7c478bd9Sstevel@tonic-gate cur++; 208*7c478bd9Sstevel@tonic-gate } 209*7c478bd9Sstevel@tonic-gate 210*7c478bd9Sstevel@tonic-gate /* 211*7c478bd9Sstevel@tonic-gate * Skip to next 64-bit-aligned address to get the next 212*7c478bd9Sstevel@tonic-gate * prasmap_t. 213*7c478bd9Sstevel@tonic-gate */ 214*7c478bd9Sstevel@tonic-gate cur = (caddr_t)((intptr_t)(cur + 7) & ~7); 215*7c478bd9Sstevel@tonic-gate } 216*7c478bd9Sstevel@tonic-gate 217*7c478bd9Sstevel@tonic-gate return (count); 218*7c478bd9Sstevel@tonic-gate } 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate /* 221*7c478bd9Sstevel@tonic-gate * Return the amount of memory (in kilobytes) that hasn't been referenced or 222*7c478bd9Sstevel@tonic-gate * modified, which memory which will be paged out first. Should be written to 223*7c478bd9Sstevel@tonic-gate * exclude nonresident pages when sufficient interfaces exist. 224*7c478bd9Sstevel@tonic-gate */ 225*7c478bd9Sstevel@tonic-gate static uint64_t 226*7c478bd9Sstevel@tonic-gate unrm_size(lprocess_t *lpc) 227*7c478bd9Sstevel@tonic-gate { 228*7c478bd9Sstevel@tonic-gate return (count_pages(lpc->lpc_prpageheader, CP_CLEAR, 229*7c478bd9Sstevel@tonic-gate 0, PG_MODIFIED | PG_REFERENCED)); 230*7c478bd9Sstevel@tonic-gate } 231*7c478bd9Sstevel@tonic-gate 232*7c478bd9Sstevel@tonic-gate /* 233*7c478bd9Sstevel@tonic-gate * Advance a prpageheader_cur_t to the address space's next mapping, returning 234*7c478bd9Sstevel@tonic-gate * its address, or NULL if there is none. Any known nonpageable or nonresident 235*7c478bd9Sstevel@tonic-gate * mappings will be skipped over. 236*7c478bd9Sstevel@tonic-gate */ 237*7c478bd9Sstevel@tonic-gate static uintptr_t 238*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp) 239*7c478bd9Sstevel@tonic-gate { 240*7c478bd9Sstevel@tonic-gate prasmap_t *pap; 241*7c478bd9Sstevel@tonic-gate int i; 242*7c478bd9Sstevel@tonic-gate 243*7c478bd9Sstevel@tonic-gate next: 244*7c478bd9Sstevel@tonic-gate ASSERT(pcp->pr_map < pcp->pr_nmap); 245*7c478bd9Sstevel@tonic-gate if ((pcp->pr_map + 1) == pcp->pr_nmap) 246*7c478bd9Sstevel@tonic-gate return (NULL); 247*7c478bd9Sstevel@tonic-gate pcp->pr_map++; 248*7c478bd9Sstevel@tonic-gate if (pcp->pr_pgoff < pcp->pr_npage) { 249*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)((uintptr_t)pcp->pr_pdaddr + 250*7c478bd9Sstevel@tonic-gate (pcp->pr_npage - pcp->pr_pgoff)); 251*7c478bd9Sstevel@tonic-gate pcp->pr_pgoff = pcp->pr_npage; 252*7c478bd9Sstevel@tonic-gate } 253*7c478bd9Sstevel@tonic-gate /* 254*7c478bd9Sstevel@tonic-gate * Skip to next 64-bit-aligned address to get the next prasmap_t. 255*7c478bd9Sstevel@tonic-gate */ 256*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7); 257*7c478bd9Sstevel@tonic-gate pap = (prasmap_t *)pcp->pr_pdaddr; 258*7c478bd9Sstevel@tonic-gate pcp->pr_pgoff = 0; 259*7c478bd9Sstevel@tonic-gate pcp->pr_npage = pap->pr_npage; 260*7c478bd9Sstevel@tonic-gate pcp->pr_pagesize = pap->pr_pagesize; 261*7c478bd9Sstevel@tonic-gate pcp->pr_addr = pap->pr_vaddr; 262*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = pap + 1; 263*7c478bd9Sstevel@tonic-gate 264*7c478bd9Sstevel@tonic-gate /* 265*7c478bd9Sstevel@tonic-gate * Skip any known nonpageable mappings. Currently, the only one 266*7c478bd9Sstevel@tonic-gate * detected is the schedctl page. 267*7c478bd9Sstevel@tonic-gate */ 268*7c478bd9Sstevel@tonic-gate if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC | 269*7c478bd9Sstevel@tonic-gate MA_ANON)) == 0 && pap->pr_npage == 1) { 270*7c478bd9Sstevel@tonic-gate debug("identified nonpageable schedctl mapping at %p\n", 271*7c478bd9Sstevel@tonic-gate (void *)pcp->pr_addr); 272*7c478bd9Sstevel@tonic-gate goto next; 273*7c478bd9Sstevel@tonic-gate } 274*7c478bd9Sstevel@tonic-gate 275*7c478bd9Sstevel@tonic-gate /* 276*7c478bd9Sstevel@tonic-gate * Skip mappings with no resident pages. If the xmap does not 277*7c478bd9Sstevel@tonic-gate * correspond to the pagedata for any reason, it will be ignored. 278*7c478bd9Sstevel@tonic-gate */ 279*7c478bd9Sstevel@tonic-gate pcp->pr_rss = -1; 280*7c478bd9Sstevel@tonic-gate pcp->pr_pg_rss = -1; 281*7c478bd9Sstevel@tonic-gate for (i = 0; i < pcp->pr_nxmap; i++) { 282*7c478bd9Sstevel@tonic-gate prxmap_t *xmap = &pcp->pr_xmap[i]; 283*7c478bd9Sstevel@tonic-gate 284*7c478bd9Sstevel@tonic-gate if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size == 285*7c478bd9Sstevel@tonic-gate (pcp->pr_npage * pcp->pr_pagesize)) { 286*7c478bd9Sstevel@tonic-gate pcp->pr_rss = xmap->pr_rss; 287*7c478bd9Sstevel@tonic-gate /* 288*7c478bd9Sstevel@tonic-gate * Remove COW pages from the pageable RSS count. 289*7c478bd9Sstevel@tonic-gate */ 290*7c478bd9Sstevel@tonic-gate if ((xmap->pr_mflags & MA_SHARED) == 0) 291*7c478bd9Sstevel@tonic-gate pcp->pr_pg_rss = xmap->pr_anon; 292*7c478bd9Sstevel@tonic-gate break; 293*7c478bd9Sstevel@tonic-gate } 294*7c478bd9Sstevel@tonic-gate } 295*7c478bd9Sstevel@tonic-gate if (pcp->pr_rss == 0) { 296*7c478bd9Sstevel@tonic-gate debug("identified nonresident mapping at 0x%p\n", 297*7c478bd9Sstevel@tonic-gate (void *)pcp->pr_addr); 298*7c478bd9Sstevel@tonic-gate goto next; 299*7c478bd9Sstevel@tonic-gate } else if (pcp->pr_pg_rss == 0) { 300*7c478bd9Sstevel@tonic-gate debug("identified unpageable mapping at 0x%p\n", 301*7c478bd9Sstevel@tonic-gate (void *)pcp->pr_addr); 302*7c478bd9Sstevel@tonic-gate goto next; 303*7c478bd9Sstevel@tonic-gate } 304*7c478bd9Sstevel@tonic-gate 305*7c478bd9Sstevel@tonic-gate return (pcp->pr_addr); 306*7c478bd9Sstevel@tonic-gate } 307*7c478bd9Sstevel@tonic-gate 308*7c478bd9Sstevel@tonic-gate /* 309*7c478bd9Sstevel@tonic-gate * Advance a prpageheader_cur_t to the mapping's next page, returning its 310*7c478bd9Sstevel@tonic-gate * address, or NULL if there is none. 311*7c478bd9Sstevel@tonic-gate */ 312*7c478bd9Sstevel@tonic-gate static void * 313*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur(prpageheader_cur_t *pcp) 314*7c478bd9Sstevel@tonic-gate { 315*7c478bd9Sstevel@tonic-gate ASSERT(pcp->pr_pgoff < pcp->pr_npage); 316*7c478bd9Sstevel@tonic-gate if ((pcp->pr_pgoff + 1) == pcp->pr_npage) 317*7c478bd9Sstevel@tonic-gate return (NULL); 318*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1; 319*7c478bd9Sstevel@tonic-gate pcp->pr_pgoff++; 320*7c478bd9Sstevel@tonic-gate 321*7c478bd9Sstevel@tonic-gate ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0); 322*7c478bd9Sstevel@tonic-gate return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize); 323*7c478bd9Sstevel@tonic-gate } 324*7c478bd9Sstevel@tonic-gate 325*7c478bd9Sstevel@tonic-gate /* 326*7c478bd9Sstevel@tonic-gate * Initialize a prpageheader_cur_t, positioned at the first page of the mapping 327*7c478bd9Sstevel@tonic-gate * of an address space. 328*7c478bd9Sstevel@tonic-gate */ 329*7c478bd9Sstevel@tonic-gate static void * 330*7c478bd9Sstevel@tonic-gate set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php, 331*7c478bd9Sstevel@tonic-gate prxmap_t *xmap, int nxmap) 332*7c478bd9Sstevel@tonic-gate { 333*7c478bd9Sstevel@tonic-gate bzero(pcp, sizeof (*pcp)); 334*7c478bd9Sstevel@tonic-gate pcp->pr_nmap = php->pr_nmap; 335*7c478bd9Sstevel@tonic-gate pcp->pr_map = -1; 336*7c478bd9Sstevel@tonic-gate pcp->pr_prpageheader = php; 337*7c478bd9Sstevel@tonic-gate pcp->pr_xmap = xmap; 338*7c478bd9Sstevel@tonic-gate pcp->pr_nxmap = nxmap; 339*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = (prpageheader_t *)php + 1; 340*7c478bd9Sstevel@tonic-gate 341*7c478bd9Sstevel@tonic-gate return ((void *)advance_prpageheader_cur_nextmapping(pcp)); 342*7c478bd9Sstevel@tonic-gate } 343*7c478bd9Sstevel@tonic-gate 344*7c478bd9Sstevel@tonic-gate /* 345*7c478bd9Sstevel@tonic-gate * Position a prpageheader_cur_t to the mapped address greater or equal to the 346*7c478bd9Sstevel@tonic-gate * given value. 347*7c478bd9Sstevel@tonic-gate */ 348*7c478bd9Sstevel@tonic-gate static void * 349*7c478bd9Sstevel@tonic-gate set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php, 350*7c478bd9Sstevel@tonic-gate prxmap_t *xmap, int nxmap, void *naddr) 351*7c478bd9Sstevel@tonic-gate { 352*7c478bd9Sstevel@tonic-gate void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap); 353*7c478bd9Sstevel@tonic-gate 354*7c478bd9Sstevel@tonic-gate while (addr != NULL && addr <= naddr) 355*7c478bd9Sstevel@tonic-gate if (naddr < (void *)((caddr_t)pcp->pr_addr + 356*7c478bd9Sstevel@tonic-gate pcp->pr_pagesize * pcp->pr_npage)) { 357*7c478bd9Sstevel@tonic-gate uint64_t pgdiff = ((uintptr_t)naddr - 358*7c478bd9Sstevel@tonic-gate (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize; 359*7c478bd9Sstevel@tonic-gate pcp->pr_pgoff += pgdiff; 360*7c478bd9Sstevel@tonic-gate pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff; 361*7c478bd9Sstevel@tonic-gate addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize * 362*7c478bd9Sstevel@tonic-gate pcp->pr_pgoff; 363*7c478bd9Sstevel@tonic-gate break; 364*7c478bd9Sstevel@tonic-gate } else 365*7c478bd9Sstevel@tonic-gate addr = 366*7c478bd9Sstevel@tonic-gate (void *)advance_prpageheader_cur_nextmapping(pcp); 367*7c478bd9Sstevel@tonic-gate 368*7c478bd9Sstevel@tonic-gate return (addr); 369*7c478bd9Sstevel@tonic-gate } 370*7c478bd9Sstevel@tonic-gate 371*7c478bd9Sstevel@tonic-gate static void 372*7c478bd9Sstevel@tonic-gate revoke_pagedata(rfd_t *rfd) 373*7c478bd9Sstevel@tonic-gate { 374*7c478bd9Sstevel@tonic-gate lprocess_t *lpc = rfd->rfd_data; 375*7c478bd9Sstevel@tonic-gate 376*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for" 377*7c478bd9Sstevel@tonic-gate " process %d\n", (int)lpc->lpc_pid); 378*7c478bd9Sstevel@tonic-gate ASSERT(lpc->lpc_pgdata_fd != -1); 379*7c478bd9Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1; 380*7c478bd9Sstevel@tonic-gate } 381*7c478bd9Sstevel@tonic-gate 382*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 383*7c478bd9Sstevel@tonic-gate static void 384*7c478bd9Sstevel@tonic-gate mklmapping(lmapping_t **lm, prpageheader_t *pgh) 385*7c478bd9Sstevel@tonic-gate { 386*7c478bd9Sstevel@tonic-gate prpageheader_cur_t cur; 387*7c478bd9Sstevel@tonic-gate void *addr; 388*7c478bd9Sstevel@tonic-gate 389*7c478bd9Sstevel@tonic-gate addr = set_prpageheader_cur(&cur, pgh, NULL, -1); 390*7c478bd9Sstevel@tonic-gate ASSERT(*lm == NULL); 391*7c478bd9Sstevel@tonic-gate while (addr != NULL) { 392*7c478bd9Sstevel@tonic-gate (void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage * 393*7c478bd9Sstevel@tonic-gate cur.pr_pagesize); 394*7c478bd9Sstevel@tonic-gate addr = (void *)advance_prpageheader_cur_nextmapping(&cur); 395*7c478bd9Sstevel@tonic-gate } 396*7c478bd9Sstevel@tonic-gate } 397*7c478bd9Sstevel@tonic-gate 398*7c478bd9Sstevel@tonic-gate static void 399*7c478bd9Sstevel@tonic-gate lmapping_dump(lmapping_t *lm) 400*7c478bd9Sstevel@tonic-gate { 401*7c478bd9Sstevel@tonic-gate debug("lm: %p\n", (void *)lm); 402*7c478bd9Sstevel@tonic-gate while (lm != NULL) { 403*7c478bd9Sstevel@tonic-gate debug("\t(%p, %llx\n", (void *)lm->lm_addr, 404*7c478bd9Sstevel@tonic-gate (unsigned long long)lm->lm_size); 405*7c478bd9Sstevel@tonic-gate lm = lm->lm_next; 406*7c478bd9Sstevel@tonic-gate } 407*7c478bd9Sstevel@tonic-gate } 408*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 409*7c478bd9Sstevel@tonic-gate 410*7c478bd9Sstevel@tonic-gate /* 411*7c478bd9Sstevel@tonic-gate * OR two prpagedata_t which are supposedly snapshots of the same address 412*7c478bd9Sstevel@tonic-gate * space. Intersecting mappings with different page sizes are tolerated but 413*7c478bd9Sstevel@tonic-gate * not normalized (not accurate). If the mappings of the two snapshots differ 414*7c478bd9Sstevel@tonic-gate * in any regard, the supplied mappings_changed flag will be set. 415*7c478bd9Sstevel@tonic-gate */ 416*7c478bd9Sstevel@tonic-gate static void 417*7c478bd9Sstevel@tonic-gate OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp) 418*7c478bd9Sstevel@tonic-gate { 419*7c478bd9Sstevel@tonic-gate prpageheader_cur_t src_cur; 420*7c478bd9Sstevel@tonic-gate prpageheader_cur_t dst_cur; 421*7c478bd9Sstevel@tonic-gate uintptr_t src_addr; 422*7c478bd9Sstevel@tonic-gate uintptr_t dst_addr; 423*7c478bd9Sstevel@tonic-gate int mappings_changed = 0; 424*7c478bd9Sstevel@tonic-gate 425*7c478bd9Sstevel@tonic-gate /* 426*7c478bd9Sstevel@tonic-gate * OR source pagedata with the destination, for pages of intersecting 427*7c478bd9Sstevel@tonic-gate * mappings. 428*7c478bd9Sstevel@tonic-gate */ 429*7c478bd9Sstevel@tonic-gate src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1); 430*7c478bd9Sstevel@tonic-gate dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1); 431*7c478bd9Sstevel@tonic-gate while (src_addr != NULL && dst_addr != NULL) { 432*7c478bd9Sstevel@tonic-gate while (src_addr == dst_addr && src_addr != NULL) { 433*7c478bd9Sstevel@tonic-gate *(char *)dst_cur.pr_pdaddr |= 434*7c478bd9Sstevel@tonic-gate *(char *)src_cur.pr_pdaddr; 435*7c478bd9Sstevel@tonic-gate src_addr = (uintptr_t)advance_prpageheader_cur( 436*7c478bd9Sstevel@tonic-gate &src_cur); 437*7c478bd9Sstevel@tonic-gate dst_addr = (uintptr_t)advance_prpageheader_cur( 438*7c478bd9Sstevel@tonic-gate &dst_cur); 439*7c478bd9Sstevel@tonic-gate } 440*7c478bd9Sstevel@tonic-gate if (src_addr != dst_addr) 441*7c478bd9Sstevel@tonic-gate mappings_changed = 1; 442*7c478bd9Sstevel@tonic-gate src_addr = advance_prpageheader_cur_nextmapping(&src_cur); 443*7c478bd9Sstevel@tonic-gate dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur); 444*7c478bd9Sstevel@tonic-gate while (src_addr != dst_addr && src_addr != NULL && dst_addr != 445*7c478bd9Sstevel@tonic-gate NULL) { 446*7c478bd9Sstevel@tonic-gate mappings_changed = 1; 447*7c478bd9Sstevel@tonic-gate if (src_addr < dst_addr) 448*7c478bd9Sstevel@tonic-gate src_addr = advance_prpageheader_cur_nextmapping( 449*7c478bd9Sstevel@tonic-gate &src_cur); 450*7c478bd9Sstevel@tonic-gate else 451*7c478bd9Sstevel@tonic-gate dst_addr = advance_prpageheader_cur_nextmapping( 452*7c478bd9Sstevel@tonic-gate &dst_cur); 453*7c478bd9Sstevel@tonic-gate } 454*7c478bd9Sstevel@tonic-gate } 455*7c478bd9Sstevel@tonic-gate 456*7c478bd9Sstevel@tonic-gate *mappings_changedp = mappings_changed; 457*7c478bd9Sstevel@tonic-gate } 458*7c478bd9Sstevel@tonic-gate 459*7c478bd9Sstevel@tonic-gate /* 460*7c478bd9Sstevel@tonic-gate * Merge the current pagedata with that on hand. If the pagedata is 461*7c478bd9Sstevel@tonic-gate * unretrievable for any reason, such as the process having exited or being a 462*7c478bd9Sstevel@tonic-gate * zombie, a nonzero value is returned, the process should be marked 463*7c478bd9Sstevel@tonic-gate * unscannable, and future attempts to scan it should be avoided, since the 464*7c478bd9Sstevel@tonic-gate * symptom is probably permament. If the mappings of either pagedata 465*7c478bd9Sstevel@tonic-gate * differ in any respect, the supplied callback will be invoked once. 466*7c478bd9Sstevel@tonic-gate */ 467*7c478bd9Sstevel@tonic-gate static int 468*7c478bd9Sstevel@tonic-gate merge_current_pagedata(lprocess_t *lpc, 469*7c478bd9Sstevel@tonic-gate void(*mappings_changed_cb) (lprocess_t *)) 470*7c478bd9Sstevel@tonic-gate { 471*7c478bd9Sstevel@tonic-gate prpageheader_t *pghp; 472*7c478bd9Sstevel@tonic-gate int mappings_changed = 0; 473*7c478bd9Sstevel@tonic-gate 474*7c478bd9Sstevel@tonic-gate if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 475*7c478bd9Sstevel@tonic-gate 0) { 476*7c478bd9Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX]; 477*7c478bd9Sstevel@tonic-gate 478*7c478bd9Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata", 479*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid); 480*7c478bd9Sstevel@tonic-gate if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA, 481*7c478bd9Sstevel@tonic-gate revoke_pagedata, lpc, O_RDONLY, 0)) < 0 || 482*7c478bd9Sstevel@tonic-gate get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0) 483*7c478bd9Sstevel@tonic-gate return (-1); 484*7c478bd9Sstevel@tonic-gate debug("starting/resuming pagedata collection for %d\n", 485*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid); 486*7c478bd9Sstevel@tonic-gate } 487*7c478bd9Sstevel@tonic-gate debug("process %d: %llu/%llukB r/m'd since last read\n", 488*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0, 489*7c478bd9Sstevel@tonic-gate PG_MODIFIED | PG_REFERENCED, 0), (unsigned long long)lpc->lpc_rss); 490*7c478bd9Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL) { 491*7c478bd9Sstevel@tonic-gate /* 492*7c478bd9Sstevel@tonic-gate * OR the two snapshots. 493*7c478bd9Sstevel@tonic-gate */ 494*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 495*7c478bd9Sstevel@tonic-gate lmapping_t *old = NULL; 496*7c478bd9Sstevel@tonic-gate lmapping_t *new = NULL; 497*7c478bd9Sstevel@tonic-gate 498*7c478bd9Sstevel@tonic-gate mklmapping(&new, pghp); 499*7c478bd9Sstevel@tonic-gate mklmapping(&old, lpc->lpc_prpageheader); 500*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 501*7c478bd9Sstevel@tonic-gate OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed); 502*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 503*7c478bd9Sstevel@tonic-gate if (((mappings_changed != 0) ^ 504*7c478bd9Sstevel@tonic-gate (lmapping_dump_diff(old, new) != 0))) { 505*7c478bd9Sstevel@tonic-gate debug("lmapping_changed inconsistent with lmapping\n"); 506*7c478bd9Sstevel@tonic-gate debug("old\n"); 507*7c478bd9Sstevel@tonic-gate lmapping_dump(old); 508*7c478bd9Sstevel@tonic-gate debug("new\n"); 509*7c478bd9Sstevel@tonic-gate lmapping_dump(new); 510*7c478bd9Sstevel@tonic-gate debug("ignored\n"); 511*7c478bd9Sstevel@tonic-gate lmapping_dump(lpc->lpc_ignore); 512*7c478bd9Sstevel@tonic-gate ASSERT(0); 513*7c478bd9Sstevel@tonic-gate } 514*7c478bd9Sstevel@tonic-gate lmapping_free(&new); 515*7c478bd9Sstevel@tonic-gate lmapping_free(&old); 516*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 517*7c478bd9Sstevel@tonic-gate free(lpc->lpc_prpageheader); 518*7c478bd9Sstevel@tonic-gate } else 519*7c478bd9Sstevel@tonic-gate mappings_changed = 1; 520*7c478bd9Sstevel@tonic-gate lpc->lpc_prpageheader = pghp; 521*7c478bd9Sstevel@tonic-gate debug("process %d: %llu/%llukB r/m'd since hand swept\n", 522*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0, 523*7c478bd9Sstevel@tonic-gate PG_MODIFIED | PG_REFERENCED, 0), 524*7c478bd9Sstevel@tonic-gate (unsigned long long)lpc->lpc_rss); 525*7c478bd9Sstevel@tonic-gate if (mappings_changed != 0) { 526*7c478bd9Sstevel@tonic-gate debug("process %d: mappings changed\n", (int)lpc->lpc_pid); 527*7c478bd9Sstevel@tonic-gate if (mappings_changed_cb != NULL) 528*7c478bd9Sstevel@tonic-gate mappings_changed_cb(lpc); 529*7c478bd9Sstevel@tonic-gate } 530*7c478bd9Sstevel@tonic-gate return (0); 531*7c478bd9Sstevel@tonic-gate } 532*7c478bd9Sstevel@tonic-gate 533*7c478bd9Sstevel@tonic-gate /* 534*7c478bd9Sstevel@tonic-gate * Attempt to page out a region of the given process's address space. May 535*7c478bd9Sstevel@tonic-gate * return nonzero if not all of the pages may are pageable, for any reason. 536*7c478bd9Sstevel@tonic-gate */ 537*7c478bd9Sstevel@tonic-gate static int 538*7c478bd9Sstevel@tonic-gate pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end) 539*7c478bd9Sstevel@tonic-gate { 540*7c478bd9Sstevel@tonic-gate int res; 541*7c478bd9Sstevel@tonic-gate 542*7c478bd9Sstevel@tonic-gate if (end <= start) 543*7c478bd9Sstevel@tonic-gate return (0); 544*7c478bd9Sstevel@tonic-gate 545*7c478bd9Sstevel@tonic-gate errno = 0; 546*7c478bd9Sstevel@tonic-gate res = pr_memcntl(Pr, start, (end - start), MC_SYNC, 547*7c478bd9Sstevel@tonic-gate (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0); 548*7c478bd9Sstevel@tonic-gate debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res); 549*7c478bd9Sstevel@tonic-gate 550*7c478bd9Sstevel@tonic-gate /* 551*7c478bd9Sstevel@tonic-gate * EBUSY indicates none of the pages have backing store allocated, or 552*7c478bd9Sstevel@tonic-gate * some pages were locked, which are less interesting than other 553*7c478bd9Sstevel@tonic-gate * conditions, which are noted. 554*7c478bd9Sstevel@tonic-gate */ 555*7c478bd9Sstevel@tonic-gate if (res != 0) 556*7c478bd9Sstevel@tonic-gate if (errno == EBUSY) 557*7c478bd9Sstevel@tonic-gate res = 0; 558*7c478bd9Sstevel@tonic-gate else 559*7c478bd9Sstevel@tonic-gate debug("%d: can't pageout %p+%llx (errno %d)", (int)pid, 560*7c478bd9Sstevel@tonic-gate (void *)start, (long long)(end - start), errno); 561*7c478bd9Sstevel@tonic-gate 562*7c478bd9Sstevel@tonic-gate return (res); 563*7c478bd9Sstevel@tonic-gate } 564*7c478bd9Sstevel@tonic-gate 565*7c478bd9Sstevel@tonic-gate /* 566*7c478bd9Sstevel@tonic-gate * Compute the delta of the victim process's RSS since the last call. If the 567*7c478bd9Sstevel@tonic-gate * psinfo cannot be obtained, no work is done, and no error is returned; it is 568*7c478bd9Sstevel@tonic-gate * up to the caller to detect the process' termination via other means. 569*7c478bd9Sstevel@tonic-gate */ 570*7c478bd9Sstevel@tonic-gate static int64_t 571*7c478bd9Sstevel@tonic-gate rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic) 572*7c478bd9Sstevel@tonic-gate { 573*7c478bd9Sstevel@tonic-gate int64_t d_rss = 0; 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd, 576*7c478bd9Sstevel@tonic-gate lprocess_update_psinfo_fd_cb, vic, vic) == 0) { 577*7c478bd9Sstevel@tonic-gate d_rss = (int64_t)new_psinfo->pr_rssize - 578*7c478bd9Sstevel@tonic-gate (int64_t)old_psinfo->pr_rssize; 579*7c478bd9Sstevel@tonic-gate if (d_rss < 0) 580*7c478bd9Sstevel@tonic-gate vic->lpc_collection->lcol_stat.lcols_pg_eff += 581*7c478bd9Sstevel@tonic-gate (- d_rss); 582*7c478bd9Sstevel@tonic-gate *old_psinfo = *new_psinfo; 583*7c478bd9Sstevel@tonic-gate } 584*7c478bd9Sstevel@tonic-gate 585*7c478bd9Sstevel@tonic-gate return (d_rss); 586*7c478bd9Sstevel@tonic-gate } 587*7c478bd9Sstevel@tonic-gate 588*7c478bd9Sstevel@tonic-gate static void 589*7c478bd9Sstevel@tonic-gate unignore_mappings(lprocess_t *lpc) 590*7c478bd9Sstevel@tonic-gate { 591*7c478bd9Sstevel@tonic-gate debug("clearing ignored set\n"); 592*7c478bd9Sstevel@tonic-gate lmapping_free(&lpc->lpc_ignore); 593*7c478bd9Sstevel@tonic-gate } 594*7c478bd9Sstevel@tonic-gate 595*7c478bd9Sstevel@tonic-gate static void 596*7c478bd9Sstevel@tonic-gate unignore_referenced_mappings(lprocess_t *lpc) 597*7c478bd9Sstevel@tonic-gate { 598*7c478bd9Sstevel@tonic-gate prpageheader_cur_t cur; 599*7c478bd9Sstevel@tonic-gate void *vicaddr; 600*7c478bd9Sstevel@tonic-gate 601*7c478bd9Sstevel@tonic-gate vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1); 602*7c478bd9Sstevel@tonic-gate while (vicaddr != NULL) { 603*7c478bd9Sstevel@tonic-gate if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED)) 604*7c478bd9Sstevel@tonic-gate != 0) { 605*7c478bd9Sstevel@tonic-gate if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr, 606*7c478bd9Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize) == 0) 607*7c478bd9Sstevel@tonic-gate debug("removed mapping 0x%p+0t%llukB from" 608*7c478bd9Sstevel@tonic-gate " ignored set\n", (void *)cur.pr_addr, 609*7c478bd9Sstevel@tonic-gate (unsigned long long)(cur.pr_npage * 610*7c478bd9Sstevel@tonic-gate cur.pr_pagesize / 1024)); 611*7c478bd9Sstevel@tonic-gate vicaddr = (void *)advance_prpageheader_cur_nextmapping( 612*7c478bd9Sstevel@tonic-gate &cur); 613*7c478bd9Sstevel@tonic-gate } else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL) 614*7c478bd9Sstevel@tonic-gate vicaddr = (void *)advance_prpageheader_cur_nextmapping( 615*7c478bd9Sstevel@tonic-gate &cur); 616*7c478bd9Sstevel@tonic-gate } 617*7c478bd9Sstevel@tonic-gate } 618*7c478bd9Sstevel@tonic-gate 619*7c478bd9Sstevel@tonic-gate /* 620*7c478bd9Sstevel@tonic-gate * Resume scanning, starting with the last victim, if it is still valid, or any 621*7c478bd9Sstevel@tonic-gate * other one, otherwise. 622*7c478bd9Sstevel@tonic-gate */ 623*7c478bd9Sstevel@tonic-gate void 624*7c478bd9Sstevel@tonic-gate scan(lcollection_t *lcol, int64_t excess) 625*7c478bd9Sstevel@tonic-gate { 626*7c478bd9Sstevel@tonic-gate lprocess_t *vic, *lpc; 627*7c478bd9Sstevel@tonic-gate void *vicaddr, *endaddr, *nvicaddr; 628*7c478bd9Sstevel@tonic-gate prpageheader_cur_t cur; 629*7c478bd9Sstevel@tonic-gate psinfo_t old_psinfo, new_psinfo; 630*7c478bd9Sstevel@tonic-gate hrtime_t scan_start; 631*7c478bd9Sstevel@tonic-gate int res, resumed; 632*7c478bd9Sstevel@tonic-gate uint64_t col_unrm_size; 633*7c478bd9Sstevel@tonic-gate 634*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n", 635*7c478bd9Sstevel@tonic-gate (long long)excess); 636*7c478bd9Sstevel@tonic-gate 637*7c478bd9Sstevel@tonic-gate /* 638*7c478bd9Sstevel@tonic-gate * Determine the address to start scanning at, depending on whether 639*7c478bd9Sstevel@tonic-gate * scanning can be resumed. 640*7c478bd9Sstevel@tonic-gate */ 641*7c478bd9Sstevel@tonic-gate endaddr = NULL; 642*7c478bd9Sstevel@tonic-gate if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) == 643*7c478bd9Sstevel@tonic-gate lcol->lcol_victim && lcol->lcol_resaddr != NULL) { 644*7c478bd9Sstevel@tonic-gate vicaddr = lcol->lcol_resaddr; 645*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "resuming process %d\n", 646*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid); 647*7c478bd9Sstevel@tonic-gate resumed = 1; 648*7c478bd9Sstevel@tonic-gate } else { 649*7c478bd9Sstevel@tonic-gate vicaddr = NULL; 650*7c478bd9Sstevel@tonic-gate resumed = 0; 651*7c478bd9Sstevel@tonic-gate } 652*7c478bd9Sstevel@tonic-gate 653*7c478bd9Sstevel@tonic-gate scan_start = gethrtime(); 654*7c478bd9Sstevel@tonic-gate /* 655*7c478bd9Sstevel@tonic-gate * Obtain the most current pagedata for the processes that might be 656*7c478bd9Sstevel@tonic-gate * scanned, and remove from the ignored set any mappings which have 657*7c478bd9Sstevel@tonic-gate * referenced or modified pages (in the hopes that the pageability of 658*7c478bd9Sstevel@tonic-gate * the mapping's pages may have changed). Determine if the 659*7c478bd9Sstevel@tonic-gate * unreferenced and unmodified portion is impossibly small to suffice 660*7c478bd9Sstevel@tonic-gate * to reduce the excess completely. If so, ignore these bits so that 661*7c478bd9Sstevel@tonic-gate * even working set will be paged out. 662*7c478bd9Sstevel@tonic-gate */ 663*7c478bd9Sstevel@tonic-gate col_unrm_size = 0; 664*7c478bd9Sstevel@tonic-gate lpc = vic; 665*7c478bd9Sstevel@tonic-gate while (lpc != NULL && should_run) { 666*7c478bd9Sstevel@tonic-gate if (merge_current_pagedata(lpc, unignore_mappings) != 0) { 667*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "process %d:" 668*7c478bd9Sstevel@tonic-gate " exited/temporarily unscannable", 669*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid); 670*7c478bd9Sstevel@tonic-gate goto next; 671*7c478bd9Sstevel@tonic-gate } 672*7c478bd9Sstevel@tonic-gate debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid, 673*7c478bd9Sstevel@tonic-gate (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)), 674*7c478bd9Sstevel@tonic-gate (unsigned long long)lpc->lpc_size); 675*7c478bd9Sstevel@tonic-gate col_unrm_size += lpc->lpc_unrm = unrm_size(lpc); 676*7c478bd9Sstevel@tonic-gate 677*7c478bd9Sstevel@tonic-gate if ((lcol->lcol_stat.lcols_scan_count % 678*7c478bd9Sstevel@tonic-gate RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) { 679*7c478bd9Sstevel@tonic-gate /* 680*7c478bd9Sstevel@tonic-gate * Periodically clear the set of ignored mappings. 681*7c478bd9Sstevel@tonic-gate * This will allow processes whose ignored segments' 682*7c478bd9Sstevel@tonic-gate * pageability have changed (without a corresponding 683*7c478bd9Sstevel@tonic-gate * reference or modification to a page) to be 684*7c478bd9Sstevel@tonic-gate * recognized. 685*7c478bd9Sstevel@tonic-gate */ 686*7c478bd9Sstevel@tonic-gate if (lcol->lcol_stat.lcols_scan_count > 0) 687*7c478bd9Sstevel@tonic-gate unignore_mappings(lpc); 688*7c478bd9Sstevel@tonic-gate } else { 689*7c478bd9Sstevel@tonic-gate /* 690*7c478bd9Sstevel@tonic-gate * Ensure mappings with referenced or modified pages 691*7c478bd9Sstevel@tonic-gate * are not in the ignored set. Their usage might mean 692*7c478bd9Sstevel@tonic-gate * the condition which made them unpageable is gone. 693*7c478bd9Sstevel@tonic-gate */ 694*7c478bd9Sstevel@tonic-gate unignore_referenced_mappings(lpc); 695*7c478bd9Sstevel@tonic-gate } 696*7c478bd9Sstevel@tonic-gate next: 697*7c478bd9Sstevel@tonic-gate lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol, 698*7c478bd9Sstevel@tonic-gate lpc->lpc_next) : NULL; 699*7c478bd9Sstevel@tonic-gate } 700*7c478bd9Sstevel@tonic-gate if (col_unrm_size < excess) { 701*7c478bd9Sstevel@tonic-gate lpc = vic; 702*7c478bd9Sstevel@tonic-gate debug("will not reduce excess with only unreferenced pages\n"); 703*7c478bd9Sstevel@tonic-gate while (lpc != NULL && should_run) { 704*7c478bd9Sstevel@tonic-gate if (lpc->lpc_prpageheader != NULL) { 705*7c478bd9Sstevel@tonic-gate (void) count_pages(lpc->lpc_prpageheader, 706*7c478bd9Sstevel@tonic-gate CP_CLEAR, 0, 0); 707*7c478bd9Sstevel@tonic-gate if (lpc->lpc_pgdata_fd >= 0) { 708*7c478bd9Sstevel@tonic-gate if (rfd_close(lpc->lpc_pgdata_fd) != 0) 709*7c478bd9Sstevel@tonic-gate debug("coud not close %d" 710*7c478bd9Sstevel@tonic-gate " lpc_pgdata_fd %d", 711*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid, 712*7c478bd9Sstevel@tonic-gate lpc->lpc_pgdata_fd); 713*7c478bd9Sstevel@tonic-gate lpc->lpc_pgdata_fd = -1; 714*7c478bd9Sstevel@tonic-gate } 715*7c478bd9Sstevel@tonic-gate } 716*7c478bd9Sstevel@tonic-gate lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol, 717*7c478bd9Sstevel@tonic-gate lpc->lpc_next) : NULL; 718*7c478bd9Sstevel@tonic-gate } 719*7c478bd9Sstevel@tonic-gate } 720*7c478bd9Sstevel@tonic-gate 721*7c478bd9Sstevel@tonic-gate /* 722*7c478bd9Sstevel@tonic-gate * Examine each process for pages to remove until the excess is 723*7c478bd9Sstevel@tonic-gate * reduced. 724*7c478bd9Sstevel@tonic-gate */ 725*7c478bd9Sstevel@tonic-gate while (vic != NULL && excess > 0 && should_run) { 726*7c478bd9Sstevel@tonic-gate /* 727*7c478bd9Sstevel@tonic-gate * Skip processes whose death was reported when the merging of 728*7c478bd9Sstevel@tonic-gate * pagedata was attempted. 729*7c478bd9Sstevel@tonic-gate */ 730*7c478bd9Sstevel@tonic-gate if (vic->lpc_prpageheader == NULL) 731*7c478bd9Sstevel@tonic-gate goto nextproc; 732*7c478bd9Sstevel@tonic-gate 733*7c478bd9Sstevel@tonic-gate /* 734*7c478bd9Sstevel@tonic-gate * Obtain optional segment residency information. 735*7c478bd9Sstevel@tonic-gate */ 736*7c478bd9Sstevel@tonic-gate if (lpc_xmap_update(vic) != 0) 737*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "process %d: xmap" 738*7c478bd9Sstevel@tonic-gate " unreadable; ignoring", (int)vic->lpc_pid); 739*7c478bd9Sstevel@tonic-gate 740*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG 741*7c478bd9Sstevel@tonic-gate { 742*7c478bd9Sstevel@tonic-gate void *ovicaddr = vicaddr; 743*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */ 744*7c478bd9Sstevel@tonic-gate vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader, 745*7c478bd9Sstevel@tonic-gate vic->lpc_xmap, vic->lpc_nxmap, vicaddr); 746*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG 747*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "trying to resume from" 748*7c478bd9Sstevel@tonic-gate " 0x%p, next 0x%p\n", ovicaddr, vicaddr); 749*7c478bd9Sstevel@tonic-gate } 750*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */ 751*7c478bd9Sstevel@tonic-gate 752*7c478bd9Sstevel@tonic-gate /* 753*7c478bd9Sstevel@tonic-gate * Take control of the victim. 754*7c478bd9Sstevel@tonic-gate */ 755*7c478bd9Sstevel@tonic-gate if (get_psinfo(vic->lpc_pid, &old_psinfo, 756*7c478bd9Sstevel@tonic-gate vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb, 757*7c478bd9Sstevel@tonic-gate vic, vic) != 0) { 758*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo", 759*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid); 760*7c478bd9Sstevel@tonic-gate goto nextproc; 761*7c478bd9Sstevel@tonic-gate } 762*7c478bd9Sstevel@tonic-gate (void) rfd_reserve(PGRAB_FD_COUNT); 763*7c478bd9Sstevel@tonic-gate if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) { 764*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)", 765*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid, res); 766*7c478bd9Sstevel@tonic-gate goto nextproc; 767*7c478bd9Sstevel@tonic-gate } 768*7c478bd9Sstevel@tonic-gate if (Pcreate_agent(scan_pr) != 0) { 769*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "cannot control %d", 770*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid); 771*7c478bd9Sstevel@tonic-gate goto nextproc; 772*7c478bd9Sstevel@tonic-gate } 773*7c478bd9Sstevel@tonic-gate /* 774*7c478bd9Sstevel@tonic-gate * Be very pessimistic about the state of the agent LWP -- 775*7c478bd9Sstevel@tonic-gate * verify it's actually stopped. 776*7c478bd9Sstevel@tonic-gate */ 777*7c478bd9Sstevel@tonic-gate errno = 0; 778*7c478bd9Sstevel@tonic-gate while (Pstate(scan_pr) == PS_RUN) 779*7c478bd9Sstevel@tonic-gate (void) Pwait(scan_pr, 0); 780*7c478bd9Sstevel@tonic-gate if (Pstate(scan_pr) != PS_STOP) { 781*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "agent not in expected" 782*7c478bd9Sstevel@tonic-gate " state (%d)", Pstate(scan_pr)); 783*7c478bd9Sstevel@tonic-gate goto nextproc; 784*7c478bd9Sstevel@tonic-gate } 785*7c478bd9Sstevel@tonic-gate 786*7c478bd9Sstevel@tonic-gate /* 787*7c478bd9Sstevel@tonic-gate * Within the victim's address space, find contiguous ranges of 788*7c478bd9Sstevel@tonic-gate * unreferenced pages to page out. 789*7c478bd9Sstevel@tonic-gate */ 790*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "paging out process %d\n", 791*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid); 792*7c478bd9Sstevel@tonic-gate while (excess > 0 && vicaddr != NULL && should_run) { 793*7c478bd9Sstevel@tonic-gate /* 794*7c478bd9Sstevel@tonic-gate * Skip mappings in the ignored set. Mappings get 795*7c478bd9Sstevel@tonic-gate * placed in the ignored set when all their resident 796*7c478bd9Sstevel@tonic-gate * pages are unreference and unmodified, yet unpageable 797*7c478bd9Sstevel@tonic-gate * -- such as when they are locked, or involved in 798*7c478bd9Sstevel@tonic-gate * asynchronous I/O. They will be scanned again when 799*7c478bd9Sstevel@tonic-gate * some page is referenced or modified. 800*7c478bd9Sstevel@tonic-gate */ 801*7c478bd9Sstevel@tonic-gate if (lmapping_contains(vic->lpc_ignore, cur.pr_addr, 802*7c478bd9Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize)) { 803*7c478bd9Sstevel@tonic-gate debug("ignored mapping at 0x%p\n", 804*7c478bd9Sstevel@tonic-gate (void *)cur.pr_addr); 805*7c478bd9Sstevel@tonic-gate /* 806*7c478bd9Sstevel@tonic-gate * Update statistics. 807*7c478bd9Sstevel@tonic-gate */ 808*7c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_pg_att += 809*7c478bd9Sstevel@tonic-gate cur.pr_npage * cur.pr_pagesize / 1024; 810*7c478bd9Sstevel@tonic-gate 811*7c478bd9Sstevel@tonic-gate vicaddr = (void *) 812*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(&cur); 813*7c478bd9Sstevel@tonic-gate continue; 814*7c478bd9Sstevel@tonic-gate } 815*7c478bd9Sstevel@tonic-gate 816*7c478bd9Sstevel@tonic-gate /* 817*7c478bd9Sstevel@tonic-gate * Determine a range of unreferenced pages to page out, 818*7c478bd9Sstevel@tonic-gate * and clear the R/M bits in the preceding referenced 819*7c478bd9Sstevel@tonic-gate * range. 820*7c478bd9Sstevel@tonic-gate */ 821*7c478bd9Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p," 822*7c478bd9Sstevel@tonic-gate " npage %llu\n", vicaddr, 823*7c478bd9Sstevel@tonic-gate (unsigned long long)cur.pr_npage); 824*7c478bd9Sstevel@tonic-gate while (vicaddr != NULL && 825*7c478bd9Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr != 0) { 826*7c478bd9Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr = 0; 827*7c478bd9Sstevel@tonic-gate vicaddr = advance_prpageheader_cur(&cur); 828*7c478bd9Sstevel@tonic-gate } 829*7c478bd9Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr" 830*7c478bd9Sstevel@tonic-gate " %p\n", vicaddr, cur.pr_pdaddr); 831*7c478bd9Sstevel@tonic-gate if (vicaddr == NULL) { 832*7c478bd9Sstevel@tonic-gate /* 833*7c478bd9Sstevel@tonic-gate * The end of mapping was reached before any 834*7c478bd9Sstevel@tonic-gate * unreferenced pages were seen. 835*7c478bd9Sstevel@tonic-gate */ 836*7c478bd9Sstevel@tonic-gate vicaddr = (void *) 837*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(&cur); 838*7c478bd9Sstevel@tonic-gate continue; 839*7c478bd9Sstevel@tonic-gate } 840*7c478bd9Sstevel@tonic-gate do 841*7c478bd9Sstevel@tonic-gate endaddr = advance_prpageheader_cur(&cur); 842*7c478bd9Sstevel@tonic-gate while (endaddr != NULL && 843*7c478bd9Sstevel@tonic-gate *(caddr_t)cur.pr_pdaddr == 0 && 844*7c478bd9Sstevel@tonic-gate (((intptr_t)endaddr - (intptr_t)vicaddr) / 845*7c478bd9Sstevel@tonic-gate 1024) < excess); 846*7c478bd9Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n", 847*7c478bd9Sstevel@tonic-gate endaddr, *(caddr_t)cur.pr_pdaddr); 848*7c478bd9Sstevel@tonic-gate 849*7c478bd9Sstevel@tonic-gate /* 850*7c478bd9Sstevel@tonic-gate * Page out from vicaddr to the end of the mapping, or 851*7c478bd9Sstevel@tonic-gate * endaddr if set, then continue scanning after 852*7c478bd9Sstevel@tonic-gate * endaddr, or the next mapping, if not set. 853*7c478bd9Sstevel@tonic-gate */ 854*7c478bd9Sstevel@tonic-gate nvicaddr = endaddr; 855*7c478bd9Sstevel@tonic-gate if (endaddr == NULL) 856*7c478bd9Sstevel@tonic-gate endaddr = (caddr_t)cur.pr_addr + 857*7c478bd9Sstevel@tonic-gate cur.pr_pagesize * cur.pr_npage; 858*7c478bd9Sstevel@tonic-gate if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) == 859*7c478bd9Sstevel@tonic-gate 0) { 860*7c478bd9Sstevel@tonic-gate int64_t d_rss, att; 861*7c478bd9Sstevel@tonic-gate int willignore = 0; 862*7c478bd9Sstevel@tonic-gate 863*7c478bd9Sstevel@tonic-gate excess += (d_rss = rss_delta( 864*7c478bd9Sstevel@tonic-gate &new_psinfo, &old_psinfo, vic)); 865*7c478bd9Sstevel@tonic-gate 866*7c478bd9Sstevel@tonic-gate /* 867*7c478bd9Sstevel@tonic-gate * If this pageout attempt was unsuccessful 868*7c478bd9Sstevel@tonic-gate * (the resident portion was not affected), and 869*7c478bd9Sstevel@tonic-gate * was for the whole mapping, put it in the 870*7c478bd9Sstevel@tonic-gate * ignored set, so it will not be scanned again 871*7c478bd9Sstevel@tonic-gate * until some page is referenced or modified. 872*7c478bd9Sstevel@tonic-gate */ 873*7c478bd9Sstevel@tonic-gate if (d_rss >= 0 && (void *)cur.pr_addr == 874*7c478bd9Sstevel@tonic-gate vicaddr && (cur.pr_pagesize * cur.pr_npage) 875*7c478bd9Sstevel@tonic-gate == ((uintptr_t)endaddr - 876*7c478bd9Sstevel@tonic-gate (uintptr_t)vicaddr)) { 877*7c478bd9Sstevel@tonic-gate if (lmapping_insert( 878*7c478bd9Sstevel@tonic-gate &vic->lpc_ignore, 879*7c478bd9Sstevel@tonic-gate cur.pr_addr, 880*7c478bd9Sstevel@tonic-gate cur.pr_pagesize * 881*7c478bd9Sstevel@tonic-gate cur.pr_npage) != 0) 882*7c478bd9Sstevel@tonic-gate debug("not enough memory to add" 883*7c478bd9Sstevel@tonic-gate " mapping at %p to ignored" 884*7c478bd9Sstevel@tonic-gate " set\n", 885*7c478bd9Sstevel@tonic-gate (void *)cur.pr_addr); 886*7c478bd9Sstevel@tonic-gate willignore = 1; 887*7c478bd9Sstevel@tonic-gate } 888*7c478bd9Sstevel@tonic-gate 889*7c478bd9Sstevel@tonic-gate /* 890*7c478bd9Sstevel@tonic-gate * Update statistics. 891*7c478bd9Sstevel@tonic-gate */ 892*7c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_pg_att += (att = 893*7c478bd9Sstevel@tonic-gate ((intptr_t)endaddr - (intptr_t)vicaddr) / 894*7c478bd9Sstevel@tonic-gate 1024); 895*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "paged out 0x%p" 896*7c478bd9Sstevel@tonic-gate "+0t(%llu/%llu)kB%s\n", vicaddr, 897*7c478bd9Sstevel@tonic-gate (unsigned long long)((d_rss < 898*7c478bd9Sstevel@tonic-gate 0) ? - d_rss : 0), (unsigned long long)att, 899*7c478bd9Sstevel@tonic-gate willignore ? " (will ignore)" : ""); 900*7c478bd9Sstevel@tonic-gate } else { 901*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, 902*7c478bd9Sstevel@tonic-gate "process %d: exited/unscannable\n", 903*7c478bd9Sstevel@tonic-gate (int)vic->lpc_pid); 904*7c478bd9Sstevel@tonic-gate vic->lpc_unscannable = 1; 905*7c478bd9Sstevel@tonic-gate goto nextproc; 906*7c478bd9Sstevel@tonic-gate } 907*7c478bd9Sstevel@tonic-gate 908*7c478bd9Sstevel@tonic-gate /* 909*7c478bd9Sstevel@tonic-gate * Update the statistics file, if it's time. 910*7c478bd9Sstevel@tonic-gate */ 911*7c478bd9Sstevel@tonic-gate check_update_statistics(); 912*7c478bd9Sstevel@tonic-gate 913*7c478bd9Sstevel@tonic-gate vicaddr = (nvicaddr != NULL) ? nvicaddr : (void 914*7c478bd9Sstevel@tonic-gate *)advance_prpageheader_cur_nextmapping(&cur); 915*7c478bd9Sstevel@tonic-gate } 916*7c478bd9Sstevel@tonic-gate excess += rss_delta(&new_psinfo, &old_psinfo, vic); 917*7c478bd9Sstevel@tonic-gate st_debug(STDL_NORMAL, lcol, "done, excess %lld\n", 918*7c478bd9Sstevel@tonic-gate (long long)excess); 919*7c478bd9Sstevel@tonic-gate nextproc: 920*7c478bd9Sstevel@tonic-gate /* 921*7c478bd9Sstevel@tonic-gate * If a process was grabbed, release it, destroying its agent. 922*7c478bd9Sstevel@tonic-gate */ 923*7c478bd9Sstevel@tonic-gate if (scan_pr != NULL) { 924*7c478bd9Sstevel@tonic-gate (void) Prelease(scan_pr, 0); 925*7c478bd9Sstevel@tonic-gate scan_pr = NULL; 926*7c478bd9Sstevel@tonic-gate } 927*7c478bd9Sstevel@tonic-gate lcol->lcol_victim = vic; 928*7c478bd9Sstevel@tonic-gate /* 929*7c478bd9Sstevel@tonic-gate * Scan the collection at most once. Only if scanning was not 930*7c478bd9Sstevel@tonic-gate * aborted for any reason, and the end of lprocess has not been 931*7c478bd9Sstevel@tonic-gate * reached, determine the next victim and scan it. 932*7c478bd9Sstevel@tonic-gate */ 933*7c478bd9Sstevel@tonic-gate if (vic != NULL) { 934*7c478bd9Sstevel@tonic-gate if (vic->lpc_next != NULL) { 935*7c478bd9Sstevel@tonic-gate /* 936*7c478bd9Sstevel@tonic-gate * Determine the next process to be scanned. 937*7c478bd9Sstevel@tonic-gate */ 938*7c478bd9Sstevel@tonic-gate if (excess > 0) { 939*7c478bd9Sstevel@tonic-gate vic = get_valid_victim(lcol, 940*7c478bd9Sstevel@tonic-gate vic->lpc_next); 941*7c478bd9Sstevel@tonic-gate vicaddr = 0; 942*7c478bd9Sstevel@tonic-gate } 943*7c478bd9Sstevel@tonic-gate } else { 944*7c478bd9Sstevel@tonic-gate /* 945*7c478bd9Sstevel@tonic-gate * A complete scan of the collection was made, 946*7c478bd9Sstevel@tonic-gate * so tick the scan counter and stop scanning 947*7c478bd9Sstevel@tonic-gate * until the next request. 948*7c478bd9Sstevel@tonic-gate */ 949*7c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_count++; 950*7c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_time_complete 951*7c478bd9Sstevel@tonic-gate = lcol->lcol_stat.lcols_scan_time; 952*7c478bd9Sstevel@tonic-gate /* 953*7c478bd9Sstevel@tonic-gate * If an excess still exists, tick the 954*7c478bd9Sstevel@tonic-gate * "ineffective scan" counter, signalling that 955*7c478bd9Sstevel@tonic-gate * the cap may be uneforceable. 956*7c478bd9Sstevel@tonic-gate */ 957*7c478bd9Sstevel@tonic-gate if (resumed == 0 && excess > 0) 958*7c478bd9Sstevel@tonic-gate lcol->lcol_stat 959*7c478bd9Sstevel@tonic-gate .lcols_scan_ineffective++; 960*7c478bd9Sstevel@tonic-gate /* 961*7c478bd9Sstevel@tonic-gate * Scanning should start at the beginning of 962*7c478bd9Sstevel@tonic-gate * the process list at the next request. 963*7c478bd9Sstevel@tonic-gate */ 964*7c478bd9Sstevel@tonic-gate if (excess > 0) 965*7c478bd9Sstevel@tonic-gate vic = NULL; 966*7c478bd9Sstevel@tonic-gate } 967*7c478bd9Sstevel@tonic-gate } 968*7c478bd9Sstevel@tonic-gate } 969*7c478bd9Sstevel@tonic-gate lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start); 970*7c478bd9Sstevel@tonic-gate st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n", 971*7c478bd9Sstevel@tonic-gate (long long)excess); 972*7c478bd9Sstevel@tonic-gate 973*7c478bd9Sstevel@tonic-gate lcol->lcol_resaddr = vicaddr; 974*7c478bd9Sstevel@tonic-gate if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) { 975*7c478bd9Sstevel@tonic-gate lcol->lcol_victim = get_valid_victim(lcol, 976*7c478bd9Sstevel@tonic-gate lcol->lcol_victim->lpc_next); 977*7c478bd9Sstevel@tonic-gate } 978*7c478bd9Sstevel@tonic-gate } 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate /* 981*7c478bd9Sstevel@tonic-gate * Abort the scan in progress, and destroy the agent LWP of any grabbed 982*7c478bd9Sstevel@tonic-gate * processes. 983*7c478bd9Sstevel@tonic-gate */ 984*7c478bd9Sstevel@tonic-gate void 985*7c478bd9Sstevel@tonic-gate scan_abort(void) 986*7c478bd9Sstevel@tonic-gate { 987*7c478bd9Sstevel@tonic-gate if (scan_pr != NULL) 988*7c478bd9Sstevel@tonic-gate (void) Prelease(scan_pr, NULL); 989*7c478bd9Sstevel@tonic-gate } 990*7c478bd9Sstevel@tonic-gate 991*7c478bd9Sstevel@tonic-gate static void 992*7c478bd9Sstevel@tonic-gate revoke_xmap(rfd_t *rfd) 993*7c478bd9Sstevel@tonic-gate { 994*7c478bd9Sstevel@tonic-gate lprocess_t *lpc = rfd->rfd_data; 995*7c478bd9Sstevel@tonic-gate 996*7c478bd9Sstevel@tonic-gate debug("revoking xmap for process %d\n", (int)lpc->lpc_pid); 997*7c478bd9Sstevel@tonic-gate ASSERT(lpc->lpc_xmap_fd != -1); 998*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 999*7c478bd9Sstevel@tonic-gate } 1000*7c478bd9Sstevel@tonic-gate 1001*7c478bd9Sstevel@tonic-gate /* 1002*7c478bd9Sstevel@tonic-gate * Retrieve the process's current xmap , which is used to determine the size of 1003*7c478bd9Sstevel@tonic-gate * the resident portion of its segments. Return zero if successful. 1004*7c478bd9Sstevel@tonic-gate */ 1005*7c478bd9Sstevel@tonic-gate static int 1006*7c478bd9Sstevel@tonic-gate lpc_xmap_update(lprocess_t *lpc) 1007*7c478bd9Sstevel@tonic-gate { 1008*7c478bd9Sstevel@tonic-gate int res; 1009*7c478bd9Sstevel@tonic-gate struct stat st; 1010*7c478bd9Sstevel@tonic-gate 1011*7c478bd9Sstevel@tonic-gate free(lpc->lpc_xmap); 1012*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap = NULL; 1013*7c478bd9Sstevel@tonic-gate lpc->lpc_nxmap = -1; 1014*7c478bd9Sstevel@tonic-gate 1015*7c478bd9Sstevel@tonic-gate if (lpc->lpc_xmap_fd == -1) { 1016*7c478bd9Sstevel@tonic-gate char pathbuf[PROC_PATH_MAX]; 1017*7c478bd9Sstevel@tonic-gate 1018*7c478bd9Sstevel@tonic-gate (void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap", 1019*7c478bd9Sstevel@tonic-gate (int)lpc->lpc_pid); 1020*7c478bd9Sstevel@tonic-gate if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP, 1021*7c478bd9Sstevel@tonic-gate revoke_xmap, lpc, O_RDONLY, 0)) < 0) 1022*7c478bd9Sstevel@tonic-gate return (-1); 1023*7c478bd9Sstevel@tonic-gate } 1024*7c478bd9Sstevel@tonic-gate 1025*7c478bd9Sstevel@tonic-gate redo: 1026*7c478bd9Sstevel@tonic-gate errno = 0; 1027*7c478bd9Sstevel@tonic-gate if (fstat(lpc->lpc_xmap_fd, &st) != 0) { 1028*7c478bd9Sstevel@tonic-gate debug("cannot stat xmap\n"); 1029*7c478bd9Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 1030*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 1031*7c478bd9Sstevel@tonic-gate return (-1); 1032*7c478bd9Sstevel@tonic-gate } 1033*7c478bd9Sstevel@tonic-gate 1034*7c478bd9Sstevel@tonic-gate if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) { 1035*7c478bd9Sstevel@tonic-gate debug("xmap wrong size\n"); 1036*7c478bd9Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 1037*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 1038*7c478bd9Sstevel@tonic-gate return (-1); 1039*7c478bd9Sstevel@tonic-gate } 1040*7c478bd9Sstevel@tonic-gate 1041*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap = malloc(st.st_size); 1042*7c478bd9Sstevel@tonic-gate if (lpc->lpc_xmap == NULL) { 1043*7c478bd9Sstevel@tonic-gate debug("cannot malloc() %ld bytes for xmap", st.st_size); 1044*7c478bd9Sstevel@tonic-gate (void) rfd_close(lpc->lpc_xmap_fd); 1045*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap_fd = -1; 1046*7c478bd9Sstevel@tonic-gate return (-1); 1047*7c478bd9Sstevel@tonic-gate } 1048*7c478bd9Sstevel@tonic-gate 1049*7c478bd9Sstevel@tonic-gate if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) != 1050*7c478bd9Sstevel@tonic-gate st.st_size) { 1051*7c478bd9Sstevel@tonic-gate free(lpc->lpc_xmap); 1052*7c478bd9Sstevel@tonic-gate lpc->lpc_xmap = NULL; 1053*7c478bd9Sstevel@tonic-gate if (res > 0) { 1054*7c478bd9Sstevel@tonic-gate debug("xmap changed size, retrying\n"); 1055*7c478bd9Sstevel@tonic-gate goto redo; 1056*7c478bd9Sstevel@tonic-gate } else { 1057*7c478bd9Sstevel@tonic-gate debug("cannot read xmap"); 1058*7c478bd9Sstevel@tonic-gate return (-1); 1059*7c478bd9Sstevel@tonic-gate } 1060*7c478bd9Sstevel@tonic-gate } 1061*7c478bd9Sstevel@tonic-gate lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap); 1062*7c478bd9Sstevel@tonic-gate 1063*7c478bd9Sstevel@tonic-gate return (0); 1064*7c478bd9Sstevel@tonic-gate } 1065