1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/mman.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/stat.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
33*7c478bd9Sstevel@tonic-gate #include <assert.h>
34*7c478bd9Sstevel@tonic-gate #include <errno.h>
35*7c478bd9Sstevel@tonic-gate #include <fcntl.h>
36*7c478bd9Sstevel@tonic-gate #include <libproc.h>
37*7c478bd9Sstevel@tonic-gate #include <limits.h>
38*7c478bd9Sstevel@tonic-gate #include <procfs.h>
39*7c478bd9Sstevel@tonic-gate #include <stdio.h>
40*7c478bd9Sstevel@tonic-gate #include <stdlib.h>
41*7c478bd9Sstevel@tonic-gate #include <strings.h>
42*7c478bd9Sstevel@tonic-gate #include <time.h>
43*7c478bd9Sstevel@tonic-gate #include <unistd.h>
44*7c478bd9Sstevel@tonic-gate #include "rcapd.h"
45*7c478bd9Sstevel@tonic-gate #include "rcapd_rfd.h"
46*7c478bd9Sstevel@tonic-gate #include "rcapd_mapping.h"
47*7c478bd9Sstevel@tonic-gate #include "utils.h"
48*7c478bd9Sstevel@tonic-gate 
49*7c478bd9Sstevel@tonic-gate static int lpc_xmap_update(lprocess_t *);
50*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
51*7c478bd9Sstevel@tonic-gate extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2);
52*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
53*7c478bd9Sstevel@tonic-gate 
54*7c478bd9Sstevel@tonic-gate /*
55*7c478bd9Sstevel@tonic-gate  * The number of file descriptors required to grab a process and create an
56*7c478bd9Sstevel@tonic-gate  * agent in it.
57*7c478bd9Sstevel@tonic-gate  */
58*7c478bd9Sstevel@tonic-gate #define	PGRAB_FD_COUNT		10
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate /*
61*7c478bd9Sstevel@tonic-gate  * Record a position in an address space as it corresponds to a prpageheader_t
62*7c478bd9Sstevel@tonic-gate  * and affiliated structures.
63*7c478bd9Sstevel@tonic-gate  */
64*7c478bd9Sstevel@tonic-gate typedef struct prpageheader_cur {
65*7c478bd9Sstevel@tonic-gate 	int pr_nmap;		/* number of mappings in address space */
66*7c478bd9Sstevel@tonic-gate 	int pr_map;		/* number of this mapping */
67*7c478bd9Sstevel@tonic-gate 	uint64_t pr_pgoff;	/* page offset into mapping */
68*7c478bd9Sstevel@tonic-gate 	uint64_t pr_npage;	/* number of pages in mapping */
69*7c478bd9Sstevel@tonic-gate 	uint64_t pr_pagesize;	/* page size of mapping */
70*7c478bd9Sstevel@tonic-gate 	uintptr_t pr_addr;	/* base of mapping */
71*7c478bd9Sstevel@tonic-gate 	prpageheader_t *pr_prpageheader;	/* associated page header */
72*7c478bd9Sstevel@tonic-gate 	void *pr_pdaddr;	/* address of page's byte in pagedata */
73*7c478bd9Sstevel@tonic-gate 	prxmap_t *pr_xmap;	/* array containing per-segment information */
74*7c478bd9Sstevel@tonic-gate 	int pr_nxmap;		/* number of xmaps in array */
75*7c478bd9Sstevel@tonic-gate 	int64_t pr_rss;		/* number of resident pages in mapping, */
76*7c478bd9Sstevel@tonic-gate 				/* or -1 if xmap is out of sync */
77*7c478bd9Sstevel@tonic-gate 	int64_t pr_pg_rss;	/* number of pageable pages in mapping, or -1 */
78*7c478bd9Sstevel@tonic-gate } prpageheader_cur_t;
79*7c478bd9Sstevel@tonic-gate 
80*7c478bd9Sstevel@tonic-gate static struct ps_prochandle *scan_pr;	/* currently-scanned process's handle */
81*7c478bd9Sstevel@tonic-gate 
82*7c478bd9Sstevel@tonic-gate typedef enum {
83*7c478bd9Sstevel@tonic-gate 	STDL_NORMAL,
84*7c478bd9Sstevel@tonic-gate 	STDL_HIGH
85*7c478bd9Sstevel@tonic-gate } st_debug_level_t;
86*7c478bd9Sstevel@tonic-gate 
87*7c478bd9Sstevel@tonic-gate /*
88*7c478bd9Sstevel@tonic-gate  * Output a scanning-related debug message.
89*7c478bd9Sstevel@tonic-gate  */
90*7c478bd9Sstevel@tonic-gate /*PRINTFLIKE3*/ /*ARGSUSED*/
91*7c478bd9Sstevel@tonic-gate static void
92*7c478bd9Sstevel@tonic-gate st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...)
93*7c478bd9Sstevel@tonic-gate {
94*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
95*7c478bd9Sstevel@tonic-gate 	va_list alist;
96*7c478bd9Sstevel@tonic-gate 	char *buf;
97*7c478bd9Sstevel@tonic-gate 	size_t len;
98*7c478bd9Sstevel@tonic-gate 
99*7c478bd9Sstevel@tonic-gate 	if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH
100*7c478bd9Sstevel@tonic-gate 	    : RCM_DEBUG))
101*7c478bd9Sstevel@tonic-gate 		return;
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate 	len = strlen(msg) + LINELEN;
104*7c478bd9Sstevel@tonic-gate 	buf = malloc(len);
105*7c478bd9Sstevel@tonic-gate 	if (buf == NULL)
106*7c478bd9Sstevel@tonic-gate 		return;
107*7c478bd9Sstevel@tonic-gate 	(void) snprintf(buf, len, "%s %s scanner %s", rcfg.rcfg_mode_name,
108*7c478bd9Sstevel@tonic-gate 	    lcol->lcol_name, msg);
109*7c478bd9Sstevel@tonic-gate 
110*7c478bd9Sstevel@tonic-gate 	va_start(alist, msg);
111*7c478bd9Sstevel@tonic-gate 	vdprintfe(RCM_DEBUG, buf, alist);
112*7c478bd9Sstevel@tonic-gate 	va_end(alist);
113*7c478bd9Sstevel@tonic-gate 
114*7c478bd9Sstevel@tonic-gate 	free(buf);
115*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
116*7c478bd9Sstevel@tonic-gate }
117*7c478bd9Sstevel@tonic-gate 
118*7c478bd9Sstevel@tonic-gate /*
119*7c478bd9Sstevel@tonic-gate  * Determine the collection's current victim, based on its last.  The last will
120*7c478bd9Sstevel@tonic-gate  * be returned, or, if invalid, any other valid process, if the collection has
121*7c478bd9Sstevel@tonic-gate  * any.
122*7c478bd9Sstevel@tonic-gate  */
123*7c478bd9Sstevel@tonic-gate static lprocess_t *
124*7c478bd9Sstevel@tonic-gate get_valid_victim(lcollection_t *lcol, lprocess_t *lpc)
125*7c478bd9Sstevel@tonic-gate {
126*7c478bd9Sstevel@tonic-gate 	if (lpc == NULL || !lcollection_member(lcol, lpc))
127*7c478bd9Sstevel@tonic-gate 		lpc = lcol->lcol_lprocess;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate 	/*
130*7c478bd9Sstevel@tonic-gate 	 * Find the next scannable process, and make it the victim.
131*7c478bd9Sstevel@tonic-gate 	 */
132*7c478bd9Sstevel@tonic-gate 	while (lpc != NULL && lpc->lpc_unscannable != 0)
133*7c478bd9Sstevel@tonic-gate 		lpc = lpc->lpc_next;
134*7c478bd9Sstevel@tonic-gate 
135*7c478bd9Sstevel@tonic-gate 	return (lpc);
136*7c478bd9Sstevel@tonic-gate }
137*7c478bd9Sstevel@tonic-gate 
138*7c478bd9Sstevel@tonic-gate /*
139*7c478bd9Sstevel@tonic-gate  * Get a process's combined current pagedata (per-page referenced and modified
140*7c478bd9Sstevel@tonic-gate  * bits) and set the supplied pointer to it.  The caller is responsible for
141*7c478bd9Sstevel@tonic-gate  * freeing the data.  If the pagedata is unreadable, a nonzero value is
142*7c478bd9Sstevel@tonic-gate  * returned, and errno is set.  Otherwise, 0 is returned.
143*7c478bd9Sstevel@tonic-gate  */
144*7c478bd9Sstevel@tonic-gate static int
145*7c478bd9Sstevel@tonic-gate get_pagedata(prpageheader_t **pghpp, int fd)
146*7c478bd9Sstevel@tonic-gate {
147*7c478bd9Sstevel@tonic-gate 	int res;
148*7c478bd9Sstevel@tonic-gate 	struct stat st;
149*7c478bd9Sstevel@tonic-gate 
150*7c478bd9Sstevel@tonic-gate redo:
151*7c478bd9Sstevel@tonic-gate 	errno = 0;
152*7c478bd9Sstevel@tonic-gate 	if (fstat(fd, &st) != 0) {
153*7c478bd9Sstevel@tonic-gate 		debug("cannot stat pagedata\n");
154*7c478bd9Sstevel@tonic-gate 		return (-1);
155*7c478bd9Sstevel@tonic-gate 	}
156*7c478bd9Sstevel@tonic-gate 
157*7c478bd9Sstevel@tonic-gate 	errno = 0;
158*7c478bd9Sstevel@tonic-gate 	*pghpp = malloc(st.st_size);
159*7c478bd9Sstevel@tonic-gate 	if (*pghpp == NULL) {
160*7c478bd9Sstevel@tonic-gate 		debug("cannot malloc() %ld bytes for pagedata", st.st_size);
161*7c478bd9Sstevel@tonic-gate 		return (-1);
162*7c478bd9Sstevel@tonic-gate 	}
163*7c478bd9Sstevel@tonic-gate 	(void) bzero(*pghpp, st.st_size);
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate 	errno = 0;
166*7c478bd9Sstevel@tonic-gate 	if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) {
167*7c478bd9Sstevel@tonic-gate 		free(*pghpp);
168*7c478bd9Sstevel@tonic-gate 		*pghpp = NULL;
169*7c478bd9Sstevel@tonic-gate 		if (res > 0 || errno == E2BIG) {
170*7c478bd9Sstevel@tonic-gate 			debug("pagedata changed size, retrying\n");
171*7c478bd9Sstevel@tonic-gate 			goto redo;
172*7c478bd9Sstevel@tonic-gate 		} else {
173*7c478bd9Sstevel@tonic-gate 			debug("cannot read pagedata");
174*7c478bd9Sstevel@tonic-gate 			return (-1);
175*7c478bd9Sstevel@tonic-gate 		}
176*7c478bd9Sstevel@tonic-gate 	}
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate 	return (0);
179*7c478bd9Sstevel@tonic-gate }
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate /*
182*7c478bd9Sstevel@tonic-gate  * Return the count of kilobytes of pages represented by the given pagedata
183*7c478bd9Sstevel@tonic-gate  * which meet the given criteria, having pages which are in all of the states
184*7c478bd9Sstevel@tonic-gate  * specified by the mask, and in none of the states in the notmask.  If the
185*7c478bd9Sstevel@tonic-gate  * CP_CLEAR flag is set, the pagedata will also be cleared.
186*7c478bd9Sstevel@tonic-gate  */
187*7c478bd9Sstevel@tonic-gate #define	CP_CLEAR	1
188*7c478bd9Sstevel@tonic-gate static uint64_t
189*7c478bd9Sstevel@tonic-gate count_pages(prpageheader_t *pghp, int flags, int mask, int notmask)
190*7c478bd9Sstevel@tonic-gate {
191*7c478bd9Sstevel@tonic-gate 	int map;
192*7c478bd9Sstevel@tonic-gate 	caddr_t cur, end;
193*7c478bd9Sstevel@tonic-gate 	prpageheader_t pgh = *pghp;
194*7c478bd9Sstevel@tonic-gate 	prasmap_t *asmapp;
195*7c478bd9Sstevel@tonic-gate 	uint64_t count = 0;
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 	cur = (caddr_t)pghp + sizeof (*pghp);
198*7c478bd9Sstevel@tonic-gate 	for (map = 0; map < pgh.pr_nmap; map++) {
199*7c478bd9Sstevel@tonic-gate 		asmapp = (prasmap_t *)(uintptr_t)cur;
200*7c478bd9Sstevel@tonic-gate 		cur += sizeof (*asmapp);
201*7c478bd9Sstevel@tonic-gate 		end = cur + asmapp->pr_npage;
202*7c478bd9Sstevel@tonic-gate 		while (cur < end) {
203*7c478bd9Sstevel@tonic-gate 			if ((*cur & mask) == mask && (*cur & notmask) == 0)
204*7c478bd9Sstevel@tonic-gate 				count += asmapp->pr_pagesize / 1024;
205*7c478bd9Sstevel@tonic-gate 			if ((flags & CP_CLEAR) != 0)
206*7c478bd9Sstevel@tonic-gate 				*cur = 0;
207*7c478bd9Sstevel@tonic-gate 			cur++;
208*7c478bd9Sstevel@tonic-gate 		}
209*7c478bd9Sstevel@tonic-gate 
210*7c478bd9Sstevel@tonic-gate 		/*
211*7c478bd9Sstevel@tonic-gate 		 * Skip to next 64-bit-aligned address to get the next
212*7c478bd9Sstevel@tonic-gate 		 * prasmap_t.
213*7c478bd9Sstevel@tonic-gate 		 */
214*7c478bd9Sstevel@tonic-gate 		cur = (caddr_t)((intptr_t)(cur + 7) & ~7);
215*7c478bd9Sstevel@tonic-gate 	}
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate 	return (count);
218*7c478bd9Sstevel@tonic-gate }
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate /*
221*7c478bd9Sstevel@tonic-gate  * Return the amount of memory (in kilobytes) that hasn't been referenced or
222*7c478bd9Sstevel@tonic-gate  * modified, which memory which will be paged out first.  Should be written to
223*7c478bd9Sstevel@tonic-gate  * exclude nonresident pages when sufficient interfaces exist.
224*7c478bd9Sstevel@tonic-gate  */
225*7c478bd9Sstevel@tonic-gate static uint64_t
226*7c478bd9Sstevel@tonic-gate unrm_size(lprocess_t *lpc)
227*7c478bd9Sstevel@tonic-gate {
228*7c478bd9Sstevel@tonic-gate 	return (count_pages(lpc->lpc_prpageheader, CP_CLEAR,
229*7c478bd9Sstevel@tonic-gate 	    0, PG_MODIFIED | PG_REFERENCED));
230*7c478bd9Sstevel@tonic-gate }
231*7c478bd9Sstevel@tonic-gate 
232*7c478bd9Sstevel@tonic-gate /*
233*7c478bd9Sstevel@tonic-gate  * Advance a prpageheader_cur_t to the address space's next mapping, returning
234*7c478bd9Sstevel@tonic-gate  * its address, or NULL if there is none.  Any known nonpageable or nonresident
235*7c478bd9Sstevel@tonic-gate  * mappings will be skipped over.
236*7c478bd9Sstevel@tonic-gate  */
237*7c478bd9Sstevel@tonic-gate static uintptr_t
238*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp)
239*7c478bd9Sstevel@tonic-gate {
240*7c478bd9Sstevel@tonic-gate 	prasmap_t *pap;
241*7c478bd9Sstevel@tonic-gate 	int i;
242*7c478bd9Sstevel@tonic-gate 
243*7c478bd9Sstevel@tonic-gate next:
244*7c478bd9Sstevel@tonic-gate 	ASSERT(pcp->pr_map < pcp->pr_nmap);
245*7c478bd9Sstevel@tonic-gate 	if ((pcp->pr_map + 1) == pcp->pr_nmap)
246*7c478bd9Sstevel@tonic-gate 		return (NULL);
247*7c478bd9Sstevel@tonic-gate 	pcp->pr_map++;
248*7c478bd9Sstevel@tonic-gate 	if (pcp->pr_pgoff < pcp->pr_npage) {
249*7c478bd9Sstevel@tonic-gate 		pcp->pr_pdaddr = (caddr_t)((uintptr_t)pcp->pr_pdaddr +
250*7c478bd9Sstevel@tonic-gate 		    (pcp->pr_npage - pcp->pr_pgoff));
251*7c478bd9Sstevel@tonic-gate 		pcp->pr_pgoff = pcp->pr_npage;
252*7c478bd9Sstevel@tonic-gate 	}
253*7c478bd9Sstevel@tonic-gate 	/*
254*7c478bd9Sstevel@tonic-gate 	 * Skip to next 64-bit-aligned address to get the next prasmap_t.
255*7c478bd9Sstevel@tonic-gate 	 */
256*7c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7);
257*7c478bd9Sstevel@tonic-gate 	pap = (prasmap_t *)pcp->pr_pdaddr;
258*7c478bd9Sstevel@tonic-gate 	pcp->pr_pgoff = 0;
259*7c478bd9Sstevel@tonic-gate 	pcp->pr_npage = pap->pr_npage;
260*7c478bd9Sstevel@tonic-gate 	pcp->pr_pagesize = pap->pr_pagesize;
261*7c478bd9Sstevel@tonic-gate 	pcp->pr_addr = pap->pr_vaddr;
262*7c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = pap + 1;
263*7c478bd9Sstevel@tonic-gate 
264*7c478bd9Sstevel@tonic-gate 	/*
265*7c478bd9Sstevel@tonic-gate 	 * Skip any known nonpageable mappings.  Currently, the only one
266*7c478bd9Sstevel@tonic-gate 	 * detected is the schedctl page.
267*7c478bd9Sstevel@tonic-gate 	 */
268*7c478bd9Sstevel@tonic-gate 	if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC |
269*7c478bd9Sstevel@tonic-gate 	    MA_ANON)) == 0 && pap->pr_npage == 1) {
270*7c478bd9Sstevel@tonic-gate 		debug("identified nonpageable schedctl mapping at %p\n",
271*7c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
272*7c478bd9Sstevel@tonic-gate 		goto next;
273*7c478bd9Sstevel@tonic-gate 	}
274*7c478bd9Sstevel@tonic-gate 
275*7c478bd9Sstevel@tonic-gate 	/*
276*7c478bd9Sstevel@tonic-gate 	 * Skip mappings with no resident pages.  If the xmap does not
277*7c478bd9Sstevel@tonic-gate 	 * correspond to the pagedata for any reason, it will be ignored.
278*7c478bd9Sstevel@tonic-gate 	 */
279*7c478bd9Sstevel@tonic-gate 	pcp->pr_rss = -1;
280*7c478bd9Sstevel@tonic-gate 	pcp->pr_pg_rss = -1;
281*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < pcp->pr_nxmap; i++) {
282*7c478bd9Sstevel@tonic-gate 		prxmap_t *xmap = &pcp->pr_xmap[i];
283*7c478bd9Sstevel@tonic-gate 
284*7c478bd9Sstevel@tonic-gate 		if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size ==
285*7c478bd9Sstevel@tonic-gate 		    (pcp->pr_npage * pcp->pr_pagesize)) {
286*7c478bd9Sstevel@tonic-gate 			pcp->pr_rss = xmap->pr_rss;
287*7c478bd9Sstevel@tonic-gate 			/*
288*7c478bd9Sstevel@tonic-gate 			 * Remove COW pages from the pageable RSS count.
289*7c478bd9Sstevel@tonic-gate 			 */
290*7c478bd9Sstevel@tonic-gate 			if ((xmap->pr_mflags & MA_SHARED) == 0)
291*7c478bd9Sstevel@tonic-gate 				pcp->pr_pg_rss = xmap->pr_anon;
292*7c478bd9Sstevel@tonic-gate 			break;
293*7c478bd9Sstevel@tonic-gate 		}
294*7c478bd9Sstevel@tonic-gate 	}
295*7c478bd9Sstevel@tonic-gate 	if (pcp->pr_rss == 0) {
296*7c478bd9Sstevel@tonic-gate 		debug("identified nonresident mapping at 0x%p\n",
297*7c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
298*7c478bd9Sstevel@tonic-gate 		goto next;
299*7c478bd9Sstevel@tonic-gate 	} else if (pcp->pr_pg_rss == 0) {
300*7c478bd9Sstevel@tonic-gate 		debug("identified unpageable mapping at 0x%p\n",
301*7c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
302*7c478bd9Sstevel@tonic-gate 		goto next;
303*7c478bd9Sstevel@tonic-gate 	}
304*7c478bd9Sstevel@tonic-gate 
305*7c478bd9Sstevel@tonic-gate 	return (pcp->pr_addr);
306*7c478bd9Sstevel@tonic-gate }
307*7c478bd9Sstevel@tonic-gate 
308*7c478bd9Sstevel@tonic-gate /*
309*7c478bd9Sstevel@tonic-gate  * Advance a prpageheader_cur_t to the mapping's next page, returning its
310*7c478bd9Sstevel@tonic-gate  * address, or NULL if there is none.
311*7c478bd9Sstevel@tonic-gate  */
312*7c478bd9Sstevel@tonic-gate static void *
313*7c478bd9Sstevel@tonic-gate advance_prpageheader_cur(prpageheader_cur_t *pcp)
314*7c478bd9Sstevel@tonic-gate {
315*7c478bd9Sstevel@tonic-gate 	ASSERT(pcp->pr_pgoff < pcp->pr_npage);
316*7c478bd9Sstevel@tonic-gate 	if ((pcp->pr_pgoff + 1) == pcp->pr_npage)
317*7c478bd9Sstevel@tonic-gate 		return (NULL);
318*7c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1;
319*7c478bd9Sstevel@tonic-gate 	pcp->pr_pgoff++;
320*7c478bd9Sstevel@tonic-gate 
321*7c478bd9Sstevel@tonic-gate 	ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0);
322*7c478bd9Sstevel@tonic-gate 	return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize);
323*7c478bd9Sstevel@tonic-gate }
324*7c478bd9Sstevel@tonic-gate 
325*7c478bd9Sstevel@tonic-gate /*
326*7c478bd9Sstevel@tonic-gate  * Initialize a prpageheader_cur_t, positioned at the first page of the mapping
327*7c478bd9Sstevel@tonic-gate  * of an address space.
328*7c478bd9Sstevel@tonic-gate  */
329*7c478bd9Sstevel@tonic-gate static void *
330*7c478bd9Sstevel@tonic-gate set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php,
331*7c478bd9Sstevel@tonic-gate     prxmap_t *xmap, int nxmap)
332*7c478bd9Sstevel@tonic-gate {
333*7c478bd9Sstevel@tonic-gate 	bzero(pcp, sizeof (*pcp));
334*7c478bd9Sstevel@tonic-gate 	pcp->pr_nmap = php->pr_nmap;
335*7c478bd9Sstevel@tonic-gate 	pcp->pr_map = -1;
336*7c478bd9Sstevel@tonic-gate 	pcp->pr_prpageheader = php;
337*7c478bd9Sstevel@tonic-gate 	pcp->pr_xmap = xmap;
338*7c478bd9Sstevel@tonic-gate 	pcp->pr_nxmap = nxmap;
339*7c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (prpageheader_t *)php + 1;
340*7c478bd9Sstevel@tonic-gate 
341*7c478bd9Sstevel@tonic-gate 	return ((void *)advance_prpageheader_cur_nextmapping(pcp));
342*7c478bd9Sstevel@tonic-gate }
343*7c478bd9Sstevel@tonic-gate 
344*7c478bd9Sstevel@tonic-gate /*
345*7c478bd9Sstevel@tonic-gate  * Position a prpageheader_cur_t to the mapped address greater or equal to the
346*7c478bd9Sstevel@tonic-gate  * given value.
347*7c478bd9Sstevel@tonic-gate  */
348*7c478bd9Sstevel@tonic-gate static void *
349*7c478bd9Sstevel@tonic-gate set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php,
350*7c478bd9Sstevel@tonic-gate     prxmap_t *xmap, int nxmap, void *naddr)
351*7c478bd9Sstevel@tonic-gate {
352*7c478bd9Sstevel@tonic-gate 	void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap);
353*7c478bd9Sstevel@tonic-gate 
354*7c478bd9Sstevel@tonic-gate 	while (addr != NULL && addr <= naddr)
355*7c478bd9Sstevel@tonic-gate 		if (naddr < (void *)((caddr_t)pcp->pr_addr +
356*7c478bd9Sstevel@tonic-gate 		    pcp->pr_pagesize * pcp->pr_npage)) {
357*7c478bd9Sstevel@tonic-gate 			uint64_t pgdiff = ((uintptr_t)naddr -
358*7c478bd9Sstevel@tonic-gate 			    (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize;
359*7c478bd9Sstevel@tonic-gate 			pcp->pr_pgoff += pgdiff;
360*7c478bd9Sstevel@tonic-gate 			pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff;
361*7c478bd9Sstevel@tonic-gate 			addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize *
362*7c478bd9Sstevel@tonic-gate 			    pcp->pr_pgoff;
363*7c478bd9Sstevel@tonic-gate 			break;
364*7c478bd9Sstevel@tonic-gate 		} else
365*7c478bd9Sstevel@tonic-gate 			addr =
366*7c478bd9Sstevel@tonic-gate 			    (void *)advance_prpageheader_cur_nextmapping(pcp);
367*7c478bd9Sstevel@tonic-gate 
368*7c478bd9Sstevel@tonic-gate 	return (addr);
369*7c478bd9Sstevel@tonic-gate }
370*7c478bd9Sstevel@tonic-gate 
371*7c478bd9Sstevel@tonic-gate static void
372*7c478bd9Sstevel@tonic-gate revoke_pagedata(rfd_t *rfd)
373*7c478bd9Sstevel@tonic-gate {
374*7c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = rfd->rfd_data;
375*7c478bd9Sstevel@tonic-gate 
376*7c478bd9Sstevel@tonic-gate 	st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for"
377*7c478bd9Sstevel@tonic-gate 	    " process %d\n", (int)lpc->lpc_pid);
378*7c478bd9Sstevel@tonic-gate 	ASSERT(lpc->lpc_pgdata_fd != -1);
379*7c478bd9Sstevel@tonic-gate 	lpc->lpc_pgdata_fd = -1;
380*7c478bd9Sstevel@tonic-gate }
381*7c478bd9Sstevel@tonic-gate 
382*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
383*7c478bd9Sstevel@tonic-gate static void
384*7c478bd9Sstevel@tonic-gate mklmapping(lmapping_t **lm, prpageheader_t *pgh)
385*7c478bd9Sstevel@tonic-gate {
386*7c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
387*7c478bd9Sstevel@tonic-gate 	void *addr;
388*7c478bd9Sstevel@tonic-gate 
389*7c478bd9Sstevel@tonic-gate 	addr = set_prpageheader_cur(&cur, pgh, NULL, -1);
390*7c478bd9Sstevel@tonic-gate 	ASSERT(*lm == NULL);
391*7c478bd9Sstevel@tonic-gate 	while (addr != NULL) {
392*7c478bd9Sstevel@tonic-gate 		(void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage *
393*7c478bd9Sstevel@tonic-gate 		    cur.pr_pagesize);
394*7c478bd9Sstevel@tonic-gate 		addr = (void *)advance_prpageheader_cur_nextmapping(&cur);
395*7c478bd9Sstevel@tonic-gate 	}
396*7c478bd9Sstevel@tonic-gate }
397*7c478bd9Sstevel@tonic-gate 
398*7c478bd9Sstevel@tonic-gate static void
399*7c478bd9Sstevel@tonic-gate lmapping_dump(lmapping_t *lm)
400*7c478bd9Sstevel@tonic-gate {
401*7c478bd9Sstevel@tonic-gate 	debug("lm: %p\n", (void *)lm);
402*7c478bd9Sstevel@tonic-gate 	while (lm != NULL) {
403*7c478bd9Sstevel@tonic-gate 		debug("\t(%p, %llx\n", (void *)lm->lm_addr,
404*7c478bd9Sstevel@tonic-gate 		    (unsigned long long)lm->lm_size);
405*7c478bd9Sstevel@tonic-gate 		lm = lm->lm_next;
406*7c478bd9Sstevel@tonic-gate 	}
407*7c478bd9Sstevel@tonic-gate }
408*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
409*7c478bd9Sstevel@tonic-gate 
410*7c478bd9Sstevel@tonic-gate /*
411*7c478bd9Sstevel@tonic-gate  * OR two prpagedata_t which are supposedly snapshots of the same address
412*7c478bd9Sstevel@tonic-gate  * space.  Intersecting mappings with different page sizes are tolerated but
413*7c478bd9Sstevel@tonic-gate  * not normalized (not accurate).  If the mappings of the two snapshots differ
414*7c478bd9Sstevel@tonic-gate  * in any regard, the supplied mappings_changed flag will be set.
415*7c478bd9Sstevel@tonic-gate  */
416*7c478bd9Sstevel@tonic-gate static void
417*7c478bd9Sstevel@tonic-gate OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp)
418*7c478bd9Sstevel@tonic-gate {
419*7c478bd9Sstevel@tonic-gate 	prpageheader_cur_t src_cur;
420*7c478bd9Sstevel@tonic-gate 	prpageheader_cur_t dst_cur;
421*7c478bd9Sstevel@tonic-gate 	uintptr_t src_addr;
422*7c478bd9Sstevel@tonic-gate 	uintptr_t dst_addr;
423*7c478bd9Sstevel@tonic-gate 	int mappings_changed = 0;
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 	/*
426*7c478bd9Sstevel@tonic-gate 	 * OR source pagedata with the destination, for pages of intersecting
427*7c478bd9Sstevel@tonic-gate 	 * mappings.
428*7c478bd9Sstevel@tonic-gate 	 */
429*7c478bd9Sstevel@tonic-gate 	src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1);
430*7c478bd9Sstevel@tonic-gate 	dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1);
431*7c478bd9Sstevel@tonic-gate 	while (src_addr != NULL && dst_addr != NULL) {
432*7c478bd9Sstevel@tonic-gate 		while (src_addr == dst_addr && src_addr != NULL) {
433*7c478bd9Sstevel@tonic-gate 			*(char *)dst_cur.pr_pdaddr |=
434*7c478bd9Sstevel@tonic-gate 			    *(char *)src_cur.pr_pdaddr;
435*7c478bd9Sstevel@tonic-gate 			src_addr = (uintptr_t)advance_prpageheader_cur(
436*7c478bd9Sstevel@tonic-gate 			    &src_cur);
437*7c478bd9Sstevel@tonic-gate 			dst_addr = (uintptr_t)advance_prpageheader_cur(
438*7c478bd9Sstevel@tonic-gate 			    &dst_cur);
439*7c478bd9Sstevel@tonic-gate 		}
440*7c478bd9Sstevel@tonic-gate 		if (src_addr != dst_addr)
441*7c478bd9Sstevel@tonic-gate 			mappings_changed = 1;
442*7c478bd9Sstevel@tonic-gate 		src_addr = advance_prpageheader_cur_nextmapping(&src_cur);
443*7c478bd9Sstevel@tonic-gate 		dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur);
444*7c478bd9Sstevel@tonic-gate 		while (src_addr != dst_addr && src_addr != NULL && dst_addr !=
445*7c478bd9Sstevel@tonic-gate 		    NULL) {
446*7c478bd9Sstevel@tonic-gate 			mappings_changed = 1;
447*7c478bd9Sstevel@tonic-gate 			if (src_addr < dst_addr)
448*7c478bd9Sstevel@tonic-gate 				src_addr = advance_prpageheader_cur_nextmapping(
449*7c478bd9Sstevel@tonic-gate 				    &src_cur);
450*7c478bd9Sstevel@tonic-gate 			else
451*7c478bd9Sstevel@tonic-gate 				dst_addr = advance_prpageheader_cur_nextmapping(
452*7c478bd9Sstevel@tonic-gate 				    &dst_cur);
453*7c478bd9Sstevel@tonic-gate 		}
454*7c478bd9Sstevel@tonic-gate 	}
455*7c478bd9Sstevel@tonic-gate 
456*7c478bd9Sstevel@tonic-gate 	*mappings_changedp = mappings_changed;
457*7c478bd9Sstevel@tonic-gate }
458*7c478bd9Sstevel@tonic-gate 
459*7c478bd9Sstevel@tonic-gate /*
460*7c478bd9Sstevel@tonic-gate  * Merge the current pagedata with that on hand.  If the pagedata is
461*7c478bd9Sstevel@tonic-gate  * unretrievable for any reason, such as the process having exited or being a
462*7c478bd9Sstevel@tonic-gate  * zombie, a nonzero value is returned, the process should be marked
463*7c478bd9Sstevel@tonic-gate  * unscannable, and future attempts to scan it should be avoided, since the
464*7c478bd9Sstevel@tonic-gate  * symptom is probably permament.  If the mappings of either pagedata
465*7c478bd9Sstevel@tonic-gate  * differ in any respect, the supplied callback will be invoked once.
466*7c478bd9Sstevel@tonic-gate  */
467*7c478bd9Sstevel@tonic-gate static int
468*7c478bd9Sstevel@tonic-gate merge_current_pagedata(lprocess_t *lpc,
469*7c478bd9Sstevel@tonic-gate     void(*mappings_changed_cb) (lprocess_t *))
470*7c478bd9Sstevel@tonic-gate {
471*7c478bd9Sstevel@tonic-gate 	prpageheader_t *pghp;
472*7c478bd9Sstevel@tonic-gate 	int mappings_changed = 0;
473*7c478bd9Sstevel@tonic-gate 
474*7c478bd9Sstevel@tonic-gate 	if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) !=
475*7c478bd9Sstevel@tonic-gate 	    0) {
476*7c478bd9Sstevel@tonic-gate 		char pathbuf[PROC_PATH_MAX];
477*7c478bd9Sstevel@tonic-gate 
478*7c478bd9Sstevel@tonic-gate 		(void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata",
479*7c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
480*7c478bd9Sstevel@tonic-gate 		if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA,
481*7c478bd9Sstevel@tonic-gate 		    revoke_pagedata, lpc, O_RDONLY, 0)) < 0 ||
482*7c478bd9Sstevel@tonic-gate 		    get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0)
483*7c478bd9Sstevel@tonic-gate 			return (-1);
484*7c478bd9Sstevel@tonic-gate 		debug("starting/resuming pagedata collection for %d\n",
485*7c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
486*7c478bd9Sstevel@tonic-gate 	}
487*7c478bd9Sstevel@tonic-gate 	debug("process %d: %llu/%llukB r/m'd since last read\n",
488*7c478bd9Sstevel@tonic-gate 	    (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0,
489*7c478bd9Sstevel@tonic-gate 	    PG_MODIFIED | PG_REFERENCED, 0), (unsigned long long)lpc->lpc_rss);
490*7c478bd9Sstevel@tonic-gate 	if (lpc->lpc_prpageheader != NULL) {
491*7c478bd9Sstevel@tonic-gate 		/*
492*7c478bd9Sstevel@tonic-gate 		 * OR the two snapshots.
493*7c478bd9Sstevel@tonic-gate 		 */
494*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
495*7c478bd9Sstevel@tonic-gate 		lmapping_t *old = NULL;
496*7c478bd9Sstevel@tonic-gate 		lmapping_t *new = NULL;
497*7c478bd9Sstevel@tonic-gate 
498*7c478bd9Sstevel@tonic-gate 		mklmapping(&new, pghp);
499*7c478bd9Sstevel@tonic-gate 		mklmapping(&old, lpc->lpc_prpageheader);
500*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
501*7c478bd9Sstevel@tonic-gate 		OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed);
502*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
503*7c478bd9Sstevel@tonic-gate 		if (((mappings_changed != 0) ^
504*7c478bd9Sstevel@tonic-gate 		    (lmapping_dump_diff(old, new) != 0))) {
505*7c478bd9Sstevel@tonic-gate 			debug("lmapping_changed inconsistent with lmapping\n");
506*7c478bd9Sstevel@tonic-gate 			debug("old\n");
507*7c478bd9Sstevel@tonic-gate 			lmapping_dump(old);
508*7c478bd9Sstevel@tonic-gate 			debug("new\n");
509*7c478bd9Sstevel@tonic-gate 			lmapping_dump(new);
510*7c478bd9Sstevel@tonic-gate 			debug("ignored\n");
511*7c478bd9Sstevel@tonic-gate 			lmapping_dump(lpc->lpc_ignore);
512*7c478bd9Sstevel@tonic-gate 			ASSERT(0);
513*7c478bd9Sstevel@tonic-gate 		}
514*7c478bd9Sstevel@tonic-gate 		lmapping_free(&new);
515*7c478bd9Sstevel@tonic-gate 		lmapping_free(&old);
516*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
517*7c478bd9Sstevel@tonic-gate 		free(lpc->lpc_prpageheader);
518*7c478bd9Sstevel@tonic-gate 	} else
519*7c478bd9Sstevel@tonic-gate 		mappings_changed = 1;
520*7c478bd9Sstevel@tonic-gate 	lpc->lpc_prpageheader = pghp;
521*7c478bd9Sstevel@tonic-gate 	debug("process %d: %llu/%llukB r/m'd since hand swept\n",
522*7c478bd9Sstevel@tonic-gate 	    (int)lpc->lpc_pid, (unsigned long long)count_pages(pghp, 0,
523*7c478bd9Sstevel@tonic-gate 	    PG_MODIFIED | PG_REFERENCED, 0),
524*7c478bd9Sstevel@tonic-gate 	    (unsigned long long)lpc->lpc_rss);
525*7c478bd9Sstevel@tonic-gate 	if (mappings_changed != 0) {
526*7c478bd9Sstevel@tonic-gate 		debug("process %d: mappings changed\n", (int)lpc->lpc_pid);
527*7c478bd9Sstevel@tonic-gate 		if (mappings_changed_cb != NULL)
528*7c478bd9Sstevel@tonic-gate 			mappings_changed_cb(lpc);
529*7c478bd9Sstevel@tonic-gate 	}
530*7c478bd9Sstevel@tonic-gate 	return (0);
531*7c478bd9Sstevel@tonic-gate }
532*7c478bd9Sstevel@tonic-gate 
533*7c478bd9Sstevel@tonic-gate /*
534*7c478bd9Sstevel@tonic-gate  * Attempt to page out a region of the given process's address space.  May
535*7c478bd9Sstevel@tonic-gate  * return nonzero if not all of the pages may are pageable, for any reason.
536*7c478bd9Sstevel@tonic-gate  */
537*7c478bd9Sstevel@tonic-gate static int
538*7c478bd9Sstevel@tonic-gate pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
539*7c478bd9Sstevel@tonic-gate {
540*7c478bd9Sstevel@tonic-gate 	int res;
541*7c478bd9Sstevel@tonic-gate 
542*7c478bd9Sstevel@tonic-gate 	if (end <= start)
543*7c478bd9Sstevel@tonic-gate 		return (0);
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 	errno = 0;
546*7c478bd9Sstevel@tonic-gate 	res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
547*7c478bd9Sstevel@tonic-gate 	    (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0);
548*7c478bd9Sstevel@tonic-gate 	debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
549*7c478bd9Sstevel@tonic-gate 
550*7c478bd9Sstevel@tonic-gate 	/*
551*7c478bd9Sstevel@tonic-gate 	 * EBUSY indicates none of the pages have backing store allocated, or
552*7c478bd9Sstevel@tonic-gate 	 * some pages were locked, which are less interesting than other
553*7c478bd9Sstevel@tonic-gate 	 * conditions, which are noted.
554*7c478bd9Sstevel@tonic-gate 	 */
555*7c478bd9Sstevel@tonic-gate 	if (res != 0)
556*7c478bd9Sstevel@tonic-gate 		if (errno == EBUSY)
557*7c478bd9Sstevel@tonic-gate 			res = 0;
558*7c478bd9Sstevel@tonic-gate 		else
559*7c478bd9Sstevel@tonic-gate 			debug("%d: can't pageout %p+%llx (errno %d)", (int)pid,
560*7c478bd9Sstevel@tonic-gate 			    (void *)start, (long long)(end - start), errno);
561*7c478bd9Sstevel@tonic-gate 
562*7c478bd9Sstevel@tonic-gate 	return (res);
563*7c478bd9Sstevel@tonic-gate }
564*7c478bd9Sstevel@tonic-gate 
565*7c478bd9Sstevel@tonic-gate /*
566*7c478bd9Sstevel@tonic-gate  * Compute the delta of the victim process's RSS since the last call.  If the
567*7c478bd9Sstevel@tonic-gate  * psinfo cannot be obtained, no work is done, and no error is returned; it is
568*7c478bd9Sstevel@tonic-gate  * up to the caller to detect the process' termination via other means.
569*7c478bd9Sstevel@tonic-gate  */
570*7c478bd9Sstevel@tonic-gate static int64_t
571*7c478bd9Sstevel@tonic-gate rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic)
572*7c478bd9Sstevel@tonic-gate {
573*7c478bd9Sstevel@tonic-gate 	int64_t d_rss = 0;
574*7c478bd9Sstevel@tonic-gate 
575*7c478bd9Sstevel@tonic-gate 	if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd,
576*7c478bd9Sstevel@tonic-gate 	    lprocess_update_psinfo_fd_cb, vic, vic) == 0) {
577*7c478bd9Sstevel@tonic-gate 		d_rss = (int64_t)new_psinfo->pr_rssize -
578*7c478bd9Sstevel@tonic-gate 		    (int64_t)old_psinfo->pr_rssize;
579*7c478bd9Sstevel@tonic-gate 		if (d_rss < 0)
580*7c478bd9Sstevel@tonic-gate 			vic->lpc_collection->lcol_stat.lcols_pg_eff +=
581*7c478bd9Sstevel@tonic-gate 			    (- d_rss);
582*7c478bd9Sstevel@tonic-gate 		*old_psinfo = *new_psinfo;
583*7c478bd9Sstevel@tonic-gate 	}
584*7c478bd9Sstevel@tonic-gate 
585*7c478bd9Sstevel@tonic-gate 	return (d_rss);
586*7c478bd9Sstevel@tonic-gate }
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate static void
589*7c478bd9Sstevel@tonic-gate unignore_mappings(lprocess_t *lpc)
590*7c478bd9Sstevel@tonic-gate {
591*7c478bd9Sstevel@tonic-gate 	debug("clearing ignored set\n");
592*7c478bd9Sstevel@tonic-gate 	lmapping_free(&lpc->lpc_ignore);
593*7c478bd9Sstevel@tonic-gate }
594*7c478bd9Sstevel@tonic-gate 
595*7c478bd9Sstevel@tonic-gate static void
596*7c478bd9Sstevel@tonic-gate unignore_referenced_mappings(lprocess_t *lpc)
597*7c478bd9Sstevel@tonic-gate {
598*7c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
599*7c478bd9Sstevel@tonic-gate 	void *vicaddr;
600*7c478bd9Sstevel@tonic-gate 
601*7c478bd9Sstevel@tonic-gate 	vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1);
602*7c478bd9Sstevel@tonic-gate 	while (vicaddr != NULL) {
603*7c478bd9Sstevel@tonic-gate 		if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED))
604*7c478bd9Sstevel@tonic-gate 		    != 0) {
605*7c478bd9Sstevel@tonic-gate 			if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr,
606*7c478bd9Sstevel@tonic-gate 			    cur.pr_npage * cur.pr_pagesize) == 0)
607*7c478bd9Sstevel@tonic-gate 				debug("removed mapping 0x%p+0t%llukB from"
608*7c478bd9Sstevel@tonic-gate 				    " ignored set\n", (void *)cur.pr_addr,
609*7c478bd9Sstevel@tonic-gate 				    (unsigned long long)(cur.pr_npage *
610*7c478bd9Sstevel@tonic-gate 				    cur.pr_pagesize / 1024));
611*7c478bd9Sstevel@tonic-gate 			vicaddr = (void *)advance_prpageheader_cur_nextmapping(
612*7c478bd9Sstevel@tonic-gate 			    &cur);
613*7c478bd9Sstevel@tonic-gate 		} else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL)
614*7c478bd9Sstevel@tonic-gate 			vicaddr = (void *)advance_prpageheader_cur_nextmapping(
615*7c478bd9Sstevel@tonic-gate 			    &cur);
616*7c478bd9Sstevel@tonic-gate 	}
617*7c478bd9Sstevel@tonic-gate }
618*7c478bd9Sstevel@tonic-gate 
619*7c478bd9Sstevel@tonic-gate /*
620*7c478bd9Sstevel@tonic-gate  * Resume scanning, starting with the last victim, if it is still valid, or any
621*7c478bd9Sstevel@tonic-gate  * other one, otherwise.
622*7c478bd9Sstevel@tonic-gate  */
623*7c478bd9Sstevel@tonic-gate void
624*7c478bd9Sstevel@tonic-gate scan(lcollection_t *lcol, int64_t excess)
625*7c478bd9Sstevel@tonic-gate {
626*7c478bd9Sstevel@tonic-gate 	lprocess_t *vic, *lpc;
627*7c478bd9Sstevel@tonic-gate 	void *vicaddr, *endaddr, *nvicaddr;
628*7c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
629*7c478bd9Sstevel@tonic-gate 	psinfo_t old_psinfo, new_psinfo;
630*7c478bd9Sstevel@tonic-gate 	hrtime_t scan_start;
631*7c478bd9Sstevel@tonic-gate 	int res, resumed;
632*7c478bd9Sstevel@tonic-gate 	uint64_t col_unrm_size;
633*7c478bd9Sstevel@tonic-gate 
634*7c478bd9Sstevel@tonic-gate 	st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n",
635*7c478bd9Sstevel@tonic-gate 	    (long long)excess);
636*7c478bd9Sstevel@tonic-gate 
637*7c478bd9Sstevel@tonic-gate 	/*
638*7c478bd9Sstevel@tonic-gate 	 * Determine the address to start scanning at, depending on whether
639*7c478bd9Sstevel@tonic-gate 	 * scanning can be resumed.
640*7c478bd9Sstevel@tonic-gate 	 */
641*7c478bd9Sstevel@tonic-gate 	endaddr = NULL;
642*7c478bd9Sstevel@tonic-gate 	if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) ==
643*7c478bd9Sstevel@tonic-gate 	    lcol->lcol_victim && lcol->lcol_resaddr != NULL) {
644*7c478bd9Sstevel@tonic-gate 		vicaddr = lcol->lcol_resaddr;
645*7c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "resuming process %d\n",
646*7c478bd9Sstevel@tonic-gate 		    (int)vic->lpc_pid);
647*7c478bd9Sstevel@tonic-gate 		resumed = 1;
648*7c478bd9Sstevel@tonic-gate 	} else {
649*7c478bd9Sstevel@tonic-gate 		vicaddr = NULL;
650*7c478bd9Sstevel@tonic-gate 		resumed = 0;
651*7c478bd9Sstevel@tonic-gate 	}
652*7c478bd9Sstevel@tonic-gate 
653*7c478bd9Sstevel@tonic-gate 	scan_start = gethrtime();
654*7c478bd9Sstevel@tonic-gate 	/*
655*7c478bd9Sstevel@tonic-gate 	 * Obtain the most current pagedata for the processes that might be
656*7c478bd9Sstevel@tonic-gate 	 * scanned, and remove from the ignored set any mappings which have
657*7c478bd9Sstevel@tonic-gate 	 * referenced or modified pages (in the hopes that the pageability of
658*7c478bd9Sstevel@tonic-gate 	 * the mapping's pages may have changed).  Determine if the
659*7c478bd9Sstevel@tonic-gate 	 * unreferenced and unmodified portion is impossibly small to suffice
660*7c478bd9Sstevel@tonic-gate 	 * to reduce the excess completely.  If so, ignore these bits so that
661*7c478bd9Sstevel@tonic-gate 	 * even working set will be paged out.
662*7c478bd9Sstevel@tonic-gate 	 */
663*7c478bd9Sstevel@tonic-gate 	col_unrm_size = 0;
664*7c478bd9Sstevel@tonic-gate 	lpc = vic;
665*7c478bd9Sstevel@tonic-gate 	while (lpc != NULL && should_run) {
666*7c478bd9Sstevel@tonic-gate 		if (merge_current_pagedata(lpc, unignore_mappings) != 0) {
667*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "process %d:"
668*7c478bd9Sstevel@tonic-gate 			    " exited/temporarily unscannable",
669*7c478bd9Sstevel@tonic-gate 			    (int)lpc->lpc_pid);
670*7c478bd9Sstevel@tonic-gate 			goto next;
671*7c478bd9Sstevel@tonic-gate 		}
672*7c478bd9Sstevel@tonic-gate 		debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid,
673*7c478bd9Sstevel@tonic-gate 		    (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)),
674*7c478bd9Sstevel@tonic-gate 		    (unsigned long long)lpc->lpc_size);
675*7c478bd9Sstevel@tonic-gate 		col_unrm_size += lpc->lpc_unrm = unrm_size(lpc);
676*7c478bd9Sstevel@tonic-gate 
677*7c478bd9Sstevel@tonic-gate 		if ((lcol->lcol_stat.lcols_scan_count %
678*7c478bd9Sstevel@tonic-gate 		    RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) {
679*7c478bd9Sstevel@tonic-gate 			/*
680*7c478bd9Sstevel@tonic-gate 			 * Periodically clear the set of ignored mappings.
681*7c478bd9Sstevel@tonic-gate 			 * This will allow processes whose ignored segments'
682*7c478bd9Sstevel@tonic-gate 			 * pageability have changed (without a corresponding
683*7c478bd9Sstevel@tonic-gate 			 * reference or modification to a page) to be
684*7c478bd9Sstevel@tonic-gate 			 * recognized.
685*7c478bd9Sstevel@tonic-gate 			 */
686*7c478bd9Sstevel@tonic-gate 			if (lcol->lcol_stat.lcols_scan_count > 0)
687*7c478bd9Sstevel@tonic-gate 				unignore_mappings(lpc);
688*7c478bd9Sstevel@tonic-gate 		} else {
689*7c478bd9Sstevel@tonic-gate 			/*
690*7c478bd9Sstevel@tonic-gate 			 * Ensure mappings with referenced or modified pages
691*7c478bd9Sstevel@tonic-gate 			 * are not in the ignored set.  Their usage might mean
692*7c478bd9Sstevel@tonic-gate 			 * the condition which made them unpageable is gone.
693*7c478bd9Sstevel@tonic-gate 			 */
694*7c478bd9Sstevel@tonic-gate 			unignore_referenced_mappings(lpc);
695*7c478bd9Sstevel@tonic-gate 		}
696*7c478bd9Sstevel@tonic-gate next:
697*7c478bd9Sstevel@tonic-gate 		lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
698*7c478bd9Sstevel@tonic-gate 		    lpc->lpc_next) : NULL;
699*7c478bd9Sstevel@tonic-gate 	}
700*7c478bd9Sstevel@tonic-gate 	if (col_unrm_size < excess) {
701*7c478bd9Sstevel@tonic-gate 		lpc = vic;
702*7c478bd9Sstevel@tonic-gate 		debug("will not reduce excess with only unreferenced pages\n");
703*7c478bd9Sstevel@tonic-gate 		while (lpc != NULL && should_run) {
704*7c478bd9Sstevel@tonic-gate 			if (lpc->lpc_prpageheader != NULL) {
705*7c478bd9Sstevel@tonic-gate 				(void) count_pages(lpc->lpc_prpageheader,
706*7c478bd9Sstevel@tonic-gate 				    CP_CLEAR, 0, 0);
707*7c478bd9Sstevel@tonic-gate 				if (lpc->lpc_pgdata_fd >= 0) {
708*7c478bd9Sstevel@tonic-gate 					if (rfd_close(lpc->lpc_pgdata_fd) != 0)
709*7c478bd9Sstevel@tonic-gate 						debug("coud not close %d"
710*7c478bd9Sstevel@tonic-gate 						    " lpc_pgdata_fd %d",
711*7c478bd9Sstevel@tonic-gate 						    (int)lpc->lpc_pid,
712*7c478bd9Sstevel@tonic-gate 						    lpc->lpc_pgdata_fd);
713*7c478bd9Sstevel@tonic-gate 					lpc->lpc_pgdata_fd = -1;
714*7c478bd9Sstevel@tonic-gate 				}
715*7c478bd9Sstevel@tonic-gate 			}
716*7c478bd9Sstevel@tonic-gate 			lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
717*7c478bd9Sstevel@tonic-gate 			    lpc->lpc_next) : NULL;
718*7c478bd9Sstevel@tonic-gate 		}
719*7c478bd9Sstevel@tonic-gate 	}
720*7c478bd9Sstevel@tonic-gate 
721*7c478bd9Sstevel@tonic-gate 	/*
722*7c478bd9Sstevel@tonic-gate 	 * Examine each process for pages to remove until the excess is
723*7c478bd9Sstevel@tonic-gate 	 * reduced.
724*7c478bd9Sstevel@tonic-gate 	 */
725*7c478bd9Sstevel@tonic-gate 	while (vic != NULL && excess > 0 && should_run) {
726*7c478bd9Sstevel@tonic-gate 		/*
727*7c478bd9Sstevel@tonic-gate 		 * Skip processes whose death was reported when the merging of
728*7c478bd9Sstevel@tonic-gate 		 * pagedata was attempted.
729*7c478bd9Sstevel@tonic-gate 		 */
730*7c478bd9Sstevel@tonic-gate 		if (vic->lpc_prpageheader == NULL)
731*7c478bd9Sstevel@tonic-gate 			goto nextproc;
732*7c478bd9Sstevel@tonic-gate 
733*7c478bd9Sstevel@tonic-gate 		/*
734*7c478bd9Sstevel@tonic-gate 		 * Obtain optional segment residency information.
735*7c478bd9Sstevel@tonic-gate 		 */
736*7c478bd9Sstevel@tonic-gate 		if (lpc_xmap_update(vic) != 0)
737*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "process %d: xmap"
738*7c478bd9Sstevel@tonic-gate 			    " unreadable; ignoring", (int)vic->lpc_pid);
739*7c478bd9Sstevel@tonic-gate 
740*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
741*7c478bd9Sstevel@tonic-gate 		{
742*7c478bd9Sstevel@tonic-gate 			void *ovicaddr = vicaddr;
743*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
744*7c478bd9Sstevel@tonic-gate 		vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader,
745*7c478bd9Sstevel@tonic-gate 		    vic->lpc_xmap, vic->lpc_nxmap, vicaddr);
746*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
747*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "trying to resume from"
748*7c478bd9Sstevel@tonic-gate 			    " 0x%p, next 0x%p\n", ovicaddr, vicaddr);
749*7c478bd9Sstevel@tonic-gate 		}
750*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
751*7c478bd9Sstevel@tonic-gate 
752*7c478bd9Sstevel@tonic-gate 		/*
753*7c478bd9Sstevel@tonic-gate 		 * Take control of the victim.
754*7c478bd9Sstevel@tonic-gate 		 */
755*7c478bd9Sstevel@tonic-gate 		if (get_psinfo(vic->lpc_pid, &old_psinfo,
756*7c478bd9Sstevel@tonic-gate 		    vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb,
757*7c478bd9Sstevel@tonic-gate 		    vic, vic) != 0) {
758*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo",
759*7c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid);
760*7c478bd9Sstevel@tonic-gate 			goto nextproc;
761*7c478bd9Sstevel@tonic-gate 		}
762*7c478bd9Sstevel@tonic-gate 		(void) rfd_reserve(PGRAB_FD_COUNT);
763*7c478bd9Sstevel@tonic-gate 		if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) {
764*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)",
765*7c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid, res);
766*7c478bd9Sstevel@tonic-gate 			goto nextproc;
767*7c478bd9Sstevel@tonic-gate 		}
768*7c478bd9Sstevel@tonic-gate 		if (Pcreate_agent(scan_pr) != 0) {
769*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot control %d",
770*7c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid);
771*7c478bd9Sstevel@tonic-gate 			goto nextproc;
772*7c478bd9Sstevel@tonic-gate 		}
773*7c478bd9Sstevel@tonic-gate 		/*
774*7c478bd9Sstevel@tonic-gate 		 * Be very pessimistic about the state of the agent LWP --
775*7c478bd9Sstevel@tonic-gate 		 * verify it's actually stopped.
776*7c478bd9Sstevel@tonic-gate 		 */
777*7c478bd9Sstevel@tonic-gate 		errno = 0;
778*7c478bd9Sstevel@tonic-gate 		while (Pstate(scan_pr) == PS_RUN)
779*7c478bd9Sstevel@tonic-gate 			(void) Pwait(scan_pr, 0);
780*7c478bd9Sstevel@tonic-gate 		if (Pstate(scan_pr) != PS_STOP) {
781*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "agent not in expected"
782*7c478bd9Sstevel@tonic-gate 			    " state (%d)", Pstate(scan_pr));
783*7c478bd9Sstevel@tonic-gate 			goto nextproc;
784*7c478bd9Sstevel@tonic-gate 		}
785*7c478bd9Sstevel@tonic-gate 
786*7c478bd9Sstevel@tonic-gate 		/*
787*7c478bd9Sstevel@tonic-gate 		 * Within the victim's address space, find contiguous ranges of
788*7c478bd9Sstevel@tonic-gate 		 * unreferenced pages to page out.
789*7c478bd9Sstevel@tonic-gate 		 */
790*7c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "paging out process %d\n",
791*7c478bd9Sstevel@tonic-gate 		    (int)vic->lpc_pid);
792*7c478bd9Sstevel@tonic-gate 		while (excess > 0 && vicaddr != NULL && should_run) {
793*7c478bd9Sstevel@tonic-gate 			/*
794*7c478bd9Sstevel@tonic-gate 			 * Skip mappings in the ignored set.  Mappings get
795*7c478bd9Sstevel@tonic-gate 			 * placed in the ignored set when all their resident
796*7c478bd9Sstevel@tonic-gate 			 * pages are unreference and unmodified, yet unpageable
797*7c478bd9Sstevel@tonic-gate 			 * -- such as when they are locked, or involved in
798*7c478bd9Sstevel@tonic-gate 			 * asynchronous I/O.  They will be scanned again when
799*7c478bd9Sstevel@tonic-gate 			 * some page is referenced or modified.
800*7c478bd9Sstevel@tonic-gate 			 */
801*7c478bd9Sstevel@tonic-gate 			if (lmapping_contains(vic->lpc_ignore, cur.pr_addr,
802*7c478bd9Sstevel@tonic-gate 			    cur.pr_npage * cur.pr_pagesize)) {
803*7c478bd9Sstevel@tonic-gate 				debug("ignored mapping at 0x%p\n",
804*7c478bd9Sstevel@tonic-gate 				    (void *)cur.pr_addr);
805*7c478bd9Sstevel@tonic-gate 				/*
806*7c478bd9Sstevel@tonic-gate 				 * Update statistics.
807*7c478bd9Sstevel@tonic-gate 				 */
808*7c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_pg_att +=
809*7c478bd9Sstevel@tonic-gate 				    cur.pr_npage * cur.pr_pagesize / 1024;
810*7c478bd9Sstevel@tonic-gate 
811*7c478bd9Sstevel@tonic-gate 				vicaddr = (void *)
812*7c478bd9Sstevel@tonic-gate 				    advance_prpageheader_cur_nextmapping(&cur);
813*7c478bd9Sstevel@tonic-gate 				continue;
814*7c478bd9Sstevel@tonic-gate 			}
815*7c478bd9Sstevel@tonic-gate 
816*7c478bd9Sstevel@tonic-gate 			/*
817*7c478bd9Sstevel@tonic-gate 			 * Determine a range of unreferenced pages to page out,
818*7c478bd9Sstevel@tonic-gate 			 * and clear the R/M bits in the preceding referenced
819*7c478bd9Sstevel@tonic-gate 			 * range.
820*7c478bd9Sstevel@tonic-gate 			 */
821*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p,"
822*7c478bd9Sstevel@tonic-gate 			    " npage %llu\n", vicaddr,
823*7c478bd9Sstevel@tonic-gate 			    (unsigned long long)cur.pr_npage);
824*7c478bd9Sstevel@tonic-gate 			while (vicaddr != NULL &&
825*7c478bd9Sstevel@tonic-gate 			    *(caddr_t)cur.pr_pdaddr != 0) {
826*7c478bd9Sstevel@tonic-gate 				*(caddr_t)cur.pr_pdaddr = 0;
827*7c478bd9Sstevel@tonic-gate 				vicaddr = advance_prpageheader_cur(&cur);
828*7c478bd9Sstevel@tonic-gate 			}
829*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr"
830*7c478bd9Sstevel@tonic-gate 			    " %p\n", vicaddr, cur.pr_pdaddr);
831*7c478bd9Sstevel@tonic-gate 			if (vicaddr == NULL) {
832*7c478bd9Sstevel@tonic-gate 				/*
833*7c478bd9Sstevel@tonic-gate 				 * The end of mapping was reached before any
834*7c478bd9Sstevel@tonic-gate 				 * unreferenced pages were seen.
835*7c478bd9Sstevel@tonic-gate 				 */
836*7c478bd9Sstevel@tonic-gate 				vicaddr = (void *)
837*7c478bd9Sstevel@tonic-gate 				    advance_prpageheader_cur_nextmapping(&cur);
838*7c478bd9Sstevel@tonic-gate 				continue;
839*7c478bd9Sstevel@tonic-gate 			}
840*7c478bd9Sstevel@tonic-gate 			do
841*7c478bd9Sstevel@tonic-gate 				endaddr = advance_prpageheader_cur(&cur);
842*7c478bd9Sstevel@tonic-gate 			while (endaddr != NULL &&
843*7c478bd9Sstevel@tonic-gate 			    *(caddr_t)cur.pr_pdaddr == 0 &&
844*7c478bd9Sstevel@tonic-gate 			    (((intptr_t)endaddr - (intptr_t)vicaddr) /
845*7c478bd9Sstevel@tonic-gate 				1024) < excess);
846*7c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n",
847*7c478bd9Sstevel@tonic-gate 			    endaddr, *(caddr_t)cur.pr_pdaddr);
848*7c478bd9Sstevel@tonic-gate 
849*7c478bd9Sstevel@tonic-gate 			/*
850*7c478bd9Sstevel@tonic-gate 			 * Page out from vicaddr to the end of the mapping, or
851*7c478bd9Sstevel@tonic-gate 			 * endaddr if set, then continue scanning after
852*7c478bd9Sstevel@tonic-gate 			 * endaddr, or the next mapping, if not set.
853*7c478bd9Sstevel@tonic-gate 			 */
854*7c478bd9Sstevel@tonic-gate 			nvicaddr = endaddr;
855*7c478bd9Sstevel@tonic-gate 			if (endaddr == NULL)
856*7c478bd9Sstevel@tonic-gate 				endaddr = (caddr_t)cur.pr_addr +
857*7c478bd9Sstevel@tonic-gate 				    cur.pr_pagesize * cur.pr_npage;
858*7c478bd9Sstevel@tonic-gate 			if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) ==
859*7c478bd9Sstevel@tonic-gate 			    0) {
860*7c478bd9Sstevel@tonic-gate 				int64_t d_rss, att;
861*7c478bd9Sstevel@tonic-gate 				int willignore = 0;
862*7c478bd9Sstevel@tonic-gate 
863*7c478bd9Sstevel@tonic-gate 				excess += (d_rss = rss_delta(
864*7c478bd9Sstevel@tonic-gate 				    &new_psinfo, &old_psinfo, vic));
865*7c478bd9Sstevel@tonic-gate 
866*7c478bd9Sstevel@tonic-gate 				/*
867*7c478bd9Sstevel@tonic-gate 				 * If this pageout attempt was unsuccessful
868*7c478bd9Sstevel@tonic-gate 				 * (the resident portion was not affected), and
869*7c478bd9Sstevel@tonic-gate 				 * was for the whole mapping, put it in the
870*7c478bd9Sstevel@tonic-gate 				 * ignored set, so it will not be scanned again
871*7c478bd9Sstevel@tonic-gate 				 * until some page is referenced or modified.
872*7c478bd9Sstevel@tonic-gate 				 */
873*7c478bd9Sstevel@tonic-gate 				if (d_rss >= 0 && (void *)cur.pr_addr ==
874*7c478bd9Sstevel@tonic-gate 				    vicaddr && (cur.pr_pagesize * cur.pr_npage)
875*7c478bd9Sstevel@tonic-gate 				    == ((uintptr_t)endaddr -
876*7c478bd9Sstevel@tonic-gate 				    (uintptr_t)vicaddr)) {
877*7c478bd9Sstevel@tonic-gate 					if (lmapping_insert(
878*7c478bd9Sstevel@tonic-gate 					    &vic->lpc_ignore,
879*7c478bd9Sstevel@tonic-gate 					    cur.pr_addr,
880*7c478bd9Sstevel@tonic-gate 					    cur.pr_pagesize *
881*7c478bd9Sstevel@tonic-gate 					    cur.pr_npage) != 0)
882*7c478bd9Sstevel@tonic-gate 						debug("not enough memory to add"
883*7c478bd9Sstevel@tonic-gate 						    " mapping at %p to ignored"
884*7c478bd9Sstevel@tonic-gate 						    " set\n",
885*7c478bd9Sstevel@tonic-gate 						    (void *)cur.pr_addr);
886*7c478bd9Sstevel@tonic-gate 					willignore = 1;
887*7c478bd9Sstevel@tonic-gate 				}
888*7c478bd9Sstevel@tonic-gate 
889*7c478bd9Sstevel@tonic-gate 				/*
890*7c478bd9Sstevel@tonic-gate 				 * Update statistics.
891*7c478bd9Sstevel@tonic-gate 				 */
892*7c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_pg_att += (att =
893*7c478bd9Sstevel@tonic-gate 				    ((intptr_t)endaddr - (intptr_t)vicaddr) /
894*7c478bd9Sstevel@tonic-gate 				    1024);
895*7c478bd9Sstevel@tonic-gate 				st_debug(STDL_NORMAL, lcol, "paged out 0x%p"
896*7c478bd9Sstevel@tonic-gate 				    "+0t(%llu/%llu)kB%s\n", vicaddr,
897*7c478bd9Sstevel@tonic-gate 				    (unsigned long long)((d_rss <
898*7c478bd9Sstevel@tonic-gate 				    0) ? - d_rss : 0), (unsigned long long)att,
899*7c478bd9Sstevel@tonic-gate 				    willignore ? " (will ignore)" : "");
900*7c478bd9Sstevel@tonic-gate 			} else {
901*7c478bd9Sstevel@tonic-gate 				st_debug(STDL_NORMAL, lcol,
902*7c478bd9Sstevel@tonic-gate 				    "process %d: exited/unscannable\n",
903*7c478bd9Sstevel@tonic-gate 				    (int)vic->lpc_pid);
904*7c478bd9Sstevel@tonic-gate 				vic->lpc_unscannable = 1;
905*7c478bd9Sstevel@tonic-gate 				goto nextproc;
906*7c478bd9Sstevel@tonic-gate 			}
907*7c478bd9Sstevel@tonic-gate 
908*7c478bd9Sstevel@tonic-gate 			/*
909*7c478bd9Sstevel@tonic-gate 			 * Update the statistics file, if it's time.
910*7c478bd9Sstevel@tonic-gate 			 */
911*7c478bd9Sstevel@tonic-gate 			check_update_statistics();
912*7c478bd9Sstevel@tonic-gate 
913*7c478bd9Sstevel@tonic-gate 			vicaddr = (nvicaddr != NULL) ? nvicaddr : (void
914*7c478bd9Sstevel@tonic-gate 			    *)advance_prpageheader_cur_nextmapping(&cur);
915*7c478bd9Sstevel@tonic-gate 		}
916*7c478bd9Sstevel@tonic-gate 		excess += rss_delta(&new_psinfo, &old_psinfo, vic);
917*7c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "done, excess %lld\n",
918*7c478bd9Sstevel@tonic-gate 		    (long long)excess);
919*7c478bd9Sstevel@tonic-gate nextproc:
920*7c478bd9Sstevel@tonic-gate 		/*
921*7c478bd9Sstevel@tonic-gate 		 * If a process was grabbed, release it, destroying its agent.
922*7c478bd9Sstevel@tonic-gate 		 */
923*7c478bd9Sstevel@tonic-gate 		if (scan_pr != NULL) {
924*7c478bd9Sstevel@tonic-gate 			(void) Prelease(scan_pr, 0);
925*7c478bd9Sstevel@tonic-gate 			scan_pr = NULL;
926*7c478bd9Sstevel@tonic-gate 		}
927*7c478bd9Sstevel@tonic-gate 		lcol->lcol_victim = vic;
928*7c478bd9Sstevel@tonic-gate 		/*
929*7c478bd9Sstevel@tonic-gate 		 * Scan the collection at most once.  Only if scanning was not
930*7c478bd9Sstevel@tonic-gate 		 * aborted for any reason, and the end of lprocess has not been
931*7c478bd9Sstevel@tonic-gate 		 * reached, determine the next victim and scan it.
932*7c478bd9Sstevel@tonic-gate 		 */
933*7c478bd9Sstevel@tonic-gate 		if (vic != NULL) {
934*7c478bd9Sstevel@tonic-gate 			if (vic->lpc_next != NULL) {
935*7c478bd9Sstevel@tonic-gate 				/*
936*7c478bd9Sstevel@tonic-gate 				 * Determine the next process to be scanned.
937*7c478bd9Sstevel@tonic-gate 				 */
938*7c478bd9Sstevel@tonic-gate 				if (excess > 0) {
939*7c478bd9Sstevel@tonic-gate 					vic = get_valid_victim(lcol,
940*7c478bd9Sstevel@tonic-gate 					    vic->lpc_next);
941*7c478bd9Sstevel@tonic-gate 					vicaddr = 0;
942*7c478bd9Sstevel@tonic-gate 				}
943*7c478bd9Sstevel@tonic-gate 			} else {
944*7c478bd9Sstevel@tonic-gate 				/*
945*7c478bd9Sstevel@tonic-gate 				 * A complete scan of the collection was made,
946*7c478bd9Sstevel@tonic-gate 				 * so tick the scan counter and stop scanning
947*7c478bd9Sstevel@tonic-gate 				 * until the next request.
948*7c478bd9Sstevel@tonic-gate 				 */
949*7c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_scan_count++;
950*7c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_scan_time_complete
951*7c478bd9Sstevel@tonic-gate 				    = lcol->lcol_stat.lcols_scan_time;
952*7c478bd9Sstevel@tonic-gate 				/*
953*7c478bd9Sstevel@tonic-gate 				 * If an excess still exists, tick the
954*7c478bd9Sstevel@tonic-gate 				 * "ineffective scan" counter, signalling that
955*7c478bd9Sstevel@tonic-gate 				 * the cap may be uneforceable.
956*7c478bd9Sstevel@tonic-gate 				 */
957*7c478bd9Sstevel@tonic-gate 				if (resumed == 0 && excess > 0)
958*7c478bd9Sstevel@tonic-gate 					lcol->lcol_stat
959*7c478bd9Sstevel@tonic-gate 					    .lcols_scan_ineffective++;
960*7c478bd9Sstevel@tonic-gate 				/*
961*7c478bd9Sstevel@tonic-gate 				 * Scanning should start at the beginning of
962*7c478bd9Sstevel@tonic-gate 				 * the process list at the next request.
963*7c478bd9Sstevel@tonic-gate 				 */
964*7c478bd9Sstevel@tonic-gate 				if (excess > 0)
965*7c478bd9Sstevel@tonic-gate 					vic = NULL;
966*7c478bd9Sstevel@tonic-gate 			}
967*7c478bd9Sstevel@tonic-gate 		}
968*7c478bd9Sstevel@tonic-gate 	}
969*7c478bd9Sstevel@tonic-gate 	lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start);
970*7c478bd9Sstevel@tonic-gate 	st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n",
971*7c478bd9Sstevel@tonic-gate 	    (long long)excess);
972*7c478bd9Sstevel@tonic-gate 
973*7c478bd9Sstevel@tonic-gate 	lcol->lcol_resaddr = vicaddr;
974*7c478bd9Sstevel@tonic-gate 	if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) {
975*7c478bd9Sstevel@tonic-gate 		lcol->lcol_victim = get_valid_victim(lcol,
976*7c478bd9Sstevel@tonic-gate 		    lcol->lcol_victim->lpc_next);
977*7c478bd9Sstevel@tonic-gate 	}
978*7c478bd9Sstevel@tonic-gate }
979*7c478bd9Sstevel@tonic-gate 
980*7c478bd9Sstevel@tonic-gate /*
981*7c478bd9Sstevel@tonic-gate  * Abort the scan in progress, and destroy the agent LWP of any grabbed
982*7c478bd9Sstevel@tonic-gate  * processes.
983*7c478bd9Sstevel@tonic-gate  */
984*7c478bd9Sstevel@tonic-gate void
985*7c478bd9Sstevel@tonic-gate scan_abort(void)
986*7c478bd9Sstevel@tonic-gate {
987*7c478bd9Sstevel@tonic-gate 	if (scan_pr != NULL)
988*7c478bd9Sstevel@tonic-gate 		(void) Prelease(scan_pr, NULL);
989*7c478bd9Sstevel@tonic-gate }
990*7c478bd9Sstevel@tonic-gate 
991*7c478bd9Sstevel@tonic-gate static void
992*7c478bd9Sstevel@tonic-gate revoke_xmap(rfd_t *rfd)
993*7c478bd9Sstevel@tonic-gate {
994*7c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = rfd->rfd_data;
995*7c478bd9Sstevel@tonic-gate 
996*7c478bd9Sstevel@tonic-gate 	debug("revoking xmap for process %d\n", (int)lpc->lpc_pid);
997*7c478bd9Sstevel@tonic-gate 	ASSERT(lpc->lpc_xmap_fd != -1);
998*7c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap_fd = -1;
999*7c478bd9Sstevel@tonic-gate }
1000*7c478bd9Sstevel@tonic-gate 
1001*7c478bd9Sstevel@tonic-gate /*
1002*7c478bd9Sstevel@tonic-gate  * Retrieve the process's current xmap , which is used to determine the size of
1003*7c478bd9Sstevel@tonic-gate  * the resident portion of its segments.  Return zero if successful.
1004*7c478bd9Sstevel@tonic-gate  */
1005*7c478bd9Sstevel@tonic-gate static int
1006*7c478bd9Sstevel@tonic-gate lpc_xmap_update(lprocess_t *lpc)
1007*7c478bd9Sstevel@tonic-gate {
1008*7c478bd9Sstevel@tonic-gate 	int res;
1009*7c478bd9Sstevel@tonic-gate 	struct stat st;
1010*7c478bd9Sstevel@tonic-gate 
1011*7c478bd9Sstevel@tonic-gate 	free(lpc->lpc_xmap);
1012*7c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap = NULL;
1013*7c478bd9Sstevel@tonic-gate 	lpc->lpc_nxmap = -1;
1014*7c478bd9Sstevel@tonic-gate 
1015*7c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap_fd == -1) {
1016*7c478bd9Sstevel@tonic-gate 		char pathbuf[PROC_PATH_MAX];
1017*7c478bd9Sstevel@tonic-gate 
1018*7c478bd9Sstevel@tonic-gate 		(void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap",
1019*7c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
1020*7c478bd9Sstevel@tonic-gate 		if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP,
1021*7c478bd9Sstevel@tonic-gate 		    revoke_xmap, lpc, O_RDONLY, 0)) < 0)
1022*7c478bd9Sstevel@tonic-gate 			return (-1);
1023*7c478bd9Sstevel@tonic-gate 	}
1024*7c478bd9Sstevel@tonic-gate 
1025*7c478bd9Sstevel@tonic-gate redo:
1026*7c478bd9Sstevel@tonic-gate 	errno = 0;
1027*7c478bd9Sstevel@tonic-gate 	if (fstat(lpc->lpc_xmap_fd, &st) != 0) {
1028*7c478bd9Sstevel@tonic-gate 		debug("cannot stat xmap\n");
1029*7c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
1030*7c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
1031*7c478bd9Sstevel@tonic-gate 		return (-1);
1032*7c478bd9Sstevel@tonic-gate 	}
1033*7c478bd9Sstevel@tonic-gate 
1034*7c478bd9Sstevel@tonic-gate 	if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) {
1035*7c478bd9Sstevel@tonic-gate 		debug("xmap wrong size\n");
1036*7c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
1037*7c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
1038*7c478bd9Sstevel@tonic-gate 		return (-1);
1039*7c478bd9Sstevel@tonic-gate 	}
1040*7c478bd9Sstevel@tonic-gate 
1041*7c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap = malloc(st.st_size);
1042*7c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap == NULL) {
1043*7c478bd9Sstevel@tonic-gate 		debug("cannot malloc() %ld bytes for xmap", st.st_size);
1044*7c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
1045*7c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
1046*7c478bd9Sstevel@tonic-gate 		return (-1);
1047*7c478bd9Sstevel@tonic-gate 	}
1048*7c478bd9Sstevel@tonic-gate 
1049*7c478bd9Sstevel@tonic-gate 	if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) !=
1050*7c478bd9Sstevel@tonic-gate 	    st.st_size) {
1051*7c478bd9Sstevel@tonic-gate 		free(lpc->lpc_xmap);
1052*7c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap = NULL;
1053*7c478bd9Sstevel@tonic-gate 		if (res > 0) {
1054*7c478bd9Sstevel@tonic-gate 			debug("xmap changed size, retrying\n");
1055*7c478bd9Sstevel@tonic-gate 			goto redo;
1056*7c478bd9Sstevel@tonic-gate 		} else {
1057*7c478bd9Sstevel@tonic-gate 			debug("cannot read xmap");
1058*7c478bd9Sstevel@tonic-gate 			return (-1);
1059*7c478bd9Sstevel@tonic-gate 		}
1060*7c478bd9Sstevel@tonic-gate 	}
1061*7c478bd9Sstevel@tonic-gate 	lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap);
1062*7c478bd9Sstevel@tonic-gate 
1063*7c478bd9Sstevel@tonic-gate 	return (0);
1064*7c478bd9Sstevel@tonic-gate }
1065