17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50209230bSgjelinek  * Common Development and Distribution License (the "License").
60209230bSgjelinek  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
220209230bSgjelinek  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/mman.h>
277c478bd9Sstevel@tonic-gate #include <sys/param.h>
287c478bd9Sstevel@tonic-gate #include <sys/stat.h>
297c478bd9Sstevel@tonic-gate #include <sys/types.h>
307c478bd9Sstevel@tonic-gate #include <assert.h>
317c478bd9Sstevel@tonic-gate #include <errno.h>
327c478bd9Sstevel@tonic-gate #include <fcntl.h>
337c478bd9Sstevel@tonic-gate #include <libproc.h>
347c478bd9Sstevel@tonic-gate #include <limits.h>
357c478bd9Sstevel@tonic-gate #include <procfs.h>
367c478bd9Sstevel@tonic-gate #include <stdio.h>
377c478bd9Sstevel@tonic-gate #include <stdlib.h>
387c478bd9Sstevel@tonic-gate #include <strings.h>
397c478bd9Sstevel@tonic-gate #include <time.h>
407c478bd9Sstevel@tonic-gate #include <unistd.h>
417c478bd9Sstevel@tonic-gate #include "rcapd.h"
427c478bd9Sstevel@tonic-gate #include "rcapd_rfd.h"
437c478bd9Sstevel@tonic-gate #include "rcapd_mapping.h"
447c478bd9Sstevel@tonic-gate #include "utils.h"
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate static int lpc_xmap_update(lprocess_t *);
477c478bd9Sstevel@tonic-gate #ifdef DEBUG
487c478bd9Sstevel@tonic-gate extern int lmapping_dump_diff(lmapping_t *lm1, lmapping_t *lm2);
497c478bd9Sstevel@tonic-gate #endif /* DEBUG */
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate /*
527c478bd9Sstevel@tonic-gate  * The number of file descriptors required to grab a process and create an
537c478bd9Sstevel@tonic-gate  * agent in it.
547c478bd9Sstevel@tonic-gate  */
557c478bd9Sstevel@tonic-gate #define	PGRAB_FD_COUNT		10
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate /*
587c478bd9Sstevel@tonic-gate  * Record a position in an address space as it corresponds to a prpageheader_t
597c478bd9Sstevel@tonic-gate  * and affiliated structures.
607c478bd9Sstevel@tonic-gate  */
617c478bd9Sstevel@tonic-gate typedef struct prpageheader_cur {
627c478bd9Sstevel@tonic-gate 	int pr_nmap;		/* number of mappings in address space */
637c478bd9Sstevel@tonic-gate 	int pr_map;		/* number of this mapping */
647c478bd9Sstevel@tonic-gate 	uint64_t pr_pgoff;	/* page offset into mapping */
657c478bd9Sstevel@tonic-gate 	uint64_t pr_npage;	/* number of pages in mapping */
667c478bd9Sstevel@tonic-gate 	uint64_t pr_pagesize;	/* page size of mapping */
677c478bd9Sstevel@tonic-gate 	uintptr_t pr_addr;	/* base of mapping */
687c478bd9Sstevel@tonic-gate 	prpageheader_t *pr_prpageheader;	/* associated page header */
697c478bd9Sstevel@tonic-gate 	void *pr_pdaddr;	/* address of page's byte in pagedata */
707c478bd9Sstevel@tonic-gate 	prxmap_t *pr_xmap;	/* array containing per-segment information */
717c478bd9Sstevel@tonic-gate 	int pr_nxmap;		/* number of xmaps in array */
727c478bd9Sstevel@tonic-gate 	int64_t pr_rss;		/* number of resident pages in mapping, */
737c478bd9Sstevel@tonic-gate 				/* or -1 if xmap is out of sync */
747c478bd9Sstevel@tonic-gate 	int64_t pr_pg_rss;	/* number of pageable pages in mapping, or -1 */
757c478bd9Sstevel@tonic-gate } prpageheader_cur_t;
767c478bd9Sstevel@tonic-gate 
777c478bd9Sstevel@tonic-gate static struct ps_prochandle *scan_pr;	/* currently-scanned process's handle */
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate typedef enum {
807c478bd9Sstevel@tonic-gate 	STDL_NORMAL,
817c478bd9Sstevel@tonic-gate 	STDL_HIGH
827c478bd9Sstevel@tonic-gate } st_debug_level_t;
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /*
857c478bd9Sstevel@tonic-gate  * Output a scanning-related debug message.
867c478bd9Sstevel@tonic-gate  */
877c478bd9Sstevel@tonic-gate /*PRINTFLIKE3*/ /*ARGSUSED*/
887c478bd9Sstevel@tonic-gate static void
st_debug(st_debug_level_t level,lcollection_t * lcol,char * msg,...)897c478bd9Sstevel@tonic-gate st_debug(st_debug_level_t level, lcollection_t *lcol, char *msg, ...)
907c478bd9Sstevel@tonic-gate {
917c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
927c478bd9Sstevel@tonic-gate 	va_list alist;
937c478bd9Sstevel@tonic-gate 	char *buf;
947c478bd9Sstevel@tonic-gate 	size_t len;
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate 	if (get_message_priority() < ((level == STDL_HIGH) ? RCM_DEBUG_HIGH
977c478bd9Sstevel@tonic-gate 	    : RCM_DEBUG))
987c478bd9Sstevel@tonic-gate 		return;
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate 	len = strlen(msg) + LINELEN;
1017c478bd9Sstevel@tonic-gate 	buf = malloc(len);
1027c478bd9Sstevel@tonic-gate 	if (buf == NULL)
1037c478bd9Sstevel@tonic-gate 		return;
1040209230bSgjelinek 	(void) snprintf(buf, len, "%s %s scanner %s",
1050209230bSgjelinek 	    (lcol->lcol_id.rcid_type == RCIDT_PROJECT ? "project" : "zone"),
1067c478bd9Sstevel@tonic-gate 	    lcol->lcol_name, msg);
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate 	va_start(alist, msg);
1097c478bd9Sstevel@tonic-gate 	vdprintfe(RCM_DEBUG, buf, alist);
1107c478bd9Sstevel@tonic-gate 	va_end(alist);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate 	free(buf);
1137c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
1147c478bd9Sstevel@tonic-gate }
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate /*
1177c478bd9Sstevel@tonic-gate  * Determine the collection's current victim, based on its last.  The last will
1187c478bd9Sstevel@tonic-gate  * be returned, or, if invalid, any other valid process, if the collection has
1197c478bd9Sstevel@tonic-gate  * any.
1207c478bd9Sstevel@tonic-gate  */
1217c478bd9Sstevel@tonic-gate static lprocess_t *
get_valid_victim(lcollection_t * lcol,lprocess_t * lpc)1227c478bd9Sstevel@tonic-gate get_valid_victim(lcollection_t *lcol, lprocess_t *lpc)
1237c478bd9Sstevel@tonic-gate {
1247c478bd9Sstevel@tonic-gate 	if (lpc == NULL || !lcollection_member(lcol, lpc))
1257c478bd9Sstevel@tonic-gate 		lpc = lcol->lcol_lprocess;
1267c478bd9Sstevel@tonic-gate 
1277c478bd9Sstevel@tonic-gate 	/*
1287c478bd9Sstevel@tonic-gate 	 * Find the next scannable process, and make it the victim.
1297c478bd9Sstevel@tonic-gate 	 */
1307c478bd9Sstevel@tonic-gate 	while (lpc != NULL && lpc->lpc_unscannable != 0)
1317c478bd9Sstevel@tonic-gate 		lpc = lpc->lpc_next;
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate 	return (lpc);
1347c478bd9Sstevel@tonic-gate }
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate /*
1377c478bd9Sstevel@tonic-gate  * Get a process's combined current pagedata (per-page referenced and modified
1387c478bd9Sstevel@tonic-gate  * bits) and set the supplied pointer to it.  The caller is responsible for
1397c478bd9Sstevel@tonic-gate  * freeing the data.  If the pagedata is unreadable, a nonzero value is
1407c478bd9Sstevel@tonic-gate  * returned, and errno is set.  Otherwise, 0 is returned.
1417c478bd9Sstevel@tonic-gate  */
1427c478bd9Sstevel@tonic-gate static int
get_pagedata(prpageheader_t ** pghpp,int fd)1437c478bd9Sstevel@tonic-gate get_pagedata(prpageheader_t **pghpp, int fd)
1447c478bd9Sstevel@tonic-gate {
1457c478bd9Sstevel@tonic-gate 	int res;
1467c478bd9Sstevel@tonic-gate 	struct stat st;
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate redo:
1497c478bd9Sstevel@tonic-gate 	errno = 0;
1507c478bd9Sstevel@tonic-gate 	if (fstat(fd, &st) != 0) {
1517c478bd9Sstevel@tonic-gate 		debug("cannot stat pagedata\n");
1527c478bd9Sstevel@tonic-gate 		return (-1);
1537c478bd9Sstevel@tonic-gate 	}
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 	errno = 0;
1567c478bd9Sstevel@tonic-gate 	*pghpp = malloc(st.st_size);
1577c478bd9Sstevel@tonic-gate 	if (*pghpp == NULL) {
1587c478bd9Sstevel@tonic-gate 		debug("cannot malloc() %ld bytes for pagedata", st.st_size);
1597c478bd9Sstevel@tonic-gate 		return (-1);
1607c478bd9Sstevel@tonic-gate 	}
1617c478bd9Sstevel@tonic-gate 	(void) bzero(*pghpp, st.st_size);
1627c478bd9Sstevel@tonic-gate 
1637c478bd9Sstevel@tonic-gate 	errno = 0;
1647c478bd9Sstevel@tonic-gate 	if ((res = read(fd, *pghpp, st.st_size)) != st.st_size) {
1657c478bd9Sstevel@tonic-gate 		free(*pghpp);
1667c478bd9Sstevel@tonic-gate 		*pghpp = NULL;
1677c478bd9Sstevel@tonic-gate 		if (res > 0 || errno == E2BIG) {
1687c478bd9Sstevel@tonic-gate 			debug("pagedata changed size, retrying\n");
1697c478bd9Sstevel@tonic-gate 			goto redo;
1707c478bd9Sstevel@tonic-gate 		} else {
1717c478bd9Sstevel@tonic-gate 			debug("cannot read pagedata");
1727c478bd9Sstevel@tonic-gate 			return (-1);
1737c478bd9Sstevel@tonic-gate 		}
1747c478bd9Sstevel@tonic-gate 	}
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	return (0);
1777c478bd9Sstevel@tonic-gate }
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate /*
1807c478bd9Sstevel@tonic-gate  * Return the count of kilobytes of pages represented by the given pagedata
1817c478bd9Sstevel@tonic-gate  * which meet the given criteria, having pages which are in all of the states
1827c478bd9Sstevel@tonic-gate  * specified by the mask, and in none of the states in the notmask.  If the
1837c478bd9Sstevel@tonic-gate  * CP_CLEAR flag is set, the pagedata will also be cleared.
1847c478bd9Sstevel@tonic-gate  */
1857c478bd9Sstevel@tonic-gate #define	CP_CLEAR	1
1867c478bd9Sstevel@tonic-gate static uint64_t
count_pages(prpageheader_t * pghp,int flags,int mask,int notmask)1877c478bd9Sstevel@tonic-gate count_pages(prpageheader_t *pghp, int flags, int mask, int notmask)
1887c478bd9Sstevel@tonic-gate {
1897c478bd9Sstevel@tonic-gate 	int map;
1907c478bd9Sstevel@tonic-gate 	caddr_t cur, end;
1917c478bd9Sstevel@tonic-gate 	prpageheader_t pgh = *pghp;
1927c478bd9Sstevel@tonic-gate 	prasmap_t *asmapp;
1937c478bd9Sstevel@tonic-gate 	uint64_t count = 0;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	cur = (caddr_t)pghp + sizeof (*pghp);
1967c478bd9Sstevel@tonic-gate 	for (map = 0; map < pgh.pr_nmap; map++) {
1977c478bd9Sstevel@tonic-gate 		asmapp = (prasmap_t *)(uintptr_t)cur;
1987c478bd9Sstevel@tonic-gate 		cur += sizeof (*asmapp);
1997c478bd9Sstevel@tonic-gate 		end = cur + asmapp->pr_npage;
2007c478bd9Sstevel@tonic-gate 		while (cur < end) {
2017c478bd9Sstevel@tonic-gate 			if ((*cur & mask) == mask && (*cur & notmask) == 0)
2027c478bd9Sstevel@tonic-gate 				count += asmapp->pr_pagesize / 1024;
2037c478bd9Sstevel@tonic-gate 			if ((flags & CP_CLEAR) != 0)
2047c478bd9Sstevel@tonic-gate 				*cur = 0;
2057c478bd9Sstevel@tonic-gate 			cur++;
2067c478bd9Sstevel@tonic-gate 		}
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 		/*
2097c478bd9Sstevel@tonic-gate 		 * Skip to next 64-bit-aligned address to get the next
2107c478bd9Sstevel@tonic-gate 		 * prasmap_t.
2117c478bd9Sstevel@tonic-gate 		 */
2127c478bd9Sstevel@tonic-gate 		cur = (caddr_t)((intptr_t)(cur + 7) & ~7);
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate 	return (count);
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate /*
2197c478bd9Sstevel@tonic-gate  * Return the amount of memory (in kilobytes) that hasn't been referenced or
2207c478bd9Sstevel@tonic-gate  * modified, which memory which will be paged out first.  Should be written to
2217c478bd9Sstevel@tonic-gate  * exclude nonresident pages when sufficient interfaces exist.
2227c478bd9Sstevel@tonic-gate  */
2237c478bd9Sstevel@tonic-gate static uint64_t
unrm_size(lprocess_t * lpc)2247c478bd9Sstevel@tonic-gate unrm_size(lprocess_t *lpc)
2257c478bd9Sstevel@tonic-gate {
2267c478bd9Sstevel@tonic-gate 	return (count_pages(lpc->lpc_prpageheader, CP_CLEAR,
2277c478bd9Sstevel@tonic-gate 	    0, PG_MODIFIED | PG_REFERENCED));
2287c478bd9Sstevel@tonic-gate }
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate /*
2317c478bd9Sstevel@tonic-gate  * Advance a prpageheader_cur_t to the address space's next mapping, returning
2327c478bd9Sstevel@tonic-gate  * its address, or NULL if there is none.  Any known nonpageable or nonresident
2337c478bd9Sstevel@tonic-gate  * mappings will be skipped over.
2347c478bd9Sstevel@tonic-gate  */
2357c478bd9Sstevel@tonic-gate static uintptr_t
advance_prpageheader_cur_nextmapping(prpageheader_cur_t * pcp)2367c478bd9Sstevel@tonic-gate advance_prpageheader_cur_nextmapping(prpageheader_cur_t *pcp)
2377c478bd9Sstevel@tonic-gate {
2387c478bd9Sstevel@tonic-gate 	prasmap_t *pap;
2397c478bd9Sstevel@tonic-gate 	int i;
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate next:
2427c478bd9Sstevel@tonic-gate 	ASSERT(pcp->pr_map < pcp->pr_nmap);
2437c478bd9Sstevel@tonic-gate 	if ((pcp->pr_map + 1) == pcp->pr_nmap)
244*ec3255b6SToomas Soome 		return ((uintptr_t)NULL);
2457c478bd9Sstevel@tonic-gate 	pcp->pr_map++;
2467c478bd9Sstevel@tonic-gate 	if (pcp->pr_pgoff < pcp->pr_npage) {
247414388d7Ssl 		pcp->pr_pdaddr = (caddr_t)(uintptr_t)
248414388d7Ssl 		    ((uintptr_t)pcp->pr_pdaddr +
2497c478bd9Sstevel@tonic-gate 		    (pcp->pr_npage - pcp->pr_pgoff));
2507c478bd9Sstevel@tonic-gate 		pcp->pr_pgoff = pcp->pr_npage;
2517c478bd9Sstevel@tonic-gate 	}
2527c478bd9Sstevel@tonic-gate 	/*
2537c478bd9Sstevel@tonic-gate 	 * Skip to next 64-bit-aligned address to get the next prasmap_t.
2547c478bd9Sstevel@tonic-gate 	 */
2557c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (caddr_t)(((uintptr_t)pcp->pr_pdaddr + 7) & ~7);
2567c478bd9Sstevel@tonic-gate 	pap = (prasmap_t *)pcp->pr_pdaddr;
2577c478bd9Sstevel@tonic-gate 	pcp->pr_pgoff = 0;
2587c478bd9Sstevel@tonic-gate 	pcp->pr_npage = pap->pr_npage;
2597c478bd9Sstevel@tonic-gate 	pcp->pr_pagesize = pap->pr_pagesize;
2607c478bd9Sstevel@tonic-gate 	pcp->pr_addr = pap->pr_vaddr;
2617c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = pap + 1;
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	/*
2647c478bd9Sstevel@tonic-gate 	 * Skip any known nonpageable mappings.  Currently, the only one
2657c478bd9Sstevel@tonic-gate 	 * detected is the schedctl page.
2667c478bd9Sstevel@tonic-gate 	 */
2677c478bd9Sstevel@tonic-gate 	if ((pap->pr_mflags ^ (MA_SHARED | MA_READ | MA_WRITE | MA_EXEC |
2687c478bd9Sstevel@tonic-gate 	    MA_ANON)) == 0 && pap->pr_npage == 1) {
2697c478bd9Sstevel@tonic-gate 		debug("identified nonpageable schedctl mapping at %p\n",
2707c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
2717c478bd9Sstevel@tonic-gate 		goto next;
2727c478bd9Sstevel@tonic-gate 	}
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate 	/*
2757c478bd9Sstevel@tonic-gate 	 * Skip mappings with no resident pages.  If the xmap does not
2767c478bd9Sstevel@tonic-gate 	 * correspond to the pagedata for any reason, it will be ignored.
2777c478bd9Sstevel@tonic-gate 	 */
2787c478bd9Sstevel@tonic-gate 	pcp->pr_rss = -1;
2797c478bd9Sstevel@tonic-gate 	pcp->pr_pg_rss = -1;
2807c478bd9Sstevel@tonic-gate 	for (i = 0; i < pcp->pr_nxmap; i++) {
2817c478bd9Sstevel@tonic-gate 		prxmap_t *xmap = &pcp->pr_xmap[i];
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate 		if (pcp->pr_addr == xmap->pr_vaddr && xmap->pr_size ==
2847c478bd9Sstevel@tonic-gate 		    (pcp->pr_npage * pcp->pr_pagesize)) {
2857c478bd9Sstevel@tonic-gate 			pcp->pr_rss = xmap->pr_rss;
2867c478bd9Sstevel@tonic-gate 			/*
2877c478bd9Sstevel@tonic-gate 			 * Remove COW pages from the pageable RSS count.
2887c478bd9Sstevel@tonic-gate 			 */
2897c478bd9Sstevel@tonic-gate 			if ((xmap->pr_mflags & MA_SHARED) == 0)
2907c478bd9Sstevel@tonic-gate 				pcp->pr_pg_rss = xmap->pr_anon;
2917c478bd9Sstevel@tonic-gate 			break;
2927c478bd9Sstevel@tonic-gate 		}
2937c478bd9Sstevel@tonic-gate 	}
2947c478bd9Sstevel@tonic-gate 	if (pcp->pr_rss == 0) {
2957c478bd9Sstevel@tonic-gate 		debug("identified nonresident mapping at 0x%p\n",
2967c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
2977c478bd9Sstevel@tonic-gate 		goto next;
2987c478bd9Sstevel@tonic-gate 	} else if (pcp->pr_pg_rss == 0) {
2997c478bd9Sstevel@tonic-gate 		debug("identified unpageable mapping at 0x%p\n",
3007c478bd9Sstevel@tonic-gate 		    (void *)pcp->pr_addr);
3017c478bd9Sstevel@tonic-gate 		goto next;
3027c478bd9Sstevel@tonic-gate 	}
3037c478bd9Sstevel@tonic-gate 
3047c478bd9Sstevel@tonic-gate 	return (pcp->pr_addr);
3057c478bd9Sstevel@tonic-gate }
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate /*
3087c478bd9Sstevel@tonic-gate  * Advance a prpageheader_cur_t to the mapping's next page, returning its
3097c478bd9Sstevel@tonic-gate  * address, or NULL if there is none.
3107c478bd9Sstevel@tonic-gate  */
3117c478bd9Sstevel@tonic-gate static void *
advance_prpageheader_cur(prpageheader_cur_t * pcp)3127c478bd9Sstevel@tonic-gate advance_prpageheader_cur(prpageheader_cur_t *pcp)
3137c478bd9Sstevel@tonic-gate {
3147c478bd9Sstevel@tonic-gate 	ASSERT(pcp->pr_pgoff < pcp->pr_npage);
3157c478bd9Sstevel@tonic-gate 	if ((pcp->pr_pgoff + 1) == pcp->pr_npage)
3167c478bd9Sstevel@tonic-gate 		return (NULL);
3177c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + 1;
3187c478bd9Sstevel@tonic-gate 	pcp->pr_pgoff++;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	ASSERT((*(char *)pcp->pr_pdaddr & ~(PG_MODIFIED | PG_REFERENCED)) == 0);
3217c478bd9Sstevel@tonic-gate 	return ((caddr_t)pcp->pr_addr + pcp->pr_pgoff * pcp->pr_pagesize);
3227c478bd9Sstevel@tonic-gate }
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate /*
3257c478bd9Sstevel@tonic-gate  * Initialize a prpageheader_cur_t, positioned at the first page of the mapping
3267c478bd9Sstevel@tonic-gate  * of an address space.
3277c478bd9Sstevel@tonic-gate  */
3287c478bd9Sstevel@tonic-gate static void *
set_prpageheader_cur(prpageheader_cur_t * pcp,prpageheader_t * php,prxmap_t * xmap,int nxmap)3297c478bd9Sstevel@tonic-gate set_prpageheader_cur(prpageheader_cur_t *pcp, prpageheader_t *php,
3307c478bd9Sstevel@tonic-gate     prxmap_t *xmap, int nxmap)
3317c478bd9Sstevel@tonic-gate {
3327c478bd9Sstevel@tonic-gate 	bzero(pcp, sizeof (*pcp));
3337c478bd9Sstevel@tonic-gate 	pcp->pr_nmap = php->pr_nmap;
3347c478bd9Sstevel@tonic-gate 	pcp->pr_map = -1;
3357c478bd9Sstevel@tonic-gate 	pcp->pr_prpageheader = php;
3367c478bd9Sstevel@tonic-gate 	pcp->pr_xmap = xmap;
3377c478bd9Sstevel@tonic-gate 	pcp->pr_nxmap = nxmap;
3387c478bd9Sstevel@tonic-gate 	pcp->pr_pdaddr = (prpageheader_t *)php + 1;
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 	return ((void *)advance_prpageheader_cur_nextmapping(pcp));
3417c478bd9Sstevel@tonic-gate }
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate /*
3447c478bd9Sstevel@tonic-gate  * Position a prpageheader_cur_t to the mapped address greater or equal to the
3457c478bd9Sstevel@tonic-gate  * given value.
3467c478bd9Sstevel@tonic-gate  */
3477c478bd9Sstevel@tonic-gate static void *
set_prpageheader_cur_addr(prpageheader_cur_t * pcp,prpageheader_t * php,prxmap_t * xmap,int nxmap,void * naddr)3487c478bd9Sstevel@tonic-gate set_prpageheader_cur_addr(prpageheader_cur_t *pcp, prpageheader_t *php,
3497c478bd9Sstevel@tonic-gate     prxmap_t *xmap, int nxmap, void *naddr)
3507c478bd9Sstevel@tonic-gate {
3517c478bd9Sstevel@tonic-gate 	void *addr = set_prpageheader_cur(pcp, php, xmap, nxmap);
3527c478bd9Sstevel@tonic-gate 
3537c478bd9Sstevel@tonic-gate 	while (addr != NULL && addr <= naddr)
3547c478bd9Sstevel@tonic-gate 		if (naddr < (void *)((caddr_t)pcp->pr_addr +
3557c478bd9Sstevel@tonic-gate 		    pcp->pr_pagesize * pcp->pr_npage)) {
3567c478bd9Sstevel@tonic-gate 			uint64_t pgdiff = ((uintptr_t)naddr -
3577c478bd9Sstevel@tonic-gate 			    (uintptr_t)pcp->pr_addr) / pcp->pr_pagesize;
3587c478bd9Sstevel@tonic-gate 			pcp->pr_pgoff += pgdiff;
3597c478bd9Sstevel@tonic-gate 			pcp->pr_pdaddr = (caddr_t)pcp->pr_pdaddr + pgdiff;
3607c478bd9Sstevel@tonic-gate 			addr = (caddr_t)pcp->pr_addr + pcp->pr_pagesize *
3617c478bd9Sstevel@tonic-gate 			    pcp->pr_pgoff;
3627c478bd9Sstevel@tonic-gate 			break;
3637c478bd9Sstevel@tonic-gate 		} else
3647c478bd9Sstevel@tonic-gate 			addr =
3657c478bd9Sstevel@tonic-gate 			    (void *)advance_prpageheader_cur_nextmapping(pcp);
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate 	return (addr);
3687c478bd9Sstevel@tonic-gate }
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate static void
revoke_pagedata(rfd_t * rfd)3717c478bd9Sstevel@tonic-gate revoke_pagedata(rfd_t *rfd)
3727c478bd9Sstevel@tonic-gate {
3737c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = rfd->rfd_data;
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 	st_debug(STDL_NORMAL, lpc->lpc_collection, "revoking pagedata for"
3767c478bd9Sstevel@tonic-gate 	    " process %d\n", (int)lpc->lpc_pid);
3777c478bd9Sstevel@tonic-gate 	ASSERT(lpc->lpc_pgdata_fd != -1);
3787c478bd9Sstevel@tonic-gate 	lpc->lpc_pgdata_fd = -1;
3797c478bd9Sstevel@tonic-gate }
3807c478bd9Sstevel@tonic-gate 
3817c478bd9Sstevel@tonic-gate #ifdef DEBUG
3827c478bd9Sstevel@tonic-gate static void
mklmapping(lmapping_t ** lm,prpageheader_t * pgh)3837c478bd9Sstevel@tonic-gate mklmapping(lmapping_t **lm, prpageheader_t *pgh)
3847c478bd9Sstevel@tonic-gate {
3857c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
3867c478bd9Sstevel@tonic-gate 	void *addr;
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate 	addr = set_prpageheader_cur(&cur, pgh, NULL, -1);
3897c478bd9Sstevel@tonic-gate 	ASSERT(*lm == NULL);
3907c478bd9Sstevel@tonic-gate 	while (addr != NULL) {
3917c478bd9Sstevel@tonic-gate 		(void) lmapping_insert(lm, cur.pr_addr, cur.pr_npage *
3927c478bd9Sstevel@tonic-gate 		    cur.pr_pagesize);
3937c478bd9Sstevel@tonic-gate 		addr = (void *)advance_prpageheader_cur_nextmapping(&cur);
3947c478bd9Sstevel@tonic-gate 	}
3957c478bd9Sstevel@tonic-gate }
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate static void
lmapping_dump(lmapping_t * lm)3987c478bd9Sstevel@tonic-gate lmapping_dump(lmapping_t *lm)
3997c478bd9Sstevel@tonic-gate {
4007c478bd9Sstevel@tonic-gate 	debug("lm: %p\n", (void *)lm);
4017c478bd9Sstevel@tonic-gate 	while (lm != NULL) {
4027c478bd9Sstevel@tonic-gate 		debug("\t(%p, %llx\n", (void *)lm->lm_addr,
4037c478bd9Sstevel@tonic-gate 		    (unsigned long long)lm->lm_size);
4047c478bd9Sstevel@tonic-gate 		lm = lm->lm_next;
4057c478bd9Sstevel@tonic-gate 	}
4067c478bd9Sstevel@tonic-gate }
4077c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate /*
4107c478bd9Sstevel@tonic-gate  * OR two prpagedata_t which are supposedly snapshots of the same address
4117c478bd9Sstevel@tonic-gate  * space.  Intersecting mappings with different page sizes are tolerated but
4127c478bd9Sstevel@tonic-gate  * not normalized (not accurate).  If the mappings of the two snapshots differ
4137c478bd9Sstevel@tonic-gate  * in any regard, the supplied mappings_changed flag will be set.
4147c478bd9Sstevel@tonic-gate  */
4157c478bd9Sstevel@tonic-gate static void
OR_pagedata(prpageheader_t * src,prpageheader_t * dst,int * mappings_changedp)4167c478bd9Sstevel@tonic-gate OR_pagedata(prpageheader_t *src, prpageheader_t *dst, int *mappings_changedp)
4177c478bd9Sstevel@tonic-gate {
4187c478bd9Sstevel@tonic-gate 	prpageheader_cur_t src_cur;
4197c478bd9Sstevel@tonic-gate 	prpageheader_cur_t dst_cur;
4207c478bd9Sstevel@tonic-gate 	uintptr_t src_addr;
4217c478bd9Sstevel@tonic-gate 	uintptr_t dst_addr;
4227c478bd9Sstevel@tonic-gate 	int mappings_changed = 0;
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	/*
4257c478bd9Sstevel@tonic-gate 	 * OR source pagedata with the destination, for pages of intersecting
4267c478bd9Sstevel@tonic-gate 	 * mappings.
4277c478bd9Sstevel@tonic-gate 	 */
4287c478bd9Sstevel@tonic-gate 	src_addr = (uintptr_t)set_prpageheader_cur(&src_cur, src, NULL, -1);
4297c478bd9Sstevel@tonic-gate 	dst_addr = (uintptr_t)set_prpageheader_cur(&dst_cur, dst, NULL, -1);
430*ec3255b6SToomas Soome 	while (src_addr != (uintptr_t)NULL && dst_addr != (uintptr_t)NULL) {
431*ec3255b6SToomas Soome 		while (src_addr == dst_addr && src_addr != (uintptr_t)NULL) {
4327c478bd9Sstevel@tonic-gate 			*(char *)dst_cur.pr_pdaddr |=
4337c478bd9Sstevel@tonic-gate 			    *(char *)src_cur.pr_pdaddr;
4347c478bd9Sstevel@tonic-gate 			src_addr = (uintptr_t)advance_prpageheader_cur(
4357c478bd9Sstevel@tonic-gate 			    &src_cur);
4367c478bd9Sstevel@tonic-gate 			dst_addr = (uintptr_t)advance_prpageheader_cur(
4377c478bd9Sstevel@tonic-gate 			    &dst_cur);
4387c478bd9Sstevel@tonic-gate 		}
4397c478bd9Sstevel@tonic-gate 		if (src_addr != dst_addr)
4407c478bd9Sstevel@tonic-gate 			mappings_changed = 1;
4417c478bd9Sstevel@tonic-gate 		src_addr = advance_prpageheader_cur_nextmapping(&src_cur);
4427c478bd9Sstevel@tonic-gate 		dst_addr = advance_prpageheader_cur_nextmapping(&dst_cur);
443*ec3255b6SToomas Soome 		while (src_addr != dst_addr && src_addr != (uintptr_t)NULL &&
444*ec3255b6SToomas Soome 		    dst_addr != (uintptr_t)NULL) {
4457c478bd9Sstevel@tonic-gate 			mappings_changed = 1;
4467c478bd9Sstevel@tonic-gate 			if (src_addr < dst_addr)
4477c478bd9Sstevel@tonic-gate 				src_addr = advance_prpageheader_cur_nextmapping(
4487c478bd9Sstevel@tonic-gate 				    &src_cur);
4497c478bd9Sstevel@tonic-gate 			else
4507c478bd9Sstevel@tonic-gate 				dst_addr = advance_prpageheader_cur_nextmapping(
4517c478bd9Sstevel@tonic-gate 				    &dst_cur);
4527c478bd9Sstevel@tonic-gate 		}
4537c478bd9Sstevel@tonic-gate 	}
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 	*mappings_changedp = mappings_changed;
4567c478bd9Sstevel@tonic-gate }
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate /*
4597c478bd9Sstevel@tonic-gate  * Merge the current pagedata with that on hand.  If the pagedata is
4607c478bd9Sstevel@tonic-gate  * unretrievable for any reason, such as the process having exited or being a
4617c478bd9Sstevel@tonic-gate  * zombie, a nonzero value is returned, the process should be marked
4627c478bd9Sstevel@tonic-gate  * unscannable, and future attempts to scan it should be avoided, since the
4637c478bd9Sstevel@tonic-gate  * symptom is probably permament.  If the mappings of either pagedata
4647c478bd9Sstevel@tonic-gate  * differ in any respect, the supplied callback will be invoked once.
4657c478bd9Sstevel@tonic-gate  */
4667c478bd9Sstevel@tonic-gate static int
merge_current_pagedata(lprocess_t * lpc,void (* mappings_changed_cb)(lprocess_t *))4677c478bd9Sstevel@tonic-gate merge_current_pagedata(lprocess_t *lpc,
4687c478bd9Sstevel@tonic-gate     void(*mappings_changed_cb) (lprocess_t *))
4697c478bd9Sstevel@tonic-gate {
4707c478bd9Sstevel@tonic-gate 	prpageheader_t *pghp;
4717c478bd9Sstevel@tonic-gate 	int mappings_changed = 0;
4720209230bSgjelinek 	uint64_t cnt;
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	if (lpc->lpc_pgdata_fd < 0 || get_pagedata(&pghp, lpc->lpc_pgdata_fd) !=
4757c478bd9Sstevel@tonic-gate 	    0) {
4767c478bd9Sstevel@tonic-gate 		char pathbuf[PROC_PATH_MAX];
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 		(void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/pagedata",
4797c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
4807c478bd9Sstevel@tonic-gate 		if ((lpc->lpc_pgdata_fd = rfd_open(pathbuf, 1, RFD_PAGEDATA,
4817c478bd9Sstevel@tonic-gate 		    revoke_pagedata, lpc, O_RDONLY, 0)) < 0 ||
4827c478bd9Sstevel@tonic-gate 		    get_pagedata(&pghp, lpc->lpc_pgdata_fd) != 0)
4837c478bd9Sstevel@tonic-gate 			return (-1);
4847c478bd9Sstevel@tonic-gate 		debug("starting/resuming pagedata collection for %d\n",
4857c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
4867c478bd9Sstevel@tonic-gate 	}
4870209230bSgjelinek 
4880209230bSgjelinek 	cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
4890209230bSgjelinek 	if (cnt != 0 || lpc->lpc_rss != 0)
4900209230bSgjelinek 		debug("process %d: %llu/%llukB rfd/mdfd since last read\n",
4910209230bSgjelinek 		    (int)lpc->lpc_pid, (unsigned long long)cnt,
4920209230bSgjelinek 		    (unsigned long long)lpc->lpc_rss);
4937c478bd9Sstevel@tonic-gate 	if (lpc->lpc_prpageheader != NULL) {
4947c478bd9Sstevel@tonic-gate 		/*
4957c478bd9Sstevel@tonic-gate 		 * OR the two snapshots.
4967c478bd9Sstevel@tonic-gate 		 */
4977c478bd9Sstevel@tonic-gate #ifdef DEBUG
4987c478bd9Sstevel@tonic-gate 		lmapping_t *old = NULL;
4997c478bd9Sstevel@tonic-gate 		lmapping_t *new = NULL;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 		mklmapping(&new, pghp);
5027c478bd9Sstevel@tonic-gate 		mklmapping(&old, lpc->lpc_prpageheader);
5037c478bd9Sstevel@tonic-gate #endif /* DEBUG */
5047c478bd9Sstevel@tonic-gate 		OR_pagedata(lpc->lpc_prpageheader, pghp, &mappings_changed);
5057c478bd9Sstevel@tonic-gate #ifdef DEBUG
5067c478bd9Sstevel@tonic-gate 		if (((mappings_changed != 0) ^
5077c478bd9Sstevel@tonic-gate 		    (lmapping_dump_diff(old, new) != 0))) {
5087c478bd9Sstevel@tonic-gate 			debug("lmapping_changed inconsistent with lmapping\n");
5097c478bd9Sstevel@tonic-gate 			debug("old\n");
5107c478bd9Sstevel@tonic-gate 			lmapping_dump(old);
5117c478bd9Sstevel@tonic-gate 			debug("new\n");
5127c478bd9Sstevel@tonic-gate 			lmapping_dump(new);
5137c478bd9Sstevel@tonic-gate 			debug("ignored\n");
5147c478bd9Sstevel@tonic-gate 			lmapping_dump(lpc->lpc_ignore);
5157c478bd9Sstevel@tonic-gate 			ASSERT(0);
5167c478bd9Sstevel@tonic-gate 		}
5177c478bd9Sstevel@tonic-gate 		lmapping_free(&new);
5187c478bd9Sstevel@tonic-gate 		lmapping_free(&old);
5197c478bd9Sstevel@tonic-gate #endif /* DEBUG */
5207c478bd9Sstevel@tonic-gate 		free(lpc->lpc_prpageheader);
5217c478bd9Sstevel@tonic-gate 	} else
5227c478bd9Sstevel@tonic-gate 		mappings_changed = 1;
5237c478bd9Sstevel@tonic-gate 	lpc->lpc_prpageheader = pghp;
5240209230bSgjelinek 
5250209230bSgjelinek 	cnt = count_pages(pghp, 0, PG_MODIFIED | PG_REFERENCED, 0);
5260209230bSgjelinek 	if (cnt != 0 || lpc->lpc_rss != 0)
5270209230bSgjelinek 		debug("process %d: %llu/%llukB rfd/mdfd since hand swept\n",
5280209230bSgjelinek 		    (int)lpc->lpc_pid, (unsigned long long)cnt,
5290209230bSgjelinek 		    (unsigned long long)lpc->lpc_rss);
5307c478bd9Sstevel@tonic-gate 	if (mappings_changed != 0) {
5317c478bd9Sstevel@tonic-gate 		debug("process %d: mappings changed\n", (int)lpc->lpc_pid);
5327c478bd9Sstevel@tonic-gate 		if (mappings_changed_cb != NULL)
5337c478bd9Sstevel@tonic-gate 			mappings_changed_cb(lpc);
5347c478bd9Sstevel@tonic-gate 	}
5357c478bd9Sstevel@tonic-gate 	return (0);
5367c478bd9Sstevel@tonic-gate }
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate /*
5397c478bd9Sstevel@tonic-gate  * Attempt to page out a region of the given process's address space.  May
5407c478bd9Sstevel@tonic-gate  * return nonzero if not all of the pages may are pageable, for any reason.
5417c478bd9Sstevel@tonic-gate  */
5427c478bd9Sstevel@tonic-gate static int
pageout(pid_t pid,struct ps_prochandle * Pr,caddr_t start,caddr_t end)5437c478bd9Sstevel@tonic-gate pageout(pid_t pid, struct ps_prochandle *Pr, caddr_t start, caddr_t end)
5447c478bd9Sstevel@tonic-gate {
5457c478bd9Sstevel@tonic-gate 	int res;
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate 	if (end <= start)
5487c478bd9Sstevel@tonic-gate 		return (0);
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	errno = 0;
5517c478bd9Sstevel@tonic-gate 	res = pr_memcntl(Pr, start, (end - start), MC_SYNC,
5527c478bd9Sstevel@tonic-gate 	    (caddr_t)(MS_ASYNC | MS_INVALIDATE), 0, 0);
5537c478bd9Sstevel@tonic-gate 	debug_high("pr_memcntl [%p-%p): %d", (void *)start, (void *)end, res);
5547c478bd9Sstevel@tonic-gate 
5557c478bd9Sstevel@tonic-gate 	/*
5567c478bd9Sstevel@tonic-gate 	 * EBUSY indicates none of the pages have backing store allocated, or
5577c478bd9Sstevel@tonic-gate 	 * some pages were locked, which are less interesting than other
5587c478bd9Sstevel@tonic-gate 	 * conditions, which are noted.
5597c478bd9Sstevel@tonic-gate 	 */
5607c478bd9Sstevel@tonic-gate 	if (res != 0)
5617c478bd9Sstevel@tonic-gate 		if (errno == EBUSY)
5627c478bd9Sstevel@tonic-gate 			res = 0;
5637c478bd9Sstevel@tonic-gate 		else
5647c478bd9Sstevel@tonic-gate 			debug("%d: can't pageout %p+%llx (errno %d)", (int)pid,
5657c478bd9Sstevel@tonic-gate 			    (void *)start, (long long)(end - start), errno);
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 	return (res);
5687c478bd9Sstevel@tonic-gate }
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate /*
5717c478bd9Sstevel@tonic-gate  * Compute the delta of the victim process's RSS since the last call.  If the
5727c478bd9Sstevel@tonic-gate  * psinfo cannot be obtained, no work is done, and no error is returned; it is
5737c478bd9Sstevel@tonic-gate  * up to the caller to detect the process' termination via other means.
5747c478bd9Sstevel@tonic-gate  */
5757c478bd9Sstevel@tonic-gate static int64_t
rss_delta(psinfo_t * new_psinfo,psinfo_t * old_psinfo,lprocess_t * vic)5767c478bd9Sstevel@tonic-gate rss_delta(psinfo_t *new_psinfo, psinfo_t *old_psinfo, lprocess_t *vic)
5777c478bd9Sstevel@tonic-gate {
5787c478bd9Sstevel@tonic-gate 	int64_t d_rss = 0;
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 	if (get_psinfo(vic->lpc_pid, new_psinfo, vic->lpc_psinfo_fd,
5817c478bd9Sstevel@tonic-gate 	    lprocess_update_psinfo_fd_cb, vic, vic) == 0) {
5827c478bd9Sstevel@tonic-gate 		d_rss = (int64_t)new_psinfo->pr_rssize -
5837c478bd9Sstevel@tonic-gate 		    (int64_t)old_psinfo->pr_rssize;
5847c478bd9Sstevel@tonic-gate 		if (d_rss < 0)
5857c478bd9Sstevel@tonic-gate 			vic->lpc_collection->lcol_stat.lcols_pg_eff +=
5867c478bd9Sstevel@tonic-gate 			    (- d_rss);
5877c478bd9Sstevel@tonic-gate 		*old_psinfo = *new_psinfo;
5887c478bd9Sstevel@tonic-gate 	}
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 	return (d_rss);
5917c478bd9Sstevel@tonic-gate }
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate static void
unignore_mappings(lprocess_t * lpc)5947c478bd9Sstevel@tonic-gate unignore_mappings(lprocess_t *lpc)
5957c478bd9Sstevel@tonic-gate {
5967c478bd9Sstevel@tonic-gate 	lmapping_free(&lpc->lpc_ignore);
5977c478bd9Sstevel@tonic-gate }
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate static void
unignore_referenced_mappings(lprocess_t * lpc)6007c478bd9Sstevel@tonic-gate unignore_referenced_mappings(lprocess_t *lpc)
6017c478bd9Sstevel@tonic-gate {
6027c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
6037c478bd9Sstevel@tonic-gate 	void *vicaddr;
6047c478bd9Sstevel@tonic-gate 
6057c478bd9Sstevel@tonic-gate 	vicaddr = set_prpageheader_cur(&cur, lpc->lpc_prpageheader, NULL, -1);
6067c478bd9Sstevel@tonic-gate 	while (vicaddr != NULL) {
6077c478bd9Sstevel@tonic-gate 		if (((*(char *)cur.pr_pdaddr) & (PG_REFERENCED | PG_MODIFIED))
6087c478bd9Sstevel@tonic-gate 		    != 0) {
6097c478bd9Sstevel@tonic-gate 			if (lmapping_remove(&lpc->lpc_ignore, cur.pr_addr,
6107c478bd9Sstevel@tonic-gate 			    cur.pr_npage * cur.pr_pagesize) == 0)
6117c478bd9Sstevel@tonic-gate 				debug("removed mapping 0x%p+0t%llukB from"
6127c478bd9Sstevel@tonic-gate 				    " ignored set\n", (void *)cur.pr_addr,
6137c478bd9Sstevel@tonic-gate 				    (unsigned long long)(cur.pr_npage *
6147c478bd9Sstevel@tonic-gate 				    cur.pr_pagesize / 1024));
6157c478bd9Sstevel@tonic-gate 			vicaddr = (void *)advance_prpageheader_cur_nextmapping(
6167c478bd9Sstevel@tonic-gate 			    &cur);
6177c478bd9Sstevel@tonic-gate 		} else if ((vicaddr = advance_prpageheader_cur(&cur)) == NULL)
6187c478bd9Sstevel@tonic-gate 			vicaddr = (void *)advance_prpageheader_cur_nextmapping(
6197c478bd9Sstevel@tonic-gate 			    &cur);
6207c478bd9Sstevel@tonic-gate 	}
6217c478bd9Sstevel@tonic-gate }
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate /*
6247c478bd9Sstevel@tonic-gate  * Resume scanning, starting with the last victim, if it is still valid, or any
6257c478bd9Sstevel@tonic-gate  * other one, otherwise.
6267c478bd9Sstevel@tonic-gate  */
6277c478bd9Sstevel@tonic-gate void
scan(lcollection_t * lcol,int64_t excess)6287c478bd9Sstevel@tonic-gate scan(lcollection_t *lcol, int64_t excess)
6297c478bd9Sstevel@tonic-gate {
6307c478bd9Sstevel@tonic-gate 	lprocess_t *vic, *lpc;
6317c478bd9Sstevel@tonic-gate 	void *vicaddr, *endaddr, *nvicaddr;
6327c478bd9Sstevel@tonic-gate 	prpageheader_cur_t cur;
6337c478bd9Sstevel@tonic-gate 	psinfo_t old_psinfo, new_psinfo;
6347c478bd9Sstevel@tonic-gate 	hrtime_t scan_start;
6357c478bd9Sstevel@tonic-gate 	int res, resumed;
6367c478bd9Sstevel@tonic-gate 	uint64_t col_unrm_size;
6377c478bd9Sstevel@tonic-gate 
6387c478bd9Sstevel@tonic-gate 	st_debug(STDL_NORMAL, lcol, "starting to scan, excess %lldk\n",
6397c478bd9Sstevel@tonic-gate 	    (long long)excess);
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 	/*
6427c478bd9Sstevel@tonic-gate 	 * Determine the address to start scanning at, depending on whether
6437c478bd9Sstevel@tonic-gate 	 * scanning can be resumed.
6447c478bd9Sstevel@tonic-gate 	 */
6457c478bd9Sstevel@tonic-gate 	endaddr = NULL;
6467c478bd9Sstevel@tonic-gate 	if ((vic = get_valid_victim(lcol, lcol->lcol_victim)) ==
6477c478bd9Sstevel@tonic-gate 	    lcol->lcol_victim && lcol->lcol_resaddr != NULL) {
6487c478bd9Sstevel@tonic-gate 		vicaddr = lcol->lcol_resaddr;
6497c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "resuming process %d\n",
6507c478bd9Sstevel@tonic-gate 		    (int)vic->lpc_pid);
6517c478bd9Sstevel@tonic-gate 		resumed = 1;
6527c478bd9Sstevel@tonic-gate 	} else {
6537c478bd9Sstevel@tonic-gate 		vicaddr = NULL;
6547c478bd9Sstevel@tonic-gate 		resumed = 0;
6557c478bd9Sstevel@tonic-gate 	}
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	scan_start = gethrtime();
6587c478bd9Sstevel@tonic-gate 	/*
6597c478bd9Sstevel@tonic-gate 	 * Obtain the most current pagedata for the processes that might be
6607c478bd9Sstevel@tonic-gate 	 * scanned, and remove from the ignored set any mappings which have
6617c478bd9Sstevel@tonic-gate 	 * referenced or modified pages (in the hopes that the pageability of
6627c478bd9Sstevel@tonic-gate 	 * the mapping's pages may have changed).  Determine if the
6637c478bd9Sstevel@tonic-gate 	 * unreferenced and unmodified portion is impossibly small to suffice
6647c478bd9Sstevel@tonic-gate 	 * to reduce the excess completely.  If so, ignore these bits so that
6657c478bd9Sstevel@tonic-gate 	 * even working set will be paged out.
6667c478bd9Sstevel@tonic-gate 	 */
6677c478bd9Sstevel@tonic-gate 	col_unrm_size = 0;
6687c478bd9Sstevel@tonic-gate 	lpc = vic;
6697c478bd9Sstevel@tonic-gate 	while (lpc != NULL && should_run) {
6707c478bd9Sstevel@tonic-gate 		if (merge_current_pagedata(lpc, unignore_mappings) != 0) {
6717c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "process %d:"
6727c478bd9Sstevel@tonic-gate 			    " exited/temporarily unscannable",
6737c478bd9Sstevel@tonic-gate 			    (int)lpc->lpc_pid);
6747c478bd9Sstevel@tonic-gate 			goto next;
6757c478bd9Sstevel@tonic-gate 		}
6767c478bd9Sstevel@tonic-gate 		debug("process %d: %llu/%llukB scannable\n", (int)lpc->lpc_pid,
6777c478bd9Sstevel@tonic-gate 		    (unsigned long long)(lpc->lpc_unrm = unrm_size(lpc)),
6787c478bd9Sstevel@tonic-gate 		    (unsigned long long)lpc->lpc_size);
6797c478bd9Sstevel@tonic-gate 		col_unrm_size += lpc->lpc_unrm = unrm_size(lpc);
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate 		if ((lcol->lcol_stat.lcols_scan_count %
6827c478bd9Sstevel@tonic-gate 		    RCAPD_IGNORED_SET_FLUSH_IVAL) == 0) {
6837c478bd9Sstevel@tonic-gate 			/*
6847c478bd9Sstevel@tonic-gate 			 * Periodically clear the set of ignored mappings.
6857c478bd9Sstevel@tonic-gate 			 * This will allow processes whose ignored segments'
6867c478bd9Sstevel@tonic-gate 			 * pageability have changed (without a corresponding
6877c478bd9Sstevel@tonic-gate 			 * reference or modification to a page) to be
6887c478bd9Sstevel@tonic-gate 			 * recognized.
6897c478bd9Sstevel@tonic-gate 			 */
6907c478bd9Sstevel@tonic-gate 			if (lcol->lcol_stat.lcols_scan_count > 0)
6917c478bd9Sstevel@tonic-gate 				unignore_mappings(lpc);
6927c478bd9Sstevel@tonic-gate 		} else {
6937c478bd9Sstevel@tonic-gate 			/*
6947c478bd9Sstevel@tonic-gate 			 * Ensure mappings with referenced or modified pages
6957c478bd9Sstevel@tonic-gate 			 * are not in the ignored set.  Their usage might mean
6967c478bd9Sstevel@tonic-gate 			 * the condition which made them unpageable is gone.
6977c478bd9Sstevel@tonic-gate 			 */
6987c478bd9Sstevel@tonic-gate 			unignore_referenced_mappings(lpc);
6997c478bd9Sstevel@tonic-gate 		}
7007c478bd9Sstevel@tonic-gate next:
7017c478bd9Sstevel@tonic-gate 		lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
7027c478bd9Sstevel@tonic-gate 		    lpc->lpc_next) : NULL;
7037c478bd9Sstevel@tonic-gate 	}
7047c478bd9Sstevel@tonic-gate 	if (col_unrm_size < excess) {
7057c478bd9Sstevel@tonic-gate 		lpc = vic;
7067c478bd9Sstevel@tonic-gate 		debug("will not reduce excess with only unreferenced pages\n");
7077c478bd9Sstevel@tonic-gate 		while (lpc != NULL && should_run) {
7087c478bd9Sstevel@tonic-gate 			if (lpc->lpc_prpageheader != NULL) {
7097c478bd9Sstevel@tonic-gate 				(void) count_pages(lpc->lpc_prpageheader,
7107c478bd9Sstevel@tonic-gate 				    CP_CLEAR, 0, 0);
7117c478bd9Sstevel@tonic-gate 				if (lpc->lpc_pgdata_fd >= 0) {
7127c478bd9Sstevel@tonic-gate 					if (rfd_close(lpc->lpc_pgdata_fd) != 0)
7137c478bd9Sstevel@tonic-gate 						debug("coud not close %d"
7147c478bd9Sstevel@tonic-gate 						    " lpc_pgdata_fd %d",
7157c478bd9Sstevel@tonic-gate 						    (int)lpc->lpc_pid,
7167c478bd9Sstevel@tonic-gate 						    lpc->lpc_pgdata_fd);
7177c478bd9Sstevel@tonic-gate 					lpc->lpc_pgdata_fd = -1;
7187c478bd9Sstevel@tonic-gate 				}
7197c478bd9Sstevel@tonic-gate 			}
7207c478bd9Sstevel@tonic-gate 			lpc = lpc->lpc_next != NULL ? get_valid_victim(lcol,
7217c478bd9Sstevel@tonic-gate 			    lpc->lpc_next) : NULL;
7227c478bd9Sstevel@tonic-gate 		}
7237c478bd9Sstevel@tonic-gate 	}
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate 	/*
7267c478bd9Sstevel@tonic-gate 	 * Examine each process for pages to remove until the excess is
7277c478bd9Sstevel@tonic-gate 	 * reduced.
7287c478bd9Sstevel@tonic-gate 	 */
7297c478bd9Sstevel@tonic-gate 	while (vic != NULL && excess > 0 && should_run) {
7307c478bd9Sstevel@tonic-gate 		/*
7317c478bd9Sstevel@tonic-gate 		 * Skip processes whose death was reported when the merging of
7327c478bd9Sstevel@tonic-gate 		 * pagedata was attempted.
7337c478bd9Sstevel@tonic-gate 		 */
7347c478bd9Sstevel@tonic-gate 		if (vic->lpc_prpageheader == NULL)
7357c478bd9Sstevel@tonic-gate 			goto nextproc;
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate 		/*
7387c478bd9Sstevel@tonic-gate 		 * Obtain optional segment residency information.
7397c478bd9Sstevel@tonic-gate 		 */
7407c478bd9Sstevel@tonic-gate 		if (lpc_xmap_update(vic) != 0)
7417c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "process %d: xmap"
7427c478bd9Sstevel@tonic-gate 			    " unreadable; ignoring", (int)vic->lpc_pid);
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
7457c478bd9Sstevel@tonic-gate 		{
7467c478bd9Sstevel@tonic-gate 			void *ovicaddr = vicaddr;
7477c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
7487c478bd9Sstevel@tonic-gate 		vicaddr = set_prpageheader_cur_addr(&cur, vic->lpc_prpageheader,
7497c478bd9Sstevel@tonic-gate 		    vic->lpc_xmap, vic->lpc_nxmap, vicaddr);
7507c478bd9Sstevel@tonic-gate #ifdef DEBUG_MSG
7517c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "trying to resume from"
7527c478bd9Sstevel@tonic-gate 			    " 0x%p, next 0x%p\n", ovicaddr, vicaddr);
7537c478bd9Sstevel@tonic-gate 		}
7547c478bd9Sstevel@tonic-gate #endif /* DEBUG_MSG */
7557c478bd9Sstevel@tonic-gate 
7567c478bd9Sstevel@tonic-gate 		/*
7577c478bd9Sstevel@tonic-gate 		 * Take control of the victim.
7587c478bd9Sstevel@tonic-gate 		 */
7597c478bd9Sstevel@tonic-gate 		if (get_psinfo(vic->lpc_pid, &old_psinfo,
7607c478bd9Sstevel@tonic-gate 		    vic->lpc_psinfo_fd, lprocess_update_psinfo_fd_cb,
7617c478bd9Sstevel@tonic-gate 		    vic, vic) != 0) {
7627c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot get %d psinfo",
7637c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid);
7647c478bd9Sstevel@tonic-gate 			goto nextproc;
7657c478bd9Sstevel@tonic-gate 		}
7667c478bd9Sstevel@tonic-gate 		(void) rfd_reserve(PGRAB_FD_COUNT);
7677c478bd9Sstevel@tonic-gate 		if ((scan_pr = Pgrab(vic->lpc_pid, 0, &res)) == NULL) {
7687c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot grab %d (%d)",
7697c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid, res);
7707c478bd9Sstevel@tonic-gate 			goto nextproc;
7717c478bd9Sstevel@tonic-gate 		}
7727c478bd9Sstevel@tonic-gate 		if (Pcreate_agent(scan_pr) != 0) {
7737c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "cannot control %d",
7747c478bd9Sstevel@tonic-gate 			    (int)vic->lpc_pid);
7757c478bd9Sstevel@tonic-gate 			goto nextproc;
7767c478bd9Sstevel@tonic-gate 		}
7777c478bd9Sstevel@tonic-gate 		/*
7787c478bd9Sstevel@tonic-gate 		 * Be very pessimistic about the state of the agent LWP --
7797c478bd9Sstevel@tonic-gate 		 * verify it's actually stopped.
7807c478bd9Sstevel@tonic-gate 		 */
7817c478bd9Sstevel@tonic-gate 		errno = 0;
7827c478bd9Sstevel@tonic-gate 		while (Pstate(scan_pr) == PS_RUN)
7837c478bd9Sstevel@tonic-gate 			(void) Pwait(scan_pr, 0);
7847c478bd9Sstevel@tonic-gate 		if (Pstate(scan_pr) != PS_STOP) {
7857c478bd9Sstevel@tonic-gate 			st_debug(STDL_NORMAL, lcol, "agent not in expected"
7867c478bd9Sstevel@tonic-gate 			    " state (%d)", Pstate(scan_pr));
7877c478bd9Sstevel@tonic-gate 			goto nextproc;
7887c478bd9Sstevel@tonic-gate 		}
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 		/*
7917c478bd9Sstevel@tonic-gate 		 * Within the victim's address space, find contiguous ranges of
7927c478bd9Sstevel@tonic-gate 		 * unreferenced pages to page out.
7937c478bd9Sstevel@tonic-gate 		 */
7947c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "paging out process %d\n",
7957c478bd9Sstevel@tonic-gate 		    (int)vic->lpc_pid);
7967c478bd9Sstevel@tonic-gate 		while (excess > 0 && vicaddr != NULL && should_run) {
7977c478bd9Sstevel@tonic-gate 			/*
7987c478bd9Sstevel@tonic-gate 			 * Skip mappings in the ignored set.  Mappings get
7997c478bd9Sstevel@tonic-gate 			 * placed in the ignored set when all their resident
8007c478bd9Sstevel@tonic-gate 			 * pages are unreference and unmodified, yet unpageable
8017c478bd9Sstevel@tonic-gate 			 * -- such as when they are locked, or involved in
8027c478bd9Sstevel@tonic-gate 			 * asynchronous I/O.  They will be scanned again when
8037c478bd9Sstevel@tonic-gate 			 * some page is referenced or modified.
8047c478bd9Sstevel@tonic-gate 			 */
8057c478bd9Sstevel@tonic-gate 			if (lmapping_contains(vic->lpc_ignore, cur.pr_addr,
8067c478bd9Sstevel@tonic-gate 			    cur.pr_npage * cur.pr_pagesize)) {
8077c478bd9Sstevel@tonic-gate 				debug("ignored mapping at 0x%p\n",
8087c478bd9Sstevel@tonic-gate 				    (void *)cur.pr_addr);
8097c478bd9Sstevel@tonic-gate 				/*
8107c478bd9Sstevel@tonic-gate 				 * Update statistics.
8117c478bd9Sstevel@tonic-gate 				 */
8127c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_pg_att +=
8137c478bd9Sstevel@tonic-gate 				    cur.pr_npage * cur.pr_pagesize / 1024;
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 				vicaddr = (void *)
8167c478bd9Sstevel@tonic-gate 				    advance_prpageheader_cur_nextmapping(&cur);
8177c478bd9Sstevel@tonic-gate 				continue;
8187c478bd9Sstevel@tonic-gate 			}
8197c478bd9Sstevel@tonic-gate 
8207c478bd9Sstevel@tonic-gate 			/*
8217c478bd9Sstevel@tonic-gate 			 * Determine a range of unreferenced pages to page out,
8227c478bd9Sstevel@tonic-gate 			 * and clear the R/M bits in the preceding referenced
8237c478bd9Sstevel@tonic-gate 			 * range.
8247c478bd9Sstevel@tonic-gate 			 */
8257c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "start from mapping at 0x%p,"
8267c478bd9Sstevel@tonic-gate 			    " npage %llu\n", vicaddr,
8277c478bd9Sstevel@tonic-gate 			    (unsigned long long)cur.pr_npage);
8287c478bd9Sstevel@tonic-gate 			while (vicaddr != NULL &&
8297c478bd9Sstevel@tonic-gate 			    *(caddr_t)cur.pr_pdaddr != 0) {
8307c478bd9Sstevel@tonic-gate 				*(caddr_t)cur.pr_pdaddr = 0;
8317c478bd9Sstevel@tonic-gate 				vicaddr = advance_prpageheader_cur(&cur);
8327c478bd9Sstevel@tonic-gate 			}
8337c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "advance, vicaddr %p, pdaddr"
8347c478bd9Sstevel@tonic-gate 			    " %p\n", vicaddr, cur.pr_pdaddr);
8357c478bd9Sstevel@tonic-gate 			if (vicaddr == NULL) {
8367c478bd9Sstevel@tonic-gate 				/*
8377c478bd9Sstevel@tonic-gate 				 * The end of mapping was reached before any
8387c478bd9Sstevel@tonic-gate 				 * unreferenced pages were seen.
8397c478bd9Sstevel@tonic-gate 				 */
8407c478bd9Sstevel@tonic-gate 				vicaddr = (void *)
8417c478bd9Sstevel@tonic-gate 				    advance_prpageheader_cur_nextmapping(&cur);
8427c478bd9Sstevel@tonic-gate 				continue;
8437c478bd9Sstevel@tonic-gate 			}
8447c478bd9Sstevel@tonic-gate 			do
8457c478bd9Sstevel@tonic-gate 				endaddr = advance_prpageheader_cur(&cur);
8467c478bd9Sstevel@tonic-gate 			while (endaddr != NULL &&
8477c478bd9Sstevel@tonic-gate 			    *(caddr_t)cur.pr_pdaddr == 0 &&
8487c478bd9Sstevel@tonic-gate 			    (((intptr_t)endaddr - (intptr_t)vicaddr) /
849*ec3255b6SToomas Soome 			    1024) < excess)
850*ec3255b6SToomas Soome 				;
8517c478bd9Sstevel@tonic-gate 			st_debug(STDL_HIGH, lcol, "endaddr %p, *cur %d\n",
8527c478bd9Sstevel@tonic-gate 			    endaddr, *(caddr_t)cur.pr_pdaddr);
8537c478bd9Sstevel@tonic-gate 
8547c478bd9Sstevel@tonic-gate 			/*
8557c478bd9Sstevel@tonic-gate 			 * Page out from vicaddr to the end of the mapping, or
8567c478bd9Sstevel@tonic-gate 			 * endaddr if set, then continue scanning after
8577c478bd9Sstevel@tonic-gate 			 * endaddr, or the next mapping, if not set.
8587c478bd9Sstevel@tonic-gate 			 */
8597c478bd9Sstevel@tonic-gate 			nvicaddr = endaddr;
8607c478bd9Sstevel@tonic-gate 			if (endaddr == NULL)
8617c478bd9Sstevel@tonic-gate 				endaddr = (caddr_t)cur.pr_addr +
8627c478bd9Sstevel@tonic-gate 				    cur.pr_pagesize * cur.pr_npage;
8637c478bd9Sstevel@tonic-gate 			if (pageout(vic->lpc_pid, scan_pr, vicaddr, endaddr) ==
8647c478bd9Sstevel@tonic-gate 			    0) {
8657c478bd9Sstevel@tonic-gate 				int64_t d_rss, att;
8667c478bd9Sstevel@tonic-gate 				int willignore = 0;
8677c478bd9Sstevel@tonic-gate 
8687c478bd9Sstevel@tonic-gate 				excess += (d_rss = rss_delta(
8697c478bd9Sstevel@tonic-gate 				    &new_psinfo, &old_psinfo, vic));
8707c478bd9Sstevel@tonic-gate 
8717c478bd9Sstevel@tonic-gate 				/*
8727c478bd9Sstevel@tonic-gate 				 * If this pageout attempt was unsuccessful
8737c478bd9Sstevel@tonic-gate 				 * (the resident portion was not affected), and
8747c478bd9Sstevel@tonic-gate 				 * was for the whole mapping, put it in the
8757c478bd9Sstevel@tonic-gate 				 * ignored set, so it will not be scanned again
8767c478bd9Sstevel@tonic-gate 				 * until some page is referenced or modified.
8777c478bd9Sstevel@tonic-gate 				 */
8787c478bd9Sstevel@tonic-gate 				if (d_rss >= 0 && (void *)cur.pr_addr ==
8797c478bd9Sstevel@tonic-gate 				    vicaddr && (cur.pr_pagesize * cur.pr_npage)
8807c478bd9Sstevel@tonic-gate 				    == ((uintptr_t)endaddr -
8817c478bd9Sstevel@tonic-gate 				    (uintptr_t)vicaddr)) {
8827c478bd9Sstevel@tonic-gate 					if (lmapping_insert(
8837c478bd9Sstevel@tonic-gate 					    &vic->lpc_ignore,
8847c478bd9Sstevel@tonic-gate 					    cur.pr_addr,
8857c478bd9Sstevel@tonic-gate 					    cur.pr_pagesize *
8867c478bd9Sstevel@tonic-gate 					    cur.pr_npage) != 0)
8877c478bd9Sstevel@tonic-gate 						debug("not enough memory to add"
8887c478bd9Sstevel@tonic-gate 						    " mapping at %p to ignored"
8897c478bd9Sstevel@tonic-gate 						    " set\n",
8907c478bd9Sstevel@tonic-gate 						    (void *)cur.pr_addr);
8917c478bd9Sstevel@tonic-gate 					willignore = 1;
8927c478bd9Sstevel@tonic-gate 				}
8937c478bd9Sstevel@tonic-gate 
8947c478bd9Sstevel@tonic-gate 				/*
8957c478bd9Sstevel@tonic-gate 				 * Update statistics.
8967c478bd9Sstevel@tonic-gate 				 */
8977c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_pg_att += (att =
8987c478bd9Sstevel@tonic-gate 				    ((intptr_t)endaddr - (intptr_t)vicaddr) /
8997c478bd9Sstevel@tonic-gate 				    1024);
9007c478bd9Sstevel@tonic-gate 				st_debug(STDL_NORMAL, lcol, "paged out 0x%p"
9017c478bd9Sstevel@tonic-gate 				    "+0t(%llu/%llu)kB%s\n", vicaddr,
9027c478bd9Sstevel@tonic-gate 				    (unsigned long long)((d_rss <
9037c478bd9Sstevel@tonic-gate 				    0) ? - d_rss : 0), (unsigned long long)att,
9047c478bd9Sstevel@tonic-gate 				    willignore ? " (will ignore)" : "");
9057c478bd9Sstevel@tonic-gate 			} else {
9067c478bd9Sstevel@tonic-gate 				st_debug(STDL_NORMAL, lcol,
9077c478bd9Sstevel@tonic-gate 				    "process %d: exited/unscannable\n",
9087c478bd9Sstevel@tonic-gate 				    (int)vic->lpc_pid);
9097c478bd9Sstevel@tonic-gate 				vic->lpc_unscannable = 1;
9107c478bd9Sstevel@tonic-gate 				goto nextproc;
9117c478bd9Sstevel@tonic-gate 			}
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 			/*
9147c478bd9Sstevel@tonic-gate 			 * Update the statistics file, if it's time.
9157c478bd9Sstevel@tonic-gate 			 */
9167c478bd9Sstevel@tonic-gate 			check_update_statistics();
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate 			vicaddr = (nvicaddr != NULL) ? nvicaddr : (void
9197c478bd9Sstevel@tonic-gate 			    *)advance_prpageheader_cur_nextmapping(&cur);
9207c478bd9Sstevel@tonic-gate 		}
9217c478bd9Sstevel@tonic-gate 		excess += rss_delta(&new_psinfo, &old_psinfo, vic);
9227c478bd9Sstevel@tonic-gate 		st_debug(STDL_NORMAL, lcol, "done, excess %lld\n",
9237c478bd9Sstevel@tonic-gate 		    (long long)excess);
9247c478bd9Sstevel@tonic-gate nextproc:
9257c478bd9Sstevel@tonic-gate 		/*
9267c478bd9Sstevel@tonic-gate 		 * If a process was grabbed, release it, destroying its agent.
9277c478bd9Sstevel@tonic-gate 		 */
9287c478bd9Sstevel@tonic-gate 		if (scan_pr != NULL) {
9297c478bd9Sstevel@tonic-gate 			(void) Prelease(scan_pr, 0);
9307c478bd9Sstevel@tonic-gate 			scan_pr = NULL;
9317c478bd9Sstevel@tonic-gate 		}
9327c478bd9Sstevel@tonic-gate 		lcol->lcol_victim = vic;
9337c478bd9Sstevel@tonic-gate 		/*
9347c478bd9Sstevel@tonic-gate 		 * Scan the collection at most once.  Only if scanning was not
9357c478bd9Sstevel@tonic-gate 		 * aborted for any reason, and the end of lprocess has not been
9367c478bd9Sstevel@tonic-gate 		 * reached, determine the next victim and scan it.
9377c478bd9Sstevel@tonic-gate 		 */
9387c478bd9Sstevel@tonic-gate 		if (vic != NULL) {
9397c478bd9Sstevel@tonic-gate 			if (vic->lpc_next != NULL) {
9407c478bd9Sstevel@tonic-gate 				/*
9417c478bd9Sstevel@tonic-gate 				 * Determine the next process to be scanned.
9427c478bd9Sstevel@tonic-gate 				 */
9437c478bd9Sstevel@tonic-gate 				if (excess > 0) {
9447c478bd9Sstevel@tonic-gate 					vic = get_valid_victim(lcol,
9457c478bd9Sstevel@tonic-gate 					    vic->lpc_next);
9467c478bd9Sstevel@tonic-gate 					vicaddr = 0;
9477c478bd9Sstevel@tonic-gate 				}
9487c478bd9Sstevel@tonic-gate 			} else {
9497c478bd9Sstevel@tonic-gate 				/*
9507c478bd9Sstevel@tonic-gate 				 * A complete scan of the collection was made,
9517c478bd9Sstevel@tonic-gate 				 * so tick the scan counter and stop scanning
9527c478bd9Sstevel@tonic-gate 				 * until the next request.
9537c478bd9Sstevel@tonic-gate 				 */
9547c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_scan_count++;
9557c478bd9Sstevel@tonic-gate 				lcol->lcol_stat.lcols_scan_time_complete
9567c478bd9Sstevel@tonic-gate 				    = lcol->lcol_stat.lcols_scan_time;
9577c478bd9Sstevel@tonic-gate 				/*
9587c478bd9Sstevel@tonic-gate 				 * If an excess still exists, tick the
9597c478bd9Sstevel@tonic-gate 				 * "ineffective scan" counter, signalling that
9607c478bd9Sstevel@tonic-gate 				 * the cap may be uneforceable.
9617c478bd9Sstevel@tonic-gate 				 */
9627c478bd9Sstevel@tonic-gate 				if (resumed == 0 && excess > 0)
9637c478bd9Sstevel@tonic-gate 					lcol->lcol_stat
9647c478bd9Sstevel@tonic-gate 					    .lcols_scan_ineffective++;
9657c478bd9Sstevel@tonic-gate 				/*
9667c478bd9Sstevel@tonic-gate 				 * Scanning should start at the beginning of
9677c478bd9Sstevel@tonic-gate 				 * the process list at the next request.
9687c478bd9Sstevel@tonic-gate 				 */
9697c478bd9Sstevel@tonic-gate 				if (excess > 0)
9707c478bd9Sstevel@tonic-gate 					vic = NULL;
9717c478bd9Sstevel@tonic-gate 			}
9727c478bd9Sstevel@tonic-gate 		}
9737c478bd9Sstevel@tonic-gate 	}
9747c478bd9Sstevel@tonic-gate 	lcol->lcol_stat.lcols_scan_time += (gethrtime() - scan_start);
9757c478bd9Sstevel@tonic-gate 	st_debug(STDL_HIGH, lcol, "done scanning; excess %lld\n",
9767c478bd9Sstevel@tonic-gate 	    (long long)excess);
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	lcol->lcol_resaddr = vicaddr;
9797c478bd9Sstevel@tonic-gate 	if (lcol->lcol_resaddr == NULL && lcol->lcol_victim != NULL) {
9807c478bd9Sstevel@tonic-gate 		lcol->lcol_victim = get_valid_victim(lcol,
9817c478bd9Sstevel@tonic-gate 		    lcol->lcol_victim->lpc_next);
9827c478bd9Sstevel@tonic-gate 	}
9837c478bd9Sstevel@tonic-gate }
9847c478bd9Sstevel@tonic-gate 
9857c478bd9Sstevel@tonic-gate /*
9867c478bd9Sstevel@tonic-gate  * Abort the scan in progress, and destroy the agent LWP of any grabbed
9877c478bd9Sstevel@tonic-gate  * processes.
9887c478bd9Sstevel@tonic-gate  */
9897c478bd9Sstevel@tonic-gate void
scan_abort(void)9907c478bd9Sstevel@tonic-gate scan_abort(void)
9917c478bd9Sstevel@tonic-gate {
9927c478bd9Sstevel@tonic-gate 	if (scan_pr != NULL)
993*ec3255b6SToomas Soome 		(void) Prelease(scan_pr, 0);
9947c478bd9Sstevel@tonic-gate }
9957c478bd9Sstevel@tonic-gate 
9967c478bd9Sstevel@tonic-gate static void
revoke_xmap(rfd_t * rfd)9977c478bd9Sstevel@tonic-gate revoke_xmap(rfd_t *rfd)
9987c478bd9Sstevel@tonic-gate {
9997c478bd9Sstevel@tonic-gate 	lprocess_t *lpc = rfd->rfd_data;
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate 	debug("revoking xmap for process %d\n", (int)lpc->lpc_pid);
10027c478bd9Sstevel@tonic-gate 	ASSERT(lpc->lpc_xmap_fd != -1);
10037c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap_fd = -1;
10047c478bd9Sstevel@tonic-gate }
10057c478bd9Sstevel@tonic-gate 
10067c478bd9Sstevel@tonic-gate /*
10077c478bd9Sstevel@tonic-gate  * Retrieve the process's current xmap , which is used to determine the size of
10087c478bd9Sstevel@tonic-gate  * the resident portion of its segments.  Return zero if successful.
10097c478bd9Sstevel@tonic-gate  */
10107c478bd9Sstevel@tonic-gate static int
lpc_xmap_update(lprocess_t * lpc)10117c478bd9Sstevel@tonic-gate lpc_xmap_update(lprocess_t *lpc)
10127c478bd9Sstevel@tonic-gate {
10137c478bd9Sstevel@tonic-gate 	int res;
10147c478bd9Sstevel@tonic-gate 	struct stat st;
10157c478bd9Sstevel@tonic-gate 
10167c478bd9Sstevel@tonic-gate 	free(lpc->lpc_xmap);
10177c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap = NULL;
10187c478bd9Sstevel@tonic-gate 	lpc->lpc_nxmap = -1;
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap_fd == -1) {
10217c478bd9Sstevel@tonic-gate 		char pathbuf[PROC_PATH_MAX];
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate 		(void) snprintf(pathbuf, sizeof (pathbuf), "/proc/%d/xmap",
10247c478bd9Sstevel@tonic-gate 		    (int)lpc->lpc_pid);
10257c478bd9Sstevel@tonic-gate 		if ((lpc->lpc_xmap_fd = rfd_open(pathbuf, 1, RFD_XMAP,
10267c478bd9Sstevel@tonic-gate 		    revoke_xmap, lpc, O_RDONLY, 0)) < 0)
10277c478bd9Sstevel@tonic-gate 			return (-1);
10287c478bd9Sstevel@tonic-gate 	}
10297c478bd9Sstevel@tonic-gate 
10307c478bd9Sstevel@tonic-gate redo:
10317c478bd9Sstevel@tonic-gate 	errno = 0;
10327c478bd9Sstevel@tonic-gate 	if (fstat(lpc->lpc_xmap_fd, &st) != 0) {
10337c478bd9Sstevel@tonic-gate 		debug("cannot stat xmap\n");
10347c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
10357c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
10367c478bd9Sstevel@tonic-gate 		return (-1);
10377c478bd9Sstevel@tonic-gate 	}
10387c478bd9Sstevel@tonic-gate 
10397c478bd9Sstevel@tonic-gate 	if ((st.st_size % sizeof (*lpc->lpc_xmap)) != 0) {
10407c478bd9Sstevel@tonic-gate 		debug("xmap wrong size\n");
10417c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
10427c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
10437c478bd9Sstevel@tonic-gate 		return (-1);
10447c478bd9Sstevel@tonic-gate 	}
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate 	lpc->lpc_xmap = malloc(st.st_size);
10477c478bd9Sstevel@tonic-gate 	if (lpc->lpc_xmap == NULL) {
10487c478bd9Sstevel@tonic-gate 		debug("cannot malloc() %ld bytes for xmap", st.st_size);
10497c478bd9Sstevel@tonic-gate 		(void) rfd_close(lpc->lpc_xmap_fd);
10507c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap_fd = -1;
10517c478bd9Sstevel@tonic-gate 		return (-1);
10527c478bd9Sstevel@tonic-gate 	}
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate 	if ((res = pread(lpc->lpc_xmap_fd, lpc->lpc_xmap, st.st_size, 0)) !=
10557c478bd9Sstevel@tonic-gate 	    st.st_size) {
10567c478bd9Sstevel@tonic-gate 		free(lpc->lpc_xmap);
10577c478bd9Sstevel@tonic-gate 		lpc->lpc_xmap = NULL;
10587c478bd9Sstevel@tonic-gate 		if (res > 0) {
10597c478bd9Sstevel@tonic-gate 			debug("xmap changed size, retrying\n");
10607c478bd9Sstevel@tonic-gate 			goto redo;
10617c478bd9Sstevel@tonic-gate 		} else {
10627c478bd9Sstevel@tonic-gate 			debug("cannot read xmap");
10637c478bd9Sstevel@tonic-gate 			return (-1);
10647c478bd9Sstevel@tonic-gate 		}
10657c478bd9Sstevel@tonic-gate 	}
10667c478bd9Sstevel@tonic-gate 	lpc->lpc_nxmap = st.st_size / sizeof (*lpc->lpc_xmap);
10677c478bd9Sstevel@tonic-gate 
10687c478bd9Sstevel@tonic-gate 	return (0);
10697c478bd9Sstevel@tonic-gate }
1070