xref: /illumos-gate/usr/src/uts/common/vm/vm_usage.c (revision c6f039c7)
10209230bSgjelinek /*
20209230bSgjelinek  * CDDL HEADER START
30209230bSgjelinek  *
40209230bSgjelinek  * The contents of this file are subject to the terms of the
50209230bSgjelinek  * Common Development and Distribution License (the "License").
60209230bSgjelinek  * You may not use this file except in compliance with the License.
70209230bSgjelinek  *
80209230bSgjelinek  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90209230bSgjelinek  * or http://www.opensolaris.org/os/licensing.
100209230bSgjelinek  * See the License for the specific language governing permissions
110209230bSgjelinek  * and limitations under the License.
120209230bSgjelinek  *
130209230bSgjelinek  * When distributing Covered Code, include this CDDL HEADER in each
140209230bSgjelinek  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150209230bSgjelinek  * If applicable, add the following below this CDDL HEADER, with the
160209230bSgjelinek  * fields enclosed by brackets "[]" replaced with your own identifying
170209230bSgjelinek  * information: Portions Copyright [yyyy] [name of copyright owner]
180209230bSgjelinek  *
190209230bSgjelinek  * CDDL HEADER END
200209230bSgjelinek  */
210209230bSgjelinek 
220209230bSgjelinek /*
239c998850SPeter Telford  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
240209230bSgjelinek  * Use is subject to license terms.
250209230bSgjelinek  */
260209230bSgjelinek 
270209230bSgjelinek /*
280209230bSgjelinek  * vm_usage
290209230bSgjelinek  *
300209230bSgjelinek  * This file implements the getvmusage() private system call.
310209230bSgjelinek  * getvmusage() counts the amount of resident memory pages and swap
320209230bSgjelinek  * reserved by the specified process collective. A "process collective" is
330209230bSgjelinek  * the set of processes owned by a particular, zone, project, task, or user.
340209230bSgjelinek  *
350209230bSgjelinek  * rss and swap are counted so that for a given process collective, a page is
360209230bSgjelinek  * only counted once.  For example, this means that if multiple processes in
370209230bSgjelinek  * the same project map the same page, then the project will only be charged
380209230bSgjelinek  * once for that page.  On the other hand, if two processes in different
390209230bSgjelinek  * projects map the same page, then both projects will be charged
400209230bSgjelinek  * for the page.
410209230bSgjelinek  *
420209230bSgjelinek  * The vm_getusage() calculation is implemented so that the first thread
430209230bSgjelinek  * performs the rss/swap counting. Other callers will wait for that thread to
440209230bSgjelinek  * finish, copying the results.  This enables multiple rcapds and prstats to
450209230bSgjelinek  * consume data from the same calculation.  The results are also cached so that
460209230bSgjelinek  * a caller interested in recent results can just copy them instead of starting
470209230bSgjelinek  * a new calculation. The caller passes the maximium age (in seconds) of the
480209230bSgjelinek  * data.  If the cached data is young enough, the cache is copied, otherwise,
490209230bSgjelinek  * a new calculation is executed and the cache is replaced with the new
500209230bSgjelinek  * data.
510209230bSgjelinek  *
520209230bSgjelinek  * The rss calculation for each process collective is as follows:
530209230bSgjelinek  *
540209230bSgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
550209230bSgjelinek  *     and/or users.
560209230bSgjelinek  *   - For each proc:
570209230bSgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
580209230bSgjelinek  *	- For each seg in proc's address space:
590209230bSgjelinek  *		- If seg is private:
600209230bSgjelinek  *			- Lookup anons in the amp.
610209230bSgjelinek  *			- For incore pages not previously visited each of the
620209230bSgjelinek  *			  proc's collectives, add incore pagesize to each.
630209230bSgjelinek  *			  collective.
640209230bSgjelinek  *			  Anon's with a refcnt of 1 can be assummed to be not
650209230bSgjelinek  *			  previously visited.
660209230bSgjelinek  *			- For address ranges without anons in the amp:
670209230bSgjelinek  *				- Lookup pages in underlying vnode.
680209230bSgjelinek  *				- For incore pages not previously visiting for
690209230bSgjelinek  *				  each of the proc's collectives, add incore
700209230bSgjelinek  *				  pagesize to each collective.
710209230bSgjelinek  *		- If seg is shared:
720209230bSgjelinek  *			- Lookup pages in the shared amp or vnode.
730209230bSgjelinek  *			- For incore pages not previously visited for each of
740209230bSgjelinek  *			  the proc's collectives, add incore pagesize to each
750209230bSgjelinek  *			  collective.
760209230bSgjelinek  *
770209230bSgjelinek  * Swap is reserved by private segments, and shared anonymous segments.
780209230bSgjelinek  * The only shared anon segments which do not reserve swap are ISM segments
790209230bSgjelinek  * and schedctl segments, both of which can be identified by having
800209230bSgjelinek  * amp->swresv == 0.
810209230bSgjelinek  *
820209230bSgjelinek  * The swap calculation for each collective is as follows:
830209230bSgjelinek  *
840209230bSgjelinek  *   - Inspect flags, determine if counting rss for zones, projects, tasks,
850209230bSgjelinek  *     and/or users.
860209230bSgjelinek  *   - For each proc:
870209230bSgjelinek  *	- Figure out proc's collectives (zone, project, task, and/or user).
880209230bSgjelinek  *	- For each seg in proc's address space:
890209230bSgjelinek  *		- If seg is private:
900209230bSgjelinek  *			- Add svd->swresv pages to swap count for each of the
910209230bSgjelinek  *			  proc's collectives.
920209230bSgjelinek  *		- If seg is anon, shared, and amp->swresv != 0
930209230bSgjelinek  *			- For address ranges in amp not previously visited for
940209230bSgjelinek  *			  each of the proc's collectives, add size of address
950209230bSgjelinek  *			  range to the swap count for each collective.
960209230bSgjelinek  *
970209230bSgjelinek  * These two calculations are done simultaneously, with most of the work
980209230bSgjelinek  * being done in vmu_calculate_seg().  The results of the calculation are
990209230bSgjelinek  * copied into "vmu_data.vmu_cache_results".
1000209230bSgjelinek  *
1010209230bSgjelinek  * To perform the calculation, various things are tracked and cached:
1020209230bSgjelinek  *
1030209230bSgjelinek  *    - incore/not-incore page ranges for all vnodes.
1040209230bSgjelinek  *	(vmu_data.vmu_all_vnodes_hash)
1050209230bSgjelinek  *	This eliminates looking up the same page more than once.
1060209230bSgjelinek  *
1070209230bSgjelinek  *    - incore/not-incore page ranges for all shared amps.
1080209230bSgjelinek  *	(vmu_data.vmu_all_amps_hash)
1090209230bSgjelinek  *	This eliminates looking up the same page more than once.
1100209230bSgjelinek  *
1110209230bSgjelinek  *    - visited page ranges for each collective.
1120209230bSgjelinek  *	   - per vnode (entity->vme_vnode_hash)
1130209230bSgjelinek  *	   - per shared amp (entity->vme_amp_hash)
1149c998850SPeter Telford  *	For accurate counting of map-shared and COW-shared pages.
1150209230bSgjelinek  *
1160209230bSgjelinek  *    - visited private anons (refcnt > 1) for each collective.
1170209230bSgjelinek  *	(entity->vme_anon_hash)
1189c998850SPeter Telford  *	For accurate counting of COW-shared pages.
1190209230bSgjelinek  *
1200209230bSgjelinek  * The common accounting structure is the vmu_entity_t, which represents
1210209230bSgjelinek  * collectives:
1220209230bSgjelinek  *
1230209230bSgjelinek  *    - A zone.
1240209230bSgjelinek  *    - A project, task, or user within a zone.
1250209230bSgjelinek  *    - The entire system (vmu_data.vmu_system).
1260209230bSgjelinek  *    - Each collapsed (col) project and user.  This means a given projid or
1270209230bSgjelinek  *	uid, regardless of which zone the process is in.  For instance,
1280209230bSgjelinek  *      project 0 in the global zone and project 0 in a non global zone are
1290209230bSgjelinek  *	the same collapsed project.
1300209230bSgjelinek  *
1310209230bSgjelinek  *  Each entity structure tracks which pages have been already visited for
1320209230bSgjelinek  *  that entity (via previously inspected processes) so that these pages are
1330209230bSgjelinek  *  not double counted.
1340209230bSgjelinek  */
1350209230bSgjelinek 
1360209230bSgjelinek #include <sys/errno.h>
1370209230bSgjelinek #include <sys/types.h>
1380209230bSgjelinek #include <sys/zone.h>
1390209230bSgjelinek #include <sys/proc.h>
1400209230bSgjelinek #include <sys/project.h>
1410209230bSgjelinek #include <sys/task.h>
1420209230bSgjelinek #include <sys/thread.h>
1430209230bSgjelinek #include <sys/time.h>
1440209230bSgjelinek #include <sys/mman.h>
1450209230bSgjelinek #include <sys/modhash.h>
1460209230bSgjelinek #include <sys/modhash_impl.h>
1470209230bSgjelinek #include <sys/shm.h>
1480209230bSgjelinek #include <sys/swap.h>
1490209230bSgjelinek #include <sys/synch.h>
1500209230bSgjelinek #include <sys/systm.h>
1510209230bSgjelinek #include <sys/var.h>
1520209230bSgjelinek #include <sys/vm_usage.h>
1530209230bSgjelinek #include <sys/zone.h>
15400792c0bS #include <sys/sunddi.h>
1559c998850SPeter Telford #include <sys/avl.h>
1560209230bSgjelinek #include <vm/anon.h>
1570209230bSgjelinek #include <vm/as.h>
1580209230bSgjelinek #include <vm/seg_vn.h>
1590209230bSgjelinek #include <vm/seg_spt.h>
1600209230bSgjelinek 
1610209230bSgjelinek #define	VMUSAGE_HASH_SIZE		512
1620209230bSgjelinek 
1630209230bSgjelinek #define	VMUSAGE_TYPE_VNODE		1
1640209230bSgjelinek #define	VMUSAGE_TYPE_AMP		2
1650209230bSgjelinek #define	VMUSAGE_TYPE_ANON		3
1660209230bSgjelinek 
1670209230bSgjelinek #define	VMUSAGE_BOUND_UNKNOWN		0
1680209230bSgjelinek #define	VMUSAGE_BOUND_INCORE		1
1690209230bSgjelinek #define	VMUSAGE_BOUND_NOT_INCORE	2
1700209230bSgjelinek 
1719c998850SPeter Telford #define	ISWITHIN(node, addr)	((node)->vmb_start <= addr && \
1729c998850SPeter Telford 				    (node)->vmb_end >= addr ? 1 : 0)
1739c998850SPeter Telford 
1740209230bSgjelinek /*
1750209230bSgjelinek  * bounds for vnodes and shared amps
1760209230bSgjelinek  * Each bound is either entirely incore, entirely not in core, or
1779c998850SPeter Telford  * entirely unknown.  bounds are stored in an avl tree sorted by start member
1789c998850SPeter Telford  * when in use, otherwise (free or temporary lists) they're strung
1799c998850SPeter Telford  * together off of vmb_next.
1800209230bSgjelinek  */
1810209230bSgjelinek typedef struct vmu_bound {
1829c998850SPeter Telford 	avl_node_t vmb_node;
1839c998850SPeter Telford 	struct vmu_bound *vmb_next; /* NULL in tree else on free or temp list */
1840209230bSgjelinek 	pgcnt_t vmb_start;  /* page offset in vnode/amp on which bound starts */
1850209230bSgjelinek 	pgcnt_t	vmb_end;    /* page offset in vnode/amp on which bound ends */
1860209230bSgjelinek 	char	vmb_type;   /* One of VMUSAGE_BOUND_* */
1870209230bSgjelinek } vmu_bound_t;
1880209230bSgjelinek 
1890209230bSgjelinek /*
1900209230bSgjelinek  * hash of visited objects (vnodes or shared amps)
1910209230bSgjelinek  * key is address of vnode or amp.  Bounds lists known incore/non-incore
1920209230bSgjelinek  * bounds for vnode/amp.
1930209230bSgjelinek  */
1940209230bSgjelinek typedef struct vmu_object {
1950209230bSgjelinek 	struct vmu_object	*vmo_next;	/* free list */
1960209230bSgjelinek 	caddr_t		vmo_key;
1970209230bSgjelinek 	short		vmo_type;
1989c998850SPeter Telford 	avl_tree_t	vmo_bounds;
1990209230bSgjelinek } vmu_object_t;
2000209230bSgjelinek 
2010209230bSgjelinek /*
2020209230bSgjelinek  * Entity by which to count results.
2030209230bSgjelinek  *
2040209230bSgjelinek  * The entity structure keeps the current rss/swap counts for each entity
2050209230bSgjelinek  * (zone, project, etc), and hashes of vm structures that have already
2060209230bSgjelinek  * been visited for the entity.
2070209230bSgjelinek  *
2080209230bSgjelinek  * vme_next:	links the list of all entities currently being counted by
2090209230bSgjelinek  *		vmu_calculate().
2100209230bSgjelinek  *
2110209230bSgjelinek  * vme_next_calc: links the list of entities related to the current process
2120209230bSgjelinek  *		 being counted by vmu_calculate_proc().
2130209230bSgjelinek  *
2140209230bSgjelinek  * vmu_calculate_proc() walks all processes.  For each process, it makes a
2150209230bSgjelinek  * list of the entities related to that process using vme_next_calc.  This
2160209230bSgjelinek  * list changes each time vmu_calculate_proc() is called.
2170209230bSgjelinek  *
2180209230bSgjelinek  */
2190209230bSgjelinek typedef struct vmu_entity {
2200209230bSgjelinek 	struct vmu_entity *vme_next;
2210209230bSgjelinek 	struct vmu_entity *vme_next_calc;
2220209230bSgjelinek 	mod_hash_t	*vme_vnode_hash; /* vnodes visited for entity */
2230209230bSgjelinek 	mod_hash_t	*vme_amp_hash;	 /* shared amps visited for entity */
2249c998850SPeter Telford 	mod_hash_t	*vme_anon_hash;	 /* COW anons visited for entity */
2250209230bSgjelinek 	vmusage_t	vme_result;	 /* identifies entity and results */
2260209230bSgjelinek } vmu_entity_t;
2270209230bSgjelinek 
2280209230bSgjelinek /*
2290209230bSgjelinek  * Hash of entities visited within a zone, and an entity for the zone
2300209230bSgjelinek  * itself.
2310209230bSgjelinek  */
2320209230bSgjelinek typedef struct vmu_zone {
2330209230bSgjelinek 	struct vmu_zone	*vmz_next;	/* free list */
2340209230bSgjelinek 	id_t		vmz_id;
2350209230bSgjelinek 	vmu_entity_t	*vmz_zone;
2360209230bSgjelinek 	mod_hash_t	*vmz_projects_hash;
2370209230bSgjelinek 	mod_hash_t	*vmz_tasks_hash;
2380209230bSgjelinek 	mod_hash_t	*vmz_rusers_hash;
2390209230bSgjelinek 	mod_hash_t	*vmz_eusers_hash;
2400209230bSgjelinek } vmu_zone_t;
2410209230bSgjelinek 
2420209230bSgjelinek /*
2430209230bSgjelinek  * Cache of results from last calculation
2440209230bSgjelinek  */
2450209230bSgjelinek typedef struct vmu_cache {
2460209230bSgjelinek 	vmusage_t	*vmc_results;	/* Results from last call to */
2470209230bSgjelinek 					/* vm_getusage(). */
2480209230bSgjelinek 	uint64_t	vmc_nresults;	/* Count of cached results */
2490209230bSgjelinek 	uint64_t	vmc_refcnt;	/* refcnt for free */
2500209230bSgjelinek 	uint_t		vmc_flags;	/* Flags for vm_getusage() */
2510209230bSgjelinek 	hrtime_t	vmc_timestamp;	/* when cache was created */
2520209230bSgjelinek } vmu_cache_t;
2530209230bSgjelinek 
2540209230bSgjelinek /*
2550209230bSgjelinek  * top level rss info for the system
2560209230bSgjelinek  */
2570209230bSgjelinek typedef struct vmu_data {
2580209230bSgjelinek 	kmutex_t	vmu_lock;		/* Protects vmu_data */
2590209230bSgjelinek 	kcondvar_t	vmu_cv;			/* Used to signal threads */
2600209230bSgjelinek 						/* Waiting for */
2610209230bSgjelinek 						/* Rss_calc_thread to finish */
2620209230bSgjelinek 	vmu_entity_t	*vmu_system;		/* Entity for tracking */
2630209230bSgjelinek 						/* rss/swap for all processes */
2640209230bSgjelinek 						/* in all zones */
2650209230bSgjelinek 	mod_hash_t	*vmu_zones_hash;	/* Zones visited */
2660209230bSgjelinek 	mod_hash_t	*vmu_projects_col_hash; /* These *_col_hash hashes */
2670209230bSgjelinek 	mod_hash_t	*vmu_rusers_col_hash;	/* keep track of entities, */
2680209230bSgjelinek 	mod_hash_t	*vmu_eusers_col_hash;	/* ignoring zoneid, in order */
2690209230bSgjelinek 						/* to implement VMUSAGE_COL_* */
2700209230bSgjelinek 						/* flags, which aggregate by */
2710209230bSgjelinek 						/* project or user regardless */
2720209230bSgjelinek 						/* of zoneid. */
2730209230bSgjelinek 	mod_hash_t	*vmu_all_vnodes_hash;	/* System wide visited vnodes */
2740209230bSgjelinek 						/* to track incore/not-incore */
2750209230bSgjelinek 	mod_hash_t	*vmu_all_amps_hash;	/* System wide visited shared */
2760209230bSgjelinek 						/* amps to track incore/not- */
2770209230bSgjelinek 						/* incore */
2780209230bSgjelinek 	vmu_entity_t	*vmu_entities;		/* Linked list of entities */
2790209230bSgjelinek 	size_t		vmu_nentities;		/* Count of entities in list */
2800209230bSgjelinek 	vmu_cache_t	*vmu_cache;		/* Cached results */
2810209230bSgjelinek 	kthread_t	*vmu_calc_thread;	/* NULL, or thread running */
2820209230bSgjelinek 						/* vmu_calculate() */
2830209230bSgjelinek 	uint_t		vmu_calc_flags;		/* Flags being using by */
2840209230bSgjelinek 						/* currently running calc */
2850209230bSgjelinek 						/* thread */
2860209230bSgjelinek 	uint_t		vmu_pending_flags;	/* Flags of vm_getusage() */
2870209230bSgjelinek 						/* threads waiting for */
2880209230bSgjelinek 						/* calc thread to finish */
2890209230bSgjelinek 	uint_t		vmu_pending_waiters;	/* Number of threads waiting */
2900209230bSgjelinek 						/* for calc thread */
2910209230bSgjelinek 	vmu_bound_t	*vmu_free_bounds;
2920209230bSgjelinek 	vmu_object_t	*vmu_free_objects;
2930209230bSgjelinek 	vmu_entity_t	*vmu_free_entities;
2940209230bSgjelinek 	vmu_zone_t	*vmu_free_zones;
2950209230bSgjelinek } vmu_data_t;
2960209230bSgjelinek 
2970209230bSgjelinek extern struct as kas;
2980209230bSgjelinek extern proc_t *practive;
2990209230bSgjelinek extern zone_t *global_zone;
3000209230bSgjelinek extern struct seg_ops segvn_ops;
3010209230bSgjelinek extern struct seg_ops segspt_shmops;
3020209230bSgjelinek 
3030209230bSgjelinek static vmu_data_t vmu_data;
3040209230bSgjelinek static kmem_cache_t *vmu_bound_cache;
3050209230bSgjelinek static kmem_cache_t *vmu_object_cache;
3060209230bSgjelinek 
3070209230bSgjelinek /*
3089c998850SPeter Telford  * Comparison routine for AVL tree. We base our comparison on vmb_start.
3099c998850SPeter Telford  */
3109c998850SPeter Telford static int
bounds_cmp(const void * bnd1,const void * bnd2)3119c998850SPeter Telford bounds_cmp(const void *bnd1, const void *bnd2)
3129c998850SPeter Telford {
3139c998850SPeter Telford 	const vmu_bound_t *bound1 = bnd1;
3149c998850SPeter Telford 	const vmu_bound_t *bound2 = bnd2;
3159c998850SPeter Telford 
3169c998850SPeter Telford 	if (bound1->vmb_start == bound2->vmb_start) {
3179c998850SPeter Telford 		return (0);
3189c998850SPeter Telford 	}
3199c998850SPeter Telford 	if (bound1->vmb_start < bound2->vmb_start) {
3209c998850SPeter Telford 		return (-1);
3219c998850SPeter Telford 	}
3229c998850SPeter Telford 
3239c998850SPeter Telford 	return (1);
3249c998850SPeter Telford }
3259c998850SPeter Telford 
3269c998850SPeter Telford /*
3279c998850SPeter Telford  * Save a bound on the free list.
3280209230bSgjelinek  */
3290209230bSgjelinek static void
vmu_free_bound(vmu_bound_t * bound)3300209230bSgjelinek vmu_free_bound(vmu_bound_t *bound)
3310209230bSgjelinek {
3320209230bSgjelinek 	bound->vmb_next = vmu_data.vmu_free_bounds;
3339c998850SPeter Telford 	bound->vmb_start = 0;
3349c998850SPeter Telford 	bound->vmb_end = 0;
3359c998850SPeter Telford 	bound->vmb_type = 0;
3360209230bSgjelinek 	vmu_data.vmu_free_bounds = bound;
3370209230bSgjelinek }
3380209230bSgjelinek 
3390209230bSgjelinek /*
3400209230bSgjelinek  * Free an object, and all visited bound info.
3410209230bSgjelinek  */
3420209230bSgjelinek static void
vmu_free_object(mod_hash_val_t val)3430209230bSgjelinek vmu_free_object(mod_hash_val_t val)
3440209230bSgjelinek {
3450209230bSgjelinek 	vmu_object_t *obj = (vmu_object_t *)val;
3469c998850SPeter Telford 	avl_tree_t *tree = &(obj->vmo_bounds);
3479c998850SPeter Telford 	vmu_bound_t *bound;
3489c998850SPeter Telford 	void *cookie = NULL;
3490209230bSgjelinek 
3509c998850SPeter Telford 	while ((bound = avl_destroy_nodes(tree, &cookie)) != NULL)
3519c998850SPeter Telford 		vmu_free_bound(bound);
3529c998850SPeter Telford 	avl_destroy(tree);
3539c998850SPeter Telford 
3549c998850SPeter Telford 	obj->vmo_type = 0;
3550209230bSgjelinek 	obj->vmo_next = vmu_data.vmu_free_objects;
3560209230bSgjelinek 	vmu_data.vmu_free_objects = obj;
3570209230bSgjelinek }
3580209230bSgjelinek 
3590209230bSgjelinek /*
3600209230bSgjelinek  * Free an entity, and hashes of visited objects for that entity.
3610209230bSgjelinek  */
3620209230bSgjelinek static void
vmu_free_entity(mod_hash_val_t val)3630209230bSgjelinek vmu_free_entity(mod_hash_val_t val)
3640209230bSgjelinek {
3650209230bSgjelinek 	vmu_entity_t *entity = (vmu_entity_t *)val;
3660209230bSgjelinek 
3670209230bSgjelinek 	if (entity->vme_vnode_hash != NULL)
3680209230bSgjelinek 		i_mod_hash_clear_nosync(entity->vme_vnode_hash);
3690209230bSgjelinek 	if (entity->vme_amp_hash != NULL)
3700209230bSgjelinek 		i_mod_hash_clear_nosync(entity->vme_amp_hash);
3710209230bSgjelinek 	if (entity->vme_anon_hash != NULL)
3720209230bSgjelinek 		i_mod_hash_clear_nosync(entity->vme_anon_hash);
3730209230bSgjelinek 
3740209230bSgjelinek 	entity->vme_next = vmu_data.vmu_free_entities;
3750209230bSgjelinek 	vmu_data.vmu_free_entities = entity;
3760209230bSgjelinek }
3770209230bSgjelinek 
3780209230bSgjelinek /*
3790209230bSgjelinek  * Free zone entity, and all hashes of entities inside that zone,
3800209230bSgjelinek  * which are projects, tasks, and users.
3810209230bSgjelinek  */
3820209230bSgjelinek static void
vmu_free_zone(mod_hash_val_t val)3830209230bSgjelinek vmu_free_zone(mod_hash_val_t val)
3840209230bSgjelinek {
3850209230bSgjelinek 	vmu_zone_t *zone = (vmu_zone_t *)val;
3860209230bSgjelinek 
3870209230bSgjelinek 	if (zone->vmz_zone != NULL) {
3880209230bSgjelinek 		vmu_free_entity((mod_hash_val_t)zone->vmz_zone);
3890209230bSgjelinek 		zone->vmz_zone = NULL;
3900209230bSgjelinek 	}
3910209230bSgjelinek 	if (zone->vmz_projects_hash != NULL)
3920209230bSgjelinek 		i_mod_hash_clear_nosync(zone->vmz_projects_hash);
3930209230bSgjelinek 	if (zone->vmz_tasks_hash != NULL)
3940209230bSgjelinek 		i_mod_hash_clear_nosync(zone->vmz_tasks_hash);
3950209230bSgjelinek 	if (zone->vmz_rusers_hash != NULL)
3960209230bSgjelinek 		i_mod_hash_clear_nosync(zone->vmz_rusers_hash);
3970209230bSgjelinek 	if (zone->vmz_eusers_hash != NULL)
3980209230bSgjelinek 		i_mod_hash_clear_nosync(zone->vmz_eusers_hash);
3990209230bSgjelinek 	zone->vmz_next = vmu_data.vmu_free_zones;
4000209230bSgjelinek 	vmu_data.vmu_free_zones = zone;
4010209230bSgjelinek }
4020209230bSgjelinek 
4030209230bSgjelinek /*
4040209230bSgjelinek  * Initialize synchronization primitives and hashes for system-wide tracking
4050209230bSgjelinek  * of visited vnodes and shared amps.  Initialize results cache.
4060209230bSgjelinek  */
4070209230bSgjelinek void
vm_usage_init()4080209230bSgjelinek vm_usage_init()
4090209230bSgjelinek {
4100209230bSgjelinek 	mutex_init(&vmu_data.vmu_lock, NULL, MUTEX_DEFAULT, NULL);
4110209230bSgjelinek 	cv_init(&vmu_data.vmu_cv, NULL, CV_DEFAULT, NULL);
4120209230bSgjelinek 
4130209230bSgjelinek 	vmu_data.vmu_system = NULL;
4140209230bSgjelinek 	vmu_data.vmu_zones_hash = NULL;
4150209230bSgjelinek 	vmu_data.vmu_projects_col_hash = NULL;
4160209230bSgjelinek 	vmu_data.vmu_rusers_col_hash = NULL;
4170209230bSgjelinek 	vmu_data.vmu_eusers_col_hash = NULL;
4180209230bSgjelinek 
4190209230bSgjelinek 	vmu_data.vmu_free_bounds = NULL;
4200209230bSgjelinek 	vmu_data.vmu_free_objects = NULL;
4210209230bSgjelinek 	vmu_data.vmu_free_entities = NULL;
4220209230bSgjelinek 	vmu_data.vmu_free_zones = NULL;
4230209230bSgjelinek 
4240209230bSgjelinek 	vmu_data.vmu_all_vnodes_hash = mod_hash_create_ptrhash(
4250209230bSgjelinek 	    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4260209230bSgjelinek 	    sizeof (vnode_t));
4270209230bSgjelinek 	vmu_data.vmu_all_amps_hash = mod_hash_create_ptrhash(
4280209230bSgjelinek 	    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4290209230bSgjelinek 	    sizeof (struct anon_map));
4300209230bSgjelinek 	vmu_data.vmu_projects_col_hash = mod_hash_create_idhash(
4310209230bSgjelinek 	    "vmusage collapsed project hash", VMUSAGE_HASH_SIZE,
4320209230bSgjelinek 	    vmu_free_entity);
4330209230bSgjelinek 	vmu_data.vmu_rusers_col_hash = mod_hash_create_idhash(
4340209230bSgjelinek 	    "vmusage collapsed ruser hash", VMUSAGE_HASH_SIZE,
4350209230bSgjelinek 	    vmu_free_entity);
4360209230bSgjelinek 	vmu_data.vmu_eusers_col_hash = mod_hash_create_idhash(
4370209230bSgjelinek 	    "vmusage collpased euser hash", VMUSAGE_HASH_SIZE,
4380209230bSgjelinek 	    vmu_free_entity);
4390209230bSgjelinek 	vmu_data.vmu_zones_hash = mod_hash_create_idhash(
4400209230bSgjelinek 	    "vmusage zone hash", VMUSAGE_HASH_SIZE, vmu_free_zone);
4410209230bSgjelinek 
4420209230bSgjelinek 	vmu_bound_cache = kmem_cache_create("vmu_bound_cache",
4430209230bSgjelinek 	    sizeof (vmu_bound_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4440209230bSgjelinek 	vmu_object_cache = kmem_cache_create("vmu_object_cache",
4450209230bSgjelinek 	    sizeof (vmu_object_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
4460209230bSgjelinek 
4470209230bSgjelinek 	vmu_data.vmu_entities = NULL;
4480209230bSgjelinek 	vmu_data.vmu_nentities = 0;
4490209230bSgjelinek 
4500209230bSgjelinek 	vmu_data.vmu_cache = NULL;
4510209230bSgjelinek 	vmu_data.vmu_calc_thread = NULL;
4520209230bSgjelinek 	vmu_data.vmu_calc_flags = 0;
4530209230bSgjelinek 	vmu_data.vmu_pending_flags = 0;
4540209230bSgjelinek 	vmu_data.vmu_pending_waiters = 0;
4550209230bSgjelinek }
4560209230bSgjelinek 
4570209230bSgjelinek /*
4580209230bSgjelinek  * Allocate hashes for tracking vm objects visited for an entity.
4590209230bSgjelinek  * Update list of entities.
4600209230bSgjelinek  */
4610209230bSgjelinek static vmu_entity_t *
vmu_alloc_entity(id_t id,int type,id_t zoneid)4620209230bSgjelinek vmu_alloc_entity(id_t id, int type, id_t zoneid)
4630209230bSgjelinek {
4640209230bSgjelinek 	vmu_entity_t *entity;
4650209230bSgjelinek 
4660209230bSgjelinek 	if (vmu_data.vmu_free_entities != NULL) {
4670209230bSgjelinek 		entity = vmu_data.vmu_free_entities;
4680209230bSgjelinek 		vmu_data.vmu_free_entities =
4690209230bSgjelinek 		    vmu_data.vmu_free_entities->vme_next;
4700209230bSgjelinek 		bzero(&entity->vme_result, sizeof (vmusage_t));
4710209230bSgjelinek 	} else {
4720209230bSgjelinek 		entity = kmem_zalloc(sizeof (vmu_entity_t), KM_SLEEP);
4730209230bSgjelinek 	}
4740209230bSgjelinek 	entity->vme_result.vmu_id = id;
4750209230bSgjelinek 	entity->vme_result.vmu_zoneid = zoneid;
4760209230bSgjelinek 	entity->vme_result.vmu_type = type;
4770209230bSgjelinek 
4780209230bSgjelinek 	if (entity->vme_vnode_hash == NULL)
4790209230bSgjelinek 		entity->vme_vnode_hash = mod_hash_create_ptrhash(
4800209230bSgjelinek 		    "vmusage vnode hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4810209230bSgjelinek 		    sizeof (vnode_t));
4820209230bSgjelinek 
4830209230bSgjelinek 	if (entity->vme_amp_hash == NULL)
4840209230bSgjelinek 		entity->vme_amp_hash = mod_hash_create_ptrhash(
4850209230bSgjelinek 		    "vmusage amp hash", VMUSAGE_HASH_SIZE, vmu_free_object,
4860209230bSgjelinek 		    sizeof (struct anon_map));
4870209230bSgjelinek 
4880209230bSgjelinek 	if (entity->vme_anon_hash == NULL)
4890209230bSgjelinek 		entity->vme_anon_hash = mod_hash_create_ptrhash(
4900209230bSgjelinek 		    "vmusage anon hash", VMUSAGE_HASH_SIZE,
4910209230bSgjelinek 		    mod_hash_null_valdtor, sizeof (struct anon));
4920209230bSgjelinek 
4930209230bSgjelinek 	entity->vme_next = vmu_data.vmu_entities;
4940209230bSgjelinek 	vmu_data.vmu_entities = entity;
4950209230bSgjelinek 	vmu_data.vmu_nentities++;
4960209230bSgjelinek 
4970209230bSgjelinek 	return (entity);
4980209230bSgjelinek }
4990209230bSgjelinek 
5000209230bSgjelinek /*
5010209230bSgjelinek  * Allocate a zone entity, and hashes for tracking visited vm objects
5020209230bSgjelinek  * for projects, tasks, and users within that zone.
5030209230bSgjelinek  */
5040209230bSgjelinek static vmu_zone_t *
vmu_alloc_zone(id_t id)5050209230bSgjelinek vmu_alloc_zone(id_t id)
5060209230bSgjelinek {
5070209230bSgjelinek 	vmu_zone_t *zone;
5080209230bSgjelinek 
5090209230bSgjelinek 	if (vmu_data.vmu_free_zones != NULL) {
5100209230bSgjelinek 		zone = vmu_data.vmu_free_zones;
5110209230bSgjelinek 		vmu_data.vmu_free_zones =
5120209230bSgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
5130209230bSgjelinek 		zone->vmz_next = NULL;
5140209230bSgjelinek 		zone->vmz_zone = NULL;
5150209230bSgjelinek 	} else {
5160209230bSgjelinek 		zone = kmem_zalloc(sizeof (vmu_zone_t), KM_SLEEP);
5170209230bSgjelinek 	}
5180209230bSgjelinek 
5190209230bSgjelinek 	zone->vmz_id = id;
5200209230bSgjelinek 
5210209230bSgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES)) != 0)
5220209230bSgjelinek 		zone->vmz_zone = vmu_alloc_entity(id, VMUSAGE_ZONE, id);
5230209230bSgjelinek 
5240209230bSgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_PROJECTS |
5250209230bSgjelinek 	    VMUSAGE_ALL_PROJECTS)) != 0 && zone->vmz_projects_hash == NULL)
5260209230bSgjelinek 		zone->vmz_projects_hash = mod_hash_create_idhash(
5270209230bSgjelinek 		    "vmusage project hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5280209230bSgjelinek 
5290209230bSgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
5300209230bSgjelinek 	    != 0 && zone->vmz_tasks_hash == NULL)
5310209230bSgjelinek 		zone->vmz_tasks_hash = mod_hash_create_idhash(
5320209230bSgjelinek 		    "vmusage task hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5330209230bSgjelinek 
5340209230bSgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS))
5350209230bSgjelinek 	    != 0 && zone->vmz_rusers_hash == NULL)
5360209230bSgjelinek 		zone->vmz_rusers_hash = mod_hash_create_idhash(
5370209230bSgjelinek 		    "vmusage ruser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5380209230bSgjelinek 
5390209230bSgjelinek 	if ((vmu_data.vmu_calc_flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS))
5400209230bSgjelinek 	    != 0 && zone->vmz_eusers_hash == NULL)
5410209230bSgjelinek 		zone->vmz_eusers_hash = mod_hash_create_idhash(
5420209230bSgjelinek 		    "vmusage euser hash", VMUSAGE_HASH_SIZE, vmu_free_entity);
5430209230bSgjelinek 
5440209230bSgjelinek 	return (zone);
5450209230bSgjelinek }
5460209230bSgjelinek 
5470209230bSgjelinek /*
5480209230bSgjelinek  * Allocate a structure for tracking visited bounds for a vm object.
5490209230bSgjelinek  */
5500209230bSgjelinek static vmu_object_t *
vmu_alloc_object(caddr_t key,int type)5510209230bSgjelinek vmu_alloc_object(caddr_t key, int type)
5520209230bSgjelinek {
5530209230bSgjelinek 	vmu_object_t *object;
5540209230bSgjelinek 
5550209230bSgjelinek 	if (vmu_data.vmu_free_objects != NULL) {
5560209230bSgjelinek 		object = vmu_data.vmu_free_objects;
5570209230bSgjelinek 		vmu_data.vmu_free_objects =
5580209230bSgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
5590209230bSgjelinek 	} else {
5600209230bSgjelinek 		object = kmem_cache_alloc(vmu_object_cache, KM_SLEEP);
5610209230bSgjelinek 	}
5620209230bSgjelinek 
5639c998850SPeter Telford 	object->vmo_next = NULL;
5640209230bSgjelinek 	object->vmo_key = key;
5650209230bSgjelinek 	object->vmo_type = type;
5669c998850SPeter Telford 	avl_create(&(object->vmo_bounds), bounds_cmp, sizeof (vmu_bound_t), 0);
5670209230bSgjelinek 
5680209230bSgjelinek 	return (object);
5690209230bSgjelinek }
5700209230bSgjelinek 
5710209230bSgjelinek /*
5720209230bSgjelinek  * Allocate and return a bound structure.
5730209230bSgjelinek  */
5740209230bSgjelinek static vmu_bound_t *
vmu_alloc_bound()5750209230bSgjelinek vmu_alloc_bound()
5760209230bSgjelinek {
5770209230bSgjelinek 	vmu_bound_t *bound;
5780209230bSgjelinek 
5790209230bSgjelinek 	if (vmu_data.vmu_free_bounds != NULL) {
5800209230bSgjelinek 		bound = vmu_data.vmu_free_bounds;
5810209230bSgjelinek 		vmu_data.vmu_free_bounds =
5820209230bSgjelinek 		    vmu_data.vmu_free_bounds->vmb_next;
5830209230bSgjelinek 	} else {
5840209230bSgjelinek 		bound = kmem_cache_alloc(vmu_bound_cache, KM_SLEEP);
5850209230bSgjelinek 	}
5869c998850SPeter Telford 
5879c998850SPeter Telford 	bound->vmb_next = NULL;
5889c998850SPeter Telford 	bound->vmb_start = 0;
5899c998850SPeter Telford 	bound->vmb_end = 0;
5909c998850SPeter Telford 	bound->vmb_type = 0;
5910209230bSgjelinek 	return (bound);
5920209230bSgjelinek }
5930209230bSgjelinek 
5940209230bSgjelinek /*
5950209230bSgjelinek  * vmu_find_insert_* functions implement hash lookup or allocate and
5960209230bSgjelinek  * insert operations.
5970209230bSgjelinek  */
5980209230bSgjelinek static vmu_object_t *
vmu_find_insert_object(mod_hash_t * hash,caddr_t key,uint_t type)5990209230bSgjelinek vmu_find_insert_object(mod_hash_t *hash, caddr_t key, uint_t type)
6000209230bSgjelinek {
6010209230bSgjelinek 	int ret;
6020209230bSgjelinek 	vmu_object_t *object;
6030209230bSgjelinek 
6040209230bSgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
6050209230bSgjelinek 	    (mod_hash_val_t *)&object);
6060209230bSgjelinek 	if (ret != 0) {
6070209230bSgjelinek 		object = vmu_alloc_object(key, type);
6080209230bSgjelinek 		ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
6090209230bSgjelinek 		    (mod_hash_val_t)object, (mod_hash_hndl_t)0);
6100209230bSgjelinek 		ASSERT(ret == 0);
6110209230bSgjelinek 	}
6120209230bSgjelinek 	return (object);
6130209230bSgjelinek }
6140209230bSgjelinek 
6150209230bSgjelinek static int
vmu_find_insert_anon(mod_hash_t * hash,caddr_t key)6160209230bSgjelinek vmu_find_insert_anon(mod_hash_t *hash, caddr_t key)
6170209230bSgjelinek {
6180209230bSgjelinek 	int ret;
6190209230bSgjelinek 	caddr_t val;
6200209230bSgjelinek 
6210209230bSgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)key,
6220209230bSgjelinek 	    (mod_hash_val_t *)&val);
6230209230bSgjelinek 
6240209230bSgjelinek 	if (ret == 0)
6250209230bSgjelinek 		return (0);
6260209230bSgjelinek 
6270209230bSgjelinek 	ret = i_mod_hash_insert_nosync(hash, (mod_hash_key_t)key,
6280209230bSgjelinek 	    (mod_hash_val_t)key, (mod_hash_hndl_t)0);
6290209230bSgjelinek 
6300209230bSgjelinek 	ASSERT(ret == 0);
6310209230bSgjelinek 
6320209230bSgjelinek 	return (1);
6330209230bSgjelinek }
6340209230bSgjelinek 
6350209230bSgjelinek static vmu_entity_t *
vmu_find_insert_entity(mod_hash_t * hash,id_t id,uint_t type,id_t zoneid)6360209230bSgjelinek vmu_find_insert_entity(mod_hash_t *hash, id_t id, uint_t type, id_t zoneid)
6370209230bSgjelinek {
6380209230bSgjelinek 	int ret;
6390209230bSgjelinek 	vmu_entity_t *entity;
6400209230bSgjelinek 
6410209230bSgjelinek 	ret = i_mod_hash_find_nosync(hash, (mod_hash_key_t)(uintptr_t)id,
6420209230bSgjelinek 	    (mod_hash_val_t *)&entity);
6430209230bSgjelinek 	if (ret != 0) {
6440209230bSgjelinek 		entity = vmu_alloc_entity(id, type, zoneid);
6450209230bSgjelinek 		ret = i_mod_hash_insert_nosync(hash,
6460209230bSgjelinek 		    (mod_hash_key_t)(uintptr_t)id, (mod_hash_val_t)entity,
6470209230bSgjelinek 		    (mod_hash_hndl_t)0);
6480209230bSgjelinek 		ASSERT(ret == 0);
6490209230bSgjelinek 	}
6500209230bSgjelinek 	return (entity);
6510209230bSgjelinek }
6520209230bSgjelinek 
6530209230bSgjelinek 
6540209230bSgjelinek 
6550209230bSgjelinek 
6560209230bSgjelinek /*
6570209230bSgjelinek  * Returns list of object bounds between start and end.  New bounds inserted
6580209230bSgjelinek  * by this call are given type.
6590209230bSgjelinek  *
6600209230bSgjelinek  * Returns the number of pages covered if new bounds are created.  Returns 0
6610209230bSgjelinek  * if region between start/end consists of all existing bounds.
6620209230bSgjelinek  */
6630209230bSgjelinek static pgcnt_t
vmu_insert_lookup_object_bounds(vmu_object_t * ro,pgcnt_t start,pgcnt_t end,char type,vmu_bound_t ** first,vmu_bound_t ** last)6640209230bSgjelinek vmu_insert_lookup_object_bounds(vmu_object_t *ro, pgcnt_t start, pgcnt_t
6650209230bSgjelinek     end, char type, vmu_bound_t **first, vmu_bound_t **last)
6660209230bSgjelinek {
6679c998850SPeter Telford 	avl_tree_t	*tree = &(ro->vmo_bounds);
6689c998850SPeter Telford 	avl_index_t	where;
6699c998850SPeter Telford 	vmu_bound_t	*walker, *tmp;
6709c998850SPeter Telford 	pgcnt_t		ret = 0;
6719c998850SPeter Telford 
6729c998850SPeter Telford 	ASSERT(start <= end);
6730209230bSgjelinek 
6740209230bSgjelinek 	*first = *last = NULL;
6750209230bSgjelinek 
6769c998850SPeter Telford 	tmp = vmu_alloc_bound();
6779c998850SPeter Telford 	tmp->vmb_start = start;
6789c998850SPeter Telford 	tmp->vmb_type = type;
6799c998850SPeter Telford 
6809c998850SPeter Telford 	/* Hopelessly optimistic case. */
6819c998850SPeter Telford 	if (walker = avl_find(tree, tmp, &where)) {
6829c998850SPeter Telford 		/* We got lucky. */
6839c998850SPeter Telford 		vmu_free_bound(tmp);
6849c998850SPeter Telford 		*first = walker;
6859c998850SPeter Telford 	}
6869c998850SPeter Telford 
6879c998850SPeter Telford 	if (walker == NULL) {
6889c998850SPeter Telford 		/* Is start in the previous node? */
6899c998850SPeter Telford 		walker = avl_nearest(tree, where, AVL_BEFORE);
6909c998850SPeter Telford 		if (walker != NULL) {
6919c998850SPeter Telford 			if (ISWITHIN(walker, start)) {
6929c998850SPeter Telford 				/* We found start. */
6939c998850SPeter Telford 				vmu_free_bound(tmp);
6949c998850SPeter Telford 				*first = walker;
6959c998850SPeter Telford 			}
6960209230bSgjelinek 		}
6970209230bSgjelinek 	}
6980209230bSgjelinek 
6999c998850SPeter Telford 	/*
7009c998850SPeter Telford 	 * At this point, if *first is still NULL, then we
7019c998850SPeter Telford 	 * didn't get a direct hit and start isn't covered
7029c998850SPeter Telford 	 * by the previous node. We know that the next node
7039c998850SPeter Telford 	 * must have a greater start value than we require
7049c998850SPeter Telford 	 * because avl_find tells us where the AVL routines would
7059c998850SPeter Telford 	 * insert our new node. We have some gap between the
7069c998850SPeter Telford 	 * start we want and the next node.
7079c998850SPeter Telford 	 */
7080209230bSgjelinek 	if (*first == NULL) {
7099c998850SPeter Telford 		walker = avl_nearest(tree, where, AVL_AFTER);
7109c998850SPeter Telford 		if (walker != NULL && walker->vmb_start <= end) {
7119c998850SPeter Telford 			/* Fill the gap. */
7129c998850SPeter Telford 			tmp->vmb_end = walker->vmb_start - 1;
7139c998850SPeter Telford 			*first = tmp;
7140209230bSgjelinek 		} else {
7159c998850SPeter Telford 			/* We have a gap over [start, end]. */
7169c998850SPeter Telford 			tmp->vmb_end = end;
7179c998850SPeter Telford 			*first = *last = tmp;
7180209230bSgjelinek 		}
7199c998850SPeter Telford 		ret += tmp->vmb_end - tmp->vmb_start + 1;
7209c998850SPeter Telford 		avl_insert(tree, tmp, where);
7210209230bSgjelinek 	}
7220209230bSgjelinek 
7239c998850SPeter Telford 	ASSERT(*first != NULL);
7249c998850SPeter Telford 
7259c998850SPeter Telford 	if (*last != NULL) {
7269c998850SPeter Telford 		/* We're done. */
7279c998850SPeter Telford 		return (ret);
7280209230bSgjelinek 	}
7299c998850SPeter Telford 
7300209230bSgjelinek 	/*
7319c998850SPeter Telford 	 * If we are here we still need to set *last and
7329c998850SPeter Telford 	 * that may involve filling in some gaps.
7330209230bSgjelinek 	 */
7349c998850SPeter Telford 	*last = *first;
7359c998850SPeter Telford 	for (;;) {
7369c998850SPeter Telford 		if (ISWITHIN(*last, end)) {
7379c998850SPeter Telford 			/* We're done. */
7389c998850SPeter Telford 			break;
7390209230bSgjelinek 		}
7409c998850SPeter Telford 		walker = AVL_NEXT(tree, *last);
7419c998850SPeter Telford 		if (walker == NULL || walker->vmb_start > end) {
7429c998850SPeter Telford 			/* Bottom or mid tree with gap. */
7439c998850SPeter Telford 			tmp = vmu_alloc_bound();
7449c998850SPeter Telford 			tmp->vmb_start = (*last)->vmb_end + 1;
7450209230bSgjelinek 			tmp->vmb_end = end;
746a2977fcfSPeter Telford 			tmp->vmb_type = type;
7470209230bSgjelinek 			ret += tmp->vmb_end - tmp->vmb_start + 1;
7489c998850SPeter Telford 			avl_insert_here(tree, tmp, *last, AVL_AFTER);
7490209230bSgjelinek 			*last = tmp;
7500209230bSgjelinek 			break;
7519c998850SPeter Telford 		} else {
7529c998850SPeter Telford 			if ((*last)->vmb_end + 1 != walker->vmb_start) {
7539c998850SPeter Telford 				/* Non-contiguous. */
7549c998850SPeter Telford 				tmp = vmu_alloc_bound();
7559c998850SPeter Telford 				tmp->vmb_start = (*last)->vmb_end + 1;
7569c998850SPeter Telford 				tmp->vmb_end = walker->vmb_start - 1;
757a2977fcfSPeter Telford 				tmp->vmb_type = type;
7589c998850SPeter Telford 				ret += tmp->vmb_end - tmp->vmb_start + 1;
7599c998850SPeter Telford 				avl_insert_here(tree, tmp, *last, AVL_AFTER);
7609c998850SPeter Telford 				*last = tmp;
7619c998850SPeter Telford 			} else {
7629c998850SPeter Telford 				*last = walker;
7639c998850SPeter Telford 			}
7640209230bSgjelinek 		}
7650209230bSgjelinek 	}
7669c998850SPeter Telford 
7670209230bSgjelinek 	return (ret);
7680209230bSgjelinek }
7690209230bSgjelinek 
7700209230bSgjelinek /*
7710209230bSgjelinek  * vmu_update_bounds()
7720209230bSgjelinek  *
7739c998850SPeter Telford  * tree: avl_tree in which first and last hang.
7749c998850SPeter Telford  *
7750209230bSgjelinek  * first, last:	list of continuous bounds, of which zero or more are of
7760209230bSgjelinek  * 		type VMUSAGE_BOUND_UNKNOWN.
7770209230bSgjelinek  *
7789c998850SPeter Telford  * new_tree: avl_tree in which new_first and new_last hang.
7799c998850SPeter Telford  *
7800209230bSgjelinek  * new_first, new_last:	list of continuous bounds, of which none are of
7810209230bSgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.  These bounds are used to
7820209230bSgjelinek  *			update the types of bounds in (first,last) with
7830209230bSgjelinek  *			type VMUSAGE_BOUND_UNKNOWN.
7840209230bSgjelinek  *
7850209230bSgjelinek  * For the list of bounds (first,last), this function updates any bounds
7860209230bSgjelinek  * with type VMUSAGE_BOUND_UNKNOWN using the type of the corresponding bound in
7870209230bSgjelinek  * the list (new_first, new_last).
7880209230bSgjelinek  *
7890209230bSgjelinek  * If a bound of type VMUSAGE_BOUND_UNKNOWN spans multiple bounds in the list
7900209230bSgjelinek  * (new_first, new_last), it will be split into multiple bounds.
7910209230bSgjelinek  *
7920209230bSgjelinek  * Return value:
7930209230bSgjelinek  * 	The number of pages in the list of bounds (first,last) that were of
7940209230bSgjelinek  *	type VMUSAGE_BOUND_UNKNOWN, which have been updated to be of type
7950209230bSgjelinek  *	VMUSAGE_BOUND_INCORE.
7960209230bSgjelinek  *
7970209230bSgjelinek  */
7980209230bSgjelinek static pgcnt_t
vmu_update_bounds(avl_tree_t * tree,vmu_bound_t ** first,vmu_bound_t ** last,avl_tree_t * new_tree,vmu_bound_t * new_first,vmu_bound_t * new_last)7999c998850SPeter Telford vmu_update_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last,
8009c998850SPeter Telford     avl_tree_t *new_tree, vmu_bound_t *new_first, vmu_bound_t *new_last)
8010209230bSgjelinek {
8020209230bSgjelinek 	vmu_bound_t *next, *new_next, *tmp;
8030209230bSgjelinek 	pgcnt_t rss = 0;
8040209230bSgjelinek 
8050209230bSgjelinek 	next = *first;
8060209230bSgjelinek 	new_next = new_first;
8070209230bSgjelinek 
80852978630Ssl 	/*
80952978630Ssl 	 * Verify first and last bound are covered by new bounds if they
81052978630Ssl 	 * have unknown type.
81152978630Ssl 	 */
81252978630Ssl 	ASSERT((*first)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
8139c998850SPeter Telford 	    (*first)->vmb_start >= new_first->vmb_start);
81452978630Ssl 	ASSERT((*last)->vmb_type != VMUSAGE_BOUND_UNKNOWN ||
81552978630Ssl 	    (*last)->vmb_end <= new_last->vmb_end);
8160209230bSgjelinek 	for (;;) {
8179c998850SPeter Telford 		/* If bound already has type, proceed to next bound. */
8180209230bSgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
8190209230bSgjelinek 			if (next == *last)
8200209230bSgjelinek 				break;
8219c998850SPeter Telford 			next = AVL_NEXT(tree, next);
8220209230bSgjelinek 			continue;
8230209230bSgjelinek 		}
8240209230bSgjelinek 		while (new_next->vmb_end < next->vmb_start)
8259c998850SPeter Telford 			new_next = AVL_NEXT(new_tree, new_next);
8260209230bSgjelinek 		ASSERT(new_next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
8270209230bSgjelinek 		next->vmb_type = new_next->vmb_type;
8280209230bSgjelinek 		if (new_next->vmb_end < next->vmb_end) {
8290209230bSgjelinek 			/* need to split bound */
8300209230bSgjelinek 			tmp = vmu_alloc_bound();
8310209230bSgjelinek 			tmp->vmb_type = VMUSAGE_BOUND_UNKNOWN;
8320209230bSgjelinek 			tmp->vmb_start = new_next->vmb_end + 1;
8330209230bSgjelinek 			tmp->vmb_end = next->vmb_end;
8349c998850SPeter Telford 			avl_insert_here(tree, tmp, next, AVL_AFTER);
8350209230bSgjelinek 			next->vmb_end = new_next->vmb_end;
8360209230bSgjelinek 			if (*last == next)
8370209230bSgjelinek 				*last = tmp;
8380209230bSgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8390209230bSgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
8400209230bSgjelinek 			next = tmp;
8410209230bSgjelinek 		} else {
8420209230bSgjelinek 			if (next->vmb_type == VMUSAGE_BOUND_INCORE)
8430209230bSgjelinek 				rss += next->vmb_end - next->vmb_start + 1;
8440209230bSgjelinek 			if (next == *last)
8450209230bSgjelinek 				break;
8469c998850SPeter Telford 			next = AVL_NEXT(tree, next);
8470209230bSgjelinek 		}
8480209230bSgjelinek 	}
8490209230bSgjelinek 	return (rss);
8500209230bSgjelinek }
8510209230bSgjelinek 
8520209230bSgjelinek /*
8539c998850SPeter Telford  * Merges adjacent bounds with same type between first and last bound.
854a2977fcfSPeter Telford  * After merge, last pointer may point to a different bound, as (incoming)
855a2977fcfSPeter Telford  * last bound may have been merged away.
8560209230bSgjelinek  */
8570209230bSgjelinek static void
vmu_merge_bounds(avl_tree_t * tree,vmu_bound_t ** first,vmu_bound_t ** last)8589c998850SPeter Telford vmu_merge_bounds(avl_tree_t *tree, vmu_bound_t **first, vmu_bound_t **last)
8590209230bSgjelinek {
8609c998850SPeter Telford 	vmu_bound_t *current;
8610209230bSgjelinek 	vmu_bound_t *next;
8620209230bSgjelinek 
8639c998850SPeter Telford 	ASSERT(tree != NULL);
8640209230bSgjelinek 	ASSERT(*first != NULL);
8650209230bSgjelinek 	ASSERT(*last != NULL);
8660209230bSgjelinek 
8679c998850SPeter Telford 	current = *first;
8689c998850SPeter Telford 	while (current != *last) {
8699c998850SPeter Telford 		next = AVL_NEXT(tree, current);
8709c998850SPeter Telford 		if ((current->vmb_end + 1) == next->vmb_start &&
8719c998850SPeter Telford 		    current->vmb_type == next->vmb_type) {
8729c998850SPeter Telford 			current->vmb_end = next->vmb_end;
8739c998850SPeter Telford 			avl_remove(tree, next);
8749c998850SPeter Telford 			vmu_free_bound(next);
8759c998850SPeter Telford 			if (next == *last) {
876a2977fcfSPeter Telford 				*last = current;
8779c998850SPeter Telford 			}
878a2977fcfSPeter Telford 		} else {
879a2977fcfSPeter Telford 			current = AVL_NEXT(tree, current);
8800209230bSgjelinek 		}
8810209230bSgjelinek 	}
8820209230bSgjelinek }
8830209230bSgjelinek 
8840209230bSgjelinek /*
8850209230bSgjelinek  * Given an amp and a list of bounds, updates each bound's type with
8860209230bSgjelinek  * VMUSAGE_BOUND_INCORE or VMUSAGE_BOUND_NOT_INCORE.
8870209230bSgjelinek  *
8880209230bSgjelinek  * If a bound is partially incore, it will be split into two bounds.
8890209230bSgjelinek  * first and last may be modified, as bounds may be split into multiple
8909c998850SPeter Telford  * bounds if they are partially incore/not-incore.
8910209230bSgjelinek  *
8929c998850SPeter Telford  * Set incore to non-zero if bounds are already known to be incore.
8930209230bSgjelinek  *
8940209230bSgjelinek  */
8950209230bSgjelinek static void
vmu_amp_update_incore_bounds(avl_tree_t * tree,struct anon_map * amp,vmu_bound_t ** first,vmu_bound_t ** last,boolean_t incore)8969c998850SPeter Telford vmu_amp_update_incore_bounds(avl_tree_t *tree, struct anon_map *amp,
8979c998850SPeter Telford     vmu_bound_t **first, vmu_bound_t **last, boolean_t incore)
8980209230bSgjelinek {
8990209230bSgjelinek 	vmu_bound_t *next;
9000209230bSgjelinek 	vmu_bound_t *tmp;
9010209230bSgjelinek 	pgcnt_t index;
9020209230bSgjelinek 	short bound_type;
9030209230bSgjelinek 	short page_type;
9040209230bSgjelinek 	vnode_t *vn;
9050209230bSgjelinek 	anoff_t off;
9060209230bSgjelinek 	struct anon *ap;
9070209230bSgjelinek 
9080209230bSgjelinek 	next = *first;
9099c998850SPeter Telford 	/* Shared anon slots don't change once set. */
9100209230bSgjelinek 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_READER);
9110209230bSgjelinek 	for (;;) {
9120209230bSgjelinek 		if (incore == B_TRUE)
9130209230bSgjelinek 			next->vmb_type = VMUSAGE_BOUND_INCORE;
9140209230bSgjelinek 
9150209230bSgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
9160209230bSgjelinek 			if (next == *last)
9170209230bSgjelinek 				break;
9189c998850SPeter Telford 			next = AVL_NEXT(tree, next);
9190209230bSgjelinek 			continue;
9200209230bSgjelinek 		}
9210209230bSgjelinek 		bound_type = next->vmb_type;
9220209230bSgjelinek 		index = next->vmb_start;
9230209230bSgjelinek 		while (index <= next->vmb_end) {
9240209230bSgjelinek 
9250209230bSgjelinek 			/*
9260209230bSgjelinek 			 * These are used to determine how much to increment
9270209230bSgjelinek 			 * index when a large page is found.
9280209230bSgjelinek 			 */
9290209230bSgjelinek 			page_t *page;
9300209230bSgjelinek 			pgcnt_t pgcnt = 1;
9310209230bSgjelinek 			uint_t pgshft;
9320209230bSgjelinek 			pgcnt_t pgmsk;
9330209230bSgjelinek 
9340209230bSgjelinek 			ap = anon_get_ptr(amp->ahp, index);
9350209230bSgjelinek 			if (ap != NULL)
9360209230bSgjelinek 				swap_xlate(ap, &vn, &off);
9370209230bSgjelinek 
9380209230bSgjelinek 			if (ap != NULL && vn != NULL && vn->v_pages != NULL &&
9390209230bSgjelinek 			    (page = page_exists(vn, off)) != NULL) {
9400209230bSgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
9410209230bSgjelinek 				if (page->p_szc > 0) {
9420209230bSgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
9430209230bSgjelinek 					pgshft = page_get_shift(page->p_szc);
9440209230bSgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
9450209230bSgjelinek 					    - 1;
9460209230bSgjelinek 				}
9470209230bSgjelinek 			} else {
9480209230bSgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
9490209230bSgjelinek 			}
9500209230bSgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
9510209230bSgjelinek 				next->vmb_type = page_type;
9520209230bSgjelinek 			} else if (next->vmb_type != page_type) {
9530209230bSgjelinek 				/*
9549c998850SPeter Telford 				 * If current bound type does not match page
9550209230bSgjelinek 				 * type, need to split off new bound.
9560209230bSgjelinek 				 */
9570209230bSgjelinek 				tmp = vmu_alloc_bound();
9580209230bSgjelinek 				tmp->vmb_type = page_type;
9590209230bSgjelinek 				tmp->vmb_start = index;
9600209230bSgjelinek 				tmp->vmb_end = next->vmb_end;
9619c998850SPeter Telford 				avl_insert_here(tree, tmp, next, AVL_AFTER);
9620209230bSgjelinek 				next->vmb_end = index - 1;
9630209230bSgjelinek 				if (*last == next)
9640209230bSgjelinek 					*last = tmp;
9650209230bSgjelinek 				next = tmp;
9660209230bSgjelinek 			}
9670209230bSgjelinek 			if (pgcnt > 1) {
9680209230bSgjelinek 				/*
9690209230bSgjelinek 				 * If inside large page, jump to next large
9700209230bSgjelinek 				 * page
9710209230bSgjelinek 				 */
9720209230bSgjelinek 				index = (index & ~pgmsk) + pgcnt;
9730209230bSgjelinek 			} else {
9740209230bSgjelinek 				index++;
9750209230bSgjelinek 			}
9760209230bSgjelinek 		}
9770209230bSgjelinek 		if (next == *last) {
9780209230bSgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
9790209230bSgjelinek 			break;
9800209230bSgjelinek 		} else
9819c998850SPeter Telford 			next = AVL_NEXT(tree, next);
9820209230bSgjelinek 	}
9830209230bSgjelinek 	ANON_LOCK_EXIT(&amp->a_rwlock);
9840209230bSgjelinek }
9850209230bSgjelinek 
9860209230bSgjelinek /*
9870209230bSgjelinek  * Same as vmu_amp_update_incore_bounds(), except for tracking
9880209230bSgjelinek  * incore-/not-incore for vnodes.
9890209230bSgjelinek  */
9900209230bSgjelinek static void
vmu_vnode_update_incore_bounds(avl_tree_t * tree,vnode_t * vnode,vmu_bound_t ** first,vmu_bound_t ** last)9919c998850SPeter Telford vmu_vnode_update_incore_bounds(avl_tree_t *tree, vnode_t *vnode,
9929c998850SPeter Telford     vmu_bound_t **first, vmu_bound_t **last)
9930209230bSgjelinek {
9940209230bSgjelinek 	vmu_bound_t *next;
9950209230bSgjelinek 	vmu_bound_t *tmp;
9960209230bSgjelinek 	pgcnt_t index;
9970209230bSgjelinek 	short bound_type;
9980209230bSgjelinek 	short page_type;
9990209230bSgjelinek 
10000209230bSgjelinek 	next = *first;
10010209230bSgjelinek 	for (;;) {
10020209230bSgjelinek 		if (vnode->v_pages == NULL)
10030209230bSgjelinek 			next->vmb_type = VMUSAGE_BOUND_NOT_INCORE;
10040209230bSgjelinek 
10050209230bSgjelinek 		if (next->vmb_type != VMUSAGE_BOUND_UNKNOWN) {
10060209230bSgjelinek 			if (next == *last)
10070209230bSgjelinek 				break;
10089c998850SPeter Telford 			next = AVL_NEXT(tree, next);
10090209230bSgjelinek 			continue;
10100209230bSgjelinek 		}
10110209230bSgjelinek 
10120209230bSgjelinek 		bound_type = next->vmb_type;
10130209230bSgjelinek 		index = next->vmb_start;
10140209230bSgjelinek 		while (index <= next->vmb_end) {
10150209230bSgjelinek 
10160209230bSgjelinek 			/*
10170209230bSgjelinek 			 * These are used to determine how much to increment
10180209230bSgjelinek 			 * index when a large page is found.
10190209230bSgjelinek 			 */
10200209230bSgjelinek 			page_t *page;
10210209230bSgjelinek 			pgcnt_t pgcnt = 1;
10220209230bSgjelinek 			uint_t pgshft;
10230209230bSgjelinek 			pgcnt_t pgmsk;
10240209230bSgjelinek 
10250209230bSgjelinek 			if (vnode->v_pages != NULL &&
10260209230bSgjelinek 			    (page = page_exists(vnode, ptob(index))) != NULL) {
10270209230bSgjelinek 				page_type = VMUSAGE_BOUND_INCORE;
10280209230bSgjelinek 				if (page->p_szc > 0) {
10290209230bSgjelinek 					pgcnt = page_get_pagecnt(page->p_szc);
10300209230bSgjelinek 					pgshft = page_get_shift(page->p_szc);
10310209230bSgjelinek 					pgmsk = (0x1 << (pgshft - PAGESHIFT))
10320209230bSgjelinek 					    - 1;
10330209230bSgjelinek 				}
10340209230bSgjelinek 			} else {
10350209230bSgjelinek 				page_type = VMUSAGE_BOUND_NOT_INCORE;
10360209230bSgjelinek 			}
10370209230bSgjelinek 			if (bound_type == VMUSAGE_BOUND_UNKNOWN) {
10380209230bSgjelinek 				next->vmb_type = page_type;
10390209230bSgjelinek 			} else if (next->vmb_type != page_type) {
10400209230bSgjelinek 				/*
10419c998850SPeter Telford 				 * If current bound type does not match page
10420209230bSgjelinek 				 * type, need to split off new bound.
10430209230bSgjelinek 				 */
10440209230bSgjelinek 				tmp = vmu_alloc_bound();
10450209230bSgjelinek 				tmp->vmb_type = page_type;
10460209230bSgjelinek 				tmp->vmb_start = index;
10470209230bSgjelinek 				tmp->vmb_end = next->vmb_end;
10489c998850SPeter Telford 				avl_insert_here(tree, tmp, next, AVL_AFTER);
10490209230bSgjelinek 				next->vmb_end = index - 1;
10500209230bSgjelinek 				if (*last == next)
10510209230bSgjelinek 					*last = tmp;
10520209230bSgjelinek 				next = tmp;
10530209230bSgjelinek 			}
10540209230bSgjelinek 			if (pgcnt > 1) {
10550209230bSgjelinek 				/*
10560209230bSgjelinek 				 * If inside large page, jump to next large
10570209230bSgjelinek 				 * page
10580209230bSgjelinek 				 */
10590209230bSgjelinek 				index = (index & ~pgmsk) + pgcnt;
10600209230bSgjelinek 			} else {
10610209230bSgjelinek 				index++;
10620209230bSgjelinek 			}
10630209230bSgjelinek 		}
10640209230bSgjelinek 		if (next == *last) {
10650209230bSgjelinek 			ASSERT(next->vmb_type != VMUSAGE_BOUND_UNKNOWN);
10660209230bSgjelinek 			break;
10670209230bSgjelinek 		} else
10689c998850SPeter Telford 			next = AVL_NEXT(tree, next);
10690209230bSgjelinek 	}
10700209230bSgjelinek }
10710209230bSgjelinek 
10720209230bSgjelinek /*
10730209230bSgjelinek  * Calculate the rss and swap consumed by a segment.  vmu_entities is the
10740209230bSgjelinek  * list of entities to visit.  For shared segments, the vnode or amp
10759c998850SPeter Telford  * is looked up in each entity to see if it has been already counted.  Private
10769c998850SPeter Telford  * anon pages are checked per entity to ensure that COW pages are not
10770209230bSgjelinek  * double counted.
10780209230bSgjelinek  *
10790209230bSgjelinek  * For private mapped files, first the amp is checked for private pages.
10800209230bSgjelinek  * Bounds not backed by the amp are looked up in the vnode for each entity
10810209230bSgjelinek  * to avoid double counting of private COW vnode pages.
10820209230bSgjelinek  */
10830209230bSgjelinek static void
vmu_calculate_seg(vmu_entity_t * vmu_entities,struct seg * seg)10840209230bSgjelinek vmu_calculate_seg(vmu_entity_t *vmu_entities, struct seg *seg)
10850209230bSgjelinek {
10860209230bSgjelinek 	struct segvn_data *svd;
10870209230bSgjelinek 	struct shm_data *shmd;
10880209230bSgjelinek 	struct spt_data *sptd;
10890209230bSgjelinek 	vmu_object_t *shared_object = NULL;
10900209230bSgjelinek 	vmu_object_t *entity_object = NULL;
10910209230bSgjelinek 	vmu_entity_t *entity;
10920209230bSgjelinek 	vmusage_t *result;
10930209230bSgjelinek 	vmu_bound_t *first = NULL;
10940209230bSgjelinek 	vmu_bound_t *last = NULL;
10950209230bSgjelinek 	vmu_bound_t *cur = NULL;
10960209230bSgjelinek 	vmu_bound_t *e_first = NULL;
10970209230bSgjelinek 	vmu_bound_t *e_last = NULL;
10980209230bSgjelinek 	vmu_bound_t *tmp;
10990209230bSgjelinek 	pgcnt_t p_index, s_index, p_start, p_end, s_start, s_end, rss, virt;
11000209230bSgjelinek 	struct anon_map *private_amp = NULL;
11010209230bSgjelinek 	boolean_t incore = B_FALSE;
11020209230bSgjelinek 	boolean_t shared = B_FALSE;
11030209230bSgjelinek 	int file = 0;
11040209230bSgjelinek 	pgcnt_t swresv = 0;
11050209230bSgjelinek 	pgcnt_t panon = 0;
11060209230bSgjelinek 
1107*c6f039c7SToomas Soome 	s_start = 0;
1108*c6f039c7SToomas Soome 	p_end = 0;
11099c998850SPeter Telford 	/* Can zero-length segments exist?  Not sure, so paranoia. */
11100209230bSgjelinek 	if (seg->s_size <= 0)
11110209230bSgjelinek 		return;
11120209230bSgjelinek 
11130209230bSgjelinek 	/*
11140209230bSgjelinek 	 * Figure out if there is a shared object (such as a named vnode or
11150209230bSgjelinek 	 * a shared amp, then figure out if there is a private amp, which
11160209230bSgjelinek 	 * identifies private pages.
11170209230bSgjelinek 	 */
11180209230bSgjelinek 	if (seg->s_ops == &segvn_ops) {
11190209230bSgjelinek 		svd = (struct segvn_data *)seg->s_data;
11209c998850SPeter Telford 		if (svd->type == MAP_SHARED) {
11210209230bSgjelinek 			shared = B_TRUE;
11229c998850SPeter Telford 		} else {
11230209230bSgjelinek 			swresv = svd->swresv;
11240209230bSgjelinek 
11259c998850SPeter Telford 			if (SEGVN_LOCK_TRYENTER(seg->s_as, &svd->lock,
11269c998850SPeter Telford 			    RW_READER) != 0) {
11279c998850SPeter Telford 				/*
11289c998850SPeter Telford 				 * Text replication anon maps can be shared
11299c998850SPeter Telford 				 * across all zones. Space used for text
11309c998850SPeter Telford 				 * replication is typically capped as a small %
11319c998850SPeter Telford 				 * of memory.  To keep it simple for now we
11329c998850SPeter Telford 				 * don't account for swap and memory space used
11339c998850SPeter Telford 				 * for text replication.
11349c998850SPeter Telford 				 */
11359c998850SPeter Telford 				if (svd->tr_state == SEGVN_TR_OFF &&
11369c998850SPeter Telford 				    svd->amp != NULL) {
11379c998850SPeter Telford 					private_amp = svd->amp;
11389c998850SPeter Telford 					p_start = svd->anon_index;
11399c998850SPeter Telford 					p_end = svd->anon_index +
11409c998850SPeter Telford 					    btop(seg->s_size) - 1;
11419c998850SPeter Telford 				}
11429c998850SPeter Telford 				SEGVN_LOCK_EXIT(seg->s_as, &svd->lock);
11439c998850SPeter Telford 			}
11449c998850SPeter Telford 		}
11450209230bSgjelinek 		if (svd->vp != NULL) {
11460209230bSgjelinek 			file = 1;
11470209230bSgjelinek 			shared_object = vmu_find_insert_object(
11480209230bSgjelinek 			    vmu_data.vmu_all_vnodes_hash, (caddr_t)svd->vp,
11490209230bSgjelinek 			    VMUSAGE_TYPE_VNODE);
11500209230bSgjelinek 			s_start = btop(svd->offset);
11510209230bSgjelinek 			s_end = btop(svd->offset + seg->s_size) - 1;
11520209230bSgjelinek 		}
11530209230bSgjelinek 		if (svd->amp != NULL && svd->type == MAP_SHARED) {
11540209230bSgjelinek 			ASSERT(shared_object == NULL);
11550209230bSgjelinek 			shared_object = vmu_find_insert_object(
11560209230bSgjelinek 			    vmu_data.vmu_all_amps_hash, (caddr_t)svd->amp,
11570209230bSgjelinek 			    VMUSAGE_TYPE_AMP);
11580209230bSgjelinek 			s_start = svd->anon_index;
11590209230bSgjelinek 			s_end = svd->anon_index + btop(seg->s_size) - 1;
11600209230bSgjelinek 			/* schedctl mappings are always in core */
11610209230bSgjelinek 			if (svd->amp->swresv == 0)
11620209230bSgjelinek 				incore = B_TRUE;
11630209230bSgjelinek 		}
11640209230bSgjelinek 	} else if (seg->s_ops == &segspt_shmops) {
11650209230bSgjelinek 		shared = B_TRUE;
11660209230bSgjelinek 		shmd = (struct shm_data *)seg->s_data;
11670209230bSgjelinek 		shared_object = vmu_find_insert_object(
11680209230bSgjelinek 		    vmu_data.vmu_all_amps_hash, (caddr_t)shmd->shm_amp,
11690209230bSgjelinek 		    VMUSAGE_TYPE_AMP);
11700209230bSgjelinek 		s_start = 0;
11710209230bSgjelinek 		s_end = btop(seg->s_size) - 1;
11720209230bSgjelinek 		sptd = shmd->shm_sptseg->s_data;
11730209230bSgjelinek 
11740209230bSgjelinek 		/* ism segments are always incore and do not reserve swap */
11750209230bSgjelinek 		if (sptd->spt_flags & SHM_SHARE_MMU)
11760209230bSgjelinek 			incore = B_TRUE;
11770209230bSgjelinek 
11780209230bSgjelinek 	} else {
11790209230bSgjelinek 		return;
11800209230bSgjelinek 	}
11810209230bSgjelinek 
11820209230bSgjelinek 	/*
11830209230bSgjelinek 	 * If there is a private amp, count anon pages that exist.  If an
11849c998850SPeter Telford 	 * anon has a refcnt > 1 (COW sharing), then save the anon in a
11850209230bSgjelinek 	 * hash so that it is not double counted.
11860209230bSgjelinek 	 *
11879c998850SPeter Telford 	 * If there is also a shared object, then figure out the bounds
11880209230bSgjelinek 	 * which are not mapped by the private amp.
11890209230bSgjelinek 	 */
11900209230bSgjelinek 	if (private_amp != NULL) {
11910209230bSgjelinek 
11929c998850SPeter Telford 		/* Enter as writer to prevent COW anons from being freed */
11930209230bSgjelinek 		ANON_LOCK_ENTER(&private_amp->a_rwlock, RW_WRITER);
11940209230bSgjelinek 
11950209230bSgjelinek 		p_index = p_start;
11960209230bSgjelinek 		s_index = s_start;
11970209230bSgjelinek 
11980209230bSgjelinek 		while (p_index <= p_end) {
11990209230bSgjelinek 
12000209230bSgjelinek 			pgcnt_t p_index_next;
12010209230bSgjelinek 			pgcnt_t p_bound_size;
12020209230bSgjelinek 			int cnt;
12030209230bSgjelinek 			anoff_t off;
12040209230bSgjelinek 			struct vnode *vn;
12050209230bSgjelinek 			struct anon *ap;
12060209230bSgjelinek 			page_t *page;		/* For handling of large */
12070209230bSgjelinek 			pgcnt_t pgcnt = 1;	/* pages */
12080209230bSgjelinek 			pgcnt_t pgstart;
12090209230bSgjelinek 			pgcnt_t pgend;
12100209230bSgjelinek 			uint_t pgshft;
12110209230bSgjelinek 			pgcnt_t pgmsk;
12120209230bSgjelinek 
12130209230bSgjelinek 			p_index_next = p_index;
12140209230bSgjelinek 			ap = anon_get_next_ptr(private_amp->ahp,
12150209230bSgjelinek 			    &p_index_next);
12160209230bSgjelinek 
12170209230bSgjelinek 			/*
12180209230bSgjelinek 			 * If next anon is past end of mapping, simulate
12190209230bSgjelinek 			 * end of anon so loop terminates.
12200209230bSgjelinek 			 */
12210209230bSgjelinek 			if (p_index_next > p_end) {
12220209230bSgjelinek 				p_index_next = p_end + 1;
12230209230bSgjelinek 				ap = NULL;
12240209230bSgjelinek 			}
12250209230bSgjelinek 			/*
12269c998850SPeter Telford 			 * For COW segments, keep track of bounds not
12270209230bSgjelinek 			 * backed by private amp so they can be looked
12280209230bSgjelinek 			 * up in the backing vnode
12290209230bSgjelinek 			 */
12300209230bSgjelinek 			if (p_index_next != p_index) {
12310209230bSgjelinek 
12320209230bSgjelinek 				/*
12330209230bSgjelinek 				 * Compute index difference between anon and
12340209230bSgjelinek 				 * previous anon.
12350209230bSgjelinek 				 */
12360209230bSgjelinek 				p_bound_size = p_index_next - p_index - 1;
12370209230bSgjelinek 
12380209230bSgjelinek 				if (shared_object != NULL) {
12390209230bSgjelinek 					cur = vmu_alloc_bound();
12400209230bSgjelinek 					cur->vmb_start = s_index;
12410209230bSgjelinek 					cur->vmb_end = s_index + p_bound_size;
12420209230bSgjelinek 					cur->vmb_type = VMUSAGE_BOUND_UNKNOWN;
12430209230bSgjelinek 					if (first == NULL) {
12440209230bSgjelinek 						first = cur;
12450209230bSgjelinek 						last = cur;
12460209230bSgjelinek 					} else {
12470209230bSgjelinek 						last->vmb_next = cur;
12480209230bSgjelinek 						last = cur;
12490209230bSgjelinek 					}
12500209230bSgjelinek 				}
12510209230bSgjelinek 				p_index = p_index + p_bound_size + 1;
12520209230bSgjelinek 				s_index = s_index + p_bound_size + 1;
12530209230bSgjelinek 			}
12540209230bSgjelinek 
12550209230bSgjelinek 			/* Detect end of anons in amp */
12560209230bSgjelinek 			if (ap == NULL)
12570209230bSgjelinek 				break;
12580209230bSgjelinek 
12590209230bSgjelinek 			cnt = ap->an_refcnt;
12600209230bSgjelinek 			swap_xlate(ap, &vn, &off);
12610209230bSgjelinek 
12620209230bSgjelinek 			if (vn == NULL || vn->v_pages == NULL ||
12630209230bSgjelinek 			    (page = page_exists(vn, off)) == NULL) {
12640209230bSgjelinek 				p_index++;
12650209230bSgjelinek 				s_index++;
12660209230bSgjelinek 				continue;
12670209230bSgjelinek 			}
12680209230bSgjelinek 
12690209230bSgjelinek 			/*
12700209230bSgjelinek 			 * If large page is found, compute portion of large
12710209230bSgjelinek 			 * page in mapping, and increment indicies to the next
12720209230bSgjelinek 			 * large page.
12730209230bSgjelinek 			 */
12740209230bSgjelinek 			if (page->p_szc > 0) {
12750209230bSgjelinek 
12760209230bSgjelinek 				pgcnt = page_get_pagecnt(page->p_szc);
12770209230bSgjelinek 				pgshft = page_get_shift(page->p_szc);
12780209230bSgjelinek 				pgmsk = (0x1 << (pgshft - PAGESHIFT)) - 1;
12790209230bSgjelinek 
12800209230bSgjelinek 				/* First page in large page */
12810209230bSgjelinek 				pgstart = p_index & ~pgmsk;
12820209230bSgjelinek 				/* Last page in large page */
12830209230bSgjelinek 				pgend = pgstart + pgcnt - 1;
12840209230bSgjelinek 				/*
12850209230bSgjelinek 				 * Artifically end page if page extends past
12860209230bSgjelinek 				 * end of mapping.
12870209230bSgjelinek 				 */
12880209230bSgjelinek 				if (pgend > p_end)
12890209230bSgjelinek 					pgend = p_end;
12900209230bSgjelinek 
12910209230bSgjelinek 				/*
12920209230bSgjelinek 				 * Compute number of pages from large page
12930209230bSgjelinek 				 * which are mapped.
12940209230bSgjelinek 				 */
12950209230bSgjelinek 				pgcnt = pgend - p_index + 1;
12960209230bSgjelinek 
12970209230bSgjelinek 				/*
12980209230bSgjelinek 				 * Point indicies at page after large page,
12990209230bSgjelinek 				 * or at page after end of mapping.
13000209230bSgjelinek 				 */
13010209230bSgjelinek 				p_index += pgcnt;
13020209230bSgjelinek 				s_index += pgcnt;
13030209230bSgjelinek 			} else {
13040209230bSgjelinek 				p_index++;
13050209230bSgjelinek 				s_index++;
13060209230bSgjelinek 			}
13070209230bSgjelinek 
13080209230bSgjelinek 			/*
13090209230bSgjelinek 			 * Assume anon structs with a refcnt
13109c998850SPeter Telford 			 * of 1 are not COW shared, so there
13110209230bSgjelinek 			 * is no reason to track them per entity.
13120209230bSgjelinek 			 */
13130209230bSgjelinek 			if (cnt == 1) {
13140209230bSgjelinek 				panon += pgcnt;
13150209230bSgjelinek 				continue;
13160209230bSgjelinek 			}
13170209230bSgjelinek 			for (entity = vmu_entities; entity != NULL;
13180209230bSgjelinek 			    entity = entity->vme_next_calc) {
13190209230bSgjelinek 
13200209230bSgjelinek 				result = &entity->vme_result;
13210209230bSgjelinek 				/*
13229c998850SPeter Telford 				 * Track COW anons per entity so
13230209230bSgjelinek 				 * they are not double counted.
13240209230bSgjelinek 				 */
13250209230bSgjelinek 				if (vmu_find_insert_anon(entity->vme_anon_hash,
13260209230bSgjelinek 				    (caddr_t)ap) == 0)
13270209230bSgjelinek 					continue;
13280209230bSgjelinek 
13290209230bSgjelinek 				result->vmu_rss_all += (pgcnt << PAGESHIFT);
13300209230bSgjelinek 				result->vmu_rss_private +=
13310209230bSgjelinek 				    (pgcnt << PAGESHIFT);
13320209230bSgjelinek 			}
13330209230bSgjelinek 		}
13340209230bSgjelinek 		ANON_LOCK_EXIT(&private_amp->a_rwlock);
13350209230bSgjelinek 	}
13360209230bSgjelinek 
13370209230bSgjelinek 	/* Add up resident anon and swap reserved for private mappings */
13380209230bSgjelinek 	if (swresv > 0 || panon > 0) {
13390209230bSgjelinek 		for (entity = vmu_entities; entity != NULL;
13400209230bSgjelinek 		    entity = entity->vme_next_calc) {
13410209230bSgjelinek 			result = &entity->vme_result;
13420209230bSgjelinek 			result->vmu_swap_all += swresv;
13430209230bSgjelinek 			result->vmu_swap_private += swresv;
13440209230bSgjelinek 			result->vmu_rss_all += (panon << PAGESHIFT);
13450209230bSgjelinek 			result->vmu_rss_private += (panon << PAGESHIFT);
13460209230bSgjelinek 		}
13470209230bSgjelinek 	}
13480209230bSgjelinek 
13490209230bSgjelinek 	/* Compute resident pages backing shared amp or named vnode */
13500209230bSgjelinek 	if (shared_object != NULL) {
1351a2977fcfSPeter Telford 		avl_tree_t *tree = &(shared_object->vmo_bounds);
1352a2977fcfSPeter Telford 
13530209230bSgjelinek 		if (first == NULL) {
13540209230bSgjelinek 			/*
13550209230bSgjelinek 			 * No private amp, or private amp has no anon
13560209230bSgjelinek 			 * structs.  This means entire segment is backed by
13570209230bSgjelinek 			 * the shared object.
13580209230bSgjelinek 			 */
13590209230bSgjelinek 			first = vmu_alloc_bound();
13600209230bSgjelinek 			first->vmb_start = s_start;
13610209230bSgjelinek 			first->vmb_end = s_end;
13620209230bSgjelinek 			first->vmb_type = VMUSAGE_BOUND_UNKNOWN;
13630209230bSgjelinek 		}
13640209230bSgjelinek 		/*
13650209230bSgjelinek 		 * Iterate bounds not backed by private amp, and compute
13660209230bSgjelinek 		 * resident pages.
13670209230bSgjelinek 		 */
13680209230bSgjelinek 		cur = first;
13690209230bSgjelinek 		while (cur != NULL) {
13700209230bSgjelinek 
13710209230bSgjelinek 			if (vmu_insert_lookup_object_bounds(shared_object,
13720209230bSgjelinek 			    cur->vmb_start, cur->vmb_end, VMUSAGE_BOUND_UNKNOWN,
13730209230bSgjelinek 			    &first, &last) > 0) {
13740209230bSgjelinek 				/* new bounds, find incore/not-incore */
13750209230bSgjelinek 				if (shared_object->vmo_type ==
13769c998850SPeter Telford 				    VMUSAGE_TYPE_VNODE) {
13770209230bSgjelinek 					vmu_vnode_update_incore_bounds(
13789c998850SPeter Telford 					    tree,
13790209230bSgjelinek 					    (vnode_t *)
13800209230bSgjelinek 					    shared_object->vmo_key, &first,
13810209230bSgjelinek 					    &last);
13829c998850SPeter Telford 				} else {
13830209230bSgjelinek 					vmu_amp_update_incore_bounds(
13849c998850SPeter Telford 					    tree,
13850209230bSgjelinek 					    (struct anon_map *)
13860209230bSgjelinek 					    shared_object->vmo_key, &first,
13870209230bSgjelinek 					    &last, incore);
13889c998850SPeter Telford 				}
13899c998850SPeter Telford 				vmu_merge_bounds(tree, &first, &last);
13900209230bSgjelinek 			}
13910209230bSgjelinek 			for (entity = vmu_entities; entity != NULL;
13920209230bSgjelinek 			    entity = entity->vme_next_calc) {
13939c998850SPeter Telford 				avl_tree_t *e_tree;
13940209230bSgjelinek 
13950209230bSgjelinek 				result = &entity->vme_result;
13960209230bSgjelinek 
13970209230bSgjelinek 				entity_object = vmu_find_insert_object(
13980209230bSgjelinek 				    shared_object->vmo_type ==
13990209230bSgjelinek 				    VMUSAGE_TYPE_VNODE ? entity->vme_vnode_hash:
140000792c0bS 				    entity->vme_amp_hash,
140100792c0bS 				    shared_object->vmo_key,
140200792c0bS 				    shared_object->vmo_type);
14030209230bSgjelinek 
14040209230bSgjelinek 				virt = vmu_insert_lookup_object_bounds(
14050209230bSgjelinek 				    entity_object, cur->vmb_start, cur->vmb_end,
14060209230bSgjelinek 				    VMUSAGE_BOUND_UNKNOWN, &e_first, &e_last);
14070209230bSgjelinek 
14080209230bSgjelinek 				if (virt == 0)
14090209230bSgjelinek 					continue;
14100209230bSgjelinek 				/*
14110209230bSgjelinek 				 * Range visited for this entity
14120209230bSgjelinek 				 */
14139c998850SPeter Telford 				e_tree = &(entity_object->vmo_bounds);
14149c998850SPeter Telford 				rss = vmu_update_bounds(e_tree, &e_first,
14159c998850SPeter Telford 				    &e_last, tree, first, last);
14160209230bSgjelinek 				result->vmu_rss_all += (rss << PAGESHIFT);
14170209230bSgjelinek 				if (shared == B_TRUE && file == B_FALSE) {
14180209230bSgjelinek 					/* shared anon mapping */
14190209230bSgjelinek 					result->vmu_swap_all +=
14200209230bSgjelinek 					    (virt << PAGESHIFT);
14210209230bSgjelinek 					result->vmu_swap_shared +=
14220209230bSgjelinek 					    (virt << PAGESHIFT);
14230209230bSgjelinek 					result->vmu_rss_shared +=
14240209230bSgjelinek 					    (rss << PAGESHIFT);
14250209230bSgjelinek 				} else if (shared == B_TRUE && file == B_TRUE) {
14260209230bSgjelinek 					/* shared file mapping */
14270209230bSgjelinek 					result->vmu_rss_shared +=
14280209230bSgjelinek 					    (rss << PAGESHIFT);
14290209230bSgjelinek 				} else if (shared == B_FALSE &&
14300209230bSgjelinek 				    file == B_TRUE) {
14310209230bSgjelinek 					/* private file mapping */
14320209230bSgjelinek 					result->vmu_rss_private +=
14330209230bSgjelinek 					    (rss << PAGESHIFT);
14340209230bSgjelinek 				}
14359c998850SPeter Telford 				vmu_merge_bounds(e_tree, &e_first, &e_last);
14360209230bSgjelinek 			}
14370209230bSgjelinek 			tmp = cur;
14380209230bSgjelinek 			cur = cur->vmb_next;
14390209230bSgjelinek 			vmu_free_bound(tmp);
14400209230bSgjelinek 		}
14410209230bSgjelinek 	}
14420209230bSgjelinek }
14430209230bSgjelinek 
14440209230bSgjelinek /*
14450209230bSgjelinek  * Based on the current calculation flags, find the relevant entities
14460209230bSgjelinek  * which are relative to the process.  Then calculate each segment
14470209230bSgjelinek  * in the process'es address space for each relevant entity.
14480209230bSgjelinek  */
14490209230bSgjelinek static void
vmu_calculate_proc(proc_t * p)14500209230bSgjelinek vmu_calculate_proc(proc_t *p)
14510209230bSgjelinek {
14520209230bSgjelinek 	vmu_entity_t *entities = NULL;
14530209230bSgjelinek 	vmu_zone_t *zone;
14540209230bSgjelinek 	vmu_entity_t *tmp;
14550209230bSgjelinek 	struct as *as;
14560209230bSgjelinek 	struct seg *seg;
14570209230bSgjelinek 	int ret;
14580209230bSgjelinek 
14590209230bSgjelinek 	/* Figure out which entities are being computed */
14600209230bSgjelinek 	if ((vmu_data.vmu_system) != NULL) {
14610209230bSgjelinek 		tmp = vmu_data.vmu_system;
14620209230bSgjelinek 		tmp->vme_next_calc = entities;
14630209230bSgjelinek 		entities = tmp;
14640209230bSgjelinek 	}
14650209230bSgjelinek 	if (vmu_data.vmu_calc_flags &
14660209230bSgjelinek 	    (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES | VMUSAGE_PROJECTS |
14670209230bSgjelinek 	    VMUSAGE_ALL_PROJECTS | VMUSAGE_TASKS | VMUSAGE_ALL_TASKS |
14680209230bSgjelinek 	    VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_EUSERS |
14690209230bSgjelinek 	    VMUSAGE_ALL_EUSERS)) {
14700209230bSgjelinek 		ret = i_mod_hash_find_nosync(vmu_data.vmu_zones_hash,
14710209230bSgjelinek 		    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14720209230bSgjelinek 		    (mod_hash_val_t *)&zone);
14730209230bSgjelinek 		if (ret != 0) {
14740209230bSgjelinek 			zone = vmu_alloc_zone(p->p_zone->zone_id);
14750209230bSgjelinek 			ret = i_mod_hash_insert_nosync(vmu_data.vmu_zones_hash,
14760209230bSgjelinek 			    (mod_hash_key_t)(uintptr_t)p->p_zone->zone_id,
14770209230bSgjelinek 			    (mod_hash_val_t)zone, (mod_hash_hndl_t)0);
14780209230bSgjelinek 			ASSERT(ret == 0);
14790209230bSgjelinek 		}
14800209230bSgjelinek 		if (zone->vmz_zone != NULL) {
14810209230bSgjelinek 			tmp = zone->vmz_zone;
14820209230bSgjelinek 			tmp->vme_next_calc = entities;
14830209230bSgjelinek 			entities = tmp;
14840209230bSgjelinek 		}
14850209230bSgjelinek 		if (vmu_data.vmu_calc_flags &
14860209230bSgjelinek 		    (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS)) {
14870209230bSgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_projects_hash,
14880209230bSgjelinek 			    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS,
14890209230bSgjelinek 			    zone->vmz_id);
14900209230bSgjelinek 			tmp->vme_next_calc = entities;
14910209230bSgjelinek 			entities = tmp;
14920209230bSgjelinek 		}
14930209230bSgjelinek 		if (vmu_data.vmu_calc_flags &
14940209230bSgjelinek 		    (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS)) {
14950209230bSgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_tasks_hash,
14960209230bSgjelinek 			    p->p_task->tk_tkid, VMUSAGE_TASKS, zone->vmz_id);
14970209230bSgjelinek 			tmp->vme_next_calc = entities;
14980209230bSgjelinek 			entities = tmp;
14990209230bSgjelinek 		}
15000209230bSgjelinek 		if (vmu_data.vmu_calc_flags &
15010209230bSgjelinek 		    (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS)) {
15020209230bSgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_rusers_hash,
15030209230bSgjelinek 			    crgetruid(p->p_cred), VMUSAGE_RUSERS, zone->vmz_id);
15040209230bSgjelinek 			tmp->vme_next_calc = entities;
15050209230bSgjelinek 			entities = tmp;
15060209230bSgjelinek 		}
15070209230bSgjelinek 		if (vmu_data.vmu_calc_flags &
15080209230bSgjelinek 		    (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS)) {
15090209230bSgjelinek 			tmp = vmu_find_insert_entity(zone->vmz_eusers_hash,
15100209230bSgjelinek 			    crgetuid(p->p_cred), VMUSAGE_EUSERS, zone->vmz_id);
15110209230bSgjelinek 			tmp->vme_next_calc = entities;
15120209230bSgjelinek 			entities = tmp;
15130209230bSgjelinek 		}
15140209230bSgjelinek 	}
15150209230bSgjelinek 	/* Entities which collapse projects and users for all zones */
15160209230bSgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_PROJECTS) {
15170209230bSgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_projects_col_hash,
15180209230bSgjelinek 		    p->p_task->tk_proj->kpj_id, VMUSAGE_PROJECTS, ALL_ZONES);
15190209230bSgjelinek 		tmp->vme_next_calc = entities;
15200209230bSgjelinek 		entities = tmp;
15210209230bSgjelinek 	}
15220209230bSgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_RUSERS) {
15230209230bSgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_rusers_col_hash,
15240209230bSgjelinek 		    crgetruid(p->p_cred), VMUSAGE_RUSERS, ALL_ZONES);
15250209230bSgjelinek 		tmp->vme_next_calc = entities;
15260209230bSgjelinek 		entities = tmp;
15270209230bSgjelinek 	}
15280209230bSgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_COL_EUSERS) {
15290209230bSgjelinek 		tmp = vmu_find_insert_entity(vmu_data.vmu_eusers_col_hash,
15300209230bSgjelinek 		    crgetuid(p->p_cred), VMUSAGE_EUSERS, ALL_ZONES);
15310209230bSgjelinek 		tmp->vme_next_calc = entities;
15320209230bSgjelinek 		entities = tmp;
15330209230bSgjelinek 	}
15340209230bSgjelinek 
15350209230bSgjelinek 	ASSERT(entities != NULL);
15360209230bSgjelinek 	/* process all segs in process's address space */
15370209230bSgjelinek 	as = p->p_as;
1538dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_ENTER(as, RW_READER);
15390209230bSgjelinek 	for (seg = AS_SEGFIRST(as); seg != NULL;
15400209230bSgjelinek 	    seg = AS_SEGNEXT(as, seg)) {
15410209230bSgjelinek 		vmu_calculate_seg(entities, seg);
15420209230bSgjelinek 	}
1543dc32d872SJosef 'Jeff' Sipek 	AS_LOCK_EXIT(as);
15440209230bSgjelinek }
15450209230bSgjelinek 
15460209230bSgjelinek /*
15470209230bSgjelinek  * Free data created by previous call to vmu_calculate().
15480209230bSgjelinek  */
15490209230bSgjelinek static void
vmu_clear_calc()15500209230bSgjelinek vmu_clear_calc()
15510209230bSgjelinek {
1552c99fb8b3SToomas Soome 	if (vmu_data.vmu_system != NULL) {
15530209230bSgjelinek 		vmu_free_entity(vmu_data.vmu_system);
15540209230bSgjelinek 		vmu_data.vmu_system = NULL;
1555c99fb8b3SToomas Soome 	}
15560209230bSgjelinek 	if (vmu_data.vmu_zones_hash != NULL)
15570209230bSgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_zones_hash);
15580209230bSgjelinek 	if (vmu_data.vmu_projects_col_hash != NULL)
15590209230bSgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_projects_col_hash);
15600209230bSgjelinek 	if (vmu_data.vmu_rusers_col_hash != NULL)
15610209230bSgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_rusers_col_hash);
15620209230bSgjelinek 	if (vmu_data.vmu_eusers_col_hash != NULL)
15630209230bSgjelinek 		i_mod_hash_clear_nosync(vmu_data.vmu_eusers_col_hash);
15640209230bSgjelinek 
15650209230bSgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_vnodes_hash);
15660209230bSgjelinek 	i_mod_hash_clear_nosync(vmu_data.vmu_all_amps_hash);
15670209230bSgjelinek }
15680209230bSgjelinek 
15690209230bSgjelinek /*
15700209230bSgjelinek  * Free unused data structures.  These can result if the system workload
15710209230bSgjelinek  * decreases between calculations.
15720209230bSgjelinek  */
15730209230bSgjelinek static void
vmu_free_extra()15740209230bSgjelinek vmu_free_extra()
15750209230bSgjelinek {
15760209230bSgjelinek 	vmu_bound_t *tb;
15770209230bSgjelinek 	vmu_object_t *to;
15780209230bSgjelinek 	vmu_entity_t *te;
15790209230bSgjelinek 	vmu_zone_t *tz;
15800209230bSgjelinek 
15810209230bSgjelinek 	while (vmu_data.vmu_free_bounds != NULL) {
15820209230bSgjelinek 		tb = vmu_data.vmu_free_bounds;
15830209230bSgjelinek 		vmu_data.vmu_free_bounds = vmu_data.vmu_free_bounds->vmb_next;
15840209230bSgjelinek 		kmem_cache_free(vmu_bound_cache, tb);
15850209230bSgjelinek 	}
15860209230bSgjelinek 	while (vmu_data.vmu_free_objects != NULL) {
15870209230bSgjelinek 		to = vmu_data.vmu_free_objects;
15880209230bSgjelinek 		vmu_data.vmu_free_objects =
15890209230bSgjelinek 		    vmu_data.vmu_free_objects->vmo_next;
15900209230bSgjelinek 		kmem_cache_free(vmu_object_cache, to);
15910209230bSgjelinek 	}
15920209230bSgjelinek 	while (vmu_data.vmu_free_entities != NULL) {
15930209230bSgjelinek 		te = vmu_data.vmu_free_entities;
15940209230bSgjelinek 		vmu_data.vmu_free_entities =
15950209230bSgjelinek 		    vmu_data.vmu_free_entities->vme_next;
15960209230bSgjelinek 		if (te->vme_vnode_hash != NULL)
15970209230bSgjelinek 			mod_hash_destroy_hash(te->vme_vnode_hash);
15980209230bSgjelinek 		if (te->vme_amp_hash != NULL)
15990209230bSgjelinek 			mod_hash_destroy_hash(te->vme_amp_hash);
16000209230bSgjelinek 		if (te->vme_anon_hash != NULL)
16010209230bSgjelinek 			mod_hash_destroy_hash(te->vme_anon_hash);
16020209230bSgjelinek 		kmem_free(te, sizeof (vmu_entity_t));
16030209230bSgjelinek 	}
16040209230bSgjelinek 	while (vmu_data.vmu_free_zones != NULL) {
16050209230bSgjelinek 		tz = vmu_data.vmu_free_zones;
16060209230bSgjelinek 		vmu_data.vmu_free_zones =
16070209230bSgjelinek 		    vmu_data.vmu_free_zones->vmz_next;
16080209230bSgjelinek 		if (tz->vmz_projects_hash != NULL)
16090209230bSgjelinek 			mod_hash_destroy_hash(tz->vmz_projects_hash);
16100209230bSgjelinek 		if (tz->vmz_tasks_hash != NULL)
16110209230bSgjelinek 			mod_hash_destroy_hash(tz->vmz_tasks_hash);
16120209230bSgjelinek 		if (tz->vmz_rusers_hash != NULL)
16130209230bSgjelinek 			mod_hash_destroy_hash(tz->vmz_rusers_hash);
16140209230bSgjelinek 		if (tz->vmz_eusers_hash != NULL)
16150209230bSgjelinek 			mod_hash_destroy_hash(tz->vmz_eusers_hash);
16160209230bSgjelinek 		kmem_free(tz, sizeof (vmu_zone_t));
16170209230bSgjelinek 	}
16180209230bSgjelinek }
16190209230bSgjelinek 
16200209230bSgjelinek extern kcondvar_t *pr_pid_cv;
16210209230bSgjelinek 
16220209230bSgjelinek /*
16230209230bSgjelinek  * Determine which entity types are relevant and allocate the hashes to
16240209230bSgjelinek  * track them.  Then walk the process table and count rss and swap
16250209230bSgjelinek  * for each process'es address space.  Address space object such as
16260209230bSgjelinek  * vnodes, amps and anons are tracked per entity, so that they are
16270209230bSgjelinek  * not double counted in the results.
16280209230bSgjelinek  *
16290209230bSgjelinek  */
16300209230bSgjelinek static void
vmu_calculate()16310209230bSgjelinek vmu_calculate()
16320209230bSgjelinek {
16330209230bSgjelinek 	int i = 0;
16340209230bSgjelinek 	int ret;
16350209230bSgjelinek 	proc_t *p;
16360209230bSgjelinek 
16370209230bSgjelinek 	vmu_clear_calc();
16380209230bSgjelinek 
16390209230bSgjelinek 	if (vmu_data.vmu_calc_flags & VMUSAGE_SYSTEM)
16400209230bSgjelinek 		vmu_data.vmu_system = vmu_alloc_entity(0, VMUSAGE_SYSTEM,
16410209230bSgjelinek 		    ALL_ZONES);
16420209230bSgjelinek 
16430209230bSgjelinek 	/*
16440209230bSgjelinek 	 * Walk process table and calculate rss of each proc.
16450209230bSgjelinek 	 *
16460209230bSgjelinek 	 * Pidlock and p_lock cannot be held while doing the rss calculation.
16470209230bSgjelinek 	 * This is because:
16480209230bSgjelinek 	 *	1.  The calculation allocates using KM_SLEEP.
16490209230bSgjelinek 	 *	2.  The calculation grabs a_lock, which cannot be grabbed
16500209230bSgjelinek 	 *	    after p_lock.
16510209230bSgjelinek 	 *
16520209230bSgjelinek 	 * Since pidlock must be dropped, we cannot simply just walk the
16530209230bSgjelinek 	 * practive list.  Instead, we walk the process table, and sprlock
16540209230bSgjelinek 	 * each process to ensure that it does not exit during the
16550209230bSgjelinek 	 * calculation.
16560209230bSgjelinek 	 */
16570209230bSgjelinek 
16580209230bSgjelinek 	mutex_enter(&pidlock);
16590209230bSgjelinek 	for (i = 0; i < v.v_proc; i++) {
16600209230bSgjelinek again:
16610209230bSgjelinek 		p = pid_entry(i);
16620209230bSgjelinek 		if (p == NULL)
16630209230bSgjelinek 			continue;
16640209230bSgjelinek 
16650209230bSgjelinek 		mutex_enter(&p->p_lock);
16660209230bSgjelinek 		mutex_exit(&pidlock);
16670209230bSgjelinek 
16680209230bSgjelinek 		if (panicstr) {
16690209230bSgjelinek 			mutex_exit(&p->p_lock);
16700209230bSgjelinek 			return;
16710209230bSgjelinek 		}
16720209230bSgjelinek 
16730209230bSgjelinek 		/* Try to set P_PR_LOCK */
16740209230bSgjelinek 		ret = sprtrylock_proc(p);
16750209230bSgjelinek 		if (ret == -1) {
16760209230bSgjelinek 			/* Process in invalid state */
16770209230bSgjelinek 			mutex_exit(&p->p_lock);
16780209230bSgjelinek 			mutex_enter(&pidlock);
16790209230bSgjelinek 			continue;
16800209230bSgjelinek 		} else if (ret == 1) {
16810209230bSgjelinek 			/*
16820209230bSgjelinek 			 * P_PR_LOCK is already set.  Wait and try again.
16830209230bSgjelinek 			 * This also drops p_lock.
16840209230bSgjelinek 			 */
16850209230bSgjelinek 			sprwaitlock_proc(p);
16860209230bSgjelinek 			mutex_enter(&pidlock);
16870209230bSgjelinek 			goto again;
16880209230bSgjelinek 		}
16890209230bSgjelinek 		mutex_exit(&p->p_lock);
16900209230bSgjelinek 
16910209230bSgjelinek 		vmu_calculate_proc(p);
16920209230bSgjelinek 
16930209230bSgjelinek 		mutex_enter(&p->p_lock);
16940209230bSgjelinek 		sprunlock(p);
16950209230bSgjelinek 		mutex_enter(&pidlock);
16960209230bSgjelinek 	}
16970209230bSgjelinek 	mutex_exit(&pidlock);
16980209230bSgjelinek 
16990209230bSgjelinek 	vmu_free_extra();
17000209230bSgjelinek }
17010209230bSgjelinek 
17020209230bSgjelinek /*
17030209230bSgjelinek  * allocate a new cache for N results satisfying flags
17040209230bSgjelinek  */
17050209230bSgjelinek vmu_cache_t *
vmu_cache_alloc(size_t nres,uint_t flags)17060209230bSgjelinek vmu_cache_alloc(size_t nres, uint_t flags)
17070209230bSgjelinek {
17080209230bSgjelinek 	vmu_cache_t *cache;
17090209230bSgjelinek 
17100209230bSgjelinek 	cache = kmem_zalloc(sizeof (vmu_cache_t), KM_SLEEP);
17110209230bSgjelinek 	cache->vmc_results = kmem_zalloc(sizeof (vmusage_t) * nres, KM_SLEEP);
17120209230bSgjelinek 	cache->vmc_nresults = nres;
17130209230bSgjelinek 	cache->vmc_flags = flags;
17140209230bSgjelinek 	cache->vmc_refcnt = 1;
17150209230bSgjelinek 	return (cache);
17160209230bSgjelinek }
17170209230bSgjelinek 
17180209230bSgjelinek /*
17190209230bSgjelinek  * Make sure cached results are not freed
17200209230bSgjelinek  */
17210209230bSgjelinek static void
vmu_cache_hold(vmu_cache_t * cache)17220209230bSgjelinek vmu_cache_hold(vmu_cache_t *cache)
17230209230bSgjelinek {
17240209230bSgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
17250209230bSgjelinek 	cache->vmc_refcnt++;
17260209230bSgjelinek }
17270209230bSgjelinek 
17280209230bSgjelinek /*
17290209230bSgjelinek  * free cache data
17300209230bSgjelinek  */
17310209230bSgjelinek static void
vmu_cache_rele(vmu_cache_t * cache)17320209230bSgjelinek vmu_cache_rele(vmu_cache_t *cache)
17330209230bSgjelinek {
17340209230bSgjelinek 	ASSERT(MUTEX_HELD(&vmu_data.vmu_lock));
17350209230bSgjelinek 	ASSERT(cache->vmc_refcnt > 0);
17360209230bSgjelinek 	cache->vmc_refcnt--;
17370209230bSgjelinek 	if (cache->vmc_refcnt == 0) {
17380209230bSgjelinek 		kmem_free(cache->vmc_results, sizeof (vmusage_t) *
173900792c0bS 		    cache->vmc_nresults);
17400209230bSgjelinek 		kmem_free(cache, sizeof (vmu_cache_t));
17410209230bSgjelinek 	}
17420209230bSgjelinek }
17430209230bSgjelinek 
17440209230bSgjelinek /*
17450209230bSgjelinek  * Copy out the cached results to a caller.  Inspect the callers flags
17460209230bSgjelinek  * and zone to determine which cached results should be copied.
17470209230bSgjelinek  */
17480209230bSgjelinek static int
vmu_copyout_results(vmu_cache_t * cache,vmusage_t * buf,size_t * nres,uint_t flags,int cpflg)17490209230bSgjelinek vmu_copyout_results(vmu_cache_t *cache, vmusage_t *buf, size_t *nres,
175000792c0bS     uint_t flags, int cpflg)
17510209230bSgjelinek {
17520209230bSgjelinek 	vmusage_t *result, *out_result;
17530209230bSgjelinek 	vmusage_t dummy;
17540209230bSgjelinek 	size_t i, count = 0;
17550209230bSgjelinek 	size_t bufsize;
17560209230bSgjelinek 	int ret = 0;
17570209230bSgjelinek 	uint_t types = 0;
17580209230bSgjelinek 
17590209230bSgjelinek 	if (nres != NULL) {
176000792c0bS 		if (ddi_copyin((caddr_t)nres, &bufsize, sizeof (size_t), cpflg))
17610209230bSgjelinek 			return (set_errno(EFAULT));
17620209230bSgjelinek 	} else {
17630209230bSgjelinek 		bufsize = 0;
17640209230bSgjelinek 	}
17650209230bSgjelinek 
17660209230bSgjelinek 	/* figure out what results the caller is interested in. */
17670209230bSgjelinek 	if ((flags & VMUSAGE_SYSTEM) && curproc->p_zone == global_zone)
17680209230bSgjelinek 		types |= VMUSAGE_SYSTEM;
17690209230bSgjelinek 	if (flags & (VMUSAGE_ZONE | VMUSAGE_ALL_ZONES))
17700209230bSgjelinek 		types |= VMUSAGE_ZONE;
17710209230bSgjelinek 	if (flags & (VMUSAGE_PROJECTS | VMUSAGE_ALL_PROJECTS |
17720209230bSgjelinek 	    VMUSAGE_COL_PROJECTS))
17730209230bSgjelinek 		types |= VMUSAGE_PROJECTS;
17740209230bSgjelinek 	if (flags & (VMUSAGE_TASKS | VMUSAGE_ALL_TASKS))
17750209230bSgjelinek 		types |= VMUSAGE_TASKS;
17760209230bSgjelinek 	if (flags & (VMUSAGE_RUSERS | VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS))
17770209230bSgjelinek 		types |= VMUSAGE_RUSERS;
17780209230bSgjelinek 	if (flags & (VMUSAGE_EUSERS | VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS))
17790209230bSgjelinek 		types |= VMUSAGE_EUSERS;
17800209230bSgjelinek 
17810209230bSgjelinek 	/* count results for current zone */
17820209230bSgjelinek 	out_result = buf;
17830209230bSgjelinek 	for (result = cache->vmc_results, i = 0;
17840209230bSgjelinek 	    i < cache->vmc_nresults; result++, i++) {
17850209230bSgjelinek 
17860209230bSgjelinek 		/* Do not return "other-zone" results to non-global zones */
17870209230bSgjelinek 		if (curproc->p_zone != global_zone &&
17880209230bSgjelinek 		    curproc->p_zone->zone_id != result->vmu_zoneid)
17890209230bSgjelinek 			continue;
17900209230bSgjelinek 
17910209230bSgjelinek 		/*
17920209230bSgjelinek 		 * If non-global zone requests VMUSAGE_SYSTEM, fake
17930209230bSgjelinek 		 * up VMUSAGE_ZONE result as VMUSAGE_SYSTEM result.
17940209230bSgjelinek 		 */
17950209230bSgjelinek 		if (curproc->p_zone != global_zone &&
17960209230bSgjelinek 		    (flags & VMUSAGE_SYSTEM) != 0 &&
17970209230bSgjelinek 		    result->vmu_type == VMUSAGE_ZONE) {
17980209230bSgjelinek 			count++;
17990209230bSgjelinek 			if (out_result != NULL) {
18000209230bSgjelinek 				if (bufsize < count) {
18010209230bSgjelinek 					ret = set_errno(EOVERFLOW);
18020209230bSgjelinek 				} else {
18030209230bSgjelinek 					dummy = *result;
18040209230bSgjelinek 					dummy.vmu_zoneid = ALL_ZONES;
18050209230bSgjelinek 					dummy.vmu_id = 0;
18060209230bSgjelinek 					dummy.vmu_type = VMUSAGE_SYSTEM;
180700792c0bS 					if (ddi_copyout(&dummy, out_result,
180800792c0bS 					    sizeof (vmusage_t), cpflg))
180900792c0bS 						return (set_errno(EFAULT));
18100209230bSgjelinek 					out_result++;
18110209230bSgjelinek 				}
18120209230bSgjelinek 			}
18130209230bSgjelinek 		}
18140209230bSgjelinek 
18150209230bSgjelinek 		/* Skip results that do not match requested type */
18160209230bSgjelinek 		if ((result->vmu_type & types) == 0)
18170209230bSgjelinek 			continue;
18180209230bSgjelinek 
18190209230bSgjelinek 		/* Skip collated results if not requested */
18200209230bSgjelinek 		if (result->vmu_zoneid == ALL_ZONES) {
18210209230bSgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
18220209230bSgjelinek 			    (flags & VMUSAGE_COL_PROJECTS) == 0)
18230209230bSgjelinek 				continue;
18240209230bSgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
18250209230bSgjelinek 			    (flags & VMUSAGE_COL_EUSERS) == 0)
18260209230bSgjelinek 				continue;
18270209230bSgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
18280209230bSgjelinek 			    (flags & VMUSAGE_COL_RUSERS) == 0)
18290209230bSgjelinek 				continue;
18300209230bSgjelinek 		}
18310209230bSgjelinek 
18320209230bSgjelinek 		/* Skip "other zone" results if not requested */
18330209230bSgjelinek 		if (result->vmu_zoneid != curproc->p_zone->zone_id) {
18340209230bSgjelinek 			if (result->vmu_type == VMUSAGE_ZONE &&
18350209230bSgjelinek 			    (flags & VMUSAGE_ALL_ZONES) == 0)
18360209230bSgjelinek 				continue;
18370209230bSgjelinek 			if (result->vmu_type == VMUSAGE_PROJECTS &&
18380209230bSgjelinek 			    (flags & (VMUSAGE_ALL_PROJECTS |
18390209230bSgjelinek 			    VMUSAGE_COL_PROJECTS)) == 0)
18400209230bSgjelinek 				continue;
18410209230bSgjelinek 			if (result->vmu_type == VMUSAGE_TASKS &&
18420209230bSgjelinek 			    (flags & VMUSAGE_ALL_TASKS) == 0)
18430209230bSgjelinek 				continue;
18440209230bSgjelinek 			if (result->vmu_type == VMUSAGE_RUSERS &&
18450209230bSgjelinek 			    (flags & (VMUSAGE_ALL_RUSERS |
18460209230bSgjelinek 			    VMUSAGE_COL_RUSERS)) == 0)
18470209230bSgjelinek 				continue;
18480209230bSgjelinek 			if (result->vmu_type == VMUSAGE_EUSERS &&
18490209230bSgjelinek 			    (flags & (VMUSAGE_ALL_EUSERS |
18500209230bSgjelinek 			    VMUSAGE_COL_EUSERS)) == 0)
18510209230bSgjelinek 				continue;
18520209230bSgjelinek 		}
18530209230bSgjelinek 		count++;
18540209230bSgjelinek 		if (out_result != NULL) {
18550209230bSgjelinek 			if (bufsize < count) {
18560209230bSgjelinek 				ret = set_errno(EOVERFLOW);
18570209230bSgjelinek 			} else {
185800792c0bS 				if (ddi_copyout(result, out_result,
185900792c0bS 				    sizeof (vmusage_t), cpflg))
18600209230bSgjelinek 					return (set_errno(EFAULT));
18610209230bSgjelinek 				out_result++;
18620209230bSgjelinek 			}
18630209230bSgjelinek 		}
18640209230bSgjelinek 	}
18650209230bSgjelinek 	if (nres != NULL)
186600792c0bS 		if (ddi_copyout(&count, (void *)nres, sizeof (size_t), cpflg))
18670209230bSgjelinek 			return (set_errno(EFAULT));
18680209230bSgjelinek 
18690209230bSgjelinek 	return (ret);
18700209230bSgjelinek }
18710209230bSgjelinek 
18720209230bSgjelinek /*
18730209230bSgjelinek  * vm_getusage()
18740209230bSgjelinek  *
18750209230bSgjelinek  * Counts rss and swap by zone, project, task, and/or user.  The flags argument
18760209230bSgjelinek  * determines the type of results structures returned.  Flags requesting
18770209230bSgjelinek  * results from more than one zone are "flattened" to the local zone if the
18780209230bSgjelinek  * caller is not the global zone.
18790209230bSgjelinek  *
18800209230bSgjelinek  * args:
18810209230bSgjelinek  *	flags:	bitmap consisting of one or more of VMUSAGE_*.
18820209230bSgjelinek  *	age:	maximum allowable age (time since counting was done) in
18830209230bSgjelinek  *		seconds of the results.  Results from previous callers are
18840209230bSgjelinek  *		cached in kernel.
18850209230bSgjelinek  *	buf:	pointer to buffer array of vmusage_t.  If NULL, then only nres
18860209230bSgjelinek  *		set on success.
18870209230bSgjelinek  *	nres:	Set to number of vmusage_t structures pointed to by buf
18880209230bSgjelinek  *		before calling vm_getusage().
18890209230bSgjelinek  *		On return 0 (success) or ENOSPC, is set to the number of result
18900209230bSgjelinek  *		structures returned or attempted to return.
18910209230bSgjelinek  *
18920209230bSgjelinek  * returns 0 on success, -1 on failure:
18930209230bSgjelinek  *	EINTR (interrupted)
18940209230bSgjelinek  *	ENOSPC (nres to small for results, nres set to needed value for success)
18950209230bSgjelinek  *	EINVAL (flags invalid)
18960209230bSgjelinek  *	EFAULT (bad address for buf or nres)
18970209230bSgjelinek  */
18980209230bSgjelinek int
vm_getusage(uint_t flags,time_t age,vmusage_t * buf,size_t * nres,int cpflg)189900792c0bS vm_getusage(uint_t flags, time_t age, vmusage_t *buf, size_t *nres, int cpflg)
19000209230bSgjelinek {
19010209230bSgjelinek 	vmu_entity_t *entity;
19020209230bSgjelinek 	vmusage_t *result;
19030209230bSgjelinek 	int ret = 0;
19040209230bSgjelinek 	int cacherecent = 0;
19050209230bSgjelinek 	hrtime_t now;
19060209230bSgjelinek 	uint_t flags_orig;
19070209230bSgjelinek 
19080209230bSgjelinek 	/*
19090209230bSgjelinek 	 * Non-global zones cannot request system wide and/or collated
19100209230bSgjelinek 	 * results, or the system result, so munge the flags accordingly.
19110209230bSgjelinek 	 */
19120209230bSgjelinek 	flags_orig = flags;
19130209230bSgjelinek 	if (curproc->p_zone != global_zone) {
19140209230bSgjelinek 		if (flags & (VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS)) {
19150209230bSgjelinek 			flags &= ~(VMUSAGE_ALL_PROJECTS | VMUSAGE_COL_PROJECTS);
19160209230bSgjelinek 			flags |= VMUSAGE_PROJECTS;
19170209230bSgjelinek 		}
19180209230bSgjelinek 		if (flags & (VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS)) {
19190209230bSgjelinek 			flags &= ~(VMUSAGE_ALL_RUSERS | VMUSAGE_COL_RUSERS);
19200209230bSgjelinek 			flags |= VMUSAGE_RUSERS;
19210209230bSgjelinek 		}
19220209230bSgjelinek 		if (flags & (VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS)) {
19230209230bSgjelinek 			flags &= ~(VMUSAGE_ALL_EUSERS | VMUSAGE_COL_EUSERS);
19240209230bSgjelinek 			flags |= VMUSAGE_EUSERS;
19250209230bSgjelinek 		}
19260209230bSgjelinek 		if (flags & VMUSAGE_SYSTEM) {
19270209230bSgjelinek 			flags &= ~VMUSAGE_SYSTEM;
19280209230bSgjelinek 			flags |= VMUSAGE_ZONE;
19290209230bSgjelinek 		}
19300209230bSgjelinek 	}
19310209230bSgjelinek 
19320209230bSgjelinek 	/* Check for unknown flags */
19330209230bSgjelinek 	if ((flags & (~VMUSAGE_MASK)) != 0)
19340209230bSgjelinek 		return (set_errno(EINVAL));
19350209230bSgjelinek 
19360209230bSgjelinek 	/* Check for no flags */
19370209230bSgjelinek 	if ((flags & VMUSAGE_MASK) == 0)
19380209230bSgjelinek 		return (set_errno(EINVAL));
19390209230bSgjelinek 
19400209230bSgjelinek 	mutex_enter(&vmu_data.vmu_lock);
19410209230bSgjelinek 	now = gethrtime();
19420209230bSgjelinek 
19430209230bSgjelinek start:
19440209230bSgjelinek 	if (vmu_data.vmu_cache != NULL) {
19450209230bSgjelinek 
19460209230bSgjelinek 		vmu_cache_t *cache;
19470209230bSgjelinek 
19480209230bSgjelinek 		if ((vmu_data.vmu_cache->vmc_timestamp +
19490209230bSgjelinek 		    ((hrtime_t)age * NANOSEC)) > now)
19500209230bSgjelinek 			cacherecent = 1;
19510209230bSgjelinek 
19520209230bSgjelinek 		if ((vmu_data.vmu_cache->vmc_flags & flags) == flags &&
19530209230bSgjelinek 		    cacherecent == 1) {
19540209230bSgjelinek 			cache = vmu_data.vmu_cache;
19550209230bSgjelinek 			vmu_cache_hold(cache);
19560209230bSgjelinek 			mutex_exit(&vmu_data.vmu_lock);
19570209230bSgjelinek 
195800792c0bS 			ret = vmu_copyout_results(cache, buf, nres, flags_orig,
195900792c0bS 			    cpflg);
19600209230bSgjelinek 			mutex_enter(&vmu_data.vmu_lock);
19610209230bSgjelinek 			vmu_cache_rele(cache);
19620209230bSgjelinek 			if (vmu_data.vmu_pending_waiters > 0)
19630209230bSgjelinek 				cv_broadcast(&vmu_data.vmu_cv);
19640209230bSgjelinek 			mutex_exit(&vmu_data.vmu_lock);
19650209230bSgjelinek 			return (ret);
19660209230bSgjelinek 		}
19670209230bSgjelinek 		/*
19680209230bSgjelinek 		 * If the cache is recent, it is likely that there are other
19690209230bSgjelinek 		 * consumers of vm_getusage running, so add their flags to the
19700209230bSgjelinek 		 * desired flags for the calculation.
19710209230bSgjelinek 		 */
19720209230bSgjelinek 		if (cacherecent == 1)
19730209230bSgjelinek 			flags = vmu_data.vmu_cache->vmc_flags | flags;
19740209230bSgjelinek 	}
19750209230bSgjelinek 	if (vmu_data.vmu_calc_thread == NULL) {
19760209230bSgjelinek 
19770209230bSgjelinek 		vmu_cache_t *cache;
19780209230bSgjelinek 
19790209230bSgjelinek 		vmu_data.vmu_calc_thread = curthread;
19800209230bSgjelinek 		vmu_data.vmu_calc_flags = flags;
19810209230bSgjelinek 		vmu_data.vmu_entities = NULL;
19820209230bSgjelinek 		vmu_data.vmu_nentities = 0;
19830209230bSgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
19840209230bSgjelinek 			vmu_data.vmu_calc_flags |=
19850209230bSgjelinek 			    vmu_data.vmu_pending_flags;
19860209230bSgjelinek 
19870209230bSgjelinek 		vmu_data.vmu_pending_flags = 0;
19880209230bSgjelinek 		mutex_exit(&vmu_data.vmu_lock);
19890209230bSgjelinek 		vmu_calculate();
19900209230bSgjelinek 		mutex_enter(&vmu_data.vmu_lock);
19910209230bSgjelinek 		/* copy results to cache */
19920209230bSgjelinek 		if (vmu_data.vmu_cache != NULL)
19930209230bSgjelinek 			vmu_cache_rele(vmu_data.vmu_cache);
19940209230bSgjelinek 		cache = vmu_data.vmu_cache =
19950209230bSgjelinek 		    vmu_cache_alloc(vmu_data.vmu_nentities,
199600792c0bS 		    vmu_data.vmu_calc_flags);
19970209230bSgjelinek 
19980209230bSgjelinek 		result = cache->vmc_results;
19990209230bSgjelinek 		for (entity = vmu_data.vmu_entities; entity != NULL;
20000209230bSgjelinek 		    entity = entity->vme_next) {
20010209230bSgjelinek 			*result = entity->vme_result;
20020209230bSgjelinek 			result++;
20030209230bSgjelinek 		}
20040209230bSgjelinek 		cache->vmc_timestamp = gethrtime();
20050209230bSgjelinek 		vmu_cache_hold(cache);
20060209230bSgjelinek 
20070209230bSgjelinek 		vmu_data.vmu_calc_flags = 0;
20080209230bSgjelinek 		vmu_data.vmu_calc_thread = NULL;
20090209230bSgjelinek 
20100209230bSgjelinek 		if (vmu_data.vmu_pending_waiters > 0)
20110209230bSgjelinek 			cv_broadcast(&vmu_data.vmu_cv);
20120209230bSgjelinek 
20130209230bSgjelinek 		mutex_exit(&vmu_data.vmu_lock);
20140209230bSgjelinek 
20150209230bSgjelinek 		/* copy cache */
201600792c0bS 		ret = vmu_copyout_results(cache, buf, nres, flags_orig, cpflg);
20170209230bSgjelinek 		mutex_enter(&vmu_data.vmu_lock);
20180209230bSgjelinek 		vmu_cache_rele(cache);
20190209230bSgjelinek 		mutex_exit(&vmu_data.vmu_lock);
20200209230bSgjelinek 
20210209230bSgjelinek 		return (ret);
20220209230bSgjelinek 	}
20230209230bSgjelinek 	vmu_data.vmu_pending_flags |= flags;
20240209230bSgjelinek 	vmu_data.vmu_pending_waiters++;
20250209230bSgjelinek 	while (vmu_data.vmu_calc_thread != NULL) {
20260209230bSgjelinek 		if (cv_wait_sig(&vmu_data.vmu_cv,
20270209230bSgjelinek 		    &vmu_data.vmu_lock) == 0) {
20280209230bSgjelinek 			vmu_data.vmu_pending_waiters--;
20290209230bSgjelinek 			mutex_exit(&vmu_data.vmu_lock);
20300209230bSgjelinek 			return (set_errno(EINTR));
20310209230bSgjelinek 		}
20320209230bSgjelinek 	}
20330209230bSgjelinek 	vmu_data.vmu_pending_waiters--;
20340209230bSgjelinek 	goto start;
20350209230bSgjelinek }
2036