1efd4c9b6SSteve Lawrence /*
2efd4c9b6SSteve Lawrence  * CDDL HEADER START
3efd4c9b6SSteve Lawrence  *
4efd4c9b6SSteve Lawrence  * The contents of this file are subject to the terms of the
5efd4c9b6SSteve Lawrence  * Common Development and Distribution License (the "License").
6efd4c9b6SSteve Lawrence  * You may not use this file except in compliance with the License.
7efd4c9b6SSteve Lawrence  *
8efd4c9b6SSteve Lawrence  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9efd4c9b6SSteve Lawrence  * or http://www.opensolaris.org/os/licensing.
10efd4c9b6SSteve Lawrence  * See the License for the specific language governing permissions
11efd4c9b6SSteve Lawrence  * and limitations under the License.
12efd4c9b6SSteve Lawrence  *
13efd4c9b6SSteve Lawrence  * When distributing Covered Code, include this CDDL HEADER in each
14efd4c9b6SSteve Lawrence  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15efd4c9b6SSteve Lawrence  * If applicable, add the following below this CDDL HEADER, with the
16efd4c9b6SSteve Lawrence  * fields enclosed by brackets "[]" replaced with your own identifying
17efd4c9b6SSteve Lawrence  * information: Portions Copyright [yyyy] [name of copyright owner]
18efd4c9b6SSteve Lawrence  *
19efd4c9b6SSteve Lawrence  * CDDL HEADER END
20efd4c9b6SSteve Lawrence  */
21efd4c9b6SSteve Lawrence 
22efd4c9b6SSteve Lawrence /*
23efd4c9b6SSteve Lawrence  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24efd4c9b6SSteve Lawrence  */
25efd4c9b6SSteve Lawrence #include <alloca.h>
26efd4c9b6SSteve Lawrence #include <assert.h>
27efd4c9b6SSteve Lawrence #include <dirent.h>
28efd4c9b6SSteve Lawrence #include <dlfcn.h>
29efd4c9b6SSteve Lawrence #include <door.h>
30efd4c9b6SSteve Lawrence #include <errno.h>
31efd4c9b6SSteve Lawrence #include <exacct.h>
32efd4c9b6SSteve Lawrence #include <ctype.h>
33efd4c9b6SSteve Lawrence #include <fcntl.h>
34efd4c9b6SSteve Lawrence #include <kstat.h>
35efd4c9b6SSteve Lawrence #include <libcontract.h>
36efd4c9b6SSteve Lawrence #include <libintl.h>
37efd4c9b6SSteve Lawrence #include <libscf.h>
38efd4c9b6SSteve Lawrence #include <zonestat.h>
39efd4c9b6SSteve Lawrence #include <zonestat_impl.h>
40efd4c9b6SSteve Lawrence #include <limits.h>
41efd4c9b6SSteve Lawrence #include <pool.h>
42efd4c9b6SSteve Lawrence #include <procfs.h>
43efd4c9b6SSteve Lawrence #include <rctl.h>
44efd4c9b6SSteve Lawrence #include <thread.h>
45efd4c9b6SSteve Lawrence #include <signal.h>
46efd4c9b6SSteve Lawrence #include <stdarg.h>
47efd4c9b6SSteve Lawrence #include <stddef.h>
48efd4c9b6SSteve Lawrence #include <stdio.h>
49efd4c9b6SSteve Lawrence #include <stdlib.h>
50efd4c9b6SSteve Lawrence #include <strings.h>
51efd4c9b6SSteve Lawrence #include <synch.h>
52efd4c9b6SSteve Lawrence #include <sys/acctctl.h>
53efd4c9b6SSteve Lawrence #include <sys/contract/process.h>
54efd4c9b6SSteve Lawrence #include <sys/ctfs.h>
55efd4c9b6SSteve Lawrence #include <sys/fork.h>
56efd4c9b6SSteve Lawrence #include <sys/param.h>
57efd4c9b6SSteve Lawrence #include <sys/priocntl.h>
58efd4c9b6SSteve Lawrence #include <sys/fxpriocntl.h>
59efd4c9b6SSteve Lawrence #include <sys/processor.h>
60efd4c9b6SSteve Lawrence #include <sys/pset.h>
61efd4c9b6SSteve Lawrence #include <sys/socket.h>
62efd4c9b6SSteve Lawrence #include <sys/stat.h>
63efd4c9b6SSteve Lawrence #include <sys/statvfs.h>
64efd4c9b6SSteve Lawrence #include <sys/swap.h>
65efd4c9b6SSteve Lawrence #include <sys/systeminfo.h>
66efd4c9b6SSteve Lawrence #include <thread.h>
67efd4c9b6SSteve Lawrence #include <sys/list.h>
68efd4c9b6SSteve Lawrence #include <sys/time.h>
69efd4c9b6SSteve Lawrence #include <sys/types.h>
70efd4c9b6SSteve Lawrence #include <sys/vm_usage.h>
71efd4c9b6SSteve Lawrence #include <sys/wait.h>
72efd4c9b6SSteve Lawrence #include <sys/zone.h>
73efd4c9b6SSteve Lawrence #include <time.h>
74efd4c9b6SSteve Lawrence #include <ucred.h>
75efd4c9b6SSteve Lawrence #include <unistd.h>
76efd4c9b6SSteve Lawrence #include <vm/anon.h>
77efd4c9b6SSteve Lawrence #include <zone.h>
78efd4c9b6SSteve Lawrence #include <zonestat.h>
79efd4c9b6SSteve Lawrence 
80efd4c9b6SSteve Lawrence #define	MAX_PSET_NAME	1024	/* Taken from PV_NAME_MAX_LEN */
81efd4c9b6SSteve Lawrence #define	ZSD_PSET_UNLIMITED	UINT16_MAX
82efd4c9b6SSteve Lawrence #define	ZONESTAT_EXACCT_FILE	"/var/adm/exacct/zonestat-process"
83efd4c9b6SSteve Lawrence 
84efd4c9b6SSteve Lawrence /*
85efd4c9b6SSteve Lawrence  * zonestatd implements gathering cpu and memory utilization data for
86efd4c9b6SSteve Lawrence  * running zones.  It has these components:
87efd4c9b6SSteve Lawrence  *
88efd4c9b6SSteve Lawrence  * zsd_server:
89efd4c9b6SSteve Lawrence  *	Door server to respond to client connections.  Each client
90efd4c9b6SSteve Lawrence  *	will connect using libzonestat.so, which will open and
91efd4c9b6SSteve Lawrence  *	call /var/tmp/.zonestat_door.  Each connecting client is given
92efd4c9b6SSteve Lawrence  *	a file descriptor to the stat server.
93efd4c9b6SSteve Lawrence  *
94efd4c9b6SSteve Lawrence  *	The zsd_server also responds to zoneadmd, which reports when a
95efd4c9b6SSteve Lawrence  *	new zone is booted.  This is used to fattach the zsd_server door
96efd4c9b6SSteve Lawrence  *	into the new zone.
97efd4c9b6SSteve Lawrence  *
98efd4c9b6SSteve Lawrence  * zsd_stat_server:
99efd4c9b6SSteve Lawrence  *	Receives client requests for the current utilization data.  Each
100efd4c9b6SSteve Lawrence  *	client request will cause zonestatd to update the current utilization
101efd4c9b6SSteve Lawrence  *	data by kicking the stat_thread.
102efd4c9b6SSteve Lawrence  *
103efd4c9b6SSteve Lawrence  *	If the client is in a non-global zone, the utilization data will
104efd4c9b6SSteve Lawrence  *	be filtered to only show the given zone.  The usage by all other zones
105efd4c9b6SSteve Lawrence  *	will be added to the system utilization.
106efd4c9b6SSteve Lawrence  *
107efd4c9b6SSteve Lawrence  * stat_thread:
108efd4c9b6SSteve Lawrence  *	The stat thread implements querying the system to determine the
109efd4c9b6SSteve Lawrence  *	current utilization data for each running zone.  This includes
110efd4c9b6SSteve Lawrence  *	inspecting the system's processor set configuration, as well as details
111efd4c9b6SSteve Lawrence  *	of each zone, such as their configured limits, and which processor
112efd4c9b6SSteve Lawrence  *	sets they are running in.
113efd4c9b6SSteve Lawrence  *
114efd4c9b6SSteve Lawrence  *	The stat_thread will only update memory utilization data as often as
115efd4c9b6SSteve Lawrence  *	the configured config/sample_interval on the zones-monitoring service.
116efd4c9b6SSteve Lawrence  */
117efd4c9b6SSteve Lawrence 
118efd4c9b6SSteve Lawrence /*
119efd4c9b6SSteve Lawrence  * The private vmusage structure unfortunately uses size_t types, and assumes
120efd4c9b6SSteve Lawrence  * the caller's bitness matches the kernel's bitness.  Since the getvmusage()
121efd4c9b6SSteve Lawrence  * system call is contracted, and zonestatd is 32 bit, the following structures
122efd4c9b6SSteve Lawrence  * are used to interact with a 32bit or 64 bit kernel.
123efd4c9b6SSteve Lawrence  */
124efd4c9b6SSteve Lawrence typedef struct zsd_vmusage32 {
125efd4c9b6SSteve Lawrence 	id_t vmu_zoneid;
126efd4c9b6SSteve Lawrence 	uint_t vmu_type;
127efd4c9b6SSteve Lawrence 	id_t vmu_id;
128efd4c9b6SSteve Lawrence 
129efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_all;
130efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_private;
131efd4c9b6SSteve Lawrence 	uint32_t vmu_rss_shared;
132efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_all;
133efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_private;
134efd4c9b6SSteve Lawrence 	uint32_t vmu_swap_shared;
135efd4c9b6SSteve Lawrence } zsd_vmusage32_t;
136efd4c9b6SSteve Lawrence 
137efd4c9b6SSteve Lawrence typedef struct zsd_vmusage64 {
138efd4c9b6SSteve Lawrence 	id_t vmu_zoneid;
139efd4c9b6SSteve Lawrence 	uint_t vmu_type;
140efd4c9b6SSteve Lawrence 	id_t vmu_id;
141efd4c9b6SSteve Lawrence 	/*
142efd4c9b6SSteve Lawrence 	 * An amd64 kernel will align the following uint64_t members, but a
143efd4c9b6SSteve Lawrence 	 * 32bit i386 process will not without help.
144efd4c9b6SSteve Lawrence 	 */
145efd4c9b6SSteve Lawrence 	int vmu_align_next_members_on_8_bytes;
146efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_all;
147efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_private;
148efd4c9b6SSteve Lawrence 	uint64_t vmu_rss_shared;
149efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_all;
150efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_private;
151efd4c9b6SSteve Lawrence 	uint64_t vmu_swap_shared;
152efd4c9b6SSteve Lawrence } zsd_vmusage64_t;
153efd4c9b6SSteve Lawrence 
154efd4c9b6SSteve Lawrence struct zsd_zone;
155efd4c9b6SSteve Lawrence 
156efd4c9b6SSteve Lawrence /* Used to store a zone's usage of a pset */
157efd4c9b6SSteve Lawrence typedef struct zsd_pset_usage {
158efd4c9b6SSteve Lawrence 	struct zsd_zone	*zsu_zone;
159efd4c9b6SSteve Lawrence 	struct zsd_pset	*zsu_pset;
160efd4c9b6SSteve Lawrence 
161efd4c9b6SSteve Lawrence 	list_node_t	zsu_next;
162efd4c9b6SSteve Lawrence 
163efd4c9b6SSteve Lawrence 	zoneid_t	zsu_zoneid;
164efd4c9b6SSteve Lawrence 	boolean_t	zsu_found;	/* zone bound at end of interval */
165efd4c9b6SSteve Lawrence 	boolean_t	zsu_active;	/* zone was bound during interval */
166efd4c9b6SSteve Lawrence 	boolean_t	zsu_new;	/* zone newly bound in this interval */
167efd4c9b6SSteve Lawrence 	boolean_t	zsu_deleted;	/* zone was unbound in this interval */
168efd4c9b6SSteve Lawrence 	boolean_t	zsu_empty;	/* no procs in pset in this interval */
169efd4c9b6SSteve Lawrence 	time_t		zsu_start;	/* time when zone was found in pset */
170efd4c9b6SSteve Lawrence 	hrtime_t	zsu_hrstart;	/* time when zone  was found in pset */
171efd4c9b6SSteve Lawrence 	uint64_t	zsu_cpu_shares;
172efd4c9b6SSteve Lawrence 	uint_t		zsu_scheds;	/* schedulers found in this pass */
173efd4c9b6SSteve Lawrence 	timestruc_t	zsu_cpu_usage;	/* cpu time used */
174efd4c9b6SSteve Lawrence } zsd_pset_usage_t;
175efd4c9b6SSteve Lawrence 
176efd4c9b6SSteve Lawrence /* Used to store a pset's utilization */
177efd4c9b6SSteve Lawrence typedef struct zsd_pset {
178efd4c9b6SSteve Lawrence 	psetid_t	zsp_id;
179efd4c9b6SSteve Lawrence 	list_node_t	zsp_next;
180efd4c9b6SSteve Lawrence 	char		zsp_name[ZS_PSETNAME_MAX];
181efd4c9b6SSteve Lawrence 
182efd4c9b6SSteve Lawrence 	uint_t		zsp_cputype;	/* default, dedicated or shared */
183efd4c9b6SSteve Lawrence 	boolean_t	zsp_found;	/* pset found at end of interval */
184efd4c9b6SSteve Lawrence 	boolean_t	zsp_new;	/* pset new in this interval */
185efd4c9b6SSteve Lawrence 	boolean_t	zsp_deleted;	/* pset deleted in this interval */
186efd4c9b6SSteve Lawrence 	boolean_t	zsp_active;	/* pset existed during interval */
187efd4c9b6SSteve Lawrence 	boolean_t	zsp_empty;	/* no processes in pset */
188efd4c9b6SSteve Lawrence 	time_t		zsp_start;
189efd4c9b6SSteve Lawrence 	hrtime_t	zsp_hrstart;
190efd4c9b6SSteve Lawrence 
191efd4c9b6SSteve Lawrence 	uint64_t	zsp_online;	/* online cpus in interval */
192efd4c9b6SSteve Lawrence 	uint64_t	zsp_size;	/* size in this interval */
193efd4c9b6SSteve Lawrence 	uint64_t	zsp_min;	/* configured min in this interval */
194efd4c9b6SSteve Lawrence 	uint64_t	zsp_max;	/* configured max in this interval */
195efd4c9b6SSteve Lawrence 	int64_t		zsp_importance;	/* configured max in this interval */
196efd4c9b6SSteve Lawrence 
197efd4c9b6SSteve Lawrence 	uint_t		zsp_scheds;	/* scheds of processes found in pset */
198efd4c9b6SSteve Lawrence 	uint64_t	zsp_cpu_shares;	/* total shares in this interval */
199efd4c9b6SSteve Lawrence 
200efd4c9b6SSteve Lawrence 	timestruc_t	zsp_total_time;
201efd4c9b6SSteve Lawrence 	timestruc_t	zsp_usage_kern;
202efd4c9b6SSteve Lawrence 	timestruc_t	zsp_usage_zones;
203efd4c9b6SSteve Lawrence 
204efd4c9b6SSteve Lawrence 	/* Individual zone usages of pset */
205efd4c9b6SSteve Lawrence 	list_t		zsp_usage_list;
206efd4c9b6SSteve Lawrence 	int		zsp_nusage;
207efd4c9b6SSteve Lawrence 
208efd4c9b6SSteve Lawrence 	/* Summed kstat values from individual cpus in pset */
209efd4c9b6SSteve Lawrence 	timestruc_t	zsp_idle;
210efd4c9b6SSteve Lawrence 	timestruc_t	zsp_intr;
211efd4c9b6SSteve Lawrence 	timestruc_t	zsp_kern;
212efd4c9b6SSteve Lawrence 	timestruc_t	zsp_user;
213efd4c9b6SSteve Lawrence 
214efd4c9b6SSteve Lawrence } zsd_pset_t;
215efd4c9b6SSteve Lawrence 
216efd4c9b6SSteve Lawrence /* Used to track an individual cpu's utilization as reported by kstats */
217efd4c9b6SSteve Lawrence typedef struct zsd_cpu {
218efd4c9b6SSteve Lawrence 	processorid_t	zsc_id;
219efd4c9b6SSteve Lawrence 	list_node_t	zsc_next;
220efd4c9b6SSteve Lawrence 	psetid_t	zsc_psetid;
221efd4c9b6SSteve Lawrence 	psetid_t	zsc_psetid_prev;
222efd4c9b6SSteve Lawrence 	zsd_pset_t	*zsc_pset;
223efd4c9b6SSteve Lawrence 
224efd4c9b6SSteve Lawrence 	boolean_t	zsc_found;	/* cpu online in this interval */
225efd4c9b6SSteve Lawrence 	boolean_t	zsc_onlined;	/* cpu onlined during this interval */
226efd4c9b6SSteve Lawrence 	boolean_t	zsc_offlined;	/* cpu offlined during this interval */
227efd4c9b6SSteve Lawrence 	boolean_t	zsc_active;	/* cpu online during this interval */
228efd4c9b6SSteve Lawrence 	boolean_t	zsc_allocated;	/* True if cpu has ever been found */
229efd4c9b6SSteve Lawrence 
230efd4c9b6SSteve Lawrence 	/* kstats this interval */
231efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_idle;
232efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_intr;
233efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_kern;
234efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_user;
235efd4c9b6SSteve Lawrence 
236efd4c9b6SSteve Lawrence 	/* kstats in most recent interval */
237efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_idle_prev;
238efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_intr_prev;
239efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_kern_prev;
240efd4c9b6SSteve Lawrence 	uint64_t	zsc_nsec_user_prev;
241efd4c9b6SSteve Lawrence 
242efd4c9b6SSteve Lawrence 	/* Total kstat increases since zonestatd started reading kstats */
243efd4c9b6SSteve Lawrence 	timestruc_t	zsc_idle;
244efd4c9b6SSteve Lawrence 	timestruc_t	zsc_intr;
245efd4c9b6SSteve Lawrence 	timestruc_t	zsc_kern;
246efd4c9b6SSteve Lawrence 	timestruc_t	zsc_user;
247efd4c9b6SSteve Lawrence 
248efd4c9b6SSteve Lawrence } zsd_cpu_t;
249efd4c9b6SSteve Lawrence 
250efd4c9b6SSteve Lawrence /* Used to describe an individual zone and its utilization */
251efd4c9b6SSteve Lawrence typedef struct zsd_zone {
252efd4c9b6SSteve Lawrence 	zoneid_t	zsz_id;
253efd4c9b6SSteve Lawrence 	list_node_t	zsz_next;
254efd4c9b6SSteve Lawrence 	char		zsz_name[ZS_ZONENAME_MAX];
255efd4c9b6SSteve Lawrence 	uint_t		zsz_cputype;
256efd4c9b6SSteve Lawrence 	uint_t		zsz_iptype;
257efd4c9b6SSteve Lawrence 	time_t		zsz_start;
258efd4c9b6SSteve Lawrence 	hrtime_t	zsz_hrstart;
259efd4c9b6SSteve Lawrence 
260efd4c9b6SSteve Lawrence 	char		zsz_pool[ZS_POOLNAME_MAX];
261efd4c9b6SSteve Lawrence 	char		zsz_pset[ZS_PSETNAME_MAX];
262efd4c9b6SSteve Lawrence 	int		zsz_default_sched;
263efd4c9b6SSteve Lawrence 	/* These are deduced by inspecting processes */
264efd4c9b6SSteve Lawrence 	psetid_t	zsz_psetid;
265efd4c9b6SSteve Lawrence 	uint_t		zsz_scheds;
266efd4c9b6SSteve Lawrence 
267efd4c9b6SSteve Lawrence 	boolean_t	zsz_new;	/* zone booted during this interval */
268efd4c9b6SSteve Lawrence 	boolean_t	zsz_deleted;	/* halted during this interval */
269efd4c9b6SSteve Lawrence 	boolean_t	zsz_active;	/* running in this interval */
270efd4c9b6SSteve Lawrence 	boolean_t	zsz_empty;	/* no processes in this interval */
271efd4c9b6SSteve Lawrence 	boolean_t	zsz_gone;	/* not installed in this interval */
272efd4c9b6SSteve Lawrence 	boolean_t	zsz_found;	/* Running at end of this interval */
273efd4c9b6SSteve Lawrence 
274efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpu_shares;
275efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpu_cap;
276efd4c9b6SSteve Lawrence 	uint64_t	zsz_ram_cap;
277efd4c9b6SSteve Lawrence 	uint64_t	zsz_locked_cap;
278efd4c9b6SSteve Lawrence 	uint64_t	zsz_vm_cap;
279efd4c9b6SSteve Lawrence 
280efd4c9b6SSteve Lawrence 	uint64_t	zsz_cpus_online;
281efd4c9b6SSteve Lawrence 	timestruc_t	zsz_cpu_usage;	/* cpu time of cpu cap */
282efd4c9b6SSteve Lawrence 	timestruc_t	zsz_cap_time;	/* cpu time of cpu cap */
283efd4c9b6SSteve Lawrence 	timestruc_t	zsz_share_time; /* cpu time of share of cpu */
284efd4c9b6SSteve Lawrence 	timestruc_t	zsz_pset_time;  /* time of all psets zone is bound to */
285efd4c9b6SSteve Lawrence 
286efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_ram;
287efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_locked;
288efd4c9b6SSteve Lawrence 	uint64_t	zsz_usage_vm;
289efd4c9b6SSteve Lawrence 
290efd4c9b6SSteve Lawrence 	uint64_t	zsz_processes_cap;
291efd4c9b6SSteve Lawrence 	uint64_t	zsz_lwps_cap;
292efd4c9b6SSteve Lawrence 	uint64_t	zsz_shm_cap;
293efd4c9b6SSteve Lawrence 	uint64_t	zsz_shmids_cap;
294efd4c9b6SSteve Lawrence 	uint64_t	zsz_semids_cap;
295efd4c9b6SSteve Lawrence 	uint64_t	zsz_msgids_cap;
296efd4c9b6SSteve Lawrence 	uint64_t	zsz_lofi_cap;
297efd4c9b6SSteve Lawrence 
298efd4c9b6SSteve Lawrence 	uint64_t	zsz_processes;
299efd4c9b6SSteve Lawrence 	uint64_t	zsz_lwps;
300efd4c9b6SSteve Lawrence 	uint64_t	zsz_shm;
301efd4c9b6SSteve Lawrence 	uint64_t	zsz_shmids;
302efd4c9b6SSteve Lawrence 	uint64_t	zsz_semids;
303efd4c9b6SSteve Lawrence 	uint64_t	zsz_msgids;
304efd4c9b6SSteve Lawrence 	uint64_t	zsz_lofi;
305efd4c9b6SSteve Lawrence 
306efd4c9b6SSteve Lawrence } zsd_zone_t;
307efd4c9b6SSteve Lawrence 
308efd4c9b6SSteve Lawrence /*
309efd4c9b6SSteve Lawrence  * Used to track the cpu usage of an individual processes.
310efd4c9b6SSteve Lawrence  *
311efd4c9b6SSteve Lawrence  * zonestatd sweeps /proc each interval and charges the cpu usage of processes.
312efd4c9b6SSteve Lawrence  * to their zone.  As processes exit, their extended accounting records are
313efd4c9b6SSteve Lawrence  * read and the difference of their total and known usage is charged to their
314efd4c9b6SSteve Lawrence  * zone.
315efd4c9b6SSteve Lawrence  *
316efd4c9b6SSteve Lawrence  * If a process is never seen in /proc, the total usage on its extended
317efd4c9b6SSteve Lawrence  * accounting record will be charged to its zone.
318efd4c9b6SSteve Lawrence  */
319efd4c9b6SSteve Lawrence typedef struct zsd_proc {
320efd4c9b6SSteve Lawrence 	list_node_t	zspr_next;
321efd4c9b6SSteve Lawrence 	pid_t		zspr_ppid;
322efd4c9b6SSteve Lawrence 	psetid_t	zspr_psetid;
323efd4c9b6SSteve Lawrence 	zoneid_t	zspr_zoneid;
324efd4c9b6SSteve Lawrence 	int		zspr_sched;
325efd4c9b6SSteve Lawrence 	timestruc_t	zspr_usage;
326efd4c9b6SSteve Lawrence } zsd_proc_t;
327efd4c9b6SSteve Lawrence 
328efd4c9b6SSteve Lawrence /* Used to track the overall resource usage of the system */
329efd4c9b6SSteve Lawrence typedef struct zsd_system {
330efd4c9b6SSteve Lawrence 
331efd4c9b6SSteve Lawrence 	uint64_t zss_ram_total;
332efd4c9b6SSteve Lawrence 	uint64_t zss_ram_kern;
333efd4c9b6SSteve Lawrence 	uint64_t zss_ram_zones;
334efd4c9b6SSteve Lawrence 
335efd4c9b6SSteve Lawrence 	uint64_t zss_locked_kern;
336efd4c9b6SSteve Lawrence 	uint64_t zss_locked_zones;
337efd4c9b6SSteve Lawrence 
338efd4c9b6SSteve Lawrence 	uint64_t zss_vm_total;
339efd4c9b6SSteve Lawrence 	uint64_t zss_vm_kern;
340efd4c9b6SSteve Lawrence 	uint64_t zss_vm_zones;
341efd4c9b6SSteve Lawrence 
342efd4c9b6SSteve Lawrence 	uint64_t zss_swap_total;
343efd4c9b6SSteve Lawrence 	uint64_t zss_swap_used;
344efd4c9b6SSteve Lawrence 
345efd4c9b6SSteve Lawrence 	timestruc_t zss_idle;
346efd4c9b6SSteve Lawrence 	timestruc_t zss_intr;
347efd4c9b6SSteve Lawrence 	timestruc_t zss_kern;
348efd4c9b6SSteve Lawrence 	timestruc_t zss_user;
349efd4c9b6SSteve Lawrence 
350efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_total_time;
351efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_usage_kern;
352efd4c9b6SSteve Lawrence 	timestruc_t zss_cpu_usage_zones;
353efd4c9b6SSteve Lawrence 
354efd4c9b6SSteve Lawrence 	uint64_t zss_maxpid;
355efd4c9b6SSteve Lawrence 	uint64_t zss_processes_max;
356efd4c9b6SSteve Lawrence 	uint64_t zss_lwps_max;
357efd4c9b6SSteve Lawrence 	uint64_t zss_shm_max;
358efd4c9b6SSteve Lawrence 	uint64_t zss_shmids_max;
359efd4c9b6SSteve Lawrence 	uint64_t zss_semids_max;
360efd4c9b6SSteve Lawrence 	uint64_t zss_msgids_max;
361efd4c9b6SSteve Lawrence 	uint64_t zss_lofi_max;
362efd4c9b6SSteve Lawrence 
363efd4c9b6SSteve Lawrence 	uint64_t zss_processes;
364efd4c9b6SSteve Lawrence 	uint64_t zss_lwps;
365efd4c9b6SSteve Lawrence 	uint64_t zss_shm;
366efd4c9b6SSteve Lawrence 	uint64_t zss_shmids;
367efd4c9b6SSteve Lawrence 	uint64_t zss_semids;
368efd4c9b6SSteve Lawrence 	uint64_t zss_msgids;
369efd4c9b6SSteve Lawrence 	uint64_t zss_lofi;
370efd4c9b6SSteve Lawrence 
371efd4c9b6SSteve Lawrence 	uint64_t zss_ncpus;
372efd4c9b6SSteve Lawrence 	uint64_t zss_ncpus_online;
373efd4c9b6SSteve Lawrence 
374efd4c9b6SSteve Lawrence } zsd_system_t;
375efd4c9b6SSteve Lawrence 
376efd4c9b6SSteve Lawrence /*
377efd4c9b6SSteve Lawrence  * A dumping ground for various information and structures used to compute
378efd4c9b6SSteve Lawrence  * utilization.
379efd4c9b6SSteve Lawrence  *
380efd4c9b6SSteve Lawrence  * This structure is used to track the system while clients are connected.
381efd4c9b6SSteve Lawrence  * When The first client connects, a zsd_ctl is allocated and configured by
382efd4c9b6SSteve Lawrence  * zsd_open().  When all clients disconnect, the zsd_ctl is closed.
383efd4c9b6SSteve Lawrence  */
384efd4c9b6SSteve Lawrence typedef struct zsd_ctl {
385efd4c9b6SSteve Lawrence 	kstat_ctl_t	*zsctl_kstat_ctl;
386efd4c9b6SSteve Lawrence 
387efd4c9b6SSteve Lawrence 	/* To track extended accounting */
388efd4c9b6SSteve Lawrence 	int		zsctl_proc_fd;		/* Log currently being used */
389efd4c9b6SSteve Lawrence 	ea_file_t	zsctl_proc_eaf;
390efd4c9b6SSteve Lawrence 	struct stat64	zsctl_proc_stat;
391efd4c9b6SSteve Lawrence 	int		zsctl_proc_open;
392efd4c9b6SSteve Lawrence 	int		zsctl_proc_fd_next;	/* Log file to use next */
393efd4c9b6SSteve Lawrence 	ea_file_t	zsctl_proc_eaf_next;
394efd4c9b6SSteve Lawrence 	struct stat64	zsctl_proc_stat_next;
395efd4c9b6SSteve Lawrence 	int		zsctl_proc_open_next;
396efd4c9b6SSteve Lawrence 
397efd4c9b6SSteve Lawrence 	/* pool configuration handle */
398efd4c9b6SSteve Lawrence 	pool_conf_t	*zsctl_pool_conf;
399efd4c9b6SSteve Lawrence 	int		zsctl_pool_status;
400efd4c9b6SSteve Lawrence 	int		zsctl_pool_changed;
401efd4c9b6SSteve Lawrence 
402efd4c9b6SSteve Lawrence 	/* The above usage tacking structures */
403efd4c9b6SSteve Lawrence 	zsd_system_t	*zsctl_system;
404efd4c9b6SSteve Lawrence 	list_t		zsctl_zones;
405efd4c9b6SSteve Lawrence 	list_t		zsctl_psets;
406efd4c9b6SSteve Lawrence 	list_t		zsctl_cpus;
407efd4c9b6SSteve Lawrence 	zsd_cpu_t	*zsctl_cpu_array;
408efd4c9b6SSteve Lawrence 	zsd_proc_t	*zsctl_proc_array;
409efd4c9b6SSteve Lawrence 
410efd4c9b6SSteve Lawrence 	/* Various system info */
411efd4c9b6SSteve Lawrence 	uint64_t	zsctl_maxcpuid;
412efd4c9b6SSteve Lawrence 	uint64_t	zsctl_maxproc;
413efd4c9b6SSteve Lawrence 	uint64_t	zsctl_kern_bits;
414efd4c9b6SSteve Lawrence 	uint64_t	zsctl_pagesize;
415efd4c9b6SSteve Lawrence 
416efd4c9b6SSteve Lawrence 	/* Used to track time available under a cpu cap. */
417efd4c9b6SSteve Lawrence 	uint64_t	zsctl_hrtime;
418efd4c9b6SSteve Lawrence 	uint64_t	zsctl_hrtime_prev;
419efd4c9b6SSteve Lawrence 	timestruc_t	zsctl_hrtime_total;
420efd4c9b6SSteve Lawrence 
421efd4c9b6SSteve Lawrence 	struct timeval	zsctl_timeofday;
422efd4c9b6SSteve Lawrence 
423efd4c9b6SSteve Lawrence 	/* Caches for arrays allocated for use by various system calls */
424efd4c9b6SSteve Lawrence 	psetid_t	*zsctl_pset_cache;
425efd4c9b6SSteve Lawrence 	uint_t		zsctl_pset_ncache;
426efd4c9b6SSteve Lawrence 	processorid_t	*zsctl_cpu_cache;
427efd4c9b6SSteve Lawrence 	uint_t		zsctl_cpu_ncache;
428efd4c9b6SSteve Lawrence 	zoneid_t	*zsctl_zone_cache;
429efd4c9b6SSteve Lawrence 	uint_t		zsctl_zone_ncache;
430efd4c9b6SSteve Lawrence 	struct swaptable *zsctl_swap_cache;
431efd4c9b6SSteve Lawrence 	uint64_t	zsctl_swap_cache_size;
432efd4c9b6SSteve Lawrence 	uint64_t	zsctl_swap_cache_num;
433efd4c9b6SSteve Lawrence 	zsd_vmusage64_t	*zsctl_vmusage_cache;
434efd4c9b6SSteve Lawrence 	uint64_t	zsctl_vmusage_cache_num;
435efd4c9b6SSteve Lawrence 
436efd4c9b6SSteve Lawrence 	/* Info about procfs for scanning /proc */
437efd4c9b6SSteve Lawrence 	struct dirent	*zsctl_procfs_dent;
438efd4c9b6SSteve Lawrence 	long		zsctl_procfs_dent_size;
439efd4c9b6SSteve Lawrence 	pool_value_t	*zsctl_pool_vals[3];
440efd4c9b6SSteve Lawrence 
441efd4c9b6SSteve Lawrence 	/* Counts on tracked entities */
442efd4c9b6SSteve Lawrence 	uint_t		zsctl_nzones;
443efd4c9b6SSteve Lawrence 	uint_t		zsctl_npsets;
444efd4c9b6SSteve Lawrence 	uint_t		zsctl_npset_usages;
445efd4c9b6SSteve Lawrence } zsd_ctl_t;
446efd4c9b6SSteve Lawrence 
447efd4c9b6SSteve Lawrence zsd_ctl_t		*g_ctl;
448efd4c9b6SSteve Lawrence boolean_t		g_open;		/* True if g_ctl is open */
449efd4c9b6SSteve Lawrence int			g_hasclient;	/* True if any clients are connected */
450efd4c9b6SSteve Lawrence 
451efd4c9b6SSteve Lawrence /*
452efd4c9b6SSteve Lawrence  * The usage cache is updated by the stat_thread, and copied to clients by
453efd4c9b6SSteve Lawrence  * the zsd_stat_server.  Mutex and cond are to synchronize between the
454efd4c9b6SSteve Lawrence  * stat_thread and the stat_server.
455efd4c9b6SSteve Lawrence  */
456efd4c9b6SSteve Lawrence zs_usage_cache_t	*g_usage_cache;
457efd4c9b6SSteve Lawrence mutex_t			g_usage_cache_lock;
458efd4c9b6SSteve Lawrence cond_t			g_usage_cache_kick;
459efd4c9b6SSteve Lawrence uint_t			g_usage_cache_kickers;
460efd4c9b6SSteve Lawrence cond_t			g_usage_cache_wait;
461efd4c9b6SSteve Lawrence char			*g_usage_cache_buf;
462efd4c9b6SSteve Lawrence uint_t			g_usage_cache_bufsz;
463efd4c9b6SSteve Lawrence uint64_t		g_gen_next;
464efd4c9b6SSteve Lawrence 
465efd4c9b6SSteve Lawrence /* fds of door servers */
466efd4c9b6SSteve Lawrence int			g_server_door;
467efd4c9b6SSteve Lawrence int			g_stat_door;
468efd4c9b6SSteve Lawrence 
469efd4c9b6SSteve Lawrence /*
470efd4c9b6SSteve Lawrence  * Starting and current time.  Used to throttle memory calculation, and to
471efd4c9b6SSteve Lawrence  * mark new zones and psets with their boot and creation time.
472efd4c9b6SSteve Lawrence  */
473efd4c9b6SSteve Lawrence time_t			g_now;
474efd4c9b6SSteve Lawrence time_t			g_start;
475efd4c9b6SSteve Lawrence hrtime_t		g_hrnow;
476efd4c9b6SSteve Lawrence hrtime_t		g_hrstart;
477efd4c9b6SSteve Lawrence uint64_t		g_interval;
478efd4c9b6SSteve Lawrence 
479efd4c9b6SSteve Lawrence /*
480efd4c9b6SSteve Lawrence  * main() thread.
481efd4c9b6SSteve Lawrence  */
482efd4c9b6SSteve Lawrence thread_t		g_main;
483efd4c9b6SSteve Lawrence 
484efd4c9b6SSteve Lawrence /* PRINTFLIKE1 */
485efd4c9b6SSteve Lawrence static void
zsd_warn(const char * fmt,...)486efd4c9b6SSteve Lawrence zsd_warn(const char *fmt, ...)
487efd4c9b6SSteve Lawrence {
488efd4c9b6SSteve Lawrence 	va_list alist;
489efd4c9b6SSteve Lawrence 
490efd4c9b6SSteve Lawrence 	va_start(alist, fmt);
491efd4c9b6SSteve Lawrence 
492efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, gettext("zonestat: Warning: "));
493efd4c9b6SSteve Lawrence 	(void) vfprintf(stderr, fmt, alist);
494efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, "\n");
495efd4c9b6SSteve Lawrence 	va_end(alist);
496efd4c9b6SSteve Lawrence }
497efd4c9b6SSteve Lawrence 
498efd4c9b6SSteve Lawrence /* PRINTFLIKE1 */
499efd4c9b6SSteve Lawrence static void
zsd_error(const char * fmt,...)500efd4c9b6SSteve Lawrence zsd_error(const char *fmt, ...)
501efd4c9b6SSteve Lawrence {
502efd4c9b6SSteve Lawrence 	va_list alist;
503efd4c9b6SSteve Lawrence 
504efd4c9b6SSteve Lawrence 	va_start(alist, fmt);
505efd4c9b6SSteve Lawrence 
506efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, gettext("zonestat: Error: "));
507efd4c9b6SSteve Lawrence 	(void) vfprintf(stderr, fmt, alist);
508efd4c9b6SSteve Lawrence 	(void) fprintf(stderr, "\n");
509efd4c9b6SSteve Lawrence 	va_end(alist);
510efd4c9b6SSteve Lawrence 	exit(1);
511efd4c9b6SSteve Lawrence }
512efd4c9b6SSteve Lawrence 
513efd4c9b6SSteve Lawrence /* Turns on extended accounting if not configured externally */
514efd4c9b6SSteve Lawrence int
zsd_enable_cpu_stats()515efd4c9b6SSteve Lawrence zsd_enable_cpu_stats()
516efd4c9b6SSteve Lawrence {
517efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
518efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
519efd4c9b6SSteve Lawrence 	int ret, state = AC_ON;
520efd4c9b6SSteve Lawrence 	ac_res_t res[6];
521efd4c9b6SSteve Lawrence 
522efd4c9b6SSteve Lawrence 	/*
523efd4c9b6SSteve Lawrence 	 * Start a new accounting file  if accounting not configured
524efd4c9b6SSteve Lawrence 	 * externally.
525efd4c9b6SSteve Lawrence 	 */
526efd4c9b6SSteve Lawrence 
527efd4c9b6SSteve Lawrence 	res[0].ar_id = AC_PROC_PID;
528efd4c9b6SSteve Lawrence 	res[0].ar_state = AC_ON;
529efd4c9b6SSteve Lawrence 	res[1].ar_id = AC_PROC_ANCPID;
530efd4c9b6SSteve Lawrence 	res[1].ar_state = AC_ON;
531efd4c9b6SSteve Lawrence 	res[2].ar_id = AC_PROC_CPU;
532efd4c9b6SSteve Lawrence 	res[2].ar_state = AC_ON;
533efd4c9b6SSteve Lawrence 	res[3].ar_id = AC_PROC_TIME;
534efd4c9b6SSteve Lawrence 	res[3].ar_state = AC_ON;
535efd4c9b6SSteve Lawrence 	res[4].ar_id = AC_PROC_ZONENAME;
536efd4c9b6SSteve Lawrence 	res[4].ar_state = AC_ON;
537efd4c9b6SSteve Lawrence 	res[5].ar_id = AC_NONE;
538efd4c9b6SSteve Lawrence 	res[5].ar_state = AC_ON;
539efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
540efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to set accounting resources"));
541efd4c9b6SSteve Lawrence 		return (-1);
542efd4c9b6SSteve Lawrence 	}
543efd4c9b6SSteve Lawrence 	/* Only set accounting file if none is configured */
544efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
545efd4c9b6SSteve Lawrence 	if (ret < 0) {
546efd4c9b6SSteve Lawrence 
547efd4c9b6SSteve Lawrence 		(void) unlink(path);
548efd4c9b6SSteve Lawrence 		if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1)
549efd4c9b6SSteve Lawrence 		    == -1) {
550efd4c9b6SSteve Lawrence 			zsd_warn(gettext("Unable to set accounting file"));
551efd4c9b6SSteve Lawrence 			return (-1);
552efd4c9b6SSteve Lawrence 		}
553efd4c9b6SSteve Lawrence 	}
554efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
555efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to enable accounting"));
556efd4c9b6SSteve Lawrence 		return (-1);
557efd4c9b6SSteve Lawrence 	}
558efd4c9b6SSteve Lawrence 	return (0);
559efd4c9b6SSteve Lawrence }
560efd4c9b6SSteve Lawrence 
561efd4c9b6SSteve Lawrence /* Turns off extended accounting if not configured externally */
562efd4c9b6SSteve Lawrence int
zsd_disable_cpu_stats()563efd4c9b6SSteve Lawrence zsd_disable_cpu_stats()
564efd4c9b6SSteve Lawrence {
565efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
566efd4c9b6SSteve Lawrence 	int ret, state = AC_OFF;
567efd4c9b6SSteve Lawrence 	ac_res_t res[6];
568efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
569efd4c9b6SSteve Lawrence 
570efd4c9b6SSteve Lawrence 	/* If accounting file is externally configured, leave it alone */
571efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
572efd4c9b6SSteve Lawrence 	if (ret == 0 && strcmp(oldfile, path) != 0)
573efd4c9b6SSteve Lawrence 		return (0);
574efd4c9b6SSteve Lawrence 
575efd4c9b6SSteve Lawrence 	res[0].ar_id = AC_PROC_PID;
576efd4c9b6SSteve Lawrence 	res[0].ar_state = AC_OFF;
577efd4c9b6SSteve Lawrence 	res[1].ar_id = AC_PROC_ANCPID;
578efd4c9b6SSteve Lawrence 	res[1].ar_state = AC_OFF;
579efd4c9b6SSteve Lawrence 	res[2].ar_id = AC_PROC_CPU;
580efd4c9b6SSteve Lawrence 	res[2].ar_state = AC_OFF;
581efd4c9b6SSteve Lawrence 	res[3].ar_id = AC_PROC_TIME;
582efd4c9b6SSteve Lawrence 	res[3].ar_state = AC_OFF;
583efd4c9b6SSteve Lawrence 	res[4].ar_id = AC_PROC_ZONENAME;
584efd4c9b6SSteve Lawrence 	res[4].ar_state = AC_OFF;
585efd4c9b6SSteve Lawrence 	res[5].ar_id = AC_NONE;
586efd4c9b6SSteve Lawrence 	res[5].ar_state = AC_OFF;
587efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_RES_SET, res, sizeof (res)) != 0) {
588efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to clear accounting resources"));
589efd4c9b6SSteve Lawrence 		return (-1);
590efd4c9b6SSteve Lawrence 	}
591efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_FILE_SET, NULL, 0) == -1) {
592efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to clear accounting file"));
593efd4c9b6SSteve Lawrence 		return (-1);
594efd4c9b6SSteve Lawrence 	}
595efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_STATE_SET, &state, sizeof (state)) == -1) {
596efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to diable accounting"));
597efd4c9b6SSteve Lawrence 		return (-1);
598efd4c9b6SSteve Lawrence 	}
599efd4c9b6SSteve Lawrence 
600efd4c9b6SSteve Lawrence 	(void) unlink(path);
601efd4c9b6SSteve Lawrence 	return (0);
602efd4c9b6SSteve Lawrence }
603efd4c9b6SSteve Lawrence 
604efd4c9b6SSteve Lawrence /*
605efd4c9b6SSteve Lawrence  * If not configured externally, deletes the current extended accounting file
606efd4c9b6SSteve Lawrence  * and starts a new one.
607efd4c9b6SSteve Lawrence  *
608efd4c9b6SSteve Lawrence  * Since the stat_thread holds an open handle to the accounting file, it will
609efd4c9b6SSteve Lawrence  * read all remaining entries from the old file before switching to
610efd4c9b6SSteve Lawrence  * read the new one.
611efd4c9b6SSteve Lawrence  */
612efd4c9b6SSteve Lawrence int
zsd_roll_exacct(void)613efd4c9b6SSteve Lawrence zsd_roll_exacct(void)
614efd4c9b6SSteve Lawrence {
615efd4c9b6SSteve Lawrence 	int ret;
616efd4c9b6SSteve Lawrence 	char *path = ZONESTAT_EXACCT_FILE;
617efd4c9b6SSteve Lawrence 	char oldfile[MAXPATHLEN];
618efd4c9b6SSteve Lawrence 
619efd4c9b6SSteve Lawrence 	/* If accounting file is externally configured, leave it alone */
620efd4c9b6SSteve Lawrence 	ret = acctctl(AC_PROC | AC_FILE_GET, oldfile, sizeof (oldfile));
621efd4c9b6SSteve Lawrence 	if (ret == 0 && strcmp(oldfile, path) != 0)
622efd4c9b6SSteve Lawrence 		return (0);
623efd4c9b6SSteve Lawrence 
624efd4c9b6SSteve Lawrence 	if (unlink(path) != 0)
625efd4c9b6SSteve Lawrence 		/* Roll it next time */
626efd4c9b6SSteve Lawrence 		return (0);
627efd4c9b6SSteve Lawrence 
628efd4c9b6SSteve Lawrence 	if (acctctl(AC_PROC | AC_FILE_SET, path, strlen(path) + 1) == -1) {
629efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to set accounting file"));
630efd4c9b6SSteve Lawrence 		return (-1);
631efd4c9b6SSteve Lawrence 	}
632efd4c9b6SSteve Lawrence 	return (0);
633efd4c9b6SSteve Lawrence }
634efd4c9b6SSteve Lawrence 
635efd4c9b6SSteve Lawrence /* Contract stuff for zone_enter() */
636efd4c9b6SSteve Lawrence int
init_template(void)637efd4c9b6SSteve Lawrence init_template(void)
638efd4c9b6SSteve Lawrence {
639efd4c9b6SSteve Lawrence 	int fd;
640efd4c9b6SSteve Lawrence 	int err = 0;
641efd4c9b6SSteve Lawrence 
642efd4c9b6SSteve Lawrence 	fd = open64(CTFS_ROOT "/process/template", O_RDWR);
643efd4c9b6SSteve Lawrence 	if (fd == -1)
644efd4c9b6SSteve Lawrence 		return (-1);
645efd4c9b6SSteve Lawrence 
646efd4c9b6SSteve Lawrence 	/*
647efd4c9b6SSteve Lawrence 	 * For now, zoneadmd doesn't do anything with the contract.
648efd4c9b6SSteve Lawrence 	 * Deliver no events, don't inherit, and allow it to be orphaned.
649efd4c9b6SSteve Lawrence 	 */
650efd4c9b6SSteve Lawrence 	err |= ct_tmpl_set_critical(fd, 0);
651efd4c9b6SSteve Lawrence 	err |= ct_tmpl_set_informative(fd, 0);
652efd4c9b6SSteve Lawrence 	err |= ct_pr_tmpl_set_fatal(fd, CT_PR_EV_HWERR);
653efd4c9b6SSteve Lawrence 	err |= ct_pr_tmpl_set_param(fd, CT_PR_PGRPONLY | CT_PR_REGENT);
654efd4c9b6SSteve Lawrence 	if (err || ct_tmpl_activate(fd)) {
655efd4c9b6SSteve Lawrence 		(void) close(fd);
656efd4c9b6SSteve Lawrence 		return (-1);
657efd4c9b6SSteve Lawrence 	}
658efd4c9b6SSteve Lawrence 
659efd4c9b6SSteve Lawrence 	return (fd);
660efd4c9b6SSteve Lawrence }
661efd4c9b6SSteve Lawrence 
662efd4c9b6SSteve Lawrence /*
663efd4c9b6SSteve Lawrence  * Contract stuff for zone_enter()
664efd4c9b6SSteve Lawrence  */
665efd4c9b6SSteve Lawrence int
contract_latest(ctid_t * id)666efd4c9b6SSteve Lawrence contract_latest(ctid_t *id)
667efd4c9b6SSteve Lawrence {
668efd4c9b6SSteve Lawrence 	int cfd, r;
669efd4c9b6SSteve Lawrence 	ct_stathdl_t st;
670efd4c9b6SSteve Lawrence 	ctid_t result;
671efd4c9b6SSteve Lawrence 
672efd4c9b6SSteve Lawrence 	if ((cfd = open64(CTFS_ROOT "/process/latest", O_RDONLY)) == -1)
673efd4c9b6SSteve Lawrence 		return (errno);
674efd4c9b6SSteve Lawrence 
675efd4c9b6SSteve Lawrence 	if ((r = ct_status_read(cfd, CTD_COMMON, &st)) != 0) {
676efd4c9b6SSteve Lawrence 		(void) close(cfd);
677efd4c9b6SSteve Lawrence 		return (r);
678efd4c9b6SSteve Lawrence 	}
679efd4c9b6SSteve Lawrence 
680efd4c9b6SSteve Lawrence 	result = ct_status_get_id(st);
681efd4c9b6SSteve Lawrence 	ct_status_free(st);
682efd4c9b6SSteve Lawrence 	(void) close(cfd);
683efd4c9b6SSteve Lawrence 
684efd4c9b6SSteve Lawrence 	*id = result;
685efd4c9b6SSteve Lawrence 	return (0);
686efd4c9b6SSteve Lawrence }
687efd4c9b6SSteve Lawrence 
688efd4c9b6SSteve Lawrence static int
close_on_exec(int fd)689efd4c9b6SSteve Lawrence close_on_exec(int fd)
690efd4c9b6SSteve Lawrence {
691efd4c9b6SSteve Lawrence 	int flags = fcntl(fd, F_GETFD, 0);
692efd4c9b6SSteve Lawrence 	if ((flags != -1) && (fcntl(fd, F_SETFD, flags | FD_CLOEXEC) != -1))
693efd4c9b6SSteve Lawrence 		return (0);
694efd4c9b6SSteve Lawrence 	return (-1);
695efd4c9b6SSteve Lawrence }
696efd4c9b6SSteve Lawrence 
697efd4c9b6SSteve Lawrence int
contract_open(ctid_t ctid,const char * type,const char * file,int oflag)698efd4c9b6SSteve Lawrence contract_open(ctid_t ctid, const char *type, const char *file, int oflag)
699efd4c9b6SSteve Lawrence {
700efd4c9b6SSteve Lawrence 	char path[PATH_MAX];
701efd4c9b6SSteve Lawrence 	int n, fd;
702efd4c9b6SSteve Lawrence 
703efd4c9b6SSteve Lawrence 	if (type == NULL)
704efd4c9b6SSteve Lawrence 		type = "all";
705efd4c9b6SSteve Lawrence 
706efd4c9b6SSteve Lawrence 	n = snprintf(path, PATH_MAX, CTFS_ROOT "/%s/%ld/%s", type, ctid, file);
707efd4c9b6SSteve Lawrence 	if (n >= sizeof (path)) {
708efd4c9b6SSteve Lawrence 		errno = ENAMETOOLONG;
709efd4c9b6SSteve Lawrence 		return (-1);
710efd4c9b6SSteve Lawrence 	}
711efd4c9b6SSteve Lawrence 
712efd4c9b6SSteve Lawrence 	fd = open64(path, oflag);
713efd4c9b6SSteve Lawrence 	if (fd != -1) {
714efd4c9b6SSteve Lawrence 		if (close_on_exec(fd) == -1) {
715efd4c9b6SSteve Lawrence 			int err = errno;
716efd4c9b6SSteve Lawrence 			(void) close(fd);
717efd4c9b6SSteve Lawrence 			errno = err;
718efd4c9b6SSteve Lawrence 			return (-1);
719efd4c9b6SSteve Lawrence 		}
720efd4c9b6SSteve Lawrence 	}
721efd4c9b6SSteve Lawrence 	return (fd);
722efd4c9b6SSteve Lawrence }
723efd4c9b6SSteve Lawrence 
724efd4c9b6SSteve Lawrence int
contract_abandon_id(ctid_t ctid)725efd4c9b6SSteve Lawrence contract_abandon_id(ctid_t ctid)
726efd4c9b6SSteve Lawrence {
727efd4c9b6SSteve Lawrence 	int fd, err;
728efd4c9b6SSteve Lawrence 
729efd4c9b6SSteve Lawrence 	fd = contract_open(ctid, "all", "ctl", O_WRONLY);
730efd4c9b6SSteve Lawrence 	if (fd == -1)
731efd4c9b6SSteve Lawrence 		return (errno);
732efd4c9b6SSteve Lawrence 
733efd4c9b6SSteve Lawrence 	err = ct_ctl_abandon(fd);
734efd4c9b6SSteve Lawrence 	(void) close(fd);
735efd4c9b6SSteve Lawrence 
736efd4c9b6SSteve Lawrence 	return (err);
737efd4c9b6SSteve Lawrence }
738efd4c9b6SSteve Lawrence /*
739efd4c9b6SSteve Lawrence  * Attach the zsd_server to a zone.  Called for each zone when zonestatd
740efd4c9b6SSteve Lawrence  * starts, and for each newly booted zone when zoneadmd contacts the zsd_server
741efd4c9b6SSteve Lawrence  *
742efd4c9b6SSteve Lawrence  * Zone_enter is used to avoid reaching into zone to fattach door.
743efd4c9b6SSteve Lawrence  */
744efd4c9b6SSteve Lawrence static void
zsd_fattach_zone(zoneid_t zid,int door,boolean_t detach_only)745efd4c9b6SSteve Lawrence zsd_fattach_zone(zoneid_t zid, int door, boolean_t detach_only)
746efd4c9b6SSteve Lawrence {
747efd4c9b6SSteve Lawrence 	char *path = ZS_DOOR_PATH;
748efd4c9b6SSteve Lawrence 	int fd, pid, stat, tmpl_fd;
749efd4c9b6SSteve Lawrence 	ctid_t ct;
750efd4c9b6SSteve Lawrence 
751efd4c9b6SSteve Lawrence 	if ((tmpl_fd = init_template()) == -1) {
752efd4c9b6SSteve Lawrence 		zsd_warn("Unable to init template");
753efd4c9b6SSteve Lawrence 		return;
754efd4c9b6SSteve Lawrence 	}
755efd4c9b6SSteve Lawrence 
756efd4c9b6SSteve Lawrence 	pid = forkx(0);
757efd4c9b6SSteve Lawrence 	if (pid < 0) {
758efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
759efd4c9b6SSteve Lawrence 		zsd_warn(gettext(
760efd4c9b6SSteve Lawrence 		    "Unable to fork to add zonestat to zoneid %d\n"), zid);
761efd4c9b6SSteve Lawrence 		return;
762efd4c9b6SSteve Lawrence 	}
763efd4c9b6SSteve Lawrence 
764efd4c9b6SSteve Lawrence 	if (pid == 0) {
765efd4c9b6SSteve Lawrence 		(void) ct_tmpl_clear(tmpl_fd);
766efd4c9b6SSteve Lawrence 		(void) close(tmpl_fd);
767efd4c9b6SSteve Lawrence 		if (zid != 0 && zone_enter(zid) != 0) {
768efd4c9b6SSteve Lawrence 			if (errno == EINVAL) {
769efd4c9b6SSteve Lawrence 				_exit(0);
770efd4c9b6SSteve Lawrence 			}
771efd4c9b6SSteve Lawrence 			_exit(1);
772efd4c9b6SSteve Lawrence 		}
773efd4c9b6SSteve Lawrence 		(void) fdetach(path);
774efd4c9b6SSteve Lawrence 		(void) unlink(path);
775efd4c9b6SSteve Lawrence 		if (detach_only)
776efd4c9b6SSteve Lawrence 			_exit(0);
777efd4c9b6SSteve Lawrence 		fd = open(path, O_CREAT|O_RDWR, 0644);
778efd4c9b6SSteve Lawrence 		if (fd < 0)
779efd4c9b6SSteve Lawrence 			_exit(2);
780efd4c9b6SSteve Lawrence 		if (fattach(door, path) != 0)
781efd4c9b6SSteve Lawrence 			_exit(3);
782efd4c9b6SSteve Lawrence 		_exit(0);
783efd4c9b6SSteve Lawrence 	}
784efd4c9b6SSteve Lawrence 	if (contract_latest(&ct) == -1)
785efd4c9b6SSteve Lawrence 		ct = -1;
786efd4c9b6SSteve Lawrence 	(void) ct_tmpl_clear(tmpl_fd);
787efd4c9b6SSteve Lawrence 	(void) close(tmpl_fd);
788efd4c9b6SSteve Lawrence 	(void) contract_abandon_id(ct);
789efd4c9b6SSteve Lawrence 	while (waitpid(pid, &stat, 0) != pid)
790efd4c9b6SSteve Lawrence 		;
791efd4c9b6SSteve Lawrence 	if (WIFEXITED(stat) && WEXITSTATUS(stat) == 0)
792efd4c9b6SSteve Lawrence 		return;
793efd4c9b6SSteve Lawrence 
794efd4c9b6SSteve Lawrence 	zsd_warn(gettext("Unable to attach door to zoneid: %d"), zid);
795efd4c9b6SSteve Lawrence 
796efd4c9b6SSteve Lawrence 	if (WEXITSTATUS(stat) == 1)
797efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Cannot entering zone"));
798efd4c9b6SSteve Lawrence 	else if (WEXITSTATUS(stat) == 2)
799efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to create door file: %s"), path);
800efd4c9b6SSteve Lawrence 	else if (WEXITSTATUS(stat) == 3)
801efd4c9b6SSteve Lawrence 		zsd_warn(gettext("Unable to fattach file: %s"), path);
802efd4c9b6SSteve Lawrence 
803efd4c9b6SSteve Lawrence 	zsd_warn(gettext("Internal error entering zone: %d"), zid);
804efd4c9b6SSteve Lawrence }
805efd4c9b6SSteve Lawrence 
806efd4c9b6SSteve Lawrence /*
807efd4c9b6SSteve Lawrence  * Zone lookup and allocation functions to manage list of currently running
808efd4c9b6SSteve Lawrence  * zones.
809efd4c9b6SSteve Lawrence  */
810efd4c9b6SSteve Lawrence static zsd_zone_t *
zsd_lookup_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)811efd4c9b6SSteve Lawrence zsd_lookup_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
812efd4c9b6SSteve Lawrence {
813efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
814efd4c9b6SSteve Lawrence 
815efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
816efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
817efd4c9b6SSteve Lawrence 		if (strcmp(zone->zsz_name, zonename) == 0) {
818efd4c9b6SSteve Lawrence 			if (zoneid != -1)
819efd4c9b6SSteve Lawrence 				zone->zsz_id = zoneid;
820efd4c9b6SSteve Lawrence 			return (zone);
821efd4c9b6SSteve Lawrence 		}
822efd4c9b6SSteve Lawrence 	}
823efd4c9b6SSteve Lawrence 	return (NULL);
824efd4c9b6SSteve Lawrence }
825efd4c9b6SSteve Lawrence 
826efd4c9b6SSteve Lawrence static zsd_zone_t *
zsd_lookup_zone_byid(zsd_ctl_t * ctl,zoneid_t zoneid)827efd4c9b6SSteve Lawrence zsd_lookup_zone_byid(zsd_ctl_t *ctl, zoneid_t zoneid)
828efd4c9b6SSteve Lawrence {
829efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
830efd4c9b6SSteve Lawrence 
831efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
832efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
833efd4c9b6SSteve Lawrence 		if (zone->zsz_id == zoneid)
834efd4c9b6SSteve Lawrence 			return (zone);
835efd4c9b6SSteve Lawrence 	}
836efd4c9b6SSteve Lawrence 	return (NULL);
837efd4c9b6SSteve Lawrence }
838efd4c9b6SSteve Lawrence 
839efd4c9b6SSteve Lawrence static zsd_zone_t *
zsd_allocate_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)840efd4c9b6SSteve Lawrence zsd_allocate_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
841efd4c9b6SSteve Lawrence {
842efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
843efd4c9b6SSteve Lawrence 
844efd4c9b6SSteve Lawrence 	if ((zone = (zsd_zone_t *)calloc(1, sizeof (zsd_zone_t))) == NULL)
845efd4c9b6SSteve Lawrence 		return (NULL);
846efd4c9b6SSteve Lawrence 
847efd4c9b6SSteve Lawrence 	(void) strlcpy(zone->zsz_name, zonename, sizeof (zone->zsz_name));
848efd4c9b6SSteve Lawrence 	zone->zsz_id = zoneid;
849efd4c9b6SSteve Lawrence 	zone->zsz_found = B_FALSE;
850efd4c9b6SSteve Lawrence 
851efd4c9b6SSteve Lawrence 	/*
852efd4c9b6SSteve Lawrence 	 * Allocate as deleted so if not found in first pass, zone is deleted
853efd4c9b6SSteve Lawrence 	 * from list.  This can happen if zone is returned by zone_list, but
854efd4c9b6SSteve Lawrence 	 * exits before first attempt to fetch zone details.
855efd4c9b6SSteve Lawrence 	 */
856efd4c9b6SSteve Lawrence 	zone->zsz_start = g_now;
857efd4c9b6SSteve Lawrence 	zone->zsz_hrstart = g_hrnow;
858efd4c9b6SSteve Lawrence 	zone->zsz_deleted = B_TRUE;
859efd4c9b6SSteve Lawrence 
860efd4c9b6SSteve Lawrence 	zone->zsz_cpu_shares = ZS_LIMIT_NONE;
861efd4c9b6SSteve Lawrence 	zone->zsz_cpu_cap = ZS_LIMIT_NONE;
862efd4c9b6SSteve Lawrence 	zone->zsz_ram_cap = ZS_LIMIT_NONE;
863efd4c9b6SSteve Lawrence 	zone->zsz_locked_cap = ZS_LIMIT_NONE;
864efd4c9b6SSteve Lawrence 	zone->zsz_vm_cap = ZS_LIMIT_NONE;
865efd4c9b6SSteve Lawrence 
866efd4c9b6SSteve Lawrence 	zone->zsz_processes_cap = ZS_LIMIT_NONE;
867efd4c9b6SSteve Lawrence 	zone->zsz_lwps_cap = ZS_LIMIT_NONE;
868efd4c9b6SSteve Lawrence 	zone->zsz_shm_cap = ZS_LIMIT_NONE;
869efd4c9b6SSteve Lawrence 	zone->zsz_shmids_cap = ZS_LIMIT_NONE;
870efd4c9b6SSteve Lawrence 	zone->zsz_semids_cap = ZS_LIMIT_NONE;
871efd4c9b6SSteve Lawrence 	zone->zsz_msgids_cap = ZS_LIMIT_NONE;
872efd4c9b6SSteve Lawrence 	zone->zsz_lofi_cap = ZS_LIMIT_NONE;
873efd4c9b6SSteve Lawrence 
874efd4c9b6SSteve Lawrence 	ctl->zsctl_nzones++;
875efd4c9b6SSteve Lawrence 
876efd4c9b6SSteve Lawrence 	return (zone);
877efd4c9b6SSteve Lawrence }
878efd4c9b6SSteve Lawrence 
879efd4c9b6SSteve Lawrence static zsd_zone_t *
zsd_lookup_insert_zone(zsd_ctl_t * ctl,char * zonename,zoneid_t zoneid)880efd4c9b6SSteve Lawrence zsd_lookup_insert_zone(zsd_ctl_t *ctl, char *zonename, zoneid_t zoneid)
881efd4c9b6SSteve Lawrence {
882efd4c9b6SSteve Lawrence 	zsd_zone_t *zone, *tmp;
883efd4c9b6SSteve Lawrence 
884efd4c9b6SSteve Lawrence 	if ((zone = zsd_lookup_zone(ctl, zonename, zoneid)) != NULL)
885efd4c9b6SSteve Lawrence 		return (zone);
886efd4c9b6SSteve Lawrence 
887efd4c9b6SSteve Lawrence 	if ((zone = zsd_allocate_zone(ctl, zonename, zoneid)) == NULL)
888efd4c9b6SSteve Lawrence 		return (NULL);
889efd4c9b6SSteve Lawrence 
890efd4c9b6SSteve Lawrence 	/* Insert sorted by zonename */
891efd4c9b6SSteve Lawrence 	tmp = list_head(&ctl->zsctl_zones);
892efd4c9b6SSteve Lawrence 	while (tmp != NULL && strcmp(zonename, tmp->zsz_name) > 0)
893efd4c9b6SSteve Lawrence 		tmp = list_next(&ctl->zsctl_zones, tmp);
894efd4c9b6SSteve Lawrence 
895efd4c9b6SSteve Lawrence 	list_insert_before(&ctl->zsctl_zones, tmp, zone);
896efd4c9b6SSteve Lawrence 	return (zone);
897efd4c9b6SSteve Lawrence }
898efd4c9b6SSteve Lawrence 
899efd4c9b6SSteve Lawrence /*
900efd4c9b6SSteve Lawrence  * Mark all zones as not existing.  As zones are found, they will
901efd4c9b6SSteve Lawrence  * be marked as existing.  If a zone is not found, then it must have
902efd4c9b6SSteve Lawrence  * halted.
903efd4c9b6SSteve Lawrence  */
904efd4c9b6SSteve Lawrence static void
zsd_mark_zones_start(zsd_ctl_t * ctl)905efd4c9b6SSteve Lawrence zsd_mark_zones_start(zsd_ctl_t *ctl)
906efd4c9b6SSteve Lawrence {
907efd4c9b6SSteve Lawrence 
908efd4c9b6SSteve Lawrence 	zsd_zone_t *zone;
909efd4c9b6SSteve Lawrence 
910efd4c9b6SSteve Lawrence 	for (zone = list_head(&ctl->zsctl_zones); zone != NULL;
911efd4c9b6SSteve Lawrence 	    zone = list_next(&ctl->zsctl_zones, zone)) {
912efd4c9b6SSteve Lawrence 		zone->zsz_found = B_FALSE;
913efd4c9b6SSteve Lawrence 	}
914efd4c9b6SSteve Lawrence }
915efd4c9b6SSteve Lawrence 
916efd4c9b6SSteve Lawrence /*
917efd4c9b6SSteve Lawrence  * Mark each zone as not using pset.  If processes are found using the
918efd4c9b6SSteve Lawrence  * pset, the zone will remain bound to the pset.  If none of a zones
919efd4c9b6SSteve Lawrence  * processes are bound to the pset, the zone's usage of the pset will
920efd4c9b6SSteve Lawrence  * be deleted.
921efd4c9b6SSteve Lawrence  *
922efd4c9b6SSteve Lawrence  */
923efd4c9b6SSteve Lawrence static void
zsd_mark_pset_usage_start(zsd_pset_t * pset)924efd4c9b6SSteve Lawrence zsd_mark_pset_usage_start(zsd_pset_t *pset)
925efd4c9b6SSteve Lawrence {
926efd4c9b6SSteve Lawrence 	zsd_pset_usage_t *usage;
927efd4c9b6SSteve Lawrence 
928efd4c9b6SSteve Lawrence 	for (usage = list_head(&pset->zsp_usage_list);
929efd4c9b6SSteve Lawrence 	    usage != NULL;
930efd4c9b6SSteve Lawrence 	    usage = list_next(&pset->zsp_usage_list, usage)) {
931efd4c9b6SSteve Lawrence 		usage->zsu_found = B_FALSE;
932efd4c9b6SSteve Lawrence 		usage->zsu_empty = B_TRUE;
933efd4c9b6SSteve Lawrence 	}
934