1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 */
25
26#include "statcommon.h"
27#include "dsr.h"
28
29#include <stdlib.h>
30#include <unistd.h>
31#include <strings.h>
32#include <errno.h>
33#include <limits.h>
34#include <poll.h>
35
36#define	ARRAY_SIZE(a)	(sizeof (a) / sizeof (*a))
37
38/*
39 * The time we delay before retrying after an allocation
40 * failure, in milliseconds
41 */
42#define	RETRY_DELAY 200
43
44static char *cpu_states[] = {
45	"cpu_ticks_idle",
46	"cpu_ticks_user",
47	"cpu_ticks_kernel",
48	"cpu_ticks_wait"
49};
50
51static kstat_t *
52kstat_lookup_read(kstat_ctl_t *kc, char *module,
53		int instance, char *name)
54{
55	kstat_t *ksp = kstat_lookup(kc, module, instance, name);
56	if (ksp == NULL)
57		return (NULL);
58	if (kstat_read(kc, ksp, NULL) == -1)
59		return (NULL);
60	return (ksp);
61}
62
63/*
64 * Note: the following helpers do not clean up on the failure case,
65 * because it is left to the free_snapshot() in the acquire_snapshot()
66 * failure path.
67 */
68
69static int
70acquire_cpus(struct snapshot *ss, kstat_ctl_t *kc)
71{
72	size_t i;
73
74	ss->s_nr_cpus = sysconf(_SC_CPUID_MAX) + 1;
75	ss->s_cpus = calloc(ss->s_nr_cpus, sizeof (struct cpu_snapshot));
76	if (ss->s_cpus == NULL)
77		goto out;
78
79	for (i = 0; i < ss->s_nr_cpus; i++) {
80		kstat_t *ksp;
81
82		ss->s_cpus[i].cs_id = ID_NO_CPU;
83		ss->s_cpus[i].cs_state = p_online(i, P_STATUS);
84		/* If no valid CPU is present, move on to the next one */
85		if (ss->s_cpus[i].cs_state == -1)
86			continue;
87		ss->s_cpus[i].cs_id = i;
88
89		if ((ksp = kstat_lookup_read(kc, "cpu_info", i, NULL)) == NULL)
90			goto out;
91
92		(void) pset_assign(PS_QUERY, i, &ss->s_cpus[i].cs_pset_id);
93		if (ss->s_cpus[i].cs_pset_id == PS_NONE)
94			ss->s_cpus[i].cs_pset_id = ID_NO_PSET;
95
96		if (!CPU_ACTIVE(&ss->s_cpus[i]))
97			continue;
98
99		if ((ksp = kstat_lookup_read(kc, "cpu", i, "vm")) == NULL)
100			goto out;
101
102		if (kstat_copy(ksp, &ss->s_cpus[i].cs_vm))
103			goto out;
104
105		if ((ksp = kstat_lookup_read(kc, "cpu", i, "sys")) == NULL)
106			goto out;
107
108		if (kstat_copy(ksp, &ss->s_cpus[i].cs_sys))
109			goto out;
110	}
111
112	errno = 0;
113out:
114	return (errno);
115}
116
117static int
118acquire_psets(struct snapshot *ss)
119{
120	psetid_t *pids = NULL;
121	struct pset_snapshot *ps;
122	size_t pids_nr;
123	size_t i, j;
124
125	/*
126	 * Careful in this code. We have to use pset_list
127	 * twice, but inbetween pids_nr can change at will.
128	 * We delay the setting of s_nr_psets until we have
129	 * the "final" value of pids_nr.
130	 */
131
132	if (pset_list(NULL, &pids_nr) < 0)
133		return (errno);
134
135	if ((pids = calloc(pids_nr, sizeof (psetid_t))) == NULL)
136		goto out;
137
138	if (pset_list(pids, &pids_nr) < 0)
139		goto out;
140
141	ss->s_psets = calloc(pids_nr + 1, sizeof (struct pset_snapshot));
142	if (ss->s_psets == NULL)
143		goto out;
144	ss->s_nr_psets = pids_nr + 1;
145
146	/* CPUs not in any actual pset */
147	ps = &ss->s_psets[0];
148	ps->ps_id = 0;
149	ps->ps_cpus = calloc(ss->s_nr_cpus, sizeof (struct cpu_snapshot *));
150	if (ps->ps_cpus == NULL)
151		goto out;
152
153	/* CPUs in a a pset */
154	for (i = 1; i < ss->s_nr_psets; i++) {
155		ps = &ss->s_psets[i];
156
157		ps->ps_id = pids[i - 1];
158		ps->ps_cpus =
159		    calloc(ss->s_nr_cpus, sizeof (struct cpu_snapshot *));
160		if (ps->ps_cpus == NULL)
161			goto out;
162	}
163
164	for (i = 0; i < ss->s_nr_psets; i++) {
165		ps = &ss->s_psets[i];
166
167		for (j = 0; j < ss->s_nr_cpus; j++) {
168			if (!CPU_ACTIVE(&ss->s_cpus[j]))
169				continue;
170			if (ss->s_cpus[j].cs_pset_id != ps->ps_id)
171				continue;
172
173			ps->ps_cpus[ps->ps_nr_cpus++] = &ss->s_cpus[j];
174		}
175	}
176
177	errno = 0;
178out:
179	free(pids);
180	return (errno);
181}
182
183static int
184acquire_intrs(struct snapshot *ss, kstat_ctl_t *kc)
185{
186	kstat_t *ksp;
187	size_t i = 0;
188	kstat_t *sys_misc;
189	kstat_named_t *clock;
190
191	/* clock interrupt */
192	ss->s_nr_intrs = 1;
193
194	for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) {
195		if (ksp->ks_type == KSTAT_TYPE_INTR)
196			ss->s_nr_intrs++;
197	}
198
199	ss->s_intrs = calloc(ss->s_nr_intrs, sizeof (struct intr_snapshot));
200	if (ss->s_intrs == NULL)
201		return (errno);
202
203	sys_misc = kstat_lookup_read(kc, "unix", 0, "system_misc");
204	if (sys_misc == NULL)
205		goto out;
206
207	clock = (kstat_named_t *)kstat_data_lookup(sys_misc, "clk_intr");
208	if (clock == NULL)
209		goto out;
210
211	(void) strlcpy(ss->s_intrs[0].is_name, "clock", KSTAT_STRLEN);
212	ss->s_intrs[0].is_total = clock->value.ui32;
213
214	i = 1;
215
216	for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) {
217		kstat_intr_t *ki;
218		int j;
219
220		if (ksp->ks_type != KSTAT_TYPE_INTR)
221			continue;
222		if (kstat_read(kc, ksp, NULL) == -1)
223			goto out;
224
225		ki = KSTAT_INTR_PTR(ksp);
226
227		(void) strlcpy(ss->s_intrs[i].is_name, ksp->ks_name,
228		    KSTAT_STRLEN);
229		ss->s_intrs[i].is_total = 0;
230
231		for (j = 0; j < KSTAT_NUM_INTRS; j++)
232			ss->s_intrs[i].is_total += ki->intrs[j];
233
234		i++;
235	}
236
237	errno = 0;
238out:
239	return (errno);
240}
241
242int
243acquire_sys(struct snapshot *ss, kstat_ctl_t *kc)
244{
245	size_t i;
246	kstat_named_t *knp;
247	kstat_t *ksp;
248
249	if ((ksp = kstat_lookup(kc, "unix", 0, "sysinfo")) == NULL)
250		return (errno);
251
252	if (kstat_read(kc, ksp, &ss->s_sys.ss_sysinfo) == -1)
253		return (errno);
254
255	if ((ksp = kstat_lookup(kc, "unix", 0, "vminfo")) == NULL)
256		return (errno);
257
258	if (kstat_read(kc, ksp, &ss->s_sys.ss_vminfo) == -1)
259		return (errno);
260
261	if ((ksp = kstat_lookup(kc, "unix", 0, "dnlcstats")) == NULL)
262		return (errno);
263
264	if (kstat_read(kc, ksp, &ss->s_sys.ss_nc) == -1)
265		return (errno);
266
267	if ((ksp = kstat_lookup(kc, "unix", 0, "system_misc")) == NULL)
268		return (errno);
269
270	if (kstat_read(kc, ksp, NULL) == -1)
271		return (errno);
272
273	knp = (kstat_named_t *)kstat_data_lookup(ksp, "clk_intr");
274	if (knp == NULL)
275		return (errno);
276
277	ss->s_sys.ss_ticks = knp->value.l;
278
279	knp = (kstat_named_t *)kstat_data_lookup(ksp, "deficit");
280	if (knp == NULL)
281		return (errno);
282
283	ss->s_sys.ss_deficit = knp->value.l;
284
285	for (i = 0; i < ss->s_nr_cpus; i++) {
286		if (!CPU_ACTIVE(&ss->s_cpus[i]))
287			continue;
288
289		if (kstat_add(&ss->s_cpus[i].cs_sys, &ss->s_sys.ss_agg_sys))
290			return (errno);
291		if (kstat_add(&ss->s_cpus[i].cs_vm, &ss->s_sys.ss_agg_vm))
292			return (errno);
293		ss->s_nr_active_cpus++;
294	}
295
296	return (0);
297}
298
299struct snapshot *
300acquire_snapshot(kstat_ctl_t *kc, int types, struct iodev_filter *iodev_filter)
301{
302	struct snapshot *ss = NULL;
303	int err;
304
305retry:
306	err = 0;
307	/* ensure any partial resources are freed on a retry */
308	free_snapshot(ss);
309
310	ss = safe_alloc(sizeof (struct snapshot));
311
312	(void) memset(ss, 0, sizeof (struct snapshot));
313
314	ss->s_types = types;
315
316	/* wait for a possibly up-to-date chain */
317	while (kstat_chain_update(kc) == -1) {
318		if (errno == EAGAIN)
319			(void) poll(NULL, 0, RETRY_DELAY);
320		else
321			fail(1, "kstat_chain_update failed");
322	}
323
324	if (!err && (types & SNAP_INTERRUPTS))
325		err = acquire_intrs(ss, kc);
326
327	if (!err && (types & (SNAP_CPUS | SNAP_SYSTEM | SNAP_PSETS)))
328		err = acquire_cpus(ss, kc);
329
330	if (!err && (types & SNAP_PSETS))
331		err = acquire_psets(ss);
332
333	if (!err && (types & (SNAP_IODEVS | SNAP_CONTROLLERS |
334	    SNAP_IOPATHS_LI | SNAP_IOPATHS_LTI)))
335		err = acquire_iodevs(ss, kc, iodev_filter);
336
337	if (!err && (types & SNAP_SYSTEM))
338		err = acquire_sys(ss, kc);
339
340	switch (err) {
341		case 0:
342			break;
343		case EAGAIN:
344			(void) poll(NULL, 0, RETRY_DELAY);
345		/* a kstat disappeared from under us */
346		/*FALLTHRU*/
347		case ENXIO:
348		case ENOENT:
349			goto retry;
350		default:
351			fail(1, "acquiring snapshot failed");
352	}
353
354	return (ss);
355}
356
357void
358free_snapshot(struct snapshot *ss)
359{
360	size_t i;
361
362	if (ss == NULL)
363		return;
364
365	while (ss->s_iodevs) {
366		struct iodev_snapshot *tmp = ss->s_iodevs;
367		ss->s_iodevs = ss->s_iodevs->is_next;
368		free_iodev(tmp);
369	}
370
371	if (ss->s_cpus) {
372		for (i = 0; i < ss->s_nr_cpus; i++) {
373			free(ss->s_cpus[i].cs_vm.ks_data);
374			free(ss->s_cpus[i].cs_sys.ks_data);
375		}
376		free(ss->s_cpus);
377	}
378
379	if (ss->s_psets) {
380		for (i = 0; i < ss->s_nr_psets; i++)
381			free(ss->s_psets[i].ps_cpus);
382		free(ss->s_psets);
383	}
384
385	free(ss->s_sys.ss_agg_sys.ks_data);
386	free(ss->s_sys.ss_agg_vm.ks_data);
387	free(ss);
388}
389
390kstat_ctl_t *
391open_kstat(void)
392{
393	kstat_ctl_t *kc;
394
395	while ((kc = kstat_open()) == NULL) {
396		if (errno == EAGAIN)
397			(void) poll(NULL, 0, RETRY_DELAY);
398		else
399			fail(1, "kstat_open failed");
400	}
401
402	return (kc);
403}
404
405void *
406safe_alloc(size_t size)
407{
408	void *ptr;
409
410	while ((ptr = malloc(size)) == NULL) {
411		if (errno == EAGAIN)
412			(void) poll(NULL, 0, RETRY_DELAY);
413		else
414			fail(1, "malloc failed");
415	}
416	return (ptr);
417}
418
419char *
420safe_strdup(char *str)
421{
422	char *ret;
423
424	if (str == NULL)
425		return (NULL);
426
427	while ((ret = strdup(str)) == NULL) {
428		if (errno == EAGAIN)
429			(void) poll(NULL, 0, RETRY_DELAY);
430		else
431			fail(1, "malloc failed");
432	}
433	return (ret);
434}
435
436uint64_t
437kstat_delta(kstat_t *old, kstat_t *new, char *name)
438{
439	kstat_named_t *knew = kstat_data_lookup(new, name);
440	if (old && old->ks_data) {
441		kstat_named_t *kold = kstat_data_lookup(old, name);
442		return (knew->value.ui64 - kold->value.ui64);
443	}
444	return (knew->value.ui64);
445}
446
447int
448kstat_copy(const kstat_t *src, kstat_t *dst)
449{
450	*dst = *src;
451
452	if (src->ks_data != NULL) {
453		if ((dst->ks_data = malloc(src->ks_data_size)) == NULL)
454			return (-1);
455		bcopy(src->ks_data, dst->ks_data, src->ks_data_size);
456	} else {
457		dst->ks_data = NULL;
458		dst->ks_data_size = 0;
459	}
460	return (0);
461}
462
463int
464kstat_add(const kstat_t *src, kstat_t *dst)
465{
466	size_t i;
467	kstat_named_t *from;
468	kstat_named_t *to;
469
470	if (dst->ks_data == NULL)
471		return (kstat_copy(src, dst));
472
473	from = src->ks_data;
474	to = dst->ks_data;
475
476	for (i = 0; i < src->ks_ndata; i++) {
477		/* "addition" makes little sense for strings */
478		if (from->data_type != KSTAT_DATA_CHAR &&
479		    from->data_type != KSTAT_DATA_STRING)
480			(to)->value.ui64 += (from)->value.ui64;
481		from++;
482		to++;
483	}
484
485	return (0);
486}
487
488uint64_t
489cpu_ticks_delta(kstat_t *old, kstat_t *new)
490{
491	uint64_t ticks = 0;
492	size_t i;
493	for (i = 0; i < ARRAY_SIZE(cpu_states); i++)
494		ticks += kstat_delta(old, new, cpu_states[i]);
495	return (ticks);
496}
497
498int
499nr_active_cpus(struct snapshot *ss)
500{
501	size_t i;
502	int count = 0;
503	for (i = 0; i < ss->s_nr_cpus; i++) {
504		if (CPU_ACTIVE(&ss->s_cpus[i]))
505			count++;
506	}
507
508	return (count);
509}
510
511/*
512 * Return the number of ticks delta between two hrtime_t
513 * values. Attempt to cater for various kinds of overflow
514 * in hrtime_t - no matter how improbable.
515 */
516uint64_t
517hrtime_delta(hrtime_t old, hrtime_t new)
518{
519	uint64_t del;
520
521	if ((new >= old) && (old >= 0L))
522		return (new - old);
523	else {
524		/*
525		 * We've overflowed the positive portion of an
526		 * hrtime_t.
527		 */
528		if (new < 0L) {
529			/*
530			 * The new value is negative. Handle the
531			 * case where the old value is positive or
532			 * negative.
533			 */
534			uint64_t n1;
535			uint64_t o1;
536
537			n1 = -new;
538			if (old > 0L)
539				return (n1 - old);
540			else {
541				o1 = -old;
542				del = n1 - o1;
543				return (del);
544			}
545		} else {
546			/*
547			 * Either we've just gone from being negative
548			 * to positive *or* the last entry was positive
549			 * and the new entry is also positive but *less*
550			 * than the old entry. This implies we waited
551			 * quite a few days on a very fast system between
552			 * iostat displays.
553			 */
554			if (old < 0L) {
555				uint64_t o2;
556
557				o2 = -old;
558				del = UINT64_MAX - o2;
559			} else {
560				del = UINT64_MAX - old;
561			}
562			del += new;
563			return (del);
564		}
565	}
566}
567