1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright (c) 2015, Joyent Inc. All rights reserved.
29 */
30
31#include <sys/timer.h>
32#include <sys/systm.h>
33#include <sys/param.h>
34#include <sys/kmem.h>
35#include <sys/debug.h>
36#include <sys/cyclic.h>
37#include <sys/cmn_err.h>
38#include <sys/pset.h>
39#include <sys/atomic.h>
40#include <sys/policy.h>
41
42static clock_backend_t clock_highres;
43
44/*ARGSUSED*/
45static int
46clock_highres_settime(timespec_t *ts)
47{
48	return (EINVAL);
49}
50
51static int
52clock_highres_gettime(timespec_t *ts)
53{
54	hrt2ts(gethrtime(), (timestruc_t *)ts);
55
56	return (0);
57}
58
59static int
60clock_highres_getres(timespec_t *ts)
61{
62	hrt2ts(cyclic_getres(), (timestruc_t *)ts);
63
64	return (0);
65}
66
67/*ARGSUSED*/
68static int
69clock_highres_timer_create(itimer_t *it, void (*fire)(itimer_t *))
70{
71	/*
72	 * CLOCK_HIGHRES timers of sufficiently high resolution can deny
73	 * service; only allow privileged users to create such timers.
74	 * Sites that do not wish to have this restriction should
75	 * give users the "proc_clock_highres" privilege.
76	 */
77	if (secpolicy_clock_highres(CRED()) != 0) {
78		it->it_arg = NULL;
79		return (EPERM);
80	}
81
82	it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
83	it->it_fire = fire;
84
85	return (0);
86}
87
88static void
89clock_highres_fire(void *arg)
90{
91	itimer_t *it = (itimer_t *)arg;
92	hrtime_t *addr = &it->it_hrtime;
93	hrtime_t old = *addr, new = gethrtime();
94
95	do {
96		old = *addr;
97	} while (atomic_cas_64((uint64_t *)addr, old, new) != old);
98
99	it->it_fire(it);
100}
101
102static int
103clock_highres_timer_settime(itimer_t *it, int flags,
104	const struct itimerspec *when)
105{
106	cyclic_id_t cyc, *cycp = it->it_arg;
107	proc_t *p = curproc;
108	kthread_t *t = curthread;
109	cyc_time_t cyctime;
110	cyc_handler_t hdlr;
111	cpu_t *cpu;
112	cpupart_t *cpupart;
113	int pset;
114
115	cyctime.cyt_when = ts2hrt(&when->it_value);
116	cyctime.cyt_interval = ts2hrt(&when->it_interval);
117
118	if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
119	    it->it_itime.it_interval.tv_sec == 0 &&
120	    it->it_itime.it_interval.tv_nsec == 0 &&
121	    (cyc = *cycp) != CYCLIC_NONE) {
122		/*
123		 * If our existing timer is a one-shot and our new timer is a
124		 * one-shot, we'll save ourselves a world of grief and just
125		 * reprogram the cyclic.
126		 */
127		it->it_itime = *when;
128
129		if (!(flags & TIMER_ABSTIME))
130			cyctime.cyt_when += gethrtime();
131
132		hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
133		(void) cyclic_reprogram(cyc, cyctime.cyt_when);
134		return (0);
135	}
136
137	mutex_enter(&cpu_lock);
138	if ((cyc = *cycp) != CYCLIC_NONE) {
139		cyclic_remove(cyc);
140		*cycp = CYCLIC_NONE;
141	}
142
143	if (cyctime.cyt_when == 0) {
144		mutex_exit(&cpu_lock);
145		return (0);
146	}
147
148	if (!(flags & TIMER_ABSTIME))
149		cyctime.cyt_when += gethrtime();
150
151	/*
152	 * Now we will check for overflow (that is, we will check to see
153	 * that the start time plus the interval time doesn't exceed
154	 * INT64_MAX).  The astute code reviewer will observe that this
155	 * one-time check doesn't guarantee that a future expiration
156	 * will not wrap.  We wish to prove, then, that if a future
157	 * expiration does wrap, the earliest the problem can be encountered
158	 * is (INT64_MAX / 2) nanoseconds (191 years) after boot.  Formally:
159	 *
160	 *  Given:	s + i < m	s > 0	i > 0
161	 *		s + ni > m	n > 1
162	 *
163	 *    (where "s" is the start time, "i" is the interval, "n" is the
164	 *    number of times the cyclic has fired and "m" is INT64_MAX)
165	 *
166	 *  Prove:
167	 *		(a)  s + (n - 1)i > (m / 2)
168	 *		(b)  s + (n - 1)i < m
169	 *
170	 * That is, prove that we must have fired at least once 191 years
171	 * after boot.  The proof is very straightforward; since the left
172	 * side of (a) is minimized when i is small, it is sufficient to show
173	 * that the statement is true for i's smallest possible value
174	 * (((m - s) / n) + epsilon).  The same goes for (b); showing that the
175	 * statement is true for i's largest possible value (m - s + epsilon)
176	 * is sufficient to prove the statement.
177	 *
178	 * The actual arithmetic manipulation is left up to reader.
179	 */
180	if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
181		mutex_exit(&cpu_lock);
182		return (EOVERFLOW);
183	}
184
185	if (cyctime.cyt_interval == 0) {
186		/*
187		 * If this is a one-shot, then we set the interval to be
188		 * inifinite.  If this timer is never touched, this cyclic will
189		 * simply consume space in the cyclic subsystem.  As soon as
190		 * timer_settime() or timer_delete() is called, the cyclic is
191		 * removed (so it's not possible to run the machine out
192		 * of resources by creating one-shots).
193		 */
194		cyctime.cyt_interval = CY_INFINITY;
195	}
196
197	it->it_itime = *when;
198
199	hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
200
201	hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
202	hdlr.cyh_arg = it;
203	hdlr.cyh_level = CY_LOW_LEVEL;
204
205	if (cyctime.cyt_when != 0)
206		*cycp = cyc = cyclic_add(&hdlr, &cyctime);
207
208	/*
209	 * Now that we have the cyclic created, we need to bind it to our
210	 * bound CPU and processor set (if any).
211	 */
212	mutex_enter(&p->p_lock);
213	cpu = t->t_bound_cpu;
214	cpupart = t->t_cpupart;
215	pset = t->t_bind_pset;
216
217	mutex_exit(&p->p_lock);
218
219	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
220
221	mutex_exit(&cpu_lock);
222
223	return (0);
224}
225
226static int
227clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
228{
229	/*
230	 * CLOCK_HIGHRES doesn't update it_itime.
231	 */
232	hrtime_t start = ts2hrt(&it->it_itime.it_value);
233	hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
234	hrtime_t diff, now = gethrtime();
235	hrtime_t *addr = &it->it_hrtime;
236	hrtime_t last;
237
238	/*
239	 * We're using atomic_cas_64() here only to assure that we slurp the
240	 * entire timestamp atomically.
241	 */
242	last = atomic_cas_64((uint64_t *)addr, 0, 0);
243
244	*when = it->it_itime;
245
246	if (!timerspecisset(&when->it_value))
247		return (0);
248
249	if (start > now) {
250		/*
251		 * We haven't gone off yet...
252		 */
253		diff = start - now;
254	} else {
255		if (interval == 0) {
256			/*
257			 * This is a one-shot which should have already
258			 * fired; set it_value to 0.
259			 */
260			timerspecclear(&when->it_value);
261			return (0);
262		}
263
264		/*
265		 * Calculate how far we are into this interval.
266		 */
267		diff = (now - start) % interval;
268
269		/*
270		 * Now check to see if we've dealt with the last interval
271		 * yet.
272		 */
273		if (now - diff > last) {
274			/*
275			 * The last interval hasn't fired; set it_value to 0.
276			 */
277			timerspecclear(&when->it_value);
278			return (0);
279		}
280
281		/*
282		 * The last interval _has_ fired; we can return the amount
283		 * of time left in this interval.
284		 */
285		diff = interval - diff;
286	}
287
288	hrt2ts(diff, &when->it_value);
289
290	return (0);
291}
292
293static int
294clock_highres_timer_delete(itimer_t *it)
295{
296	cyclic_id_t cyc;
297
298	if (it->it_arg == NULL) {
299		/*
300		 * This timer was never fully created; we must have failed
301		 * in the clock_highres_timer_create() routine.
302		 */
303		return (0);
304	}
305
306	mutex_enter(&cpu_lock);
307
308	if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
309		cyclic_remove(cyc);
310
311	mutex_exit(&cpu_lock);
312
313	kmem_free(it->it_arg, sizeof (cyclic_id_t));
314
315	return (0);
316}
317
318static void
319clock_highres_timer_lwpbind(itimer_t *it)
320{
321	proc_t *p = curproc;
322	kthread_t *t = curthread;
323	cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
324	cpu_t *cpu;
325	cpupart_t *cpupart;
326	int pset;
327
328	if (cyc == CYCLIC_NONE)
329		return;
330
331	mutex_enter(&cpu_lock);
332	mutex_enter(&p->p_lock);
333
334	/*
335	 * Okay, now we can safely look at the bindings.
336	 */
337	cpu = t->t_bound_cpu;
338	cpupart = t->t_cpupart;
339	pset = t->t_bind_pset;
340
341	/*
342	 * Now we drop p_lock.  We haven't dropped cpu_lock; we're guaranteed
343	 * that even if the bindings change, the CPU and/or processor set
344	 * that this timer was bound to remain valid (and the combination
345	 * remains self-consistent).
346	 */
347	mutex_exit(&p->p_lock);
348
349	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
350
351	mutex_exit(&cpu_lock);
352}
353
354void
355clock_highres_init()
356{
357	clock_backend_t *be = &clock_highres;
358	struct sigevent *ev = &be->clk_default;
359
360	ev->sigev_signo = SIGALRM;
361	ev->sigev_notify = SIGEV_SIGNAL;
362	ev->sigev_value.sival_ptr = NULL;
363
364	be->clk_clock_settime = clock_highres_settime;
365	be->clk_clock_gettime = clock_highres_gettime;
366	be->clk_clock_getres = clock_highres_getres;
367	be->clk_timer_create = clock_highres_timer_create;
368	be->clk_timer_gettime = clock_highres_timer_gettime;
369	be->clk_timer_settime = clock_highres_timer_settime;
370	be->clk_timer_delete = clock_highres_timer_delete;
371	be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
372
373	clock_add_backend(CLOCK_HIGHRES, &clock_highres);
374}
375