1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright 2016, Joyent Inc.
29 */
30
31#include <sys/timer.h>
32#include <sys/systm.h>
33#include <sys/param.h>
34#include <sys/kmem.h>
35#include <sys/debug.h>
36#include <sys/cyclic.h>
37#include <sys/cmn_err.h>
38#include <sys/pset.h>
39#include <sys/atomic.h>
40#include <sys/policy.h>
41
42static clock_backend_t clock_highres;
43
44/* minimum non-privileged interval (200us) */
45long clock_highres_interval_min = 200000;
46
47/*ARGSUSED*/
48static int
49clock_highres_settime(timespec_t *ts)
50{
51	return (EINVAL);
52}
53
54static int
55clock_highres_gettime(timespec_t *ts)
56{
57	hrt2ts(gethrtime(), (timestruc_t *)ts);
58
59	return (0);
60}
61
62static int
63clock_highres_getres(timespec_t *ts)
64{
65	hrt2ts(cyclic_getres(), (timestruc_t *)ts);
66
67	return (0);
68}
69
70/*ARGSUSED*/
71static int
72clock_highres_timer_create(itimer_t *it, void (*fire)(itimer_t *))
73{
74	it->it_arg = kmem_zalloc(sizeof (cyclic_id_t), KM_SLEEP);
75	it->it_fire = fire;
76
77	return (0);
78}
79
80static void
81clock_highres_fire(void *arg)
82{
83	itimer_t *it = (itimer_t *)arg;
84	hrtime_t *addr = &it->it_hrtime;
85	hrtime_t old = *addr, new = gethrtime();
86
87	do {
88		old = *addr;
89	} while (atomic_cas_64((uint64_t *)addr, old, new) != old);
90
91	it->it_fire(it);
92}
93
94static int
95clock_highres_timer_settime(itimer_t *it, int flags,
96    const struct itimerspec *when)
97{
98	cyclic_id_t cyc, *cycp = it->it_arg;
99	proc_t *p = curproc;
100	kthread_t *t = curthread;
101	cyc_time_t cyctime;
102	cyc_handler_t hdlr;
103	cpu_t *cpu;
104	cpupart_t *cpupart;
105	int pset;
106	boolean_t value_need_clamp = B_FALSE;
107	boolean_t intval_need_clamp = B_FALSE;
108	cred_t *cr = CRED();
109	struct itimerspec clamped;
110
111	/*
112	 * CLOCK_HIGHRES timers of sufficiently high resolution can deny
113	 * service; only allow privileged users to create such timers.
114	 * Non-privileged users (those without the "proc_clock_highres"
115	 * privilege) can create timers with lower resolution but if they
116	 * attempt to use a very low time value (< 200us) then their
117	 * timer will be clamped at 200us.
118	 */
119	if (when->it_value.tv_sec == 0 &&
120	    when->it_value.tv_nsec > 0 &&
121	    when->it_value.tv_nsec < clock_highres_interval_min)
122		value_need_clamp = B_TRUE;
123
124	if (when->it_interval.tv_sec == 0 &&
125	    when->it_interval.tv_nsec > 0 &&
126	    when->it_interval.tv_nsec < clock_highres_interval_min)
127		intval_need_clamp = B_TRUE;
128
129	if ((value_need_clamp || intval_need_clamp) &&
130	    secpolicy_clock_highres(cr) != 0) {
131		clamped.it_value.tv_sec = when->it_value.tv_sec;
132		clamped.it_interval.tv_sec = when->it_interval.tv_sec;
133
134		if (value_need_clamp) {
135			clamped.it_value.tv_nsec = clock_highres_interval_min;
136		} else {
137			clamped.it_value.tv_nsec = when->it_value.tv_nsec;
138		}
139
140		if (intval_need_clamp) {
141			clamped.it_interval.tv_nsec =
142			    clock_highres_interval_min;
143		} else {
144			clamped.it_interval.tv_nsec = when->it_interval.tv_nsec;
145		}
146
147		when = &clamped;
148	}
149
150	cyctime.cyt_when = ts2hrt(&when->it_value);
151	cyctime.cyt_interval = ts2hrt(&when->it_interval);
152
153	if (cyctime.cyt_when != 0 && cyctime.cyt_interval == 0 &&
154	    it->it_itime.it_interval.tv_sec == 0 &&
155	    it->it_itime.it_interval.tv_nsec == 0 &&
156	    (cyc = *cycp) != CYCLIC_NONE) {
157		/*
158		 * If our existing timer is a one-shot and our new timer is a
159		 * one-shot, we'll save ourselves a world of grief and just
160		 * reprogram the cyclic.
161		 */
162		it->it_itime = *when;
163
164		if (!(flags & TIMER_ABSTIME))
165			cyctime.cyt_when += gethrtime();
166
167		hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
168		(void) cyclic_reprogram(cyc, cyctime.cyt_when);
169		return (0);
170	}
171
172	mutex_enter(&cpu_lock);
173	if ((cyc = *cycp) != CYCLIC_NONE) {
174		cyclic_remove(cyc);
175		*cycp = CYCLIC_NONE;
176	}
177
178	if (cyctime.cyt_when == 0) {
179		mutex_exit(&cpu_lock);
180		return (0);
181	}
182
183	if (!(flags & TIMER_ABSTIME))
184		cyctime.cyt_when += gethrtime();
185
186	/*
187	 * Now we will check for overflow (that is, we will check to see
188	 * that the start time plus the interval time doesn't exceed
189	 * INT64_MAX).  The astute code reviewer will observe that this
190	 * one-time check doesn't guarantee that a future expiration
191	 * will not wrap.  We wish to prove, then, that if a future
192	 * expiration does wrap, the earliest the problem can be encountered
193	 * is (INT64_MAX / 2) nanoseconds (191 years) after boot.  Formally:
194	 *
195	 *  Given:	s + i < m	s > 0	i > 0
196	 *		s + ni > m	n > 1
197	 *
198	 *    (where "s" is the start time, "i" is the interval, "n" is the
199	 *    number of times the cyclic has fired and "m" is INT64_MAX)
200	 *
201	 *  Prove:
202	 *		(a)  s + (n - 1)i > (m / 2)
203	 *		(b)  s + (n - 1)i < m
204	 *
205	 * That is, prove that we must have fired at least once 191 years
206	 * after boot.  The proof is very straightforward; since the left
207	 * side of (a) is minimized when i is small, it is sufficient to show
208	 * that the statement is true for i's smallest possible value
209	 * (((m - s) / n) + epsilon).  The same goes for (b); showing that the
210	 * statement is true for i's largest possible value (m - s + epsilon)
211	 * is sufficient to prove the statement.
212	 *
213	 * The actual arithmetic manipulation is left up to reader.
214	 */
215	if (cyctime.cyt_when > INT64_MAX - cyctime.cyt_interval) {
216		mutex_exit(&cpu_lock);
217		return (EOVERFLOW);
218	}
219
220	if (cyctime.cyt_interval == 0) {
221		/*
222		 * If this is a one-shot, then we set the interval to be
223		 * inifinite.  If this timer is never touched, this cyclic will
224		 * simply consume space in the cyclic subsystem.  As soon as
225		 * timer_settime() or timer_delete() is called, the cyclic is
226		 * removed (so it's not possible to run the machine out
227		 * of resources by creating one-shots).
228		 */
229		cyctime.cyt_interval = CY_INFINITY;
230	}
231
232	it->it_itime = *when;
233
234	hrt2ts(cyctime.cyt_when, &it->it_itime.it_value);
235
236	hdlr.cyh_func = (cyc_func_t)clock_highres_fire;
237	hdlr.cyh_arg = it;
238	hdlr.cyh_level = CY_LOW_LEVEL;
239
240	if (cyctime.cyt_when != 0)
241		*cycp = cyc = cyclic_add(&hdlr, &cyctime);
242
243	/*
244	 * Now that we have the cyclic created, we need to bind it to our
245	 * bound CPU and processor set (if any).
246	 */
247	mutex_enter(&p->p_lock);
248	cpu = t->t_bound_cpu;
249	cpupart = t->t_cpupart;
250	pset = t->t_bind_pset;
251
252	mutex_exit(&p->p_lock);
253
254	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
255
256	mutex_exit(&cpu_lock);
257
258	return (0);
259}
260
261static int
262clock_highres_timer_gettime(itimer_t *it, struct itimerspec *when)
263{
264	/*
265	 * CLOCK_HIGHRES doesn't update it_itime.
266	 */
267	hrtime_t start = ts2hrt(&it->it_itime.it_value);
268	hrtime_t interval = ts2hrt(&it->it_itime.it_interval);
269	hrtime_t diff, now = gethrtime();
270	hrtime_t *addr = &it->it_hrtime;
271	hrtime_t last;
272
273	/*
274	 * We're using atomic_cas_64() here only to assure that we slurp the
275	 * entire timestamp atomically.
276	 */
277	last = atomic_cas_64((uint64_t *)addr, 0, 0);
278
279	*when = it->it_itime;
280
281	if (!timerspecisset(&when->it_value))
282		return (0);
283
284	if (start > now) {
285		/*
286		 * We haven't gone off yet...
287		 */
288		diff = start - now;
289	} else {
290		if (interval == 0) {
291			/*
292			 * This is a one-shot which should have already
293			 * fired; set it_value to 0.
294			 */
295			timerspecclear(&when->it_value);
296			return (0);
297		}
298
299		/*
300		 * Calculate how far we are into this interval.
301		 */
302		diff = (now - start) % interval;
303
304		/*
305		 * Now check to see if we've dealt with the last interval
306		 * yet.
307		 */
308		if (now - diff > last) {
309			/*
310			 * The last interval hasn't fired; set it_value to 0.
311			 */
312			timerspecclear(&when->it_value);
313			return (0);
314		}
315
316		/*
317		 * The last interval _has_ fired; we can return the amount
318		 * of time left in this interval.
319		 */
320		diff = interval - diff;
321	}
322
323	hrt2ts(diff, &when->it_value);
324
325	return (0);
326}
327
328static int
329clock_highres_timer_delete(itimer_t *it)
330{
331	cyclic_id_t cyc;
332
333	if (it->it_arg == NULL) {
334		/*
335		 * This timer was never fully created; we must have failed
336		 * in the clock_highres_timer_create() routine.
337		 */
338		return (0);
339	}
340
341	mutex_enter(&cpu_lock);
342
343	if ((cyc = *((cyclic_id_t *)it->it_arg)) != CYCLIC_NONE)
344		cyclic_remove(cyc);
345
346	mutex_exit(&cpu_lock);
347
348	kmem_free(it->it_arg, sizeof (cyclic_id_t));
349
350	return (0);
351}
352
353static void
354clock_highres_timer_lwpbind(itimer_t *it)
355{
356	proc_t *p = curproc;
357	kthread_t *t = curthread;
358	cyclic_id_t cyc = *((cyclic_id_t *)it->it_arg);
359	cpu_t *cpu;
360	cpupart_t *cpupart;
361	int pset;
362
363	if (cyc == CYCLIC_NONE)
364		return;
365
366	mutex_enter(&cpu_lock);
367	mutex_enter(&p->p_lock);
368
369	/*
370	 * Okay, now we can safely look at the bindings.
371	 */
372	cpu = t->t_bound_cpu;
373	cpupart = t->t_cpupart;
374	pset = t->t_bind_pset;
375
376	/*
377	 * Now we drop p_lock.  We haven't dropped cpu_lock; we're guaranteed
378	 * that even if the bindings change, the CPU and/or processor set
379	 * that this timer was bound to remain valid (and the combination
380	 * remains self-consistent).
381	 */
382	mutex_exit(&p->p_lock);
383
384	cyclic_bind(cyc, cpu, pset == PS_NONE ? NULL : cpupart);
385
386	mutex_exit(&cpu_lock);
387}
388
389void
390clock_highres_init()
391{
392	clock_backend_t *be = &clock_highres;
393	struct sigevent *ev = &be->clk_default;
394
395	ev->sigev_signo = SIGALRM;
396	ev->sigev_notify = SIGEV_SIGNAL;
397	ev->sigev_value.sival_ptr = NULL;
398
399	be->clk_clock_settime = clock_highres_settime;
400	be->clk_clock_gettime = clock_highres_gettime;
401	be->clk_clock_getres = clock_highres_getres;
402	be->clk_timer_create = clock_highres_timer_create;
403	be->clk_timer_gettime = clock_highres_timer_gettime;
404	be->clk_timer_settime = clock_highres_timer_settime;
405	be->clk_timer_delete = clock_highres_timer_delete;
406	be->clk_timer_lwpbind = clock_highres_timer_lwpbind;
407
408	clock_add_backend(CLOCK_HIGHRES, &clock_highres);
409}
410