1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2016 Joyent, Inc.
26 */
27
28/*
29 * Copyright (c) 1982, 1986 Regents of the University of California.
30 * All rights reserved.  The Berkeley software License Agreement
31 * specifies the terms and conditions for redistribution.
32 */
33
34#include <sys/param.h>
35#include <sys/user.h>
36#include <sys/vnode.h>
37#include <sys/proc.h>
38#include <sys/time.h>
39#include <sys/systm.h>
40#include <sys/kmem.h>
41#include <sys/cmn_err.h>
42#include <sys/cpuvar.h>
43#include <sys/timer.h>
44#include <sys/debug.h>
45#include <sys/sysmacros.h>
46#include <sys/cyclic.h>
47
48static void	realitexpire(void *);
49static void	realprofexpire(void *);
50static void	timeval_advance(struct timeval *, struct timeval *);
51
52kmutex_t tod_lock;	/* protects time-of-day stuff */
53
54/*
55 * Constant to define the minimum interval value of the ITIMER_REALPROF timer.
56 * Value is in microseconds; defaults to 500 usecs.  Setting this value
57 * significantly lower may allow for denial-of-service attacks.
58 */
59int itimer_realprof_minimum = 500;
60
61/*
62 * macro to compare a timeval to a timestruc
63 */
64
65#define	TVTSCMP(tvp, tsp, cmp) \
66	/* CSTYLED */ \
67	((tvp)->tv_sec cmp (tsp)->tv_sec || \
68	((tvp)->tv_sec == (tsp)->tv_sec && \
69	/* CSTYLED */ \
70	(tvp)->tv_usec * 1000 cmp (tsp)->tv_nsec))
71
72/*
73 * Time of day and interval timer support.
74 *
75 * These routines provide the kernel entry points to get and set
76 * the time-of-day and per-process interval timers.  Subroutines
77 * here provide support for adding and subtracting timeval structures
78 * and decrementing interval timers, optionally reloading the interval
79 * timers when they expire.
80 */
81
82/*
83 * SunOS function to generate monotonically increasing time values.
84 */
85void
86uniqtime(struct timeval *tv)
87{
88	static struct timeval last;
89	static int last_timechanged;
90	timestruc_t ts;
91	time_t sec;
92	int usec, nsec;
93
94	/*
95	 * protect modification of last
96	 */
97	mutex_enter(&tod_lock);
98	gethrestime(&ts);
99
100	/*
101	 * Fast algorithm to convert nsec to usec -- see hrt2ts()
102	 * in common/os/timers.c for a full description.
103	 */
104	nsec = ts.tv_nsec;
105	usec = nsec + (nsec >> 2);
106	usec = nsec + (usec >> 1);
107	usec = nsec + (usec >> 2);
108	usec = nsec + (usec >> 4);
109	usec = nsec - (usec >> 3);
110	usec = nsec + (usec >> 2);
111	usec = nsec + (usec >> 3);
112	usec = nsec + (usec >> 4);
113	usec = nsec + (usec >> 1);
114	usec = nsec + (usec >> 6);
115	usec = usec >> 10;
116	sec = ts.tv_sec;
117
118	/*
119	 * If the system hres time has been changed since the last time
120	 * we are called. then all bets are off; just update our
121	 * local copy of timechanged and accept the reported time as is.
122	 */
123	if (last_timechanged != timechanged) {
124		last_timechanged = timechanged;
125	}
126	/*
127	 * Try to keep timestamps unique, but don't be obsessive about
128	 * it in the face of large differences.
129	 */
130	else if ((sec <= last.tv_sec) &&	/* same or lower seconds, and */
131	    ((sec != last.tv_sec) ||		/* either different second or */
132	    (usec <= last.tv_usec)) &&		/* lower microsecond, and */
133	    ((last.tv_sec - sec) <= 5)) {	/* not way back in time */
134		sec = last.tv_sec;
135		usec = last.tv_usec + 1;
136		if (usec >= MICROSEC) {
137			usec -= MICROSEC;
138			sec++;
139		}
140	}
141	last.tv_sec = sec;
142	last.tv_usec = usec;
143	mutex_exit(&tod_lock);
144
145	tv->tv_sec = sec;
146	tv->tv_usec = usec;
147}
148
149/*
150 * Timestamps are exported from the kernel in several places.
151 * Such timestamps are commonly used for either uniqueness or for
152 * sequencing - truncation to 32-bits is fine for uniqueness,
153 * but sequencing is going to take more work as we get closer to 2038!
154 */
155void
156uniqtime32(struct timeval32 *tv32p)
157{
158	struct timeval tv;
159
160	uniqtime(&tv);
161	TIMEVAL_TO_TIMEVAL32(tv32p, &tv);
162}
163
164int
165gettimeofday(struct timeval *tp)
166{
167	struct timeval atv;
168
169	if (tp) {
170		uniqtime(&atv);
171		if (get_udatamodel() == DATAMODEL_NATIVE) {
172			if (copyout(&atv, tp, sizeof (atv)))
173				return (set_errno(EFAULT));
174		} else {
175			struct timeval32 tv32;
176
177			if (TIMEVAL_OVERFLOW(&atv))
178				return (set_errno(EOVERFLOW));
179			TIMEVAL_TO_TIMEVAL32(&tv32, &atv);
180
181			if (copyout(&tv32, tp, sizeof (tv32)))
182				return (set_errno(EFAULT));
183		}
184	}
185	return (0);
186}
187
188int
189getitimer(uint_t which, struct itimerval *itv)
190{
191	int error;
192
193	if (get_udatamodel() == DATAMODEL_NATIVE)
194		error = xgetitimer(which, itv, 0);
195	else {
196		struct itimerval kitv;
197
198		if ((error = xgetitimer(which, &kitv, 1)) == 0) {
199			if (ITIMERVAL_OVERFLOW(&kitv)) {
200				error = EOVERFLOW;
201			} else {
202				struct itimerval32 itv32;
203
204				ITIMERVAL_TO_ITIMERVAL32(&itv32, &kitv);
205				if (copyout(&itv32, itv, sizeof (itv32)) != 0)
206					error = EFAULT;
207			}
208		}
209	}
210
211	return (error ? (set_errno(error)) : 0);
212}
213
214int
215xgetitimer(uint_t which, struct itimerval *itv, int iskaddr)
216{
217	struct proc *p = curproc;
218	struct timeval now;
219	struct itimerval aitv;
220	hrtime_t ts, first, interval, remain;
221
222	mutex_enter(&p->p_lock);
223
224	switch (which) {
225	case ITIMER_VIRTUAL:
226	case ITIMER_PROF:
227		aitv = ttolwp(curthread)->lwp_timer[which];
228		break;
229
230	case ITIMER_REAL:
231		uniqtime(&now);
232		aitv = p->p_realitimer;
233
234		if (timerisset(&aitv.it_value)) {
235			/*CSTYLED*/
236			if (timercmp(&aitv.it_value, &now, <)) {
237				timerclear(&aitv.it_value);
238			} else {
239				timevalsub(&aitv.it_value, &now);
240			}
241		}
242		break;
243
244	case ITIMER_REALPROF:
245		if (curproc->p_rprof_cyclic == CYCLIC_NONE) {
246			bzero(&aitv, sizeof (aitv));
247			break;
248		}
249
250		aitv = curproc->p_rprof_timer;
251
252		first = tv2hrt(&aitv.it_value);
253		interval = tv2hrt(&aitv.it_interval);
254
255		if ((ts = gethrtime()) < first) {
256			/*
257			 * We haven't gone off for the first time; the time
258			 * remaining is simply the first time we will go
259			 * off minus the current time.
260			 */
261			remain = first - ts;
262		} else {
263			if (interval == 0) {
264				/*
265				 * This was set as a one-shot, and we've
266				 * already gone off; there is no time
267				 * remaining.
268				 */
269				remain = 0;
270			} else {
271				/*
272				 * We have a non-zero interval; we need to
273				 * determine how far we are into the current
274				 * interval, and subtract that from the
275				 * interval to determine the time remaining.
276				 */
277				remain = interval - ((ts - first) % interval);
278			}
279		}
280
281		hrt2tv(remain, &aitv.it_value);
282		break;
283
284	default:
285		mutex_exit(&p->p_lock);
286		return (EINVAL);
287	}
288
289	mutex_exit(&p->p_lock);
290
291	if (iskaddr) {
292		bcopy(&aitv, itv, sizeof (*itv));
293	} else {
294		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
295		if (copyout(&aitv, itv, sizeof (*itv)))
296			return (EFAULT);
297	}
298
299	return (0);
300}
301
302
303int
304setitimer(uint_t which, struct itimerval *itv, struct itimerval *oitv)
305{
306	int error;
307
308	if (oitv != NULL)
309		if ((error = getitimer(which, oitv)) != 0)
310			return (error);
311
312	if (itv == NULL)
313		return (0);
314
315	if (get_udatamodel() == DATAMODEL_NATIVE)
316		error = xsetitimer(which, itv, 0);
317	else {
318		struct itimerval32 itv32;
319		struct itimerval kitv;
320
321		if (copyin(itv, &itv32, sizeof (itv32)))
322			error = EFAULT;
323		ITIMERVAL32_TO_ITIMERVAL(&kitv, &itv32);
324		error = xsetitimer(which, &kitv, 1);
325	}
326
327	return (error ? (set_errno(error)) : 0);
328}
329
330int
331xsetitimer(uint_t which, struct itimerval *itv, int iskaddr)
332{
333	struct itimerval aitv;
334	struct timeval now;
335	struct proc *p = curproc;
336	kthread_t *t;
337	timeout_id_t tmp_id;
338	cyc_handler_t hdlr;
339	cyc_time_t when;
340	cyclic_id_t cyclic;
341	hrtime_t ts;
342	int min;
343
344	if (itv == NULL)
345		return (0);
346
347	if (iskaddr) {
348		bcopy(itv, &aitv, sizeof (aitv));
349	} else {
350		ASSERT(get_udatamodel() == DATAMODEL_NATIVE);
351		if (copyin(itv, &aitv, sizeof (aitv)))
352			return (EFAULT);
353	}
354
355	if (which == ITIMER_REALPROF) {
356		min = MAX((int)(cyclic_getres() / (NANOSEC / MICROSEC)),
357		    itimer_realprof_minimum);
358	} else {
359		min = usec_per_tick;
360	}
361
362	if (itimerfix(&aitv.it_value, min) ||
363	    (itimerfix(&aitv.it_interval, min) && timerisset(&aitv.it_value)))
364		return (EINVAL);
365
366	mutex_enter(&p->p_lock);
367	switch (which) {
368	case ITIMER_REAL:
369		/*
370		 * The SITBUSY flag prevents conflicts with multiple
371		 * threads attempting to perform setitimer(ITIMER_REAL)
372		 * at the same time, even when we drop p->p_lock below.
373		 * Any blocked thread returns successfully because the
374		 * effect is the same as if it got here first, finished,
375		 * and the other thread then came through and destroyed
376		 * what it did.  We are just protecting the system from
377		 * malfunctioning due to the race condition.
378		 */
379		if (p->p_flag & SITBUSY) {
380			mutex_exit(&p->p_lock);
381			return (0);
382		}
383		p->p_flag |= SITBUSY;
384		while ((tmp_id = p->p_itimerid) != 0) {
385			/*
386			 * Avoid deadlock in callout_delete (called from
387			 * untimeout) which may go to sleep (while holding
388			 * p_lock). Drop p_lock and re-acquire it after
389			 * untimeout returns. Need to clear p_itimerid
390			 * while holding p_lock.
391			 */
392			p->p_itimerid = 0;
393			mutex_exit(&p->p_lock);
394			(void) untimeout(tmp_id);
395			mutex_enter(&p->p_lock);
396		}
397		if (timerisset(&aitv.it_value)) {
398			uniqtime(&now);
399			timevaladd(&aitv.it_value, &now);
400			p->p_itimerid = realtime_timeout(realitexpire,
401			    p, hzto(&aitv.it_value));
402		}
403		p->p_realitimer = aitv;
404		p->p_flag &= ~SITBUSY;
405		break;
406
407	case ITIMER_REALPROF:
408		cyclic = p->p_rprof_cyclic;
409		p->p_rprof_cyclic = CYCLIC_NONE;
410
411		mutex_exit(&p->p_lock);
412
413		/*
414		 * We're now going to acquire cpu_lock, remove the old cyclic
415		 * if necessary, and add our new cyclic.
416		 */
417		mutex_enter(&cpu_lock);
418
419		if (cyclic != CYCLIC_NONE)
420			cyclic_remove(cyclic);
421
422		if (!timerisset(&aitv.it_value)) {
423			/*
424			 * If we were passed a value of 0, we're done.
425			 */
426			mutex_exit(&cpu_lock);
427			return (0);
428		}
429
430		hdlr.cyh_func = realprofexpire;
431		hdlr.cyh_arg = p;
432		hdlr.cyh_level = CY_LOW_LEVEL;
433
434		when.cyt_when = (ts = gethrtime() + tv2hrt(&aitv.it_value));
435		when.cyt_interval = tv2hrt(&aitv.it_interval);
436
437		if (when.cyt_interval == 0) {
438			/*
439			 * Using the same logic as for CLOCK_HIGHRES timers, we
440			 * set the interval to be INT64_MAX - when.cyt_when to
441			 * effect a one-shot; see the comment in clock_highres.c
442			 * for more details on why this works.
443			 */
444			when.cyt_interval = INT64_MAX - when.cyt_when;
445		}
446
447		cyclic = cyclic_add(&hdlr, &when);
448
449		mutex_exit(&cpu_lock);
450
451		/*
452		 * We have now successfully added the cyclic.  Reacquire
453		 * p_lock, and see if anyone has snuck in.
454		 */
455		mutex_enter(&p->p_lock);
456
457		if (p->p_rprof_cyclic != CYCLIC_NONE) {
458			/*
459			 * We're racing with another thread establishing an
460			 * ITIMER_REALPROF interval timer.  We'll let the other
461			 * thread win (this is a race at the application level,
462			 * so letting the other thread win is acceptable).
463			 */
464			mutex_exit(&p->p_lock);
465			mutex_enter(&cpu_lock);
466			cyclic_remove(cyclic);
467			mutex_exit(&cpu_lock);
468
469			return (0);
470		}
471
472		/*
473		 * Success.  Set our tracking variables in the proc structure,
474		 * cancel any outstanding ITIMER_PROF, and allocate the
475		 * per-thread SIGPROF buffers, if possible.
476		 */
477		hrt2tv(ts, &aitv.it_value);
478		p->p_rprof_timer = aitv;
479		p->p_rprof_cyclic = cyclic;
480
481		t = p->p_tlist;
482		do {
483			struct itimerval *itvp;
484
485			itvp = &ttolwp(t)->lwp_timer[ITIMER_PROF];
486			timerclear(&itvp->it_interval);
487			timerclear(&itvp->it_value);
488
489			if (t->t_rprof != NULL)
490				continue;
491
492			t->t_rprof =
493			    kmem_zalloc(sizeof (struct rprof), KM_NOSLEEP);
494			aston(t);
495		} while ((t = t->t_forw) != p->p_tlist);
496
497		break;
498
499	case ITIMER_VIRTUAL:
500		ttolwp(curthread)->lwp_timer[ITIMER_VIRTUAL] = aitv;
501		break;
502
503	case ITIMER_PROF:
504		if (p->p_rprof_cyclic != CYCLIC_NONE) {
505			/*
506			 * Silently ignore ITIMER_PROF if ITIMER_REALPROF
507			 * is in effect.
508			 */
509			break;
510		}
511
512		ttolwp(curthread)->lwp_timer[ITIMER_PROF] = aitv;
513		break;
514
515	default:
516		mutex_exit(&p->p_lock);
517		return (EINVAL);
518	}
519	mutex_exit(&p->p_lock);
520	return (0);
521}
522
523/*
524 * Delete the ITIMER_REALPROF interval timer.
525 * Called only from exec_args() when exec occurs.
526 * The other ITIMER_* interval timers are specified
527 * to be inherited across exec(), so leave them alone.
528 */
529void
530delete_itimer_realprof(void)
531{
532	kthread_t *t = curthread;
533	struct proc *p = ttoproc(t);
534	klwp_t *lwp = ttolwp(t);
535	cyclic_id_t cyclic;
536
537	mutex_enter(&p->p_lock);
538
539	/* we are performing execve(); assert we are single-threaded */
540	ASSERT(t == p->p_tlist && t == t->t_forw);
541
542	if ((cyclic = p->p_rprof_cyclic) == CYCLIC_NONE) {
543		mutex_exit(&p->p_lock);
544	} else {
545		p->p_rprof_cyclic = CYCLIC_NONE;
546		/*
547		 * Delete any current instance of SIGPROF.
548		 */
549		if (lwp->lwp_cursig == SIGPROF) {
550			lwp->lwp_cursig = 0;
551			lwp->lwp_extsig = 0;
552			if (lwp->lwp_curinfo) {
553				siginfofree(lwp->lwp_curinfo);
554				lwp->lwp_curinfo = NULL;
555			}
556		}
557		/*
558		 * Delete any pending instances of SIGPROF.
559		 */
560		sigdelset(&p->p_sig, SIGPROF);
561		sigdelset(&p->p_extsig, SIGPROF);
562		sigdelq(p, NULL, SIGPROF);
563		sigdelset(&t->t_sig, SIGPROF);
564		sigdelset(&t->t_extsig, SIGPROF);
565		sigdelq(p, t, SIGPROF);
566
567		mutex_exit(&p->p_lock);
568
569		/*
570		 * Remove the ITIMER_REALPROF cyclic.
571		 */
572		mutex_enter(&cpu_lock);
573		cyclic_remove(cyclic);
574		mutex_exit(&cpu_lock);
575	}
576}
577
578/*
579 * Real interval timer expired:
580 * send process whose timer expired an alarm signal.
581 * If time is not set up to reload, then just return.
582 * Else compute next time timer should go off which is > current time.
583 * This is where delay in processing this timeout causes multiple
584 * SIGALRM calls to be compressed into one.
585 */
586static void
587realitexpire(void *arg)
588{
589	struct proc *p = arg;
590	struct timeval *valp = &p->p_realitimer.it_value;
591	struct timeval *intervalp = &p->p_realitimer.it_interval;
592#if !defined(_LP64)
593	clock_t	ticks;
594#endif
595
596	mutex_enter(&p->p_lock);
597#if !defined(_LP64)
598	if ((ticks = hzto(valp)) > 1) {
599		/*
600		 * If we are executing before we were meant to, it must be
601		 * because of an overflow in a prior hzto() calculation.
602		 * In this case, we want to go to sleep for the recalculated
603		 * number of ticks. For the special meaning of the value "1"
604		 * see comment in timespectohz().
605		 */
606		p->p_itimerid = realtime_timeout(realitexpire, p, ticks);
607		mutex_exit(&p->p_lock);
608		return;
609	}
610#endif
611	sigtoproc(p, NULL, SIGALRM);
612	if (!timerisset(intervalp)) {
613		timerclear(valp);
614		p->p_itimerid = 0;
615	} else {
616		/* advance timer value past current time */
617		timeval_advance(valp, intervalp);
618		p->p_itimerid = realtime_timeout(realitexpire, p, hzto(valp));
619	}
620	mutex_exit(&p->p_lock);
621}
622
623/*
624 * Real time profiling interval timer expired:
625 * Increment microstate counters for each lwp in the process
626 * and ensure that running lwps are kicked into the kernel.
627 * If time is not set up to reload, then just return.
628 * Else compute next time timer should go off which is > current time,
629 * as above.
630 */
631static void
632realprofexpire(void *arg)
633{
634	struct proc *p = arg;
635	kthread_t *t;
636
637	mutex_enter(&p->p_lock);
638	if (p->p_rprof_cyclic == CYCLIC_NONE ||
639	    (t = p->p_tlist) == NULL) {
640		mutex_exit(&p->p_lock);
641		return;
642	}
643	do {
644		int mstate;
645
646		/*
647		 * Attempt to allocate the SIGPROF buffer, but don't sleep.
648		 */
649		if (t->t_rprof == NULL)
650			t->t_rprof = kmem_zalloc(sizeof (struct rprof),
651			    KM_NOSLEEP);
652		if (t->t_rprof == NULL)
653			continue;
654
655		thread_lock(t);
656		switch (t->t_state) {
657		case TS_SLEEP:
658			/*
659			 * Don't touch the lwp is it is swapped out.
660			 */
661			if (!(t->t_schedflag & TS_LOAD)) {
662				mstate = LMS_SLEEP;
663				break;
664			}
665			switch (mstate = ttolwp(t)->lwp_mstate.ms_prev) {
666			case LMS_TFAULT:
667			case LMS_DFAULT:
668			case LMS_KFAULT:
669			case LMS_USER_LOCK:
670				break;
671			default:
672				mstate = LMS_SLEEP;
673				break;
674			}
675			break;
676		case TS_RUN:
677		case TS_WAIT:
678			mstate = LMS_WAIT_CPU;
679			break;
680		case TS_ONPROC:
681			switch (mstate = t->t_mstate) {
682			case LMS_USER:
683			case LMS_SYSTEM:
684			case LMS_TRAP:
685				break;
686			default:
687				mstate = LMS_SYSTEM;
688				break;
689			}
690			break;
691		default:
692			mstate = t->t_mstate;
693			break;
694		}
695		t->t_rprof->rp_anystate = 1;
696		t->t_rprof->rp_state[mstate]++;
697		aston(t);
698		/*
699		 * force the thread into the kernel
700		 * if it is not already there.
701		 */
702		if (t->t_state == TS_ONPROC && t->t_cpu != CPU)
703			poke_cpu(t->t_cpu->cpu_id);
704		thread_unlock(t);
705	} while ((t = t->t_forw) != p->p_tlist);
706
707	mutex_exit(&p->p_lock);
708}
709
710/*
711 * Advances timer value past the current time of day.  See the detailed
712 * comment for this logic in realitsexpire(), above.
713 */
714static void
715timeval_advance(struct timeval *valp, struct timeval *intervalp)
716{
717	int cnt2nth;
718	struct timeval interval2nth;
719
720	for (;;) {
721		interval2nth = *intervalp;
722		for (cnt2nth = 0; ; cnt2nth++) {
723			timevaladd(valp, &interval2nth);
724			/*CSTYLED*/
725			if (TVTSCMP(valp, &hrestime, >))
726				break;
727			timevaladd(&interval2nth, &interval2nth);
728		}
729		if (cnt2nth == 0)
730			break;
731		timevalsub(valp, &interval2nth);
732	}
733}
734
735/*
736 * Check that a proposed value to load into the .it_value or .it_interval
737 * part of an interval timer is acceptable, and set it to at least a
738 * specified minimal value.
739 */
740int
741itimerfix(struct timeval *tv, int minimum)
742{
743	if (tv->tv_sec < 0 || tv->tv_sec > 100000000 ||
744	    tv->tv_usec < 0 || tv->tv_usec >= MICROSEC)
745		return (EINVAL);
746	if (tv->tv_sec == 0 && tv->tv_usec != 0 && tv->tv_usec < minimum)
747		tv->tv_usec = minimum;
748	return (0);
749}
750
751/*
752 * Same as itimerfix, except a) it takes a timespec instead of a timeval and
753 * b) it doesn't truncate based on timeout granularity; consumers of this
754 * interface (e.g. timer_settime()) depend on the passed timespec not being
755 * modified implicitly.
756 */
757int
758itimerspecfix(timespec_t *tv)
759{
760	if (tv->tv_sec < 0 || tv->tv_nsec < 0 || tv->tv_nsec >= NANOSEC)
761		return (EINVAL);
762	return (0);
763}
764
765/*
766 * Decrement an interval timer by a specified number
767 * of microseconds, which must be less than a second,
768 * i.e. < 1000000.  If the timer expires, then reload
769 * it.  In this case, carry over (usec - old value) to
770 * reducint the value reloaded into the timer so that
771 * the timer does not drift.  This routine assumes
772 * that it is called in a context where the timers
773 * on which it is operating cannot change in value.
774 */
775int
776itimerdecr(struct itimerval *itp, int usec)
777{
778	if (itp->it_value.tv_usec < usec) {
779		if (itp->it_value.tv_sec == 0) {
780			/* expired, and already in next interval */
781			usec -= itp->it_value.tv_usec;
782			goto expire;
783		}
784		itp->it_value.tv_usec += MICROSEC;
785		itp->it_value.tv_sec--;
786	}
787	itp->it_value.tv_usec -= usec;
788	usec = 0;
789	if (timerisset(&itp->it_value))
790		return (1);
791	/* expired, exactly at end of interval */
792expire:
793	if (timerisset(&itp->it_interval)) {
794		itp->it_value = itp->it_interval;
795		itp->it_value.tv_usec -= usec;
796		if (itp->it_value.tv_usec < 0) {
797			itp->it_value.tv_usec += MICROSEC;
798			itp->it_value.tv_sec--;
799		}
800	} else
801		itp->it_value.tv_usec = 0;		/* sec is already 0 */
802	return (0);
803}
804
805/*
806 * Add and subtract routines for timevals.
807 * N.B.: subtract routine doesn't deal with
808 * results which are before the beginning,
809 * it just gets very confused in this case.
810 * Caveat emptor.
811 */
812void
813timevaladd(struct timeval *t1, struct timeval *t2)
814{
815	t1->tv_sec += t2->tv_sec;
816	t1->tv_usec += t2->tv_usec;
817	timevalfix(t1);
818}
819
820void
821timevalsub(struct timeval *t1, struct timeval *t2)
822{
823	t1->tv_sec -= t2->tv_sec;
824	t1->tv_usec -= t2->tv_usec;
825	timevalfix(t1);
826}
827
828void
829timevalfix(struct timeval *t1)
830{
831	if (t1->tv_usec < 0) {
832		t1->tv_sec--;
833		t1->tv_usec += MICROSEC;
834	}
835	if (t1->tv_usec >= MICROSEC) {
836		t1->tv_sec++;
837		t1->tv_usec -= MICROSEC;
838	}
839}
840
841/*
842 * Same as the routines above. These routines take a timespec instead
843 * of a timeval.
844 */
845void
846timespecadd(timespec_t *t1, timespec_t *t2)
847{
848	t1->tv_sec += t2->tv_sec;
849	t1->tv_nsec += t2->tv_nsec;
850	timespecfix(t1);
851}
852
853void
854timespecsub(timespec_t *t1, timespec_t *t2)
855{
856	t1->tv_sec -= t2->tv_sec;
857	t1->tv_nsec -= t2->tv_nsec;
858	timespecfix(t1);
859}
860
861void
862timespecfix(timespec_t *t1)
863{
864	if (t1->tv_nsec < 0) {
865		t1->tv_sec--;
866		t1->tv_nsec += NANOSEC;
867	} else {
868		if (t1->tv_nsec >= NANOSEC) {
869			t1->tv_sec++;
870			t1->tv_nsec -= NANOSEC;
871		}
872	}
873}
874
875/*
876 * Compute number of hz until specified time.
877 * Used to compute third argument to timeout() from an absolute time.
878 */
879clock_t
880hzto(struct timeval *tv)
881{
882	timespec_t ts, now;
883
884	ts.tv_sec = tv->tv_sec;
885	ts.tv_nsec = tv->tv_usec * 1000;
886	gethrestime_lasttick(&now);
887
888	return (timespectohz(&ts, now));
889}
890
891/*
892 * Compute number of hz until specified time for a given timespec value.
893 * Used to compute third argument to timeout() from an absolute time.
894 */
895clock_t
896timespectohz(timespec_t *tv, timespec_t now)
897{
898	clock_t	ticks;
899	time_t	sec;
900	int	nsec;
901
902	/*
903	 * Compute number of ticks we will see between now and
904	 * the target time; returns "1" if the destination time
905	 * is before the next tick, so we always get some delay,
906	 * and returns LONG_MAX ticks if we would overflow.
907	 */
908	sec = tv->tv_sec - now.tv_sec;
909	nsec = tv->tv_nsec - now.tv_nsec + nsec_per_tick - 1;
910
911	if (nsec < 0) {
912		sec--;
913		nsec += NANOSEC;
914	} else if (nsec >= NANOSEC) {
915		sec++;
916		nsec -= NANOSEC;
917	}
918
919	ticks = NSEC_TO_TICK(nsec);
920
921	/*
922	 * Compute ticks, accounting for negative and overflow as above.
923	 * Overflow protection kicks in at about 70 weeks for hz=50
924	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
925	 * kernel :-)
926	 */
927	if (sec < 0 || (sec == 0 && ticks < 1))
928		ticks = 1;			/* protect vs nonpositive */
929	else if (sec > (LONG_MAX - ticks) / hz)
930		ticks = LONG_MAX;		/* protect vs overflow */
931	else
932		ticks += sec * hz;		/* common case */
933
934	return (ticks);
935}
936
937/*
938 * Compute number of hz with the timespec tv specified.
939 * The return type must be 64 bit integer.
940 */
941int64_t
942timespectohz64(timespec_t *tv)
943{
944	int64_t ticks;
945	int64_t sec;
946	int64_t nsec;
947
948	sec = tv->tv_sec;
949	nsec = tv->tv_nsec + nsec_per_tick - 1;
950
951	if (nsec < 0) {
952		sec--;
953		nsec += NANOSEC;
954	} else if (nsec >= NANOSEC) {
955		sec++;
956		nsec -= NANOSEC;
957	}
958
959	ticks = NSEC_TO_TICK(nsec);
960
961	/*
962	 * Compute ticks, accounting for negative and overflow as above.
963	 * Overflow protection kicks in at about 70 weeks for hz=50
964	 * and at about 35 weeks for hz=100. (Rather longer for the 64-bit
965	 * kernel
966	 */
967	if (sec < 0 || (sec == 0 && ticks < 1))
968		ticks = 1;			/* protect vs nonpositive */
969	else if (sec > (((~0ULL) >> 1) - ticks) / hz)
970		ticks = (~0ULL) >> 1;		/* protect vs overflow */
971	else
972		ticks += sec * hz;		/* common case */
973
974	return (ticks);
975}
976
977/*
978 * hrt2ts(): convert from hrtime_t to timestruc_t.
979 *
980 * All this routine really does is:
981 *
982 *	tsp->sec  = hrt / NANOSEC;
983 *	tsp->nsec = hrt % NANOSEC;
984 *
985 * The black magic below avoids doing a 64-bit by 32-bit integer divide,
986 * which is quite expensive.  There's actually much more going on here than
987 * it might first appear -- don't try this at home.
988 *
989 * For the adventuresome, here's an explanation of how it works.
990 *
991 * Multiplication by a fixed constant is easy -- you just do the appropriate
992 * shifts and adds.  For example, to multiply by 10, we observe that
993 *
994 *	x * 10	= x * (8 + 2)
995 *		= (x * 8) + (x * 2)
996 *		= (x << 3) + (x << 1).
997 *
998 * In general, you can read the algorithm right off the bits: the number 10
999 * is 1010 in binary; bits 1 and 3 are ones, so x * 10 = (x << 1) + (x << 3).
1000 *
1001 * Sometimes you can do better.  For example, 15 is 1111 binary, so the normal
1002 * shift/add computation is x * 15 = (x << 0) + (x << 1) + (x << 2) + (x << 3).
1003 * But, it's cheaper if you capitalize on the fact that you have a run of ones:
1004 * 1111 = 10000 - 1, hence x * 15 = (x << 4) - (x << 0).  [You would never
1005 * actually perform the operation << 0, since it's a no-op; I'm just writing
1006 * it that way for clarity.]
1007 *
1008 * The other way you can win is if you get lucky with the prime factorization
1009 * of your constant.  The number 1,000,000,000, which we have to multiply
1010 * by below, is a good example.  One billion is 111011100110101100101000000000
1011 * in binary.  If you apply the bit-grouping trick, it doesn't buy you very
1012 * much, because it's only a win for groups of three or more equal bits:
1013 *
1014 * 111011100110101100101000000000 = 1000000000000000000000000000000
1015 *				  -  000100011001010011011000000000
1016 *
1017 * Thus, instead of the 13 shift/add pairs (26 operations) implied by the LHS,
1018 * we have reduced this to 10 shift/add pairs (20 operations) on the RHS.
1019 * This is better, but not great.
1020 *
1021 * However, we can factor 1,000,000,000 = 2^9 * 5^9 = 2^9 * 125 * 125 * 125,
1022 * and multiply by each factor.  Multiplication by 125 is particularly easy,
1023 * since 128 is nearby: x * 125 = (x << 7) - x - x - x, which is just four
1024 * operations.  So, to multiply by 1,000,000,000, we perform three multipli-
1025 * cations by 125, then << 9, a total of only 3 * 4 + 1 = 13 operations.
1026 * This is the algorithm we actually use in both hrt2ts() and ts2hrt().
1027 *
1028 * Division is harder; there is no equivalent of the simple shift-add algorithm
1029 * we used for multiplication.  However, we can convert the division problem
1030 * into a multiplication problem by pre-computing the binary representation
1031 * of the reciprocal of the divisor.  For the case of interest, we have
1032 *
1033 *	1 / 1,000,000,000 = 1.0001001011100000101111101000001B-30,
1034 *
1035 * to 32 bits of precision.  (The notation B-30 means "* 2^-30", just like
1036 * E-18 means "* 10^-18".)
1037 *
1038 * So, to compute x / 1,000,000,000, we just multiply x by the 32-bit
1039 * integer 10001001011100000101111101000001, then normalize (shift) the
1040 * result.  This constant has several large bits runs, so the multiply
1041 * is relatively cheap:
1042 *
1043 *	10001001011100000101111101000001 = 10001001100000000110000001000001
1044 *					 - 00000000000100000000000100000000
1045 *
1046 * Again, you can just read the algorithm right off the bits:
1047 *
1048 *			sec = hrt;
1049 *			sec += (hrt << 6);
1050 *			sec -= (hrt << 8);
1051 *			sec += (hrt << 13);
1052 *			sec += (hrt << 14);
1053 *			sec -= (hrt << 20);
1054 *			sec += (hrt << 23);
1055 *			sec += (hrt << 24);
1056 *			sec += (hrt << 27);
1057 *			sec += (hrt << 31);
1058 *			sec >>= (32 + 30);
1059 *
1060 * Voila!  The only problem is, since hrt is 64 bits, we need to use 96-bit
1061 * arithmetic to perform this calculation.  That's a waste, because ultimately
1062 * we only need the highest 32 bits of the result.
1063 *
1064 * The first thing we do is to realize that we don't need to use all of hrt
1065 * in the calculation.  The lowest 30 bits can contribute at most 1 to the
1066 * quotient (2^30 / 1,000,000,000 = 1.07...), so we'll deal with them later.
1067 * The highest 2 bits have to be zero, or hrt won't fit in a timestruc_t.
1068 * Thus, the only bits of hrt that matter for division are bits 30..61.
1069 * These 32 bits are just the lower-order word of (hrt >> 30).  This brings
1070 * us down from 96-bit math to 64-bit math, and our algorithm becomes:
1071 *
1072 *			tmp = (uint32_t) (hrt >> 30);
1073 *			sec = tmp;
1074 *			sec += (tmp << 6);
1075 *			sec -= (tmp << 8);
1076 *			sec += (tmp << 13);
1077 *			sec += (tmp << 14);
1078 *			sec -= (tmp << 20);
1079 *			sec += (tmp << 23);
1080 *			sec += (tmp << 24);
1081 *			sec += (tmp << 27);
1082 *			sec += (tmp << 31);
1083 *			sec >>= 32;
1084 *
1085 * Next, we're going to reduce this 64-bit computation to a 32-bit
1086 * computation.  We begin by rewriting the above algorithm to use relative
1087 * shifts instead of absolute shifts.  That is, instead of computing
1088 * tmp << 6, tmp << 8, tmp << 13, etc, we'll just shift incrementally:
1089 * tmp <<= 6, tmp <<= 2 (== 8 - 6), tmp <<= 5 (== 13 - 8), etc:
1090 *
1091 *			tmp = (uint32_t) (hrt >> 30);
1092 *			sec = tmp;
1093 *			tmp <<= 6; sec += tmp;
1094 *			tmp <<= 2; sec -= tmp;
1095 *			tmp <<= 5; sec += tmp;
1096 *			tmp <<= 1; sec += tmp;
1097 *			tmp <<= 6; sec -= tmp;
1098 *			tmp <<= 3; sec += tmp;
1099 *			tmp <<= 1; sec += tmp;
1100 *			tmp <<= 3; sec += tmp;
1101 *			tmp <<= 4; sec += tmp;
1102 *			sec >>= 32;
1103 *
1104 * Now for the final step.  Instead of throwing away the low 32 bits at
1105 * the end, we can throw them away as we go, only keeping the high 32 bits
1106 * of the product at each step.  So, for example, where we now have
1107 *
1108 *			tmp <<= 6; sec = sec + tmp;
1109 * we will instead have
1110 *			tmp <<= 6; sec = (sec + tmp) >> 6;
1111 * which is equivalent to
1112 *			sec = (sec >> 6) + tmp;
1113 *
1114 * The final shift ("sec >>= 32") goes away.
1115 *
1116 * All we're really doing here is long multiplication, just like we learned in
1117 * grade school, except that at each step, we only look at the leftmost 32
1118 * columns.  The cumulative error is, at most, the sum of all the bits we
1119 * throw away, which is 2^-32 + 2^-31 + ... + 2^-2 + 2^-1 == 1 - 2^-32.
1120 * Thus, the final result ("sec") is correct to +/- 1.
1121 *
1122 * It turns out to be important to keep "sec" positive at each step, because
1123 * we don't want to have to explicitly extend the sign bit.  Therefore,
1124 * starting with the last line of code above, each line that would have read
1125 * "sec = (sec >> n) - tmp" must be changed to "sec = tmp - (sec >> n)", and
1126 * the operators (+ or -) in all previous lines must be toggled accordingly.
1127 * Thus, we end up with:
1128 *
1129 *			tmp = (uint32_t) (hrt >> 30);
1130 *			sec = tmp + (sec >> 6);
1131 *			sec = tmp - (tmp >> 2);
1132 *			sec = tmp - (sec >> 5);
1133 *			sec = tmp + (sec >> 1);
1134 *			sec = tmp - (sec >> 6);
1135 *			sec = tmp - (sec >> 3);
1136 *			sec = tmp + (sec >> 1);
1137 *			sec = tmp + (sec >> 3);
1138 *			sec = tmp + (sec >> 4);
1139 *
1140 * This yields a value for sec that is accurate to +1/-1, so we have two
1141 * cases to deal with.  The mysterious-looking "+ 7" in the code below biases
1142 * the rounding toward zero, so that sec is always less than or equal to
1143 * the correct value.  With this modified code, sec is accurate to +0/-2, with
1144 * the -2 case being very rare in practice.  With this change, we only have to
1145 * deal with one case (sec too small) in the cleanup code.
1146 *
1147 * The other modification we make is to delete the second line above
1148 * ("sec = tmp + (sec >> 6);"), since it only has an effect when bit 31 is
1149 * set, and the cleanup code can handle that rare case.  This reduces the
1150 * *guaranteed* accuracy of sec to +0/-3, but speeds up the common cases.
1151 *
1152 * Finally, we compute nsec = hrt - (sec * 1,000,000,000).  nsec will always
1153 * be positive (since sec is never too large), and will at most be equal to
1154 * the error in sec (times 1,000,000,000) plus the low-order 30 bits of hrt.
1155 * Thus, nsec < 3 * 1,000,000,000 + 2^30, which is less than 2^32, so we can
1156 * safely assume that nsec fits in 32 bits.  Consequently, when we compute
1157 * sec * 1,000,000,000, we only need the low 32 bits, so we can just do 32-bit
1158 * arithmetic and let the high-order bits fall off the end.
1159 *
1160 * Since nsec < 3 * 1,000,000,000 + 2^30 == 4,073,741,824, the cleanup loop:
1161 *
1162 *			while (nsec >= NANOSEC) {
1163 *				nsec -= NANOSEC;
1164 *				sec++;
1165 *			}
1166 *
1167 * is guaranteed to complete in at most 4 iterations.  In practice, the loop
1168 * completes in 0 or 1 iteration over 95% of the time.
1169 *
1170 * On an SS2, this implementation of hrt2ts() takes 1.7 usec, versus about
1171 * 35 usec for software division -- about 20 times faster.
1172 */
1173void
1174hrt2ts(hrtime_t hrt, timestruc_t *tsp)
1175{
1176#if defined(__amd64)
1177	/*
1178	 * The cleverness explained above is unecessary on x86_64 CPUs where
1179	 * modern compilers are able to optimize down to faster operations.
1180	 */
1181	tsp->tv_sec = hrt / NANOSEC;
1182	tsp->tv_nsec = hrt % NANOSEC;
1183#else
1184	uint32_t sec, nsec, tmp;
1185
1186	tmp = (uint32_t)(hrt >> 30);
1187	sec = tmp - (tmp >> 2);
1188	sec = tmp - (sec >> 5);
1189	sec = tmp + (sec >> 1);
1190	sec = tmp - (sec >> 6) + 7;
1191	sec = tmp - (sec >> 3);
1192	sec = tmp + (sec >> 1);
1193	sec = tmp + (sec >> 3);
1194	sec = tmp + (sec >> 4);
1195	tmp = (sec << 7) - sec - sec - sec;
1196	tmp = (tmp << 7) - tmp - tmp - tmp;
1197	tmp = (tmp << 7) - tmp - tmp - tmp;
1198	nsec = (uint32_t)hrt - (tmp << 9);
1199	while (nsec >= NANOSEC) {
1200		nsec -= NANOSEC;
1201		sec++;
1202	}
1203	tsp->tv_sec = (time_t)sec;
1204	tsp->tv_nsec = nsec;
1205#endif /* defined(__amd64) */
1206}
1207
1208/*
1209 * Convert from timestruc_t to hrtime_t.
1210 */
1211hrtime_t
1212ts2hrt(const timestruc_t *tsp)
1213{
1214#if defined(__amd64) || defined(__i386)
1215	/*
1216	 * On modern x86 CPUs, the simple version is faster.
1217	 */
1218	return ((tsp->tv_sec * NANOSEC) + tsp->tv_nsec);
1219#else
1220	/*
1221	 * The code below is equivalent to:
1222	 *
1223	 *	hrt = tsp->tv_sec * NANOSEC + tsp->tv_nsec;
1224	 *
1225	 * but requires no integer multiply.
1226	 */
1227	hrtime_t hrt;
1228
1229	hrt = tsp->tv_sec;
1230	hrt = (hrt << 7) - hrt - hrt - hrt;
1231	hrt = (hrt << 7) - hrt - hrt - hrt;
1232	hrt = (hrt << 7) - hrt - hrt - hrt;
1233	hrt = (hrt << 9) + tsp->tv_nsec;
1234	return (hrt);
1235#endif /* defined(__amd64) || defined(__i386) */
1236}
1237
1238/*
1239 * For the various 32-bit "compatibility" paths in the system.
1240 */
1241void
1242hrt2ts32(hrtime_t hrt, timestruc32_t *ts32p)
1243{
1244	timestruc_t ts;
1245
1246	hrt2ts(hrt, &ts);
1247	TIMESPEC_TO_TIMESPEC32(ts32p, &ts);
1248}
1249
1250/*
1251 * If this ever becomes performance critical (ha!), we can borrow the
1252 * code from ts2hrt(), above, to multiply tv_sec by 1,000,000 and the
1253 * straightforward (x << 10) - (x << 5) + (x << 3) to multiply tv_usec by
1254 * 1,000.  For now, we'll opt for readability (besides, the compiler does
1255 * a passable job of optimizing constant multiplication into shifts and adds).
1256 */
1257hrtime_t
1258tv2hrt(struct timeval *tvp)
1259{
1260	return ((hrtime_t)tvp->tv_sec * NANOSEC +
1261	    (hrtime_t)tvp->tv_usec * (NANOSEC / MICROSEC));
1262}
1263
1264void
1265hrt2tv(hrtime_t hrt, struct timeval *tvp)
1266{
1267#if defined(__amd64)
1268	/*
1269	 * Like hrt2ts, the simple version is faster on x86_64.
1270	 */
1271	tvp->tv_sec = hrt / NANOSEC;
1272	tvp->tv_usec = (hrt % NANOSEC) / (NANOSEC / MICROSEC);
1273#else
1274	uint32_t sec, nsec, tmp;
1275	uint32_t q, r, t;
1276
1277	tmp = (uint32_t)(hrt >> 30);
1278	sec = tmp - (tmp >> 2);
1279	sec = tmp - (sec >> 5);
1280	sec = tmp + (sec >> 1);
1281	sec = tmp - (sec >> 6) + 7;
1282	sec = tmp - (sec >> 3);
1283	sec = tmp + (sec >> 1);
1284	sec = tmp + (sec >> 3);
1285	sec = tmp + (sec >> 4);
1286	tmp = (sec << 7) - sec - sec - sec;
1287	tmp = (tmp << 7) - tmp - tmp - tmp;
1288	tmp = (tmp << 7) - tmp - tmp - tmp;
1289	nsec = (uint32_t)hrt - (tmp << 9);
1290	while (nsec >= NANOSEC) {
1291		nsec -= NANOSEC;
1292		sec++;
1293	}
1294	tvp->tv_sec = (time_t)sec;
1295	/*
1296	 * this routine is very similar to hr2ts, but requires microseconds
1297	 * instead of nanoseconds, so an interger divide by 1000 routine
1298	 * completes the conversion
1299	 */
1300	t = (nsec >> 7) + (nsec >> 8) + (nsec >> 12);
1301	q = (nsec >> 1) + t + (nsec >> 15) + (t >> 11) + (t >> 14);
1302	q = q >> 9;
1303	r = nsec - q*1000;
1304	tvp->tv_usec = q + ((r + 24) >> 10);
1305#endif /* defined(__amd64) */
1306}
1307
1308int
1309nanosleep(timespec_t *rqtp, timespec_t *rmtp)
1310{
1311	timespec_t rqtime;
1312	timespec_t rmtime;
1313	timespec_t now;
1314	int timecheck;
1315	int ret = 1;
1316	model_t datamodel = get_udatamodel();
1317
1318	timecheck = timechanged;
1319	gethrestime(&now);
1320
1321	if (datamodel == DATAMODEL_NATIVE) {
1322		if (copyin(rqtp, &rqtime, sizeof (rqtime)))
1323			return (set_errno(EFAULT));
1324	} else {
1325		timespec32_t rqtime32;
1326
1327		if (copyin(rqtp, &rqtime32, sizeof (rqtime32)))
1328			return (set_errno(EFAULT));
1329		TIMESPEC32_TO_TIMESPEC(&rqtime, &rqtime32);
1330	}
1331
1332	if (rqtime.tv_sec < 0 || rqtime.tv_nsec < 0 ||
1333	    rqtime.tv_nsec >= NANOSEC)
1334		return (set_errno(EINVAL));
1335
1336	if (timerspecisset(&rqtime)) {
1337		timespecadd(&rqtime, &now);
1338		mutex_enter(&curthread->t_delay_lock);
1339		while ((ret = cv_waituntil_sig(&curthread->t_delay_cv,
1340		    &curthread->t_delay_lock, &rqtime, timecheck)) > 0)
1341			continue;
1342		mutex_exit(&curthread->t_delay_lock);
1343	}
1344
1345	if (rmtp) {
1346		/*
1347		 * If cv_waituntil_sig() returned due to a signal, and
1348		 * there is time remaining, then set the time remaining.
1349		 * Else set time remaining to zero
1350		 */
1351		rmtime.tv_sec = rmtime.tv_nsec = 0;
1352		if (ret == 0) {
1353			timespec_t delta = rqtime;
1354
1355			gethrestime(&now);
1356			timespecsub(&delta, &now);
1357			if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
1358			    delta.tv_nsec > 0))
1359				rmtime = delta;
1360		}
1361
1362		if (datamodel == DATAMODEL_NATIVE) {
1363			if (copyout(&rmtime, rmtp, sizeof (rmtime)))
1364				return (set_errno(EFAULT));
1365		} else {
1366			timespec32_t rmtime32;
1367
1368			TIMESPEC_TO_TIMESPEC32(&rmtime32, &rmtime);
1369			if (copyout(&rmtime32, rmtp, sizeof (rmtime32)))
1370				return (set_errno(EFAULT));
1371		}
1372	}
1373
1374	if (ret == 0)
1375		return (set_errno(EINTR));
1376	return (0);
1377}
1378
1379/*
1380 * Routines to convert standard UNIX time (seconds since Jan 1, 1970)
1381 * into year/month/day/hour/minute/second format, and back again.
1382 * Note: these routines require tod_lock held to protect cached state.
1383 */
1384static int days_thru_month[64] = {
1385	0, 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366, 0, 0,
1386	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1387	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1388	0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365, 0, 0,
1389};
1390
1391todinfo_t saved_tod;
1392int saved_utc = -60;
1393
1394todinfo_t
1395utc_to_tod(time_t utc)
1396{
1397	long dse, day, month, year;
1398	todinfo_t tod;
1399
1400	ASSERT(MUTEX_HELD(&tod_lock));
1401
1402	/*
1403	 * Note that tod_set_prev() assumes utc will be set to zero in
1404	 * the case of it being negative.  Consequently, any change made
1405	 * to this behavior would have to be reflected in that function
1406	 * as well.
1407	 */
1408	if (utc < 0)			/* should never happen */
1409		utc = 0;
1410
1411	saved_tod.tod_sec += utc - saved_utc;
1412	saved_utc = utc;
1413	if (saved_tod.tod_sec >= 0 && saved_tod.tod_sec < 60)
1414		return (saved_tod);	/* only the seconds changed */
1415
1416	dse = utc / 86400;		/* days since epoch */
1417
1418	tod.tod_sec = utc % 60;
1419	tod.tod_min = (utc % 3600) / 60;
1420	tod.tod_hour = (utc % 86400) / 3600;
1421	tod.tod_dow = (dse + 4) % 7 + 1;	/* epoch was a Thursday */
1422
1423	year = dse / 365 + 72;	/* first guess -- always a bit too large */
1424	do {
1425		year--;
1426		day = dse - 365 * (year - 70) - ((year - 69) >> 2);
1427	} while (day < 0);
1428
1429	month = ((year & 3) << 4) + 1;
1430	while (day >= days_thru_month[month + 1])
1431		month++;
1432
1433	tod.tod_day = day - days_thru_month[month] + 1;
1434	tod.tod_month = month & 15;
1435	tod.tod_year = year;
1436
1437	saved_tod = tod;
1438	return (tod);
1439}
1440
1441time_t
1442tod_to_utc(todinfo_t tod)
1443{
1444	time_t utc;
1445	int year = tod.tod_year;
1446	int month = tod.tod_month + ((year & 3) << 4);
1447#ifdef DEBUG
1448	/* only warn once, not each time called */
1449	static int year_warn = 1;
1450	static int month_warn = 1;
1451	static int day_warn = 1;
1452	static int hour_warn = 1;
1453	static int min_warn = 1;
1454	static int sec_warn = 1;
1455	int days_diff = days_thru_month[month + 1] - days_thru_month[month];
1456#endif
1457
1458	ASSERT(MUTEX_HELD(&tod_lock));
1459
1460#ifdef DEBUG
1461	if (year_warn && (year < 70 || year > 8029)) {
1462		cmn_err(CE_WARN,
1463		    "The hardware real-time clock appears to have the "
1464		    "wrong years value %d -- time needs to be reset\n",
1465		    year);
1466		year_warn = 0;
1467	}
1468
1469	if (month_warn && (tod.tod_month < 1 || tod.tod_month > 12)) {
1470		cmn_err(CE_WARN,
1471		    "The hardware real-time clock appears to have the "
1472		    "wrong months value %d -- time needs to be reset\n",
1473		    tod.tod_month);
1474		month_warn = 0;
1475	}
1476
1477	if (day_warn && (tod.tod_day < 1 || tod.tod_day > days_diff)) {
1478		cmn_err(CE_WARN,
1479		    "The hardware real-time clock appears to have the "
1480		    "wrong days value %d -- time needs to be reset\n",
1481		    tod.tod_day);
1482		day_warn = 0;
1483	}
1484
1485	if (hour_warn && (tod.tod_hour < 0 || tod.tod_hour > 23)) {
1486		cmn_err(CE_WARN,
1487		    "The hardware real-time clock appears to have the "
1488		    "wrong hours value %d -- time needs to be reset\n",
1489		    tod.tod_hour);
1490		hour_warn = 0;
1491	}
1492
1493	if (min_warn && (tod.tod_min < 0 || tod.tod_min > 59)) {
1494		cmn_err(CE_WARN,
1495		    "The hardware real-time clock appears to have the "
1496		    "wrong minutes value %d -- time needs to be reset\n",
1497		    tod.tod_min);
1498		min_warn = 0;
1499	}
1500
1501	if (sec_warn && (tod.tod_sec < 0 || tod.tod_sec > 59)) {
1502		cmn_err(CE_WARN,
1503		    "The hardware real-time clock appears to have the "
1504		    "wrong seconds value %d -- time needs to be reset\n",
1505		    tod.tod_sec);
1506		sec_warn = 0;
1507	}
1508#endif
1509
1510	utc = (year - 70);		/* next 3 lines: utc = 365y + y/4 */
1511	utc += (utc << 3) + (utc << 6);
1512	utc += (utc << 2) + ((year - 69) >> 2);
1513	utc += days_thru_month[month] + tod.tod_day - 1;
1514	utc = (utc << 3) + (utc << 4) + tod.tod_hour;	/* 24 * day + hour */
1515	utc = (utc << 6) - (utc << 2) + tod.tod_min;	/* 60 * hour + min */
1516	utc = (utc << 6) - (utc << 2) + tod.tod_sec;	/* 60 * min + sec */
1517
1518	return (utc);
1519}
1520