1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_CLOCK_H
27#define	_SYS_CLOCK_H
28
29#ifdef	__cplusplus
30extern "C" {
31#endif
32
33#include <sys/spl.h>
34#include <sys/time.h>
35#include <sys/machclock.h>
36
37#ifndef _ASM
38
39#ifdef	_KERNEL
40
41extern void	setcpudelay(void);
42
43extern uint_t	nsec_scale;
44extern uint_t	nsec_shift;
45extern uint_t	nsec_per_sys_tick;
46extern uint64_t	sys_tick_freq;
47
48extern int	traptrace_use_stick;
49extern uint64_t	system_clock_freq;
50extern uint_t	sys_clock_mhz;
51
52extern void mon_clock_init(void);
53extern void mon_clock_start(void);
54extern void mon_clock_stop(void);
55extern void mon_clock_share(void);
56extern void mon_clock_unshare(void);
57
58extern hrtime_t hrtime_base;
59extern void hres_tick(void);
60extern void	clkstart(void);
61extern void cbe_level14();
62extern hrtime_t tick2ns(hrtime_t, uint_t);
63
64typedef struct {
65	uint64_t cbe_level1_inum;
66	uint64_t cbe_level10_inum;
67} cbe_data_t;
68
69#endif	/* _KERNEL */
70
71#endif	/* _ASM */
72
73
74#define	CBE_LOW_PIL	1
75#define	CBE_LOCK_PIL	LOCK_LEVEL
76#define	CBE_HIGH_PIL	14
77
78#define	ADJ_SHIFT	4	/* used in get_hrestime and _level10 */
79
80/*
81 * Locking strategy for high-resolution timing services
82 *
83 * We generally construct timestamps from two or more components:
84 * a hardware time source and one or more software time sources.
85 * These components cannot all be loaded simultaneously, so we need
86 * some sort of locking strategy to generate consistent timestamps.
87 *
88 * To minimize lock contention and cache thrashing we employ the
89 * weakest possible synchronization model: writers (rare) serialize
90 * on an acquisition-counting mutex, described below; readers (common)
91 * execute in parallel with no synchronization at all -- they don't
92 * exclude other readers, and they don't even exclude writers.  Instead,
93 * readers just examine the writer lock's value before and after loading
94 * all the components of a timestamp to detect writer intervention.
95 * In the rare case when a writer does intervene, the reader will
96 * detect it, discard the timestamp and try again.
97 *
98 * The writer lock, hres_lock, is a 32-bit integer consisting of an
99 * 8-bit lock and a 24-bit acquisition count.  To acquire the lock we
100 * set the lock field with ldstub, which sets the low-order 8 bits to
101 * 0xff; to clear the lock, we increment it, which simultaneously clears
102 * the lock field (0xff --> 0x00) and increments the acquisition count
103 * (due to carry into bit 8).  Thus each acquisition transforms hres_lock
104 * from N:0 to N:ff, and each release transforms N:ff into (N+1):0.
105 *
106 * Readers can detect writer intervention by loading hres_lock before
107 * and after loading the time components they need; if either lock value
108 * contains 0xff in the low-order bits (lock held), or if the lock values
109 * are not equal (lock was acquired and released), a writer intervened
110 * and the reader must try again.  If the lock values are equal and the
111 * low-order 8 bits are clear, the timestamp must be valid.  We can check
112 * both of these conditions with a single compare instruction by checking
113 * whether old_hres_lock & ~1 == new_hres_lock, as illustrated by the
114 * following table of all possible lock states:
115 *
116 *	initial	& ~1	final		result of compare
117 *	------------	-----		-----------------
118 *	now:00		now:00		valid
119 *	now:00		now:ff		invalid
120 *	now:00		later:00	invalid
121 *	now:00		later:ff	invalid
122 *	now:fe		now:ff		invalid
123 *	now:fe		later:00	invalid
124 *	now:fe		later:ff	invalid
125 *
126 * Implementation considerations:
127 *
128 * (1) Load buffering.
129 *
130 * On a CPU that does load buffering we must ensure that the load of
131 * hres_lock completes before the load of any timestamp components.
132 * This is essential *even on a CPU that does in-order loads* because
133 * accessing the hardware time source may not involve a memory reference
134 * (e.g. rd %tick).  A convenient way to address this is to clear the
135 * lower bit (andn with 1) of the old lock value right away, since this
136 * generates a dependency on the load of hres_lock.  We have to do this
137 * anyway to perform the lock comparison described above.
138 *
139 * (2) Out-of-order loads.
140 *
141 * On a CPU that does out-of-order loads we must ensure that the loads
142 * of all timestamp components have completed before we load the final
143 * value of hres_lock.  This can be done either by generating load
144 * dependencies on the timestamp components or by membar #LoadLoad.
145 *
146 * (3) Interaction with the high level cyclic handler, hres_tick().
147 *
148 * One unusual property of hres_lock is that it's acquired in a high
149 * level cyclic handler, hres_tick().  Thus, hres_lock must be acquired at
150 * CBE_HIGH_PIL or higher to prevent single-CPU deadlock.
151 *
152 * (4) Cross-calls.
153 *
154 * If a cross-call happens while one CPU has hres_lock and another is
155 * trying to acquire it in the clock interrupt path, the system will
156 * deadlock: the first CPU will never release hres_lock since it's
157 * waiting to be released from the cross-call, and the cross-call can't
158 * complete because the second CPU is spinning on hres_lock with traps
159 * disabled.  Thus cross-calls must be blocked while holding hres_lock.
160 *
161 * Together, (3) and (4) imply that hres_lock should only be acquired
162 * at PIL >= max(XCALL_PIL, CBE_HIGH_PIL), or while traps are disabled.
163 */
164#define	HRES_LOCK_OFFSET 3
165
166#define	CLOCK_LOCK(oldsplp)	\
167	lock_set_spl((lock_t *)&hres_lock + HRES_LOCK_OFFSET, \
168		ipltospl(CBE_HIGH_PIL), oldsplp)
169
170#define	CLOCK_UNLOCK(spl)	\
171	membar_ldst_stst();	\
172	hres_lock++;		\
173	splx(spl);		\
174	LOCKSTAT_RECORD0(LS_CLOCK_UNLOCK_RELEASE,	\
175		(lock_t *)&hres_lock + HRES_LOCK_OFFSET);
176
177/*
178 * NATIVE_TIME_TO_NSEC_SCALE is called with NSEC_SHIFT to convert hi-res
179 * timestamps into nanoseconds. On systems that have a %stick register,
180 * hi-res timestamps are in %stick units. On systems that do not have a
181 * %stick register, hi-res timestamps are in %tick units.
182 *
183 * NATIVE_TIME_TO_NSEC_SCALE is called with TICK_NSEC_SHIFT to convert from
184 * %tick units to nanoseconds on all implementations whether %stick is
185 * available or not.
186 */
187
188/*
189 * At least 62.5 MHz CPU %tick frequency
190 */
191
192#define	TICK_NSEC_SHIFT	4
193
194/*
195 * Convert hi-res native time (V9's %tick in our case) into nanoseconds.
196 *
197 * The challenge is to multiply a %tick value by (NANOSEC / sys_tick_freq)
198 * without using floating point and without overflowing 64-bit integers.
199 * We assume that all sun4u systems will have a 16 nsec or better clock
200 * (i.e. faster than 62.5 MHz), which means that (ticks << 4) has units
201 * greater than one nanosecond, so converting from (ticks << 4) to nsec
202 * requires multiplication by a rational number, R, between 0 and 1.
203 * To avoid floating-point we precompute (R * 2^32) during boot and
204 * stash this away in nsec_scale.  Thus we can compute (tick * R) as
205 * (tick * nsec_scale) >> 32, which is accurate to about 1 part per billion.
206 *
207 * To avoid 64-bit overflow when multiplying (tick << 4) by nsec_scale,
208 * we split (tick << 4) into its high and low 32-bit pieces, H and L,
209 * multiply each piece separately, and add up the relevant bits of the
210 * partial products.  Putting it all together we have:
211 *
212 * nsec = (tick << 4) * R
213 *	= ((tick << 4) * nsec_scale) >> 32
214 *	= ((H << 32) + L) * nsec_scale) >> 32
215 *	= (H * nsec_scale) + ((L * nsec_scale) >> 32)
216 *
217 * The last line is the computation we actually perform: it requires no
218 * floating point and all intermediate results fit in 64-bit registers.
219 *
220 * Note that we require that tick is less than (1 << (64 - NSEC_SHIFT));
221 * greater values will result in overflow and misbehavior (not that this
222 * is a serious problem; (1 << (64 - NSEC_SHIFT)) nanoseconds is over
223 * thirty-six years).  Nonetheless, clients may wish to be aware of this
224 * limitation; NATIVE_TIME_MAX() returns this maximum native time.
225 *
226 * We provide two versions of this macro: a "full-service" version that
227 * just converts ticks to nanoseconds and a higher-performance version that
228 * expects the scaling factor nsec_scale as its second argument (so that
229 * callers can distance the load of nsec_scale from its use).  Note that
230 * we take a fast path if we determine the ticks to be less than 32 bits
231 * (as it often is for the delta between %tick values for successive
232 * firings of the hres_tick() cyclic).
233 *
234 * Note that in the 32-bit path we don't even bother clearing NPT.
235 * We get away with this by making hardclk.c ensure than nsec_scale
236 * is even, so we can take advantage of the associativity of modular
237 * arithmetic: multiplying %tick by any even number, say 2*n, is
238 * equivalent to multiplying %tick by 2, then by n.  Multiplication
239 * by 2 is equivalent to shifting left by one, which clears NPT.
240 *
241 * Finally, note that the macros use the labels "6:" and "7:"; these
242 * labels must not be used across an invocation of either macro.
243 */
244#define	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, shift)		\
245	srlx	out, 32, scr2;		/* check high 32 bits */	\
246/* CSTYLED */ 								\
247	brz,a,pt scr2, 6f;		/* if clear, 32-bit fast path */\
248	mulx	out, scr1, out;		/* delay: 32-bit fast path */	\
249	sllx	out, shift, out;	/* clear NPT and pre-scale */	\
250	srlx	out, 32, scr2;		/* scr2 = hi32(tick<<4) = H */	\
251	mulx	scr2, scr1, scr2;	/* scr2 = (H*F) */		\
252	srl	out, 0, out;		/* out = lo32(tick<<4) = L */	\
253	mulx	out, scr1, scr1;	/* scr1 = (L*F) */		\
254	srlx	scr1, 32, scr1;		/* scr1 = (L*F) >> 32 */	\
255	ba	7f;			/* branch over 32-bit path */	\
256	add	scr1, scr2, out;	/* out = (H*F) + ((L*F) >> 32) */\
2576:									\
258	srlx	out, 32 - shift, out;					\
2597:
260
261#define	NATIVE_TIME_TO_NSEC(out, scr1, scr2)				\
262	sethi	%hi(nsec_scale), scr1;	/* load scaling factor */	\
263	ld	[scr1 + %lo(nsec_scale)], scr1;				\
264	NATIVE_TIME_TO_NSEC_SCALE(out, scr1, scr2, NSEC_SHIFT);
265
266#define	NATIVE_TIME_MAX(out)						\
267	mov	-1, out;						\
268	srlx	out, NSEC_SHIFT, out
269
270/*
271 * NSEC_SHIFT and VTRACE_SHIFT constants are defined in
272 * <sys/machclock.h> file.
273 */
274
275#ifdef	__cplusplus
276}
277#endif
278
279#endif	/* !_SYS_CLOCK_H */
280