xref: /illumos-gate/usr/src/uts/i86pc/os/timestamp.c (revision 575694f6)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
21843e1988Sjohnlev 
227c478bd9Sstevel@tonic-gate /*
237997e108SSurya Prakki  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
2579ec9da8SYuri Pankov  *
2679ec9da8SYuri Pankov  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27e014e7f8SPaul Dagnelie  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
28*575694f6SJason King  * Copyright 2020 Joyent, Inc.
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #include <sys/types.h>
327c478bd9Sstevel@tonic-gate #include <sys/param.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
347c478bd9Sstevel@tonic-gate #include <sys/disp.h>
357c478bd9Sstevel@tonic-gate #include <sys/var.h>
367c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
377c478bd9Sstevel@tonic-gate #include <sys/debug.h>
387c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
397c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
407c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
417c478bd9Sstevel@tonic-gate #include <sys/psm_defs.h>
427c478bd9Sstevel@tonic-gate #include <sys/clock.h>
437c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
447c478bd9Sstevel@tonic-gate #include <sys/lockstat.h>
457c478bd9Sstevel@tonic-gate #include <sys/smp_impldefs.h>
467c478bd9Sstevel@tonic-gate #include <sys/dtrace.h>
477c478bd9Sstevel@tonic-gate #include <sys/time.h>
48843e1988Sjohnlev #include <sys/panic.h>
49b3c18020SSudheer A #include <sys/cpu.h>
50e014e7f8SPaul Dagnelie #include <sys/sdt.h>
512428aad8SPatrick Mooney #include <sys/comm_page.h>
52*575694f6SJason King #include <sys/bootconf.h>
53*575694f6SJason King #include <sys/kobj.h>
54*575694f6SJason King #include <sys/kobj_lex.h>
55*575694f6SJason King #include <sys/tsc.h>
56*575694f6SJason King #include <sys/prom_debug.h>
57*575694f6SJason King #include <util/qsort.h>
587c478bd9Sstevel@tonic-gate 
597c478bd9Sstevel@tonic-gate /*
607c478bd9Sstevel@tonic-gate  * Using the Pentium's TSC register for gethrtime()
617c478bd9Sstevel@tonic-gate  * ------------------------------------------------
627c478bd9Sstevel@tonic-gate  *
637c478bd9Sstevel@tonic-gate  * The Pentium family, like many chip architectures, has a high-resolution
647c478bd9Sstevel@tonic-gate  * timestamp counter ("TSC") which increments once per CPU cycle.  The contents
657c478bd9Sstevel@tonic-gate  * of the timestamp counter are read with the RDTSC instruction.
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  * As with its UltraSPARC equivalent (the %tick register), TSC's cycle count
687c478bd9Sstevel@tonic-gate  * must be translated into nanoseconds in order to implement gethrtime().
697c478bd9Sstevel@tonic-gate  * We avoid inducing floating point operations in this conversion by
707c478bd9Sstevel@tonic-gate  * implementing the same nsec_scale algorithm as that found in the sun4u
717c478bd9Sstevel@tonic-gate  * platform code.  The sun4u NATIVE_TIME_TO_NSEC_SCALE block comment contains
727c478bd9Sstevel@tonic-gate  * a detailed description of the algorithm; the comment is not reproduced
737c478bd9Sstevel@tonic-gate  * here.  This implementation differs only in its value for NSEC_SHIFT:
747c478bd9Sstevel@tonic-gate  * we implement an NSEC_SHIFT of 5 (instead of sun4u's 4) to allow for
757c478bd9Sstevel@tonic-gate  * 60 MHz Pentiums.
767c478bd9Sstevel@tonic-gate  *
777c478bd9Sstevel@tonic-gate  * While TSC and %tick are both cycle counting registers, TSC's functionality
787c478bd9Sstevel@tonic-gate  * falls short in several critical ways:
797c478bd9Sstevel@tonic-gate  *
807c478bd9Sstevel@tonic-gate  *  (a)	TSCs on different CPUs are not guaranteed to be in sync.  While in
817c478bd9Sstevel@tonic-gate  *	practice they often _are_ in sync, this isn't guaranteed by the
827c478bd9Sstevel@tonic-gate  *	architecture.
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  *  (b)	The TSC cannot be reliably set to an arbitrary value.  The architecture
857c478bd9Sstevel@tonic-gate  *	only supports writing the low 32-bits of TSC, making it impractical
867c478bd9Sstevel@tonic-gate  *	to rewrite.
877c478bd9Sstevel@tonic-gate  *
887c478bd9Sstevel@tonic-gate  *  (c)	The architecture doesn't have the capacity to interrupt based on
897c478bd9Sstevel@tonic-gate  *	arbitrary values of TSC; there is no TICK_CMPR equivalent.
907c478bd9Sstevel@tonic-gate  *
917c478bd9Sstevel@tonic-gate  * Together, (a) and (b) imply that software must track the skew between
927c478bd9Sstevel@tonic-gate  * TSCs and account for it (it is assumed that while there may exist skew,
937c478bd9Sstevel@tonic-gate  * there does not exist drift).  To determine the skew between CPUs, we
947c478bd9Sstevel@tonic-gate  * have newly onlined CPUs call tsc_sync_slave(), while the CPU performing
95b3c18020SSudheer A  * the online operation calls tsc_sync_master().
967c478bd9Sstevel@tonic-gate  *
977c478bd9Sstevel@tonic-gate  * In the absence of time-of-day clock adjustments, gethrtime() must stay in
987c478bd9Sstevel@tonic-gate  * sync with gettimeofday().  This is problematic; given (c), the software
997c478bd9Sstevel@tonic-gate  * cannot drive its time-of-day source from TSC, and yet they must somehow be
1007c478bd9Sstevel@tonic-gate  * kept in sync.  We implement this by having a routine, tsc_tick(), which
1017c478bd9Sstevel@tonic-gate  * is called once per second from the interrupt which drives time-of-day.
1027c478bd9Sstevel@tonic-gate  *
1037c478bd9Sstevel@tonic-gate  * Note that the hrtime base for gethrtime, tsc_hrtime_base, is modified
1047c478bd9Sstevel@tonic-gate  * atomically with nsec_scale under CLOCK_LOCK.  This assures that time
1057c478bd9Sstevel@tonic-gate  * monotonically increases.
1067c478bd9Sstevel@tonic-gate  */
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate #define	NSEC_SHIFT 5
1097c478bd9Sstevel@tonic-gate 
110113b131bSEric Saxe static uint_t nsec_unscale;
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate /*
1137c478bd9Sstevel@tonic-gate  * These two variables used to be grouped together inside of a structure that
1147c478bd9Sstevel@tonic-gate  * lived on a single cache line. A regression (bug ID 4623398) caused the
1157c478bd9Sstevel@tonic-gate  * compiler to emit code that "optimized" away the while-loops below. The
1167c478bd9Sstevel@tonic-gate  * result was that no synchronization between the onlining and onlined CPUs
1177c478bd9Sstevel@tonic-gate  * took place.
1187c478bd9Sstevel@tonic-gate  */
1197c478bd9Sstevel@tonic-gate static volatile int tsc_ready;
1207c478bd9Sstevel@tonic-gate static volatile int tsc_sync_go;
1217c478bd9Sstevel@tonic-gate 
1227c478bd9Sstevel@tonic-gate /*
1237c478bd9Sstevel@tonic-gate  * Used as indices into the tsc_sync_snaps[] array.
1247c478bd9Sstevel@tonic-gate  */
1257c478bd9Sstevel@tonic-gate #define	TSC_MASTER		0
1267c478bd9Sstevel@tonic-gate #define	TSC_SLAVE		1
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate /*
1297c478bd9Sstevel@tonic-gate  * Used in the tsc_master_sync()/tsc_slave_sync() rendezvous.
1307c478bd9Sstevel@tonic-gate  */
1317c478bd9Sstevel@tonic-gate #define	TSC_SYNC_STOP		1
1327c478bd9Sstevel@tonic-gate #define	TSC_SYNC_GO		2
133b3c18020SSudheer A #define	TSC_SYNC_DONE		3
134b3c18020SSudheer A #define	SYNC_ITERATIONS		10
1357c478bd9Sstevel@tonic-gate 
136*575694f6SJason King #define	TSC_CONVERT_AND_ADD(tsc, hrt, scale) {		\
137*575694f6SJason King 	unsigned int *_l = (unsigned int *)&(tsc);	\
138*575694f6SJason King 	(hrt) += mul32(_l[1], scale) << NSEC_SHIFT;	\
1397c478bd9Sstevel@tonic-gate 	(hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \
1407c478bd9Sstevel@tonic-gate }
1417c478bd9Sstevel@tonic-gate 
142*575694f6SJason King #define	TSC_CONVERT(tsc, hrt, scale) {			\
143*575694f6SJason King 	unsigned int *_l = (unsigned int *)&(tsc);	\
144*575694f6SJason King 	(hrt) = mul32(_l[1], scale) << NSEC_SHIFT;	\
1457c478bd9Sstevel@tonic-gate 	(hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \
1467c478bd9Sstevel@tonic-gate }
1477c478bd9Sstevel@tonic-gate 
148ae115bc7Smrj int tsc_master_slave_sync_needed = 1;
1497c478bd9Sstevel@tonic-gate 
150b3c18020SSudheer A typedef struct tsc_sync {
151b3c18020SSudheer A 	volatile hrtime_t master_tsc, slave_tsc;
152b3c18020SSudheer A } tsc_sync_t;
153b3c18020SSudheer A static tsc_sync_t *tscp;
154b3c18020SSudheer A 
1557c478bd9Sstevel@tonic-gate static hrtime_t	tsc_last_jumped = 0;
1567c478bd9Sstevel@tonic-gate static int	tsc_jumped = 0;
157e014e7f8SPaul Dagnelie static uint32_t	tsc_wayback = 0;
158e014e7f8SPaul Dagnelie /*
159e014e7f8SPaul Dagnelie  * The cap of 1 second was chosen since it is the frequency at which the
160e014e7f8SPaul Dagnelie  * tsc_tick() function runs which means that when gethrtime() is called it
161e014e7f8SPaul Dagnelie  * should never be more than 1 second since tsc_last was updated.
162e014e7f8SPaul Dagnelie  */
163e014e7f8SPaul Dagnelie static hrtime_t tsc_resume_cap_ns = NANOSEC;	 /* 1s */
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate static hrtime_t	shadow_tsc_hrtime_base;
1667c478bd9Sstevel@tonic-gate static hrtime_t	shadow_tsc_last;
1677c478bd9Sstevel@tonic-gate static uint_t	shadow_nsec_scale;
1687c478bd9Sstevel@tonic-gate static uint32_t	shadow_hres_lock;
1692df1fe9cSrandyf int get_tsc_ready();
1707c478bd9Sstevel@tonic-gate 
171*575694f6SJason King /*
172*575694f6SJason King  * Allow an operator specify an explicit TSC calibration source
173*575694f6SJason King  * via /etc/system e.g. `set tsc_calibration="pit"`
174*575694f6SJason King  */
175*575694f6SJason King char *tsc_calibration;
176*575694f6SJason King 
177*575694f6SJason King /*
178*575694f6SJason King  * The source that was used to calibrate the TSC. This is currently just
179*575694f6SJason King  * for diagnostic purposes.
180*575694f6SJason King  */
181*575694f6SJason King static tsc_calibrate_t *tsc_calibration_source;
182*575694f6SJason King 
183*575694f6SJason King /* The TSC frequency after calibration */
184*575694f6SJason King static uint64_t tsc_freq;
185*575694f6SJason King 
186*575694f6SJason King static inline hrtime_t
tsc_protect(hrtime_t a)187*575694f6SJason King tsc_protect(hrtime_t a)
188*575694f6SJason King {
189e014e7f8SPaul Dagnelie 	if (a > tsc_resume_cap) {
190e014e7f8SPaul Dagnelie 		atomic_inc_32(&tsc_wayback);
191e014e7f8SPaul Dagnelie 		DTRACE_PROBE3(tsc__wayback, htrime_t, a, hrtime_t, tsc_last,
192e014e7f8SPaul Dagnelie 		    uint32_t, tsc_wayback);
193e014e7f8SPaul Dagnelie 		return (tsc_resume_cap);
194e014e7f8SPaul Dagnelie 	}
195e014e7f8SPaul Dagnelie 	return (a);
196e014e7f8SPaul Dagnelie }
197e014e7f8SPaul Dagnelie 
198843e1988Sjohnlev hrtime_t
tsc_gethrtime(void)199843e1988Sjohnlev tsc_gethrtime(void)
200843e1988Sjohnlev {
201843e1988Sjohnlev 	uint32_t old_hres_lock;
202843e1988Sjohnlev 	hrtime_t tsc, hrt;
203843e1988Sjohnlev 
204843e1988Sjohnlev 	do {
205843e1988Sjohnlev 		old_hres_lock = hres_lock;
206843e1988Sjohnlev 
207843e1988Sjohnlev 		if ((tsc = tsc_read()) >= tsc_last) {
208843e1988Sjohnlev 			/*
209843e1988Sjohnlev 			 * It would seem to be obvious that this is true
210843e1988Sjohnlev 			 * (that is, the past is less than the present),
211843e1988Sjohnlev 			 * but it isn't true in the presence of suspend/resume
212843e1988Sjohnlev 			 * cycles.  If we manage to call gethrtime()
213843e1988Sjohnlev 			 * after a resume, but before the first call to
214843e1988Sjohnlev 			 * tsc_tick(), we will see the jump.  In this case,
215843e1988Sjohnlev 			 * we will simply use the value in TSC as the delta.
216843e1988Sjohnlev 			 */
217843e1988Sjohnlev 			tsc -= tsc_last;
218843e1988Sjohnlev 		} else if (tsc >= tsc_last - 2*tsc_max_delta) {
219843e1988Sjohnlev 			/*
220843e1988Sjohnlev 			 * There is a chance that tsc_tick() has just run on
221843e1988Sjohnlev 			 * another CPU, and we have drifted just enough so that
222843e1988Sjohnlev 			 * we appear behind tsc_last.  In this case, force the
223843e1988Sjohnlev 			 * delta to be zero.
224843e1988Sjohnlev 			 */
225843e1988Sjohnlev 			tsc = 0;
226e014e7f8SPaul Dagnelie 		} else {
227e014e7f8SPaul Dagnelie 			/*
228e014e7f8SPaul Dagnelie 			 * If we reach this else clause we assume that we have
229e014e7f8SPaul Dagnelie 			 * gone through a suspend/resume cycle and use the
230e014e7f8SPaul Dagnelie 			 * current tsc value as the delta.
231e014e7f8SPaul Dagnelie 			 *
232e014e7f8SPaul Dagnelie 			 * In rare cases we can reach this else clause due to
233e014e7f8SPaul Dagnelie 			 * a lack of monotonicity in the TSC value.  In such
234e014e7f8SPaul Dagnelie 			 * cases using the current TSC value as the delta would
235e014e7f8SPaul Dagnelie 			 * cause us to return a value ~2x of what it should
236e014e7f8SPaul Dagnelie 			 * be.  To protect against these cases we cap the
237e014e7f8SPaul Dagnelie 			 * suspend/resume delta at tsc_resume_cap.
238e014e7f8SPaul Dagnelie 			 */
239e014e7f8SPaul Dagnelie 			tsc = tsc_protect(tsc);
240843e1988Sjohnlev 		}
241843e1988Sjohnlev 
242843e1988Sjohnlev 		hrt = tsc_hrtime_base;
243843e1988Sjohnlev 
244843e1988Sjohnlev 		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
245843e1988Sjohnlev 	} while ((old_hres_lock & ~1) != hres_lock);
246843e1988Sjohnlev 
247843e1988Sjohnlev 	return (hrt);
248843e1988Sjohnlev }
249843e1988Sjohnlev 
250843e1988Sjohnlev hrtime_t
tsc_gethrtime_delta(void)251843e1988Sjohnlev tsc_gethrtime_delta(void)
252843e1988Sjohnlev {
253843e1988Sjohnlev 	uint32_t old_hres_lock;
254843e1988Sjohnlev 	hrtime_t tsc, hrt;
255a563a037Sbholler 	ulong_t flags;
256843e1988Sjohnlev 
257843e1988Sjohnlev 	do {
258843e1988Sjohnlev 		old_hres_lock = hres_lock;
259843e1988Sjohnlev 
260843e1988Sjohnlev 		/*
261843e1988Sjohnlev 		 * We need to disable interrupts here to assure that we
262843e1988Sjohnlev 		 * don't migrate between the call to tsc_read() and
263843e1988Sjohnlev 		 * adding the CPU's TSC tick delta. Note that disabling
264843e1988Sjohnlev 		 * and reenabling preemption is forbidden here because
265843e1988Sjohnlev 		 * we may be in the middle of a fast trap. In the amd64
266843e1988Sjohnlev 		 * kernel we cannot tolerate preemption during a fast
267843e1988Sjohnlev 		 * trap. See _update_sregs().
268843e1988Sjohnlev 		 */
269843e1988Sjohnlev 
270843e1988Sjohnlev 		flags = clear_int_flag();
271843e1988Sjohnlev 		tsc = tsc_read() + tsc_sync_tick_delta[CPU->cpu_id];
272843e1988Sjohnlev 		restore_int_flag(flags);
273843e1988Sjohnlev 
274843e1988Sjohnlev 		/* See comments in tsc_gethrtime() above */
275843e1988Sjohnlev 
276843e1988Sjohnlev 		if (tsc >= tsc_last) {
277843e1988Sjohnlev 			tsc -= tsc_last;
278843e1988Sjohnlev 		} else if (tsc >= tsc_last - 2 * tsc_max_delta) {
279843e1988Sjohnlev 			tsc = 0;
280e014e7f8SPaul Dagnelie 		} else {
281e014e7f8SPaul Dagnelie 			tsc = tsc_protect(tsc);
282843e1988Sjohnlev 		}
283843e1988Sjohnlev 
284843e1988Sjohnlev 		hrt = tsc_hrtime_base;
285843e1988Sjohnlev 
286843e1988Sjohnlev 		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
287843e1988Sjohnlev 	} while ((old_hres_lock & ~1) != hres_lock);
288843e1988Sjohnlev 
289843e1988Sjohnlev 	return (hrt);
290843e1988Sjohnlev }
291843e1988Sjohnlev 
2929278ddffSRobert Mustacchi hrtime_t
tsc_gethrtime_tick_delta(void)2939278ddffSRobert Mustacchi tsc_gethrtime_tick_delta(void)
2949278ddffSRobert Mustacchi {
2959278ddffSRobert Mustacchi 	hrtime_t hrt;
2969278ddffSRobert Mustacchi 	ulong_t flags;
2979278ddffSRobert Mustacchi 
2989278ddffSRobert Mustacchi 	flags = clear_int_flag();
2999278ddffSRobert Mustacchi 	hrt = tsc_sync_tick_delta[CPU->cpu_id];
3009278ddffSRobert Mustacchi 	restore_int_flag(flags);
3019278ddffSRobert Mustacchi 
3029278ddffSRobert Mustacchi 	return (hrt);
3039278ddffSRobert Mustacchi }
3049278ddffSRobert Mustacchi 
305fc3fd29dSPatrick Mooney /* Calculate the hrtime while exposing the parameters of that calculation. */
306fc3fd29dSPatrick Mooney hrtime_t
tsc_gethrtime_params(uint64_t * tscp,uint32_t * scalep,uint8_t * shiftp)307fc3fd29dSPatrick Mooney tsc_gethrtime_params(uint64_t *tscp, uint32_t *scalep, uint8_t *shiftp)
308fc3fd29dSPatrick Mooney {
309fc3fd29dSPatrick Mooney 	uint32_t old_hres_lock, scale;
310fc3fd29dSPatrick Mooney 	hrtime_t tsc, last, base;
311fc3fd29dSPatrick Mooney 
312fc3fd29dSPatrick Mooney 	do {
313fc3fd29dSPatrick Mooney 		old_hres_lock = hres_lock;
314fc3fd29dSPatrick Mooney 
315fc3fd29dSPatrick Mooney 		if (gethrtimef == tsc_gethrtime_delta) {
316fc3fd29dSPatrick Mooney 			ulong_t flags;
317fc3fd29dSPatrick Mooney 
318fc3fd29dSPatrick Mooney 			flags = clear_int_flag();
319fc3fd29dSPatrick Mooney 			tsc = tsc_read() + tsc_sync_tick_delta[CPU->cpu_id];
320fc3fd29dSPatrick Mooney 			restore_int_flag(flags);
321fc3fd29dSPatrick Mooney 		} else {
322fc3fd29dSPatrick Mooney 			tsc = tsc_read();
323fc3fd29dSPatrick Mooney 		}
324fc3fd29dSPatrick Mooney 
325fc3fd29dSPatrick Mooney 		last = tsc_last;
326fc3fd29dSPatrick Mooney 		base = tsc_hrtime_base;
327fc3fd29dSPatrick Mooney 		scale = nsec_scale;
328fc3fd29dSPatrick Mooney 
329fc3fd29dSPatrick Mooney 	} while ((old_hres_lock & ~1) != hres_lock);
330fc3fd29dSPatrick Mooney 
331fc3fd29dSPatrick Mooney 	/* See comments in tsc_gethrtime() above */
332fc3fd29dSPatrick Mooney 	if (tsc >= last) {
333fc3fd29dSPatrick Mooney 		tsc -= last;
334fc3fd29dSPatrick Mooney 	} else if (tsc >= last - 2 * tsc_max_delta) {
335fc3fd29dSPatrick Mooney 		tsc = 0;
336fc3fd29dSPatrick Mooney 	} else {
337fc3fd29dSPatrick Mooney 		tsc = tsc_protect(tsc);
338fc3fd29dSPatrick Mooney 	}
339fc3fd29dSPatrick Mooney 
340fc3fd29dSPatrick Mooney 	TSC_CONVERT_AND_ADD(tsc, base, nsec_scale);
341fc3fd29dSPatrick Mooney 
342fc3fd29dSPatrick Mooney 	if (tscp != NULL) {
343fc3fd29dSPatrick Mooney 		/*
344fc3fd29dSPatrick Mooney 		 * Do not simply communicate the delta applied to the hrtime
345fc3fd29dSPatrick Mooney 		 * base, but rather the effective TSC measurement.
346fc3fd29dSPatrick Mooney 		 */
347fc3fd29dSPatrick Mooney 		*tscp = tsc + last;
348fc3fd29dSPatrick Mooney 	}
349fc3fd29dSPatrick Mooney 	if (scalep != NULL) {
350fc3fd29dSPatrick Mooney 		*scalep = scale;
351fc3fd29dSPatrick Mooney 	}
352fc3fd29dSPatrick Mooney 	if (shiftp != NULL) {
353fc3fd29dSPatrick Mooney 		*shiftp = NSEC_SHIFT;
354fc3fd29dSPatrick Mooney 	}
355fc3fd29dSPatrick Mooney 
356fc3fd29dSPatrick Mooney 	return (base);
357fc3fd29dSPatrick Mooney }
358fc3fd29dSPatrick Mooney 
359843e1988Sjohnlev /*
360fc3fd29dSPatrick Mooney  * This is similar to tsc_gethrtime_delta, but it cannot actually spin on
361fc3fd29dSPatrick Mooney  * hres_lock.  As a result, it caches all of the variables it needs; if the
362fc3fd29dSPatrick Mooney  * variables don't change, it's done.
363843e1988Sjohnlev  */
364843e1988Sjohnlev hrtime_t
dtrace_gethrtime(void)365843e1988Sjohnlev dtrace_gethrtime(void)
366843e1988Sjohnlev {
367843e1988Sjohnlev 	uint32_t old_hres_lock;
368843e1988Sjohnlev 	hrtime_t tsc, hrt;
369a563a037Sbholler 	ulong_t flags;
370843e1988Sjohnlev 
371843e1988Sjohnlev 	do {
372843e1988Sjohnlev 		old_hres_lock = hres_lock;
373843e1988Sjohnlev 
374843e1988Sjohnlev 		/*
375843e1988Sjohnlev 		 * Interrupts are disabled to ensure that the thread isn't
376843e1988Sjohnlev 		 * migrated between the tsc_read() and adding the CPU's
377843e1988Sjohnlev 		 * TSC tick delta.
378843e1988Sjohnlev 		 */
379843e1988Sjohnlev 		flags = clear_int_flag();
380843e1988Sjohnlev 
381843e1988Sjohnlev 		tsc = tsc_read();
382843e1988Sjohnlev 
383843e1988Sjohnlev 		if (gethrtimef == tsc_gethrtime_delta)
384843e1988Sjohnlev 			tsc += tsc_sync_tick_delta[CPU->cpu_id];
385843e1988Sjohnlev 
386843e1988Sjohnlev 		restore_int_flag(flags);
387843e1988Sjohnlev 
388843e1988Sjohnlev 		/*
389843e1988Sjohnlev 		 * See the comments in tsc_gethrtime(), above.
390843e1988Sjohnlev 		 */
391843e1988Sjohnlev 		if (tsc >= tsc_last)
392843e1988Sjohnlev 			tsc -= tsc_last;
393843e1988Sjohnlev 		else if (tsc >= tsc_last - 2*tsc_max_delta)
394843e1988Sjohnlev 			tsc = 0;
395e014e7f8SPaul Dagnelie 		else
396e014e7f8SPaul Dagnelie 			tsc = tsc_protect(tsc);
397843e1988Sjohnlev 
398843e1988Sjohnlev 		hrt = tsc_hrtime_base;
399843e1988Sjohnlev 
400843e1988Sjohnlev 		TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale);
401843e1988Sjohnlev 
402843e1988Sjohnlev 		if ((old_hres_lock & ~1) == hres_lock)
403843e1988Sjohnlev 			break;
404843e1988Sjohnlev 
405843e1988Sjohnlev 		/*
406843e1988Sjohnlev 		 * If we're here, the clock lock is locked -- or it has been
407843e1988Sjohnlev 		 * unlocked and locked since we looked.  This may be due to
408843e1988Sjohnlev 		 * tsc_tick() running on another CPU -- or it may be because
409843e1988Sjohnlev 		 * some code path has ended up in dtrace_probe() with
410843e1988Sjohnlev 		 * CLOCK_LOCK held.  We'll try to determine that we're in
411843e1988Sjohnlev 		 * the former case by taking another lap if the lock has
412843e1988Sjohnlev 		 * changed since when we first looked at it.
413843e1988Sjohnlev 		 */
414843e1988Sjohnlev 		if (old_hres_lock != hres_lock)
415843e1988Sjohnlev 			continue;
416843e1988Sjohnlev 
417843e1988Sjohnlev 		/*
418843e1988Sjohnlev 		 * So the lock was and is locked.  We'll use the old data
419843e1988Sjohnlev 		 * instead.
420843e1988Sjohnlev 		 */
421843e1988Sjohnlev 		old_hres_lock = shadow_hres_lock;
422843e1988Sjohnlev 
423843e1988Sjohnlev 		/*
424843e1988Sjohnlev 		 * Again, disable interrupts to ensure that the thread
425843e1988Sjohnlev 		 * isn't migrated between the tsc_read() and adding
426843e1988Sjohnlev 		 * the CPU's TSC tick delta.
427843e1988Sjohnlev 		 */
428843e1988Sjohnlev 		flags = clear_int_flag();
429843e1988Sjohnlev 
430843e1988Sjohnlev 		tsc = tsc_read();
431843e1988Sjohnlev 
432843e1988Sjohnlev 		if (gethrtimef == tsc_gethrtime_delta)
433843e1988Sjohnlev 			tsc += tsc_sync_tick_delta[CPU->cpu_id];
434843e1988Sjohnlev 
435843e1988Sjohnlev 		restore_int_flag(flags);
436843e1988Sjohnlev 
437843e1988Sjohnlev 		/*
438843e1988Sjohnlev 		 * See the comments in tsc_gethrtime(), above.
439843e1988Sjohnlev 		 */
440843e1988Sjohnlev 		if (tsc >= shadow_tsc_last)
441843e1988Sjohnlev 			tsc -= shadow_tsc_last;
442843e1988Sjohnlev 		else if (tsc >= shadow_tsc_last - 2 * tsc_max_delta)
443843e1988Sjohnlev 			tsc = 0;
444e014e7f8SPaul Dagnelie 		else
445e014e7f8SPaul Dagnelie 			tsc = tsc_protect(tsc);
446843e1988Sjohnlev 
447843e1988Sjohnlev 		hrt = shadow_tsc_hrtime_base;
448843e1988Sjohnlev 
449843e1988Sjohnlev 		TSC_CONVERT_AND_ADD(tsc, hrt, shadow_nsec_scale);
450843e1988Sjohnlev 	} while ((old_hres_lock & ~1) != shadow_hres_lock);
451843e1988Sjohnlev 
452843e1988Sjohnlev 	return (hrt);
453843e1988Sjohnlev }
454843e1988Sjohnlev 
455843e1988Sjohnlev hrtime_t
tsc_gethrtimeunscaled(void)456843e1988Sjohnlev tsc_gethrtimeunscaled(void)
457843e1988Sjohnlev {
458843e1988Sjohnlev 	uint32_t old_hres_lock;
459843e1988Sjohnlev 	hrtime_t tsc;
460843e1988Sjohnlev 
461843e1988Sjohnlev 	do {
462843e1988Sjohnlev 		old_hres_lock = hres_lock;
463843e1988Sjohnlev 
464843e1988Sjohnlev 		/* See tsc_tick(). */
465843e1988Sjohnlev 		tsc = tsc_read() + tsc_last_jumped;
466843e1988Sjohnlev 	} while ((old_hres_lock & ~1) != hres_lock);
467843e1988Sjohnlev 
468843e1988Sjohnlev 	return (tsc);
469843e1988Sjohnlev }
470843e1988Sjohnlev 
471113b131bSEric Saxe /*
472113b131bSEric Saxe  * Convert a nanosecond based timestamp to tsc
473113b131bSEric Saxe  */
474113b131bSEric Saxe uint64_t
tsc_unscalehrtime(hrtime_t nsec)475113b131bSEric Saxe tsc_unscalehrtime(hrtime_t nsec)
476113b131bSEric Saxe {
477113b131bSEric Saxe 	hrtime_t tsc;
478113b131bSEric Saxe 
479113b131bSEric Saxe 	if (tsc_gethrtime_enable) {
480113b131bSEric Saxe 		TSC_CONVERT(nsec, tsc, nsec_unscale);
481113b131bSEric Saxe 		return (tsc);
482113b131bSEric Saxe 	}
483113b131bSEric Saxe 	return ((uint64_t)nsec);
484113b131bSEric Saxe }
485843e1988Sjohnlev 
486843e1988Sjohnlev /* Convert a tsc timestamp to nanoseconds */
487843e1988Sjohnlev void
tsc_scalehrtime(hrtime_t * tsc)488843e1988Sjohnlev tsc_scalehrtime(hrtime_t *tsc)
489843e1988Sjohnlev {
490843e1988Sjohnlev 	hrtime_t hrt;
491843e1988Sjohnlev 	hrtime_t mytsc;
492843e1988Sjohnlev 
493843e1988Sjohnlev 	if (tsc == NULL)
494843e1988Sjohnlev 		return;
495843e1988Sjohnlev 	mytsc = *tsc;
496843e1988Sjohnlev 
497843e1988Sjohnlev 	TSC_CONVERT(mytsc, hrt, nsec_scale);
498843e1988Sjohnlev 	*tsc  = hrt;
499843e1988Sjohnlev }
500843e1988Sjohnlev 
501843e1988Sjohnlev hrtime_t
tsc_gethrtimeunscaled_delta(void)502843e1988Sjohnlev tsc_gethrtimeunscaled_delta(void)
503843e1988Sjohnlev {
504843e1988Sjohnlev 	hrtime_t hrt;
505a563a037Sbholler 	ulong_t flags;
506843e1988Sjohnlev 
507843e1988Sjohnlev 	/*
508843e1988Sjohnlev 	 * Similarly to tsc_gethrtime_delta, we need to disable preemption
509843e1988Sjohnlev 	 * to prevent migration between the call to tsc_gethrtimeunscaled
510843e1988Sjohnlev 	 * and adding the CPU's hrtime delta. Note that disabling and
511843e1988Sjohnlev 	 * reenabling preemption is forbidden here because we may be in the
512843e1988Sjohnlev 	 * middle of a fast trap. In the amd64 kernel we cannot tolerate
513843e1988Sjohnlev 	 * preemption during a fast trap. See _update_sregs().
514843e1988Sjohnlev 	 */
515843e1988Sjohnlev 
516843e1988Sjohnlev 	flags = clear_int_flag();
517843e1988Sjohnlev 	hrt = tsc_gethrtimeunscaled() + tsc_sync_tick_delta[CPU->cpu_id];
518843e1988Sjohnlev 	restore_int_flag(flags);
519843e1988Sjohnlev 
520843e1988Sjohnlev 	return (hrt);
521843e1988Sjohnlev }
522843e1988Sjohnlev 
5237c478bd9Sstevel@tonic-gate /*
52486cb0be2SPatrick Mooney  * TSC Sync Master
52586cb0be2SPatrick Mooney  *
52686cb0be2SPatrick Mooney  * Typically called on the boot CPU, this attempts to quantify TSC skew between
52786cb0be2SPatrick Mooney  * different CPUs.  If an appreciable difference is found, gethrtimef will be
52886cb0be2SPatrick Mooney  * changed to point to tsc_gethrtime_delta().
52986cb0be2SPatrick Mooney  *
53086cb0be2SPatrick Mooney  * Calculating skews is precise only when the master and slave TSCs are read
53186cb0be2SPatrick Mooney  * simultaneously; however, there is no algorithm that can read both CPUs in
53286cb0be2SPatrick Mooney  * perfect simultaneity.  The proposed algorithm is an approximate method based
53386cb0be2SPatrick Mooney  * on the behaviour of cache management.  The slave CPU continuously polls the
53486cb0be2SPatrick Mooney  * TSC while reading a global variable updated by the master CPU.  The latest
53586cb0be2SPatrick Mooney  * TSC reading is saved when the master's update (forced via mfence) reaches
53686cb0be2SPatrick Mooney  * visibility on the slave.  The master will also take a TSC reading
53786cb0be2SPatrick Mooney  * immediately following the mfence.
53886cb0be2SPatrick Mooney  *
53986cb0be2SPatrick Mooney  * While the delay between cache line invalidation on the slave and mfence
54086cb0be2SPatrick Mooney  * completion on the master is not repeatable, the error is heuristically
54186cb0be2SPatrick Mooney  * assumed to be 1/4th of the write time recorded by the master.  Multiple
54286cb0be2SPatrick Mooney  * samples are taken to control for the variance caused by external factors
54386cb0be2SPatrick Mooney  * such as bus contention.  Each sample set is independent per-CPU to control
54486cb0be2SPatrick Mooney  * for differing memory latency on NUMA systems.
5454af20bbdSSudheer A  *
5464af20bbdSSudheer A  * TSC sync is disabled in the context of virtualization because the CPUs
5474af20bbdSSudheer A  * assigned to the guest are virtual CPUs which means the real CPUs on which
5484af20bbdSSudheer A  * guest runs keep changing during life time of guest OS. So we would end up
5494af20bbdSSudheer A  * calculating TSC skews for a set of CPUs during boot whereas the guest
5504af20bbdSSudheer A  * might migrate to a different set of physical CPUs at a later point of
5514af20bbdSSudheer A  * time.
5527c478bd9Sstevel@tonic-gate  */
5537c478bd9Sstevel@tonic-gate void
tsc_sync_master(processorid_t slave)5547c478bd9Sstevel@tonic-gate tsc_sync_master(processorid_t slave)
5557c478bd9Sstevel@tonic-gate {
556b3c18020SSudheer A 	ulong_t flags, source, min_write_time = ~0UL;
55786cb0be2SPatrick Mooney 	hrtime_t write_time, mtsc_after, last_delta = 0;
558b3c18020SSudheer A 	tsc_sync_t *tsc = tscp;
559b3c18020SSudheer A 	int cnt;
560b9bfdccdSStuart Maybee 	int hwtype;
5617c478bd9Sstevel@tonic-gate 
562b9bfdccdSStuart Maybee 	hwtype = get_hwenv();
56379ec9da8SYuri Pankov 	if (!tsc_master_slave_sync_needed || (hwtype & HW_VIRTUAL) != 0)
564ae115bc7Smrj 		return;
565ae115bc7Smrj 
5667c478bd9Sstevel@tonic-gate 	flags = clear_int_flag();
567b3c18020SSudheer A 	source = CPU->cpu_id;
568b3c18020SSudheer A 
569b3c18020SSudheer A 	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
570b3c18020SSudheer A 		while (tsc_sync_go != TSC_SYNC_GO)
571b3c18020SSudheer A 			SMT_PAUSE();
572b3c18020SSudheer A 
573b3c18020SSudheer A 		tsc->master_tsc = tsc_read();
574b3c18020SSudheer A 		membar_enter();
575b3c18020SSudheer A 		mtsc_after = tsc_read();
576b3c18020SSudheer A 		while (tsc_sync_go != TSC_SYNC_DONE)
577b3c18020SSudheer A 			SMT_PAUSE();
578b3c18020SSudheer A 		write_time =  mtsc_after - tsc->master_tsc;
579b3c18020SSudheer A 		if (write_time <= min_write_time) {
58086cb0be2SPatrick Mooney 			hrtime_t tdelta;
58186cb0be2SPatrick Mooney 
58286cb0be2SPatrick Mooney 			tdelta = tsc->slave_tsc - mtsc_after;
58386cb0be2SPatrick Mooney 			if (tdelta < 0)
58486cb0be2SPatrick Mooney 				tdelta = -tdelta;
585b3c18020SSudheer A 			/*
58686cb0be2SPatrick Mooney 			 * If the margin exists, subtract 1/4th of the measured
58786cb0be2SPatrick Mooney 			 * write time from the master's TSC value.  This is an
58886cb0be2SPatrick Mooney 			 * estimate of how late the mfence completion came
58986cb0be2SPatrick Mooney 			 * after the slave noticed the cache line change.
590b3c18020SSudheer A 			 */
59186cb0be2SPatrick Mooney 			if (tdelta > (write_time/4)) {
592b3c18020SSudheer A 				tdelta = tsc->slave_tsc -
59386cb0be2SPatrick Mooney 				    (mtsc_after - (write_time/4));
59486cb0be2SPatrick Mooney 			} else {
595b3c18020SSudheer A 				tdelta = tsc->slave_tsc - mtsc_after;
59686cb0be2SPatrick Mooney 			}
59786cb0be2SPatrick Mooney 			last_delta = tsc_sync_tick_delta[source] - tdelta;
59886cb0be2SPatrick Mooney 			tsc_sync_tick_delta[slave] = last_delta;
59986cb0be2SPatrick Mooney 			min_write_time = write_time;
600b3c18020SSudheer A 		}
6017c478bd9Sstevel@tonic-gate 
602b3c18020SSudheer A 		tsc->master_tsc = tsc->slave_tsc = write_time = 0;
603b3c18020SSudheer A 		membar_enter();
604b3c18020SSudheer A 		tsc_sync_go = TSC_SYNC_STOP;
605b3c18020SSudheer A 	}
60686cb0be2SPatrick Mooney 
6077c478bd9Sstevel@tonic-gate 	/*
60886cb0be2SPatrick Mooney 	 * Only enable the delta variants of the TSC functions if the measured
60986cb0be2SPatrick Mooney 	 * skew is greater than the fastest write time.
6107c478bd9Sstevel@tonic-gate 	 */
61186cb0be2SPatrick Mooney 	last_delta = (last_delta < 0) ? -last_delta : last_delta;
61286cb0be2SPatrick Mooney 	if (last_delta > min_write_time) {
613b3c18020SSudheer A 		gethrtimef = tsc_gethrtime_delta;
614b3c18020SSudheer A 		gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
6152428aad8SPatrick Mooney 		tsc_ncpu = NCPU;
616b3c18020SSudheer A 	}
6177c478bd9Sstevel@tonic-gate 	restore_int_flag(flags);
6187c478bd9Sstevel@tonic-gate }
6197c478bd9Sstevel@tonic-gate 
6204af20bbdSSudheer A /*
62186cb0be2SPatrick Mooney  * TSC Sync Slave
62286cb0be2SPatrick Mooney  *
6234af20bbdSSudheer A  * Called by a CPU which has just been onlined.  It is expected that the CPU
6244af20bbdSSudheer A  * performing the online operation will call tsc_sync_master().
6254af20bbdSSudheer A  *
62686cb0be2SPatrick Mooney  * Like tsc_sync_master, this logic is skipped on virtualized platforms.
6274af20bbdSSudheer A  */
6287c478bd9Sstevel@tonic-gate void
tsc_sync_slave(void)6297c478bd9Sstevel@tonic-gate tsc_sync_slave(void)
6307c478bd9Sstevel@tonic-gate {
631ae115bc7Smrj 	ulong_t flags;
632b3c18020SSudheer A 	hrtime_t s1;
633b3c18020SSudheer A 	tsc_sync_t *tsc = tscp;
634b3c18020SSudheer A 	int cnt;
635b9bfdccdSStuart Maybee 	int hwtype;
6367c478bd9Sstevel@tonic-gate 
637b9bfdccdSStuart Maybee 	hwtype = get_hwenv();
63879ec9da8SYuri Pankov 	if (!tsc_master_slave_sync_needed || (hwtype & HW_VIRTUAL) != 0)
639ae115bc7Smrj 		return;
640ae115bc7Smrj 
6417c478bd9Sstevel@tonic-gate 	flags = clear_int_flag();
6427c478bd9Sstevel@tonic-gate 
643b3c18020SSudheer A 	for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) {
644b3c18020SSudheer A 		/* Re-fill the cache line */
645b3c18020SSudheer A 		s1 = tsc->master_tsc;
646b3c18020SSudheer A 		membar_enter();
647b3c18020SSudheer A 		tsc_sync_go = TSC_SYNC_GO;
648b3c18020SSudheer A 		do {
649b3c18020SSudheer A 			/*
65086cb0be2SPatrick Mooney 			 * Do not put an SMT_PAUSE here.  If the master and
65186cb0be2SPatrick Mooney 			 * slave are the same hyper-threaded CPU, we want the
65286cb0be2SPatrick Mooney 			 * master to yield as quickly as possible to the slave.
653b3c18020SSudheer A 			 */
654b3c18020SSudheer A 			s1 = tsc_read();
655b3c18020SSudheer A 		} while (tsc->master_tsc == 0);
656b3c18020SSudheer A 		tsc->slave_tsc = s1;
657b3c18020SSudheer A 		membar_enter();
658b3c18020SSudheer A 		tsc_sync_go = TSC_SYNC_DONE;
659b3c18020SSudheer A 
660b3c18020SSudheer A 		while (tsc_sync_go != TSC_SYNC_STOP)
661b3c18020SSudheer A 			SMT_PAUSE();
662b3c18020SSudheer A 	}
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	restore_int_flag(flags);
6657c478bd9Sstevel@tonic-gate }
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate /*
668ae115bc7Smrj  * Called once per second on a CPU from the cyclic subsystem's
669ae115bc7Smrj  * CY_HIGH_LEVEL interrupt.  (No longer just cpu0-only)
6707c478bd9Sstevel@tonic-gate  */
6717c478bd9Sstevel@tonic-gate void
tsc_tick(void)6727c478bd9Sstevel@tonic-gate tsc_tick(void)
6737c478bd9Sstevel@tonic-gate {
6747c478bd9Sstevel@tonic-gate 	hrtime_t now, delta;
6757c478bd9Sstevel@tonic-gate 	ushort_t spl;
6767c478bd9Sstevel@tonic-gate 
6777c478bd9Sstevel@tonic-gate 	/*
6787c478bd9Sstevel@tonic-gate 	 * Before we set the new variables, we set the shadow values.  This
6797c478bd9Sstevel@tonic-gate 	 * allows for lock free operation in dtrace_gethrtime().
6807c478bd9Sstevel@tonic-gate 	 */
6817c478bd9Sstevel@tonic-gate 	lock_set_spl((lock_t *)&shadow_hres_lock + HRES_LOCK_OFFSET,
6827c478bd9Sstevel@tonic-gate 	    ipltospl(CBE_HIGH_PIL), &spl);
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate 	shadow_tsc_hrtime_base = tsc_hrtime_base;
6857c478bd9Sstevel@tonic-gate 	shadow_tsc_last = tsc_last;
6867c478bd9Sstevel@tonic-gate 	shadow_nsec_scale = nsec_scale;
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 	shadow_hres_lock++;
6897c478bd9Sstevel@tonic-gate 	splx(spl);
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	CLOCK_LOCK(&spl);
6927c478bd9Sstevel@tonic-gate 
6937c478bd9Sstevel@tonic-gate 	now = tsc_read();
6947c478bd9Sstevel@tonic-gate 
695d90554ebSdmick 	if (gethrtimef == tsc_gethrtime_delta)
696d90554ebSdmick 		now += tsc_sync_tick_delta[CPU->cpu_id];
697d90554ebSdmick 
6987c478bd9Sstevel@tonic-gate 	if (now < tsc_last) {
6997c478bd9Sstevel@tonic-gate 		/*
7007c478bd9Sstevel@tonic-gate 		 * The TSC has just jumped into the past.  We assume that
7017c478bd9Sstevel@tonic-gate 		 * this is due to a suspend/resume cycle, and we're going
7027c478bd9Sstevel@tonic-gate 		 * to use the _current_ value of TSC as the delta.  This
7037c478bd9Sstevel@tonic-gate 		 * will keep tsc_hrtime_base correct.  We're also going to
7047c478bd9Sstevel@tonic-gate 		 * assume that rate of tsc does not change after a suspend
7057c478bd9Sstevel@tonic-gate 		 * resume (i.e nsec_scale remains the same).
7067c478bd9Sstevel@tonic-gate 		 */
7077c478bd9Sstevel@tonic-gate 		delta = now;
708e014e7f8SPaul Dagnelie 		delta = tsc_protect(delta);
7097c478bd9Sstevel@tonic-gate 		tsc_last_jumped += tsc_last;
7107c478bd9Sstevel@tonic-gate 		tsc_jumped = 1;
7117c478bd9Sstevel@tonic-gate 	} else {
7127c478bd9Sstevel@tonic-gate 		/*
7137c478bd9Sstevel@tonic-gate 		 * Determine the number of TSC ticks since the last clock
7147c478bd9Sstevel@tonic-gate 		 * tick, and add that to the hrtime base.
7157c478bd9Sstevel@tonic-gate 		 */
7167c478bd9Sstevel@tonic-gate 		delta = now - tsc_last;
7177c478bd9Sstevel@tonic-gate 	}
7187c478bd9Sstevel@tonic-gate 
7197c478bd9Sstevel@tonic-gate 	TSC_CONVERT_AND_ADD(delta, tsc_hrtime_base, nsec_scale);
7207c478bd9Sstevel@tonic-gate 	tsc_last = now;
7217c478bd9Sstevel@tonic-gate 
7227c478bd9Sstevel@tonic-gate 	CLOCK_UNLOCK(spl);
7237c478bd9Sstevel@tonic-gate }
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate void
tsc_hrtimeinit(uint64_t cpu_freq_hz)726843e1988Sjohnlev tsc_hrtimeinit(uint64_t cpu_freq_hz)
7277c478bd9Sstevel@tonic-gate {
728843e1988Sjohnlev 	extern int gethrtime_hires;
729843e1988Sjohnlev 	longlong_t tsc;
730843e1988Sjohnlev 	ulong_t flags;
7317c478bd9Sstevel@tonic-gate 
732843e1988Sjohnlev 	/*
733843e1988Sjohnlev 	 * cpu_freq_hz is the measured cpu frequency in hertz
734843e1988Sjohnlev 	 */
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate 	/*
737843e1988Sjohnlev 	 * We can't accommodate CPUs slower than 31.25 MHz.
7387c478bd9Sstevel@tonic-gate 	 */
739843e1988Sjohnlev 	ASSERT(cpu_freq_hz > NANOSEC / (1 << NSEC_SHIFT));
740843e1988Sjohnlev 	nsec_scale =
741843e1988Sjohnlev 	    (uint_t)(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / cpu_freq_hz);
742113b131bSEric Saxe 	nsec_unscale =
743113b131bSEric Saxe 	    (uint_t)(((uint64_t)cpu_freq_hz << (32 - NSEC_SHIFT)) / NANOSEC);
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 	flags = clear_int_flag();
746843e1988Sjohnlev 	tsc = tsc_read();
747843e1988Sjohnlev 	(void) tsc_gethrtime();
748843e1988Sjohnlev 	tsc_max_delta = tsc_read() - tsc;
7497c478bd9Sstevel@tonic-gate 	restore_int_flag(flags);
750843e1988Sjohnlev 	gethrtimef = tsc_gethrtime;
751843e1988Sjohnlev 	gethrtimeunscaledf = tsc_gethrtimeunscaled;
752843e1988Sjohnlev 	scalehrtimef = tsc_scalehrtime;
753113b131bSEric Saxe 	unscalehrtimef = tsc_unscalehrtime;
754843e1988Sjohnlev 	hrtime_tick = tsc_tick;
755843e1988Sjohnlev 	gethrtime_hires = 1;
7562428aad8SPatrick Mooney 	/*
7572428aad8SPatrick Mooney 	 * Being part of the comm page, tsc_ncpu communicates the published
7582428aad8SPatrick Mooney 	 * length of the tsc_sync_tick_delta array.  This is kept zeroed to
7592428aad8SPatrick Mooney 	 * ignore the absent delta data while the TSCs are synced.
7602428aad8SPatrick Mooney 	 */
7612428aad8SPatrick Mooney 	tsc_ncpu = 0;
762b3c18020SSudheer A 	/*
763b3c18020SSudheer A 	 * Allocate memory for the structure used in the tsc sync logic.
764b3c18020SSudheer A 	 * This structure should be aligned on a multiple of cache line size.
765b3c18020SSudheer A 	 */
766b3c18020SSudheer A 	tscp = kmem_zalloc(PAGESIZE, KM_SLEEP);
767e014e7f8SPaul Dagnelie 
768e014e7f8SPaul Dagnelie 	/*
769e014e7f8SPaul Dagnelie 	 * Convert the TSC resume cap ns value into its unscaled TSC value.
770e014e7f8SPaul Dagnelie 	 * See tsc_gethrtime().
771e014e7f8SPaul Dagnelie 	 */
772e014e7f8SPaul Dagnelie 	if (tsc_resume_cap == 0)
773e014e7f8SPaul Dagnelie 		TSC_CONVERT(tsc_resume_cap_ns, tsc_resume_cap, nsec_unscale);
7747c478bd9Sstevel@tonic-gate }
7752df1fe9cSrandyf 
7762df1fe9cSrandyf int
get_tsc_ready()7772df1fe9cSrandyf get_tsc_ready()
7782df1fe9cSrandyf {
7792df1fe9cSrandyf 	return (tsc_ready);
7802df1fe9cSrandyf }
7812df1fe9cSrandyf 
7822df1fe9cSrandyf /*
78386cb0be2SPatrick Mooney  * Adjust all the deltas by adding the passed value to the array and activate
78486cb0be2SPatrick Mooney  * the "delta" versions of the gethrtime functions.  It is possible that the
78586cb0be2SPatrick Mooney  * adjustment could be negative.  Such may occur if the SunOS instance was
78686cb0be2SPatrick Mooney  * moved by a virtual manager to a machine with a higher value of TSC.
7872df1fe9cSrandyf  */
7882df1fe9cSrandyf void
tsc_adjust_delta(hrtime_t tdelta)7892df1fe9cSrandyf tsc_adjust_delta(hrtime_t tdelta)
7902df1fe9cSrandyf {
7912df1fe9cSrandyf 	int		i;
7922df1fe9cSrandyf 
7932df1fe9cSrandyf 	for (i = 0; i < NCPU; i++) {
7942df1fe9cSrandyf 		tsc_sync_tick_delta[i] += tdelta;
7952df1fe9cSrandyf 	}
7962df1fe9cSrandyf 
7972df1fe9cSrandyf 	gethrtimef = tsc_gethrtime_delta;
7982df1fe9cSrandyf 	gethrtimeunscaledf = tsc_gethrtimeunscaled_delta;
7992428aad8SPatrick Mooney 	tsc_ncpu = NCPU;
8002df1fe9cSrandyf }
8012df1fe9cSrandyf 
8022df1fe9cSrandyf /*
8032df1fe9cSrandyf  * Functions to manage TSC and high-res time on suspend and resume.
8042df1fe9cSrandyf  */
8052df1fe9cSrandyf 
80686cb0be2SPatrick Mooney /* tod_ops from "uts/i86pc/io/todpc_subr.c" */
8072df1fe9cSrandyf extern tod_ops_t *tod_ops;
80886cb0be2SPatrick Mooney 
8092df1fe9cSrandyf static uint64_t tsc_saved_tsc = 0; /* 1 in 2^64 chance this'll screw up! */
8102df1fe9cSrandyf static timestruc_t tsc_saved_ts;
8112df1fe9cSrandyf static int	tsc_needs_resume = 0;	/* We only want to do this once. */
8122df1fe9cSrandyf int		tsc_delta_onsuspend = 0;
8132df1fe9cSrandyf int		tsc_adjust_seconds = 1;
8142df1fe9cSrandyf int		tsc_suspend_count = 0;
8152df1fe9cSrandyf int		tsc_resume_in_cyclic = 0;
8162df1fe9cSrandyf 
8172df1fe9cSrandyf /*
81886cb0be2SPatrick Mooney  * Take snapshots of the current time and do any other pre-suspend work.
8192df1fe9cSrandyf  */
8202df1fe9cSrandyf void
tsc_suspend(void)8212df1fe9cSrandyf tsc_suspend(void)
8222df1fe9cSrandyf {
82386cb0be2SPatrick Mooney 	/*
82486cb0be2SPatrick Mooney 	 * We need to collect the time at which we suspended here so we know
82586cb0be2SPatrick Mooney 	 * now much should be added during the resume.  This is called by each
82686cb0be2SPatrick Mooney 	 * CPU, so reentry must be properly handled.
82786cb0be2SPatrick Mooney 	 */
8282df1fe9cSrandyf 	if (tsc_gethrtime_enable) {
8292df1fe9cSrandyf 		/*
83086cb0be2SPatrick Mooney 		 * Perform the tsc_read after acquiring the lock to make it as
83186cb0be2SPatrick Mooney 		 * accurate as possible in the face of contention.
8322df1fe9cSrandyf 		 */
8332df1fe9cSrandyf 		mutex_enter(&tod_lock);
8342df1fe9cSrandyf 		tsc_saved_tsc = tsc_read();
8352df1fe9cSrandyf 		tsc_saved_ts = TODOP_GET(tod_ops);
8362df1fe9cSrandyf 		mutex_exit(&tod_lock);
8372df1fe9cSrandyf 		/* We only want to do this once. */
8382df1fe9cSrandyf 		if (tsc_needs_resume == 0) {
8392df1fe9cSrandyf 			if (tsc_delta_onsuspend) {
8402df1fe9cSrandyf 				tsc_adjust_delta(tsc_saved_tsc);
8412df1fe9cSrandyf 			} else {
8422df1fe9cSrandyf 				tsc_adjust_delta(nsec_scale);
8432df1fe9cSrandyf 			}
8442df1fe9cSrandyf 			tsc_suspend_count++;
8452df1fe9cSrandyf 		}
8462df1fe9cSrandyf 	}
8472df1fe9cSrandyf 
8482df1fe9cSrandyf 	invalidate_cache();
8492df1fe9cSrandyf 	tsc_needs_resume = 1;
8502df1fe9cSrandyf }
8512df1fe9cSrandyf 
8522df1fe9cSrandyf /*
85386cb0be2SPatrick Mooney  * Restore all timestamp state based on the snapshots taken at suspend time.
8542df1fe9cSrandyf  */
8552df1fe9cSrandyf void
tsc_resume(void)8562df1fe9cSrandyf tsc_resume(void)
8572df1fe9cSrandyf {
8582df1fe9cSrandyf 	/*
8592df1fe9cSrandyf 	 * We only need to (and want to) do this once.  So let the first
8602df1fe9cSrandyf 	 * caller handle this (we are locked by the cpu lock), as it
8612df1fe9cSrandyf 	 * is preferential that we get the earliest sync.
8622df1fe9cSrandyf 	 */
8632df1fe9cSrandyf 	if (tsc_needs_resume) {
8642df1fe9cSrandyf 		/*
8652df1fe9cSrandyf 		 * If using the TSC, adjust the delta based on how long
8662df1fe9cSrandyf 		 * we were sleeping (or away).  We also adjust for
8672df1fe9cSrandyf 		 * migration and a grown TSC.
8682df1fe9cSrandyf 		 */
8692df1fe9cSrandyf 		if (tsc_saved_tsc != 0) {
8702df1fe9cSrandyf 			timestruc_t	ts;
8712df1fe9cSrandyf 			hrtime_t	now, sleep_tsc = 0;
8722df1fe9cSrandyf 			int		sleep_sec;
8732df1fe9cSrandyf 			extern void	tsc_tick(void);
8742df1fe9cSrandyf 			extern uint64_t cpu_freq_hz;
8752df1fe9cSrandyf 
8762df1fe9cSrandyf 			/* tsc_read() MUST be before TODOP_GET() */
8772df1fe9cSrandyf 			mutex_enter(&tod_lock);
8782df1fe9cSrandyf 			now = tsc_read();
8792df1fe9cSrandyf 			ts = TODOP_GET(tod_ops);
8802df1fe9cSrandyf 			mutex_exit(&tod_lock);
8812df1fe9cSrandyf 
8822df1fe9cSrandyf 			/* Compute seconds of sleep time */
8832df1fe9cSrandyf 			sleep_sec = ts.tv_sec - tsc_saved_ts.tv_sec;
8842df1fe9cSrandyf 
8852df1fe9cSrandyf 			/*
8862df1fe9cSrandyf 			 * If the saved sec is less that or equal to
8872df1fe9cSrandyf 			 * the current ts, then there is likely a
8882df1fe9cSrandyf 			 * problem with the clock.  Assume at least
8892df1fe9cSrandyf 			 * one second has passed, so that time goes forward.
8902df1fe9cSrandyf 			 */
8912df1fe9cSrandyf 			if (sleep_sec <= 0) {
8922df1fe9cSrandyf 				sleep_sec = 1;
8932df1fe9cSrandyf 			}
8942df1fe9cSrandyf 
8952df1fe9cSrandyf 			/* How many TSC's should have occured while sleeping */
8962df1fe9cSrandyf 			if (tsc_adjust_seconds)
8972df1fe9cSrandyf 				sleep_tsc = sleep_sec * cpu_freq_hz;
8982df1fe9cSrandyf 
8992df1fe9cSrandyf 			/*
9002df1fe9cSrandyf 			 * We also want to subtract from the "sleep_tsc"
9012df1fe9cSrandyf 			 * the current value of tsc_read(), so that our
9022df1fe9cSrandyf 			 * adjustment accounts for the amount of time we
9032df1fe9cSrandyf 			 * have been resumed _or_ an adjustment based on
9042df1fe9cSrandyf 			 * the fact that we didn't actually power off the
9052df1fe9cSrandyf 			 * CPU (migration is another issue, but _should_
9062df1fe9cSrandyf 			 * also comply with this calculation).  If the CPU
9072df1fe9cSrandyf 			 * never powered off, then:
9082df1fe9cSrandyf 			 *    'now == sleep_tsc + saved_tsc'
9092df1fe9cSrandyf 			 * and the delta will effectively be "0".
9102df1fe9cSrandyf 			 */
9112df1fe9cSrandyf 			sleep_tsc -= now;
9122df1fe9cSrandyf 			if (tsc_delta_onsuspend) {
9132df1fe9cSrandyf 				tsc_adjust_delta(sleep_tsc);
9142df1fe9cSrandyf 			} else {
9152df1fe9cSrandyf 				tsc_adjust_delta(tsc_saved_tsc + sleep_tsc);
9162df1fe9cSrandyf 			}
9172df1fe9cSrandyf 			tsc_saved_tsc = 0;
9182df1fe9cSrandyf 
9192df1fe9cSrandyf 			tsc_tick();
9202df1fe9cSrandyf 		}
9212df1fe9cSrandyf 		tsc_needs_resume = 0;
9222df1fe9cSrandyf 	}
9232df1fe9cSrandyf 
9242df1fe9cSrandyf }
925*575694f6SJason King 
926*575694f6SJason King static int
tsc_calibrate_cmp(const void * a,const void * b)927*575694f6SJason King tsc_calibrate_cmp(const void *a, const void *b)
928*575694f6SJason King {
929*575694f6SJason King 	const tsc_calibrate_t * const *a1 = a;
930*575694f6SJason King 	const tsc_calibrate_t * const *b1 = b;
931*575694f6SJason King 	const tsc_calibrate_t *l = *a1;
932*575694f6SJason King 	const tsc_calibrate_t *r = *b1;
933*575694f6SJason King 
934*575694f6SJason King 	/* Sort from highest preference to lowest preference */
935*575694f6SJason King 	if (l->tscc_preference > r->tscc_preference)
936*575694f6SJason King 		return (-1);
937*575694f6SJason King 	if (l->tscc_preference < r->tscc_preference)
938*575694f6SJason King 		return (1);
939*575694f6SJason King 
940*575694f6SJason King 	/* For equal preference sources, sort alphabetically */
941*575694f6SJason King 	int c = strcmp(l->tscc_source, r->tscc_source);
942*575694f6SJason King 
943*575694f6SJason King 	if (c < 0)
944*575694f6SJason King 		return (-1);
945*575694f6SJason King 	if (c > 0)
946*575694f6SJason King 		return (1);
947*575694f6SJason King 	return (0);
948*575694f6SJason King }
949*575694f6SJason King 
950*575694f6SJason King SET_DECLARE(tsc_calibration_set, tsc_calibrate_t);
951*575694f6SJason King 
952*575694f6SJason King static tsc_calibrate_t *
tsc_calibrate_get_force(const char * source)953*575694f6SJason King tsc_calibrate_get_force(const char *source)
954*575694f6SJason King {
955*575694f6SJason King 	tsc_calibrate_t **tsccpp;
956*575694f6SJason King 
957*575694f6SJason King 	VERIFY3P(source, !=, NULL);
958*575694f6SJason King 
959*575694f6SJason King 	SET_FOREACH(tsccpp, tsc_calibration_set) {
960*575694f6SJason King 		tsc_calibrate_t *tsccp = *tsccpp;
961*575694f6SJason King 
962*575694f6SJason King 		if (strcasecmp(source, tsccp->tscc_source) == 0)
963*575694f6SJason King 			return (tsccp);
964*575694f6SJason King 	}
965*575694f6SJason King 
966*575694f6SJason King 	/*
967*575694f6SJason King 	 * If an operator explicitly gave a TSC value and we didn't find it,
968*575694f6SJason King 	 * we should let them know.
969*575694f6SJason King 	 */
970*575694f6SJason King 	cmn_err(CE_NOTE,
971*575694f6SJason King 	    "Explicit TSC calibration source '%s' not found; using default",
972*575694f6SJason King 	    source);
973*575694f6SJason King 
974*575694f6SJason King 	return (NULL);
975*575694f6SJason King }
976*575694f6SJason King 
977*575694f6SJason King /*
978*575694f6SJason King  * As described in tscc_pit.c, as an intertim measure as we transition to
979*575694f6SJason King  * alternate calibration sources besides the PIT, we still want to gather
980*575694f6SJason King  * what the values would have been had we used the PIT. Therefore, if we're
981*575694f6SJason King  * using a source other than the PIT, we explicitly run the PIT calibration
982*575694f6SJason King  * which will store the TSC frequency as measured by the PIT for the
983*575694f6SJason King  * benefit of the APIC code (as well as any potential diagnostics).
984*575694f6SJason King  */
985*575694f6SJason King static void
tsc_pit_also(void)986*575694f6SJason King tsc_pit_also(void)
987*575694f6SJason King {
988*575694f6SJason King 	tsc_calibrate_t *pit = tsc_calibrate_get_force("PIT");
989*575694f6SJason King 	uint64_t dummy;
990*575694f6SJason King 
991*575694f6SJason King 	/* We should always have the PIT as a possible calibration source */
992*575694f6SJason King 	VERIFY3P(pit, !=, NULL);
993*575694f6SJason King 
994*575694f6SJason King 	/* If we used the PIT to calibrate, we don't need to run again */
995*575694f6SJason King 	if (tsc_calibration_source == pit)
996*575694f6SJason King 		return;
997*575694f6SJason King 
998*575694f6SJason King 	/*
999*575694f6SJason King 	 * Since we're not using the PIT as the actual TSC calibration source,
1000*575694f6SJason King 	 * we don't care about the results or saving the result -- tscc_pit.c
1001*575694f6SJason King 	 * saves the frequency in a global for the benefit of the APIC code.
1002*575694f6SJason King 	 */
1003*575694f6SJason King 	(void) pit->tscc_calibrate(&dummy);
1004*575694f6SJason King }
1005*575694f6SJason King 
1006*575694f6SJason King uint64_t
tsc_calibrate(void)1007*575694f6SJason King tsc_calibrate(void)
1008*575694f6SJason King {
1009*575694f6SJason King 	tsc_calibrate_t **tsccpp, *force;
1010*575694f6SJason King 	size_t tsc_set_size;
1011*575694f6SJason King 	int tsc_name_len;
1012*575694f6SJason King 
1013*575694f6SJason King 	/*
1014*575694f6SJason King 	 * Every x86 system since the Pentium has TSC support. Since we
1015*575694f6SJason King 	 * only support 64-bit x86 systems, there should always be a TSC
1016*575694f6SJason King 	 * present, and something's horribly wrong if it's missing.
1017*575694f6SJason King 	 */
1018*575694f6SJason King 	if (!is_x86_feature(x86_featureset, X86FSET_TSC))
1019*575694f6SJason King 		panic("System does not have TSC support");
1020*575694f6SJason King 
1021*575694f6SJason King 	/*
1022*575694f6SJason King 	 * If we already successfully calibrated the TSC, no need to do
1023*575694f6SJason King 	 * it again.
1024*575694f6SJason King 	 */
1025*575694f6SJason King 	if (tsc_freq > 0)
1026*575694f6SJason King 		return (tsc_freq);
1027*575694f6SJason King 
1028*575694f6SJason King 	PRM_POINT("Calibrating the TSC...");
1029*575694f6SJason King 
1030*575694f6SJason King 	/*
1031*575694f6SJason King 	 * Allow an operator to explicitly specify a calibration source via
1032*575694f6SJason King 	 * `set tsc_calibration=foo` in the bootloader or
1033*575694f6SJason King 	 * `set tsc_calibration="foo"` in /etc/system (preferring a bootloader
1034*575694f6SJason King 	 * supplied value over /etc/system).
1035*575694f6SJason King 	 *
1036*575694f6SJason King 	 * If no source is given, or the specified source is not found, we
1037*575694f6SJason King 	 * fallback to trying all of the known sources in order by preference
1038*575694f6SJason King 	 * (high preference value to low preference value) until one succeeds.
1039*575694f6SJason King 	 */
1040*575694f6SJason King 	tsc_name_len = BOP_GETPROPLEN(bootops, "tsc_calibration");
1041*575694f6SJason King 	if (tsc_name_len > 0) {
1042*575694f6SJason King 		/* Overwrite any /etc/system supplied value */
1043*575694f6SJason King 		if (tsc_calibration != NULL) {
1044*575694f6SJason King 			size_t len = strlen(tsc_calibration) + 1;
1045*575694f6SJason King 
1046*575694f6SJason King 			kobj_free_string(tsc_calibration, len);
1047*575694f6SJason King 		}
1048*575694f6SJason King 
1049*575694f6SJason King 		tsc_calibration = kmem_zalloc(tsc_name_len + 1, KM_SLEEP);
1050*575694f6SJason King 		BOP_GETPROP(bootops, "tsc_calibration", tsc_calibration);
1051*575694f6SJason King 	}
1052*575694f6SJason King 
1053*575694f6SJason King 	if (tsc_calibration != NULL &&
1054*575694f6SJason King 	    (force = tsc_calibrate_get_force(tsc_calibration)) != NULL) {
1055*575694f6SJason King 		if (tsc_name_len > 0) {
1056*575694f6SJason King 			PRM_POINT("Forcing bootloader specified TSC calibration"
1057*575694f6SJason King 			    " source");
1058*575694f6SJason King 		} else {
1059*575694f6SJason King 			PRM_POINT("Forcing /etc/system specified TSC "
1060*575694f6SJason King 			    "calibration source");
1061*575694f6SJason King 		}
1062*575694f6SJason King 		PRM_DEBUGS(force->tscc_source);
1063*575694f6SJason King 
1064*575694f6SJason King 		if (!force->tscc_calibrate(&tsc_freq))
1065*575694f6SJason King 			panic("Failed to calibrate the TSC");
1066*575694f6SJason King 
1067*575694f6SJason King 		tsc_calibration_source = force;
1068*575694f6SJason King 
1069*575694f6SJason King 		/*
1070*575694f6SJason King 		 * We've saved the tsc_calibration_t that matched the value
1071*575694f6SJason King 		 * of tsc_calibration at this point, so we can release the
1072*575694f6SJason King 		 * memory for the value now.
1073*575694f6SJason King 		 */
1074*575694f6SJason King 		if (tsc_name_len > 0) {
1075*575694f6SJason King 			kmem_free(tsc_calibration, tsc_name_len + 1);
1076*575694f6SJason King 		} else if (tsc_calibration != NULL) {
1077*575694f6SJason King 			size_t len = strlen(tsc_calibration) + 1;
1078*575694f6SJason King 
1079*575694f6SJason King 			kobj_free_string(tsc_calibration, len);
1080*575694f6SJason King 		}
1081*575694f6SJason King 		tsc_calibration = NULL;
1082*575694f6SJason King 
1083*575694f6SJason King 		tsc_pit_also();
1084*575694f6SJason King 		return (tsc_freq);
1085*575694f6SJason King 	}
1086*575694f6SJason King 
1087*575694f6SJason King 	/*
1088*575694f6SJason King 	 * While we could sort the set contents in place, we'll make a copy
1089*575694f6SJason King 	 * of the set and avoid modifying the original set.
1090*575694f6SJason King 	 */
1091*575694f6SJason King 	tsc_set_size = SET_COUNT(tsc_calibration_set) *
1092*575694f6SJason King 	    sizeof (tsc_calibrate_t **);
1093*575694f6SJason King 	tsccpp = kmem_zalloc(tsc_set_size, KM_SLEEP);
1094*575694f6SJason King 	bcopy(SET_BEGIN(tsc_calibration_set), tsccpp, tsc_set_size);
1095*575694f6SJason King 
1096*575694f6SJason King 	/*
1097*575694f6SJason King 	 * Sort by preference, highest to lowest
1098*575694f6SJason King 	 */
1099*575694f6SJason King 	qsort(tsccpp, SET_COUNT(tsc_calibration_set),
1100*575694f6SJason King 	    sizeof (tsc_calibrate_t **), tsc_calibrate_cmp);
1101*575694f6SJason King 
1102*575694f6SJason King 	for (uint_t i = 0; i < SET_COUNT(tsc_calibration_set); i++) {
1103*575694f6SJason King 		PRM_DEBUGS(tsccpp[i]->tscc_source);
1104*575694f6SJason King 		if (tsccpp[i]->tscc_calibrate(&tsc_freq)) {
1105*575694f6SJason King 			VERIFY3U(tsc_freq, >, 0);
1106*575694f6SJason King 
1107*575694f6SJason King 			cmn_err(CE_CONT,
1108*575694f6SJason King 			    "?TSC calibrated using %s; freq is %lu MHz\n",
1109*575694f6SJason King 			    tsccpp[i]->tscc_source, tsc_freq / 1000000);
1110*575694f6SJason King 
1111*575694f6SJason King 			/*
1112*575694f6SJason King 			 * Note that tsccpp is just a (sorted) array of
1113*575694f6SJason King 			 * pointers to the tsc_calibration_t's (from the
1114*575694f6SJason King 			 * linker set). The actual tsc_calibration_t's aren't
1115*575694f6SJason King 			 * kmem_alloc()ed (being part of the linker set), so
1116*575694f6SJason King 			 * it's safe to keep a pointer to the one that was
1117*575694f6SJason King 			 * used for calibration (intended for diagnostic
1118*575694f6SJason King 			 * purposes).
1119*575694f6SJason King 			 */
1120*575694f6SJason King 			tsc_calibration_source = tsccpp[i];
1121*575694f6SJason King 
1122*575694f6SJason King 			kmem_free(tsccpp, tsc_set_size);
1123*575694f6SJason King 			tsc_pit_also();
1124*575694f6SJason King 			return (tsc_freq);
1125*575694f6SJason King 		}
1126*575694f6SJason King 	}
1127*575694f6SJason King 
1128*575694f6SJason King 	/*
1129*575694f6SJason King 	 * In case it's useful, we don't free tsccpp -- we're about to panic
1130*575694f6SJason King 	 * anyway.
1131*575694f6SJason King 	 */
1132*575694f6SJason King 	panic("Failed to calibrate TSC");
1133*575694f6SJason King }
1134*575694f6SJason King 
1135*575694f6SJason King uint64_t
tsc_get_freq(void)1136*575694f6SJason King tsc_get_freq(void)
1137*575694f6SJason King {
1138*575694f6SJason King 	VERIFY(tsc_freq > 0);
1139*575694f6SJason King 	return (tsc_freq);
1140*575694f6SJason King }
1141