17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 21843e1988Sjohnlev 227c478bd9Sstevel@tonic-gate /* 237997e108SSurya Prakki * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 2579ec9da8SYuri Pankov * 2679ec9da8SYuri Pankov * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 27e014e7f8SPaul Dagnelie * Copyright (c) 2014, 2016 by Delphix. All rights reserved. 28*fc3fd29dSPatrick Mooney * Copyright 2018 Joyent, Inc. 297c478bd9Sstevel@tonic-gate */ 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #include <sys/types.h> 327c478bd9Sstevel@tonic-gate #include <sys/param.h> 337c478bd9Sstevel@tonic-gate #include <sys/systm.h> 347c478bd9Sstevel@tonic-gate #include <sys/disp.h> 357c478bd9Sstevel@tonic-gate #include <sys/var.h> 367c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 377c478bd9Sstevel@tonic-gate #include <sys/debug.h> 387c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 397c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 407c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 417c478bd9Sstevel@tonic-gate #include <sys/psm_defs.h> 427c478bd9Sstevel@tonic-gate #include <sys/clock.h> 437c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 447c478bd9Sstevel@tonic-gate #include <sys/lockstat.h> 457c478bd9Sstevel@tonic-gate #include <sys/smp_impldefs.h> 467c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 477c478bd9Sstevel@tonic-gate #include <sys/time.h> 48843e1988Sjohnlev #include <sys/panic.h> 49b3c18020SSudheer A #include <sys/cpu.h> 50e014e7f8SPaul Dagnelie #include <sys/sdt.h> 512428aad8SPatrick Mooney #include <sys/comm_page.h> 527c478bd9Sstevel@tonic-gate 537c478bd9Sstevel@tonic-gate /* 547c478bd9Sstevel@tonic-gate * Using the Pentium's TSC register for gethrtime() 557c478bd9Sstevel@tonic-gate * ------------------------------------------------ 567c478bd9Sstevel@tonic-gate * 577c478bd9Sstevel@tonic-gate * The Pentium family, like many chip architectures, has a high-resolution 587c478bd9Sstevel@tonic-gate * timestamp counter ("TSC") which increments once per CPU cycle. The contents 597c478bd9Sstevel@tonic-gate * of the timestamp counter are read with the RDTSC instruction. 607c478bd9Sstevel@tonic-gate * 617c478bd9Sstevel@tonic-gate * As with its UltraSPARC equivalent (the %tick register), TSC's cycle count 627c478bd9Sstevel@tonic-gate * must be translated into nanoseconds in order to implement gethrtime(). 637c478bd9Sstevel@tonic-gate * We avoid inducing floating point operations in this conversion by 647c478bd9Sstevel@tonic-gate * implementing the same nsec_scale algorithm as that found in the sun4u 657c478bd9Sstevel@tonic-gate * platform code. The sun4u NATIVE_TIME_TO_NSEC_SCALE block comment contains 667c478bd9Sstevel@tonic-gate * a detailed description of the algorithm; the comment is not reproduced 677c478bd9Sstevel@tonic-gate * here. This implementation differs only in its value for NSEC_SHIFT: 687c478bd9Sstevel@tonic-gate * we implement an NSEC_SHIFT of 5 (instead of sun4u's 4) to allow for 697c478bd9Sstevel@tonic-gate * 60 MHz Pentiums. 707c478bd9Sstevel@tonic-gate * 717c478bd9Sstevel@tonic-gate * While TSC and %tick are both cycle counting registers, TSC's functionality 727c478bd9Sstevel@tonic-gate * falls short in several critical ways: 737c478bd9Sstevel@tonic-gate * 747c478bd9Sstevel@tonic-gate * (a) TSCs on different CPUs are not guaranteed to be in sync. While in 757c478bd9Sstevel@tonic-gate * practice they often _are_ in sync, this isn't guaranteed by the 767c478bd9Sstevel@tonic-gate * architecture. 777c478bd9Sstevel@tonic-gate * 787c478bd9Sstevel@tonic-gate * (b) The TSC cannot be reliably set to an arbitrary value. The architecture 797c478bd9Sstevel@tonic-gate * only supports writing the low 32-bits of TSC, making it impractical 807c478bd9Sstevel@tonic-gate * to rewrite. 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * (c) The architecture doesn't have the capacity to interrupt based on 837c478bd9Sstevel@tonic-gate * arbitrary values of TSC; there is no TICK_CMPR equivalent. 847c478bd9Sstevel@tonic-gate * 857c478bd9Sstevel@tonic-gate * Together, (a) and (b) imply that software must track the skew between 867c478bd9Sstevel@tonic-gate * TSCs and account for it (it is assumed that while there may exist skew, 877c478bd9Sstevel@tonic-gate * there does not exist drift). To determine the skew between CPUs, we 887c478bd9Sstevel@tonic-gate * have newly onlined CPUs call tsc_sync_slave(), while the CPU performing 89b3c18020SSudheer A * the online operation calls tsc_sync_master(). 907c478bd9Sstevel@tonic-gate * 917c478bd9Sstevel@tonic-gate * In the absence of time-of-day clock adjustments, gethrtime() must stay in 927c478bd9Sstevel@tonic-gate * sync with gettimeofday(). This is problematic; given (c), the software 937c478bd9Sstevel@tonic-gate * cannot drive its time-of-day source from TSC, and yet they must somehow be 947c478bd9Sstevel@tonic-gate * kept in sync. We implement this by having a routine, tsc_tick(), which 957c478bd9Sstevel@tonic-gate * is called once per second from the interrupt which drives time-of-day. 967c478bd9Sstevel@tonic-gate * 977c478bd9Sstevel@tonic-gate * Note that the hrtime base for gethrtime, tsc_hrtime_base, is modified 987c478bd9Sstevel@tonic-gate * atomically with nsec_scale under CLOCK_LOCK. This assures that time 997c478bd9Sstevel@tonic-gate * monotonically increases. 1007c478bd9Sstevel@tonic-gate */ 1017c478bd9Sstevel@tonic-gate 1027c478bd9Sstevel@tonic-gate #define NSEC_SHIFT 5 1037c478bd9Sstevel@tonic-gate 104113b131bSEric Saxe static uint_t nsec_unscale; 1057c478bd9Sstevel@tonic-gate 1067c478bd9Sstevel@tonic-gate /* 1077c478bd9Sstevel@tonic-gate * These two variables used to be grouped together inside of a structure that 1087c478bd9Sstevel@tonic-gate * lived on a single cache line. A regression (bug ID 4623398) caused the 1097c478bd9Sstevel@tonic-gate * compiler to emit code that "optimized" away the while-loops below. The 1107c478bd9Sstevel@tonic-gate * result was that no synchronization between the onlining and onlined CPUs 1117c478bd9Sstevel@tonic-gate * took place. 1127c478bd9Sstevel@tonic-gate */ 1137c478bd9Sstevel@tonic-gate static volatile int tsc_ready; 1147c478bd9Sstevel@tonic-gate static volatile int tsc_sync_go; 1157c478bd9Sstevel@tonic-gate 1167c478bd9Sstevel@tonic-gate /* 1177c478bd9Sstevel@tonic-gate * Used as indices into the tsc_sync_snaps[] array. 1187c478bd9Sstevel@tonic-gate */ 1197c478bd9Sstevel@tonic-gate #define TSC_MASTER 0 1207c478bd9Sstevel@tonic-gate #define TSC_SLAVE 1 1217c478bd9Sstevel@tonic-gate 1227c478bd9Sstevel@tonic-gate /* 1237c478bd9Sstevel@tonic-gate * Used in the tsc_master_sync()/tsc_slave_sync() rendezvous. 1247c478bd9Sstevel@tonic-gate */ 1257c478bd9Sstevel@tonic-gate #define TSC_SYNC_STOP 1 1267c478bd9Sstevel@tonic-gate #define TSC_SYNC_GO 2 127b3c18020SSudheer A #define TSC_SYNC_DONE 3 128b3c18020SSudheer A #define SYNC_ITERATIONS 10 1297c478bd9Sstevel@tonic-gate 130843e1988Sjohnlev #define TSC_CONVERT_AND_ADD(tsc, hrt, scale) { \ 131ae115bc7Smrj unsigned int *_l = (unsigned int *)&(tsc); \ 132ae115bc7Smrj (hrt) += mul32(_l[1], scale) << NSEC_SHIFT; \ 1337c478bd9Sstevel@tonic-gate (hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \ 1347c478bd9Sstevel@tonic-gate } 1357c478bd9Sstevel@tonic-gate 136ae115bc7Smrj #define TSC_CONVERT(tsc, hrt, scale) { \ 137ae115bc7Smrj unsigned int *_l = (unsigned int *)&(tsc); \ 138ae115bc7Smrj (hrt) = mul32(_l[1], scale) << NSEC_SHIFT; \ 1397c478bd9Sstevel@tonic-gate (hrt) += mul32(_l[0], scale) >> (32 - NSEC_SHIFT); \ 1407c478bd9Sstevel@tonic-gate } 1417c478bd9Sstevel@tonic-gate 142ae115bc7Smrj int tsc_master_slave_sync_needed = 1; 1437c478bd9Sstevel@tonic-gate 144b3c18020SSudheer A typedef struct tsc_sync { 145b3c18020SSudheer A volatile hrtime_t master_tsc, slave_tsc; 146b3c18020SSudheer A } tsc_sync_t; 147b3c18020SSudheer A static tsc_sync_t *tscp; 148b3c18020SSudheer A 1497c478bd9Sstevel@tonic-gate static hrtime_t tsc_last_jumped = 0; 1507c478bd9Sstevel@tonic-gate static int tsc_jumped = 0; 151e014e7f8SPaul Dagnelie static uint32_t tsc_wayback = 0; 152e014e7f8SPaul Dagnelie /* 153e014e7f8SPaul Dagnelie * The cap of 1 second was chosen since it is the frequency at which the 154e014e7f8SPaul Dagnelie * tsc_tick() function runs which means that when gethrtime() is called it 155e014e7f8SPaul Dagnelie * should never be more than 1 second since tsc_last was updated. 156e014e7f8SPaul Dagnelie */ 157e014e7f8SPaul Dagnelie static hrtime_t tsc_resume_cap_ns = NANOSEC; /* 1s */ 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate static hrtime_t shadow_tsc_hrtime_base; 1607c478bd9Sstevel@tonic-gate static hrtime_t shadow_tsc_last; 1617c478bd9Sstevel@tonic-gate static uint_t shadow_nsec_scale; 1627c478bd9Sstevel@tonic-gate static uint32_t shadow_hres_lock; 1632df1fe9cSrandyf int get_tsc_ready(); 1647c478bd9Sstevel@tonic-gate 165e014e7f8SPaul Dagnelie static inline 166e014e7f8SPaul Dagnelie hrtime_t tsc_protect(hrtime_t a) { 167e014e7f8SPaul Dagnelie if (a > tsc_resume_cap) { 168e014e7f8SPaul Dagnelie atomic_inc_32(&tsc_wayback); 169e014e7f8SPaul Dagnelie DTRACE_PROBE3(tsc__wayback, htrime_t, a, hrtime_t, tsc_last, 170e014e7f8SPaul Dagnelie uint32_t, tsc_wayback); 171e014e7f8SPaul Dagnelie return (tsc_resume_cap); 172e014e7f8SPaul Dagnelie } 173e014e7f8SPaul Dagnelie return (a); 174e014e7f8SPaul Dagnelie } 175e014e7f8SPaul Dagnelie 176843e1988Sjohnlev hrtime_t 177843e1988Sjohnlev tsc_gethrtime(void) 178843e1988Sjohnlev { 179843e1988Sjohnlev uint32_t old_hres_lock; 180843e1988Sjohnlev hrtime_t tsc, hrt; 181843e1988Sjohnlev 182843e1988Sjohnlev do { 183843e1988Sjohnlev old_hres_lock = hres_lock; 184843e1988Sjohnlev 185843e1988Sjohnlev if ((tsc = tsc_read()) >= tsc_last) { 186843e1988Sjohnlev /* 187843e1988Sjohnlev * It would seem to be obvious that this is true 188843e1988Sjohnlev * (that is, the past is less than the present), 189843e1988Sjohnlev * but it isn't true in the presence of suspend/resume 190843e1988Sjohnlev * cycles. If we manage to call gethrtime() 191843e1988Sjohnlev * after a resume, but before the first call to 192843e1988Sjohnlev * tsc_tick(), we will see the jump. In this case, 193843e1988Sjohnlev * we will simply use the value in TSC as the delta. 194843e1988Sjohnlev */ 195843e1988Sjohnlev tsc -= tsc_last; 196843e1988Sjohnlev } else if (tsc >= tsc_last - 2*tsc_max_delta) { 197843e1988Sjohnlev /* 198843e1988Sjohnlev * There is a chance that tsc_tick() has just run on 199843e1988Sjohnlev * another CPU, and we have drifted just enough so that 200843e1988Sjohnlev * we appear behind tsc_last. In this case, force the 201843e1988Sjohnlev * delta to be zero. 202843e1988Sjohnlev */ 203843e1988Sjohnlev tsc = 0; 204e014e7f8SPaul Dagnelie } else { 205e014e7f8SPaul Dagnelie /* 206e014e7f8SPaul Dagnelie * If we reach this else clause we assume that we have 207e014e7f8SPaul Dagnelie * gone through a suspend/resume cycle and use the 208e014e7f8SPaul Dagnelie * current tsc value as the delta. 209e014e7f8SPaul Dagnelie * 210e014e7f8SPaul Dagnelie * In rare cases we can reach this else clause due to 211e014e7f8SPaul Dagnelie * a lack of monotonicity in the TSC value. In such 212e014e7f8SPaul Dagnelie * cases using the current TSC value as the delta would 213e014e7f8SPaul Dagnelie * cause us to return a value ~2x of what it should 214e014e7f8SPaul Dagnelie * be. To protect against these cases we cap the 215e014e7f8SPaul Dagnelie * suspend/resume delta at tsc_resume_cap. 216e014e7f8SPaul Dagnelie */ 217e014e7f8SPaul Dagnelie tsc = tsc_protect(tsc); 218843e1988Sjohnlev } 219843e1988Sjohnlev 220843e1988Sjohnlev hrt = tsc_hrtime_base; 221843e1988Sjohnlev 222843e1988Sjohnlev TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale); 223843e1988Sjohnlev } while ((old_hres_lock & ~1) != hres_lock); 224843e1988Sjohnlev 225843e1988Sjohnlev return (hrt); 226843e1988Sjohnlev } 227843e1988Sjohnlev 228843e1988Sjohnlev hrtime_t 229843e1988Sjohnlev tsc_gethrtime_delta(void) 230843e1988Sjohnlev { 231843e1988Sjohnlev uint32_t old_hres_lock; 232843e1988Sjohnlev hrtime_t tsc, hrt; 233a563a037Sbholler ulong_t flags; 234843e1988Sjohnlev 235843e1988Sjohnlev do { 236843e1988Sjohnlev old_hres_lock = hres_lock; 237843e1988Sjohnlev 238843e1988Sjohnlev /* 239843e1988Sjohnlev * We need to disable interrupts here to assure that we 240843e1988Sjohnlev * don't migrate between the call to tsc_read() and 241843e1988Sjohnlev * adding the CPU's TSC tick delta. Note that disabling 242843e1988Sjohnlev * and reenabling preemption is forbidden here because 243843e1988Sjohnlev * we may be in the middle of a fast trap. In the amd64 244843e1988Sjohnlev * kernel we cannot tolerate preemption during a fast 245843e1988Sjohnlev * trap. See _update_sregs(). 246843e1988Sjohnlev */ 247843e1988Sjohnlev 248843e1988Sjohnlev flags = clear_int_flag(); 249843e1988Sjohnlev tsc = tsc_read() + tsc_sync_tick_delta[CPU->cpu_id]; 250843e1988Sjohnlev restore_int_flag(flags); 251843e1988Sjohnlev 252843e1988Sjohnlev /* See comments in tsc_gethrtime() above */ 253843e1988Sjohnlev 254843e1988Sjohnlev if (tsc >= tsc_last) { 255843e1988Sjohnlev tsc -= tsc_last; 256843e1988Sjohnlev } else if (tsc >= tsc_last - 2 * tsc_max_delta) { 257843e1988Sjohnlev tsc = 0; 258e014e7f8SPaul Dagnelie } else { 259e014e7f8SPaul Dagnelie tsc = tsc_protect(tsc); 260843e1988Sjohnlev } 261843e1988Sjohnlev 262843e1988Sjohnlev hrt = tsc_hrtime_base; 263843e1988Sjohnlev 264843e1988Sjohnlev TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale); 265843e1988Sjohnlev } while ((old_hres_lock & ~1) != hres_lock); 266843e1988Sjohnlev 267843e1988Sjohnlev return (hrt); 268843e1988Sjohnlev } 269843e1988Sjohnlev 2709278ddffSRobert Mustacchi hrtime_t 2719278ddffSRobert Mustacchi tsc_gethrtime_tick_delta(void) 2729278ddffSRobert Mustacchi { 2739278ddffSRobert Mustacchi hrtime_t hrt; 2749278ddffSRobert Mustacchi ulong_t flags; 2759278ddffSRobert Mustacchi 2769278ddffSRobert Mustacchi flags = clear_int_flag(); 2779278ddffSRobert Mustacchi hrt = tsc_sync_tick_delta[CPU->cpu_id]; 2789278ddffSRobert Mustacchi restore_int_flag(flags); 2799278ddffSRobert Mustacchi 2809278ddffSRobert Mustacchi return (hrt); 2819278ddffSRobert Mustacchi } 2829278ddffSRobert Mustacchi 283*fc3fd29dSPatrick Mooney /* Calculate the hrtime while exposing the parameters of that calculation. */ 284*fc3fd29dSPatrick Mooney hrtime_t 285*fc3fd29dSPatrick Mooney tsc_gethrtime_params(uint64_t *tscp, uint32_t *scalep, uint8_t *shiftp) 286*fc3fd29dSPatrick Mooney { 287*fc3fd29dSPatrick Mooney uint32_t old_hres_lock, scale; 288*fc3fd29dSPatrick Mooney hrtime_t tsc, last, base; 289*fc3fd29dSPatrick Mooney 290*fc3fd29dSPatrick Mooney do { 291*fc3fd29dSPatrick Mooney old_hres_lock = hres_lock; 292*fc3fd29dSPatrick Mooney 293*fc3fd29dSPatrick Mooney if (gethrtimef == tsc_gethrtime_delta) { 294*fc3fd29dSPatrick Mooney ulong_t flags; 295*fc3fd29dSPatrick Mooney 296*fc3fd29dSPatrick Mooney flags = clear_int_flag(); 297*fc3fd29dSPatrick Mooney tsc = tsc_read() + tsc_sync_tick_delta[CPU->cpu_id]; 298*fc3fd29dSPatrick Mooney restore_int_flag(flags); 299*fc3fd29dSPatrick Mooney } else { 300*fc3fd29dSPatrick Mooney tsc = tsc_read(); 301*fc3fd29dSPatrick Mooney } 302*fc3fd29dSPatrick Mooney 303*fc3fd29dSPatrick Mooney last = tsc_last; 304*fc3fd29dSPatrick Mooney base = tsc_hrtime_base; 305*fc3fd29dSPatrick Mooney scale = nsec_scale; 306*fc3fd29dSPatrick Mooney 307*fc3fd29dSPatrick Mooney } while ((old_hres_lock & ~1) != hres_lock); 308*fc3fd29dSPatrick Mooney 309*fc3fd29dSPatrick Mooney /* See comments in tsc_gethrtime() above */ 310*fc3fd29dSPatrick Mooney if (tsc >= last) { 311*fc3fd29dSPatrick Mooney tsc -= last; 312*fc3fd29dSPatrick Mooney } else if (tsc >= last - 2 * tsc_max_delta) { 313*fc3fd29dSPatrick Mooney tsc = 0; 314*fc3fd29dSPatrick Mooney } else { 315*fc3fd29dSPatrick Mooney tsc = tsc_protect(tsc); 316*fc3fd29dSPatrick Mooney } 317*fc3fd29dSPatrick Mooney 318*fc3fd29dSPatrick Mooney TSC_CONVERT_AND_ADD(tsc, base, nsec_scale); 319*fc3fd29dSPatrick Mooney 320*fc3fd29dSPatrick Mooney if (tscp != NULL) { 321*fc3fd29dSPatrick Mooney /* 322*fc3fd29dSPatrick Mooney * Do not simply communicate the delta applied to the hrtime 323*fc3fd29dSPatrick Mooney * base, but rather the effective TSC measurement. 324*fc3fd29dSPatrick Mooney */ 325*fc3fd29dSPatrick Mooney *tscp = tsc + last; 326*fc3fd29dSPatrick Mooney } 327*fc3fd29dSPatrick Mooney if (scalep != NULL) { 328*fc3fd29dSPatrick Mooney *scalep = scale; 329*fc3fd29dSPatrick Mooney } 330*fc3fd29dSPatrick Mooney if (shiftp != NULL) { 331*fc3fd29dSPatrick Mooney *shiftp = NSEC_SHIFT; 332*fc3fd29dSPatrick Mooney } 333*fc3fd29dSPatrick Mooney 334*fc3fd29dSPatrick Mooney return (base); 335*fc3fd29dSPatrick Mooney } 336*fc3fd29dSPatrick Mooney 337843e1988Sjohnlev /* 338*fc3fd29dSPatrick Mooney * This is similar to tsc_gethrtime_delta, but it cannot actually spin on 339*fc3fd29dSPatrick Mooney * hres_lock. As a result, it caches all of the variables it needs; if the 340*fc3fd29dSPatrick Mooney * variables don't change, it's done. 341843e1988Sjohnlev */ 342843e1988Sjohnlev hrtime_t 343843e1988Sjohnlev dtrace_gethrtime(void) 344843e1988Sjohnlev { 345843e1988Sjohnlev uint32_t old_hres_lock; 346843e1988Sjohnlev hrtime_t tsc, hrt; 347a563a037Sbholler ulong_t flags; 348843e1988Sjohnlev 349843e1988Sjohnlev do { 350843e1988Sjohnlev old_hres_lock = hres_lock; 351843e1988Sjohnlev 352843e1988Sjohnlev /* 353843e1988Sjohnlev * Interrupts are disabled to ensure that the thread isn't 354843e1988Sjohnlev * migrated between the tsc_read() and adding the CPU's 355843e1988Sjohnlev * TSC tick delta. 356843e1988Sjohnlev */ 357843e1988Sjohnlev flags = clear_int_flag(); 358843e1988Sjohnlev 359843e1988Sjohnlev tsc = tsc_read(); 360843e1988Sjohnlev 361843e1988Sjohnlev if (gethrtimef == tsc_gethrtime_delta) 362843e1988Sjohnlev tsc += tsc_sync_tick_delta[CPU->cpu_id]; 363843e1988Sjohnlev 364843e1988Sjohnlev restore_int_flag(flags); 365843e1988Sjohnlev 366843e1988Sjohnlev /* 367843e1988Sjohnlev * See the comments in tsc_gethrtime(), above. 368843e1988Sjohnlev */ 369843e1988Sjohnlev if (tsc >= tsc_last) 370843e1988Sjohnlev tsc -= tsc_last; 371843e1988Sjohnlev else if (tsc >= tsc_last - 2*tsc_max_delta) 372843e1988Sjohnlev tsc = 0; 373e014e7f8SPaul Dagnelie else 374e014e7f8SPaul Dagnelie tsc = tsc_protect(tsc); 375843e1988Sjohnlev 376843e1988Sjohnlev hrt = tsc_hrtime_base; 377843e1988Sjohnlev 378843e1988Sjohnlev TSC_CONVERT_AND_ADD(tsc, hrt, nsec_scale); 379843e1988Sjohnlev 380843e1988Sjohnlev if ((old_hres_lock & ~1) == hres_lock) 381843e1988Sjohnlev break; 382843e1988Sjohnlev 383843e1988Sjohnlev /* 384843e1988Sjohnlev * If we're here, the clock lock is locked -- or it has been 385843e1988Sjohnlev * unlocked and locked since we looked. This may be due to 386843e1988Sjohnlev * tsc_tick() running on another CPU -- or it may be because 387843e1988Sjohnlev * some code path has ended up in dtrace_probe() with 388843e1988Sjohnlev * CLOCK_LOCK held. We'll try to determine that we're in 389843e1988Sjohnlev * the former case by taking another lap if the lock has 390843e1988Sjohnlev * changed since when we first looked at it. 391843e1988Sjohnlev */ 392843e1988Sjohnlev if (old_hres_lock != hres_lock) 393843e1988Sjohnlev continue; 394843e1988Sjohnlev 395843e1988Sjohnlev /* 396843e1988Sjohnlev * So the lock was and is locked. We'll use the old data 397843e1988Sjohnlev * instead. 398843e1988Sjohnlev */ 399843e1988Sjohnlev old_hres_lock = shadow_hres_lock; 400843e1988Sjohnlev 401843e1988Sjohnlev /* 402843e1988Sjohnlev * Again, disable interrupts to ensure that the thread 403843e1988Sjohnlev * isn't migrated between the tsc_read() and adding 404843e1988Sjohnlev * the CPU's TSC tick delta. 405843e1988Sjohnlev */ 406843e1988Sjohnlev flags = clear_int_flag(); 407843e1988Sjohnlev 408843e1988Sjohnlev tsc = tsc_read(); 409843e1988Sjohnlev 410843e1988Sjohnlev if (gethrtimef == tsc_gethrtime_delta) 411843e1988Sjohnlev tsc += tsc_sync_tick_delta[CPU->cpu_id]; 412843e1988Sjohnlev 413843e1988Sjohnlev restore_int_flag(flags); 414843e1988Sjohnlev 415843e1988Sjohnlev /* 416843e1988Sjohnlev * See the comments in tsc_gethrtime(), above. 417843e1988Sjohnlev */ 418843e1988Sjohnlev if (tsc >= shadow_tsc_last) 419843e1988Sjohnlev tsc -= shadow_tsc_last; 420843e1988Sjohnlev else if (tsc >= shadow_tsc_last - 2 * tsc_max_delta) 421843e1988Sjohnlev tsc = 0; 422e014e7f8SPaul Dagnelie else 423e014e7f8SPaul Dagnelie tsc = tsc_protect(tsc); 424843e1988Sjohnlev 425843e1988Sjohnlev hrt = shadow_tsc_hrtime_base; 426843e1988Sjohnlev 427843e1988Sjohnlev TSC_CONVERT_AND_ADD(tsc, hrt, shadow_nsec_scale); 428843e1988Sjohnlev } while ((old_hres_lock & ~1) != shadow_hres_lock); 429843e1988Sjohnlev 430843e1988Sjohnlev return (hrt); 431843e1988Sjohnlev } 432843e1988Sjohnlev 433843e1988Sjohnlev hrtime_t 434843e1988Sjohnlev tsc_gethrtimeunscaled(void) 435843e1988Sjohnlev { 436843e1988Sjohnlev uint32_t old_hres_lock; 437843e1988Sjohnlev hrtime_t tsc; 438843e1988Sjohnlev 439843e1988Sjohnlev do { 440843e1988Sjohnlev old_hres_lock = hres_lock; 441843e1988Sjohnlev 442843e1988Sjohnlev /* See tsc_tick(). */ 443843e1988Sjohnlev tsc = tsc_read() + tsc_last_jumped; 444843e1988Sjohnlev } while ((old_hres_lock & ~1) != hres_lock); 445843e1988Sjohnlev 446843e1988Sjohnlev return (tsc); 447843e1988Sjohnlev } 448843e1988Sjohnlev 449113b131bSEric Saxe /* 450113b131bSEric Saxe * Convert a nanosecond based timestamp to tsc 451113b131bSEric Saxe */ 452113b131bSEric Saxe uint64_t 453113b131bSEric Saxe tsc_unscalehrtime(hrtime_t nsec) 454113b131bSEric Saxe { 455113b131bSEric Saxe hrtime_t tsc; 456113b131bSEric Saxe 457113b131bSEric Saxe if (tsc_gethrtime_enable) { 458113b131bSEric Saxe TSC_CONVERT(nsec, tsc, nsec_unscale); 459113b131bSEric Saxe return (tsc); 460113b131bSEric Saxe } 461113b131bSEric Saxe return ((uint64_t)nsec); 462113b131bSEric Saxe } 463843e1988Sjohnlev 464843e1988Sjohnlev /* Convert a tsc timestamp to nanoseconds */ 465843e1988Sjohnlev void 466843e1988Sjohnlev tsc_scalehrtime(hrtime_t *tsc) 467843e1988Sjohnlev { 468843e1988Sjohnlev hrtime_t hrt; 469843e1988Sjohnlev hrtime_t mytsc; 470843e1988Sjohnlev 471843e1988Sjohnlev if (tsc == NULL) 472843e1988Sjohnlev return; 473843e1988Sjohnlev mytsc = *tsc; 474843e1988Sjohnlev 475843e1988Sjohnlev TSC_CONVERT(mytsc, hrt, nsec_scale); 476843e1988Sjohnlev *tsc = hrt; 477843e1988Sjohnlev } 478843e1988Sjohnlev 479843e1988Sjohnlev hrtime_t 480843e1988Sjohnlev tsc_gethrtimeunscaled_delta(void) 481843e1988Sjohnlev { 482843e1988Sjohnlev hrtime_t hrt; 483a563a037Sbholler ulong_t flags; 484843e1988Sjohnlev 485843e1988Sjohnlev /* 486843e1988Sjohnlev * Similarly to tsc_gethrtime_delta, we need to disable preemption 487843e1988Sjohnlev * to prevent migration between the call to tsc_gethrtimeunscaled 488843e1988Sjohnlev * and adding the CPU's hrtime delta. Note that disabling and 489843e1988Sjohnlev * reenabling preemption is forbidden here because we may be in the 490843e1988Sjohnlev * middle of a fast trap. In the amd64 kernel we cannot tolerate 491843e1988Sjohnlev * preemption during a fast trap. See _update_sregs(). 492843e1988Sjohnlev */ 493843e1988Sjohnlev 494843e1988Sjohnlev flags = clear_int_flag(); 495843e1988Sjohnlev hrt = tsc_gethrtimeunscaled() + tsc_sync_tick_delta[CPU->cpu_id]; 496843e1988Sjohnlev restore_int_flag(flags); 497843e1988Sjohnlev 498843e1988Sjohnlev return (hrt); 499843e1988Sjohnlev } 500843e1988Sjohnlev 5017c478bd9Sstevel@tonic-gate /* 50286cb0be2SPatrick Mooney * TSC Sync Master 50386cb0be2SPatrick Mooney * 50486cb0be2SPatrick Mooney * Typically called on the boot CPU, this attempts to quantify TSC skew between 50586cb0be2SPatrick Mooney * different CPUs. If an appreciable difference is found, gethrtimef will be 50686cb0be2SPatrick Mooney * changed to point to tsc_gethrtime_delta(). 50786cb0be2SPatrick Mooney * 50886cb0be2SPatrick Mooney * Calculating skews is precise only when the master and slave TSCs are read 50986cb0be2SPatrick Mooney * simultaneously; however, there is no algorithm that can read both CPUs in 51086cb0be2SPatrick Mooney * perfect simultaneity. The proposed algorithm is an approximate method based 51186cb0be2SPatrick Mooney * on the behaviour of cache management. The slave CPU continuously polls the 51286cb0be2SPatrick Mooney * TSC while reading a global variable updated by the master CPU. The latest 51386cb0be2SPatrick Mooney * TSC reading is saved when the master's update (forced via mfence) reaches 51486cb0be2SPatrick Mooney * visibility on the slave. The master will also take a TSC reading 51586cb0be2SPatrick Mooney * immediately following the mfence. 51686cb0be2SPatrick Mooney * 51786cb0be2SPatrick Mooney * While the delay between cache line invalidation on the slave and mfence 51886cb0be2SPatrick Mooney * completion on the master is not repeatable, the error is heuristically 51986cb0be2SPatrick Mooney * assumed to be 1/4th of the write time recorded by the master. Multiple 52086cb0be2SPatrick Mooney * samples are taken to control for the variance caused by external factors 52186cb0be2SPatrick Mooney * such as bus contention. Each sample set is independent per-CPU to control 52286cb0be2SPatrick Mooney * for differing memory latency on NUMA systems. 5234af20bbdSSudheer A * 5244af20bbdSSudheer A * TSC sync is disabled in the context of virtualization because the CPUs 5254af20bbdSSudheer A * assigned to the guest are virtual CPUs which means the real CPUs on which 5264af20bbdSSudheer A * guest runs keep changing during life time of guest OS. So we would end up 5274af20bbdSSudheer A * calculating TSC skews for a set of CPUs during boot whereas the guest 5284af20bbdSSudheer A * might migrate to a different set of physical CPUs at a later point of 5294af20bbdSSudheer A * time. 5307c478bd9Sstevel@tonic-gate */ 5317c478bd9Sstevel@tonic-gate void 5327c478bd9Sstevel@tonic-gate tsc_sync_master(processorid_t slave) 5337c478bd9Sstevel@tonic-gate { 534b3c18020SSudheer A ulong_t flags, source, min_write_time = ~0UL; 53586cb0be2SPatrick Mooney hrtime_t write_time, mtsc_after, last_delta = 0; 536b3c18020SSudheer A tsc_sync_t *tsc = tscp; 537b3c18020SSudheer A int cnt; 538b9bfdccdSStuart Maybee int hwtype; 5397c478bd9Sstevel@tonic-gate 540b9bfdccdSStuart Maybee hwtype = get_hwenv(); 54179ec9da8SYuri Pankov if (!tsc_master_slave_sync_needed || (hwtype & HW_VIRTUAL) != 0) 542ae115bc7Smrj return; 543ae115bc7Smrj 5447c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 545b3c18020SSudheer A source = CPU->cpu_id; 546b3c18020SSudheer A 547b3c18020SSudheer A for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) { 548b3c18020SSudheer A while (tsc_sync_go != TSC_SYNC_GO) 549b3c18020SSudheer A SMT_PAUSE(); 550b3c18020SSudheer A 551b3c18020SSudheer A tsc->master_tsc = tsc_read(); 552b3c18020SSudheer A membar_enter(); 553b3c18020SSudheer A mtsc_after = tsc_read(); 554b3c18020SSudheer A while (tsc_sync_go != TSC_SYNC_DONE) 555b3c18020SSudheer A SMT_PAUSE(); 556b3c18020SSudheer A write_time = mtsc_after - tsc->master_tsc; 557b3c18020SSudheer A if (write_time <= min_write_time) { 55886cb0be2SPatrick Mooney hrtime_t tdelta; 55986cb0be2SPatrick Mooney 56086cb0be2SPatrick Mooney tdelta = tsc->slave_tsc - mtsc_after; 56186cb0be2SPatrick Mooney if (tdelta < 0) 56286cb0be2SPatrick Mooney tdelta = -tdelta; 563b3c18020SSudheer A /* 56486cb0be2SPatrick Mooney * If the margin exists, subtract 1/4th of the measured 56586cb0be2SPatrick Mooney * write time from the master's TSC value. This is an 56686cb0be2SPatrick Mooney * estimate of how late the mfence completion came 56786cb0be2SPatrick Mooney * after the slave noticed the cache line change. 568b3c18020SSudheer A */ 56986cb0be2SPatrick Mooney if (tdelta > (write_time/4)) { 570b3c18020SSudheer A tdelta = tsc->slave_tsc - 57186cb0be2SPatrick Mooney (mtsc_after - (write_time/4)); 57286cb0be2SPatrick Mooney } else { 573b3c18020SSudheer A tdelta = tsc->slave_tsc - mtsc_after; 57486cb0be2SPatrick Mooney } 57586cb0be2SPatrick Mooney last_delta = tsc_sync_tick_delta[source] - tdelta; 57686cb0be2SPatrick Mooney tsc_sync_tick_delta[slave] = last_delta; 57786cb0be2SPatrick Mooney min_write_time = write_time; 578b3c18020SSudheer A } 5797c478bd9Sstevel@tonic-gate 580b3c18020SSudheer A tsc->master_tsc = tsc->slave_tsc = write_time = 0; 581b3c18020SSudheer A membar_enter(); 582b3c18020SSudheer A tsc_sync_go = TSC_SYNC_STOP; 583b3c18020SSudheer A } 58486cb0be2SPatrick Mooney 5857c478bd9Sstevel@tonic-gate /* 58686cb0be2SPatrick Mooney * Only enable the delta variants of the TSC functions if the measured 58786cb0be2SPatrick Mooney * skew is greater than the fastest write time. 5887c478bd9Sstevel@tonic-gate */ 58986cb0be2SPatrick Mooney last_delta = (last_delta < 0) ? -last_delta : last_delta; 59086cb0be2SPatrick Mooney if (last_delta > min_write_time) { 591b3c18020SSudheer A gethrtimef = tsc_gethrtime_delta; 592b3c18020SSudheer A gethrtimeunscaledf = tsc_gethrtimeunscaled_delta; 5932428aad8SPatrick Mooney tsc_ncpu = NCPU; 594b3c18020SSudheer A } 5957c478bd9Sstevel@tonic-gate restore_int_flag(flags); 5967c478bd9Sstevel@tonic-gate } 5977c478bd9Sstevel@tonic-gate 5984af20bbdSSudheer A /* 59986cb0be2SPatrick Mooney * TSC Sync Slave 60086cb0be2SPatrick Mooney * 6014af20bbdSSudheer A * Called by a CPU which has just been onlined. It is expected that the CPU 6024af20bbdSSudheer A * performing the online operation will call tsc_sync_master(). 6034af20bbdSSudheer A * 60486cb0be2SPatrick Mooney * Like tsc_sync_master, this logic is skipped on virtualized platforms. 6054af20bbdSSudheer A */ 6067c478bd9Sstevel@tonic-gate void 6077c478bd9Sstevel@tonic-gate tsc_sync_slave(void) 6087c478bd9Sstevel@tonic-gate { 609ae115bc7Smrj ulong_t flags; 610b3c18020SSudheer A hrtime_t s1; 611b3c18020SSudheer A tsc_sync_t *tsc = tscp; 612b3c18020SSudheer A int cnt; 613b9bfdccdSStuart Maybee int hwtype; 6147c478bd9Sstevel@tonic-gate 615b9bfdccdSStuart Maybee hwtype = get_hwenv(); 61679ec9da8SYuri Pankov if (!tsc_master_slave_sync_needed || (hwtype & HW_VIRTUAL) != 0) 617ae115bc7Smrj return; 618ae115bc7Smrj 6197c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 6207c478bd9Sstevel@tonic-gate 621b3c18020SSudheer A for (cnt = 0; cnt < SYNC_ITERATIONS; cnt++) { 622b3c18020SSudheer A /* Re-fill the cache line */ 623b3c18020SSudheer A s1 = tsc->master_tsc; 624b3c18020SSudheer A membar_enter(); 625b3c18020SSudheer A tsc_sync_go = TSC_SYNC_GO; 626b3c18020SSudheer A do { 627b3c18020SSudheer A /* 62886cb0be2SPatrick Mooney * Do not put an SMT_PAUSE here. If the master and 62986cb0be2SPatrick Mooney * slave are the same hyper-threaded CPU, we want the 63086cb0be2SPatrick Mooney * master to yield as quickly as possible to the slave. 631b3c18020SSudheer A */ 632b3c18020SSudheer A s1 = tsc_read(); 633b3c18020SSudheer A } while (tsc->master_tsc == 0); 634b3c18020SSudheer A tsc->slave_tsc = s1; 635b3c18020SSudheer A membar_enter(); 636b3c18020SSudheer A tsc_sync_go = TSC_SYNC_DONE; 637b3c18020SSudheer A 638b3c18020SSudheer A while (tsc_sync_go != TSC_SYNC_STOP) 639b3c18020SSudheer A SMT_PAUSE(); 640b3c18020SSudheer A } 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate restore_int_flag(flags); 6437c478bd9Sstevel@tonic-gate } 6447c478bd9Sstevel@tonic-gate 6457c478bd9Sstevel@tonic-gate /* 646ae115bc7Smrj * Called once per second on a CPU from the cyclic subsystem's 647ae115bc7Smrj * CY_HIGH_LEVEL interrupt. (No longer just cpu0-only) 6487c478bd9Sstevel@tonic-gate */ 6497c478bd9Sstevel@tonic-gate void 6507c478bd9Sstevel@tonic-gate tsc_tick(void) 6517c478bd9Sstevel@tonic-gate { 6527c478bd9Sstevel@tonic-gate hrtime_t now, delta; 6537c478bd9Sstevel@tonic-gate ushort_t spl; 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate /* 6567c478bd9Sstevel@tonic-gate * Before we set the new variables, we set the shadow values. This 6577c478bd9Sstevel@tonic-gate * allows for lock free operation in dtrace_gethrtime(). 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate lock_set_spl((lock_t *)&shadow_hres_lock + HRES_LOCK_OFFSET, 6607c478bd9Sstevel@tonic-gate ipltospl(CBE_HIGH_PIL), &spl); 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate shadow_tsc_hrtime_base = tsc_hrtime_base; 6637c478bd9Sstevel@tonic-gate shadow_tsc_last = tsc_last; 6647c478bd9Sstevel@tonic-gate shadow_nsec_scale = nsec_scale; 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate shadow_hres_lock++; 6677c478bd9Sstevel@tonic-gate splx(spl); 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate CLOCK_LOCK(&spl); 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate now = tsc_read(); 6727c478bd9Sstevel@tonic-gate 673d90554ebSdmick if (gethrtimef == tsc_gethrtime_delta) 674d90554ebSdmick now += tsc_sync_tick_delta[CPU->cpu_id]; 675d90554ebSdmick 6767c478bd9Sstevel@tonic-gate if (now < tsc_last) { 6777c478bd9Sstevel@tonic-gate /* 6787c478bd9Sstevel@tonic-gate * The TSC has just jumped into the past. We assume that 6797c478bd9Sstevel@tonic-gate * this is due to a suspend/resume cycle, and we're going 6807c478bd9Sstevel@tonic-gate * to use the _current_ value of TSC as the delta. This 6817c478bd9Sstevel@tonic-gate * will keep tsc_hrtime_base correct. We're also going to 6827c478bd9Sstevel@tonic-gate * assume that rate of tsc does not change after a suspend 6837c478bd9Sstevel@tonic-gate * resume (i.e nsec_scale remains the same). 6847c478bd9Sstevel@tonic-gate */ 6857c478bd9Sstevel@tonic-gate delta = now; 686e014e7f8SPaul Dagnelie delta = tsc_protect(delta); 6877c478bd9Sstevel@tonic-gate tsc_last_jumped += tsc_last; 6887c478bd9Sstevel@tonic-gate tsc_jumped = 1; 6897c478bd9Sstevel@tonic-gate } else { 6907c478bd9Sstevel@tonic-gate /* 6917c478bd9Sstevel@tonic-gate * Determine the number of TSC ticks since the last clock 6927c478bd9Sstevel@tonic-gate * tick, and add that to the hrtime base. 6937c478bd9Sstevel@tonic-gate */ 6947c478bd9Sstevel@tonic-gate delta = now - tsc_last; 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate TSC_CONVERT_AND_ADD(delta, tsc_hrtime_base, nsec_scale); 6987c478bd9Sstevel@tonic-gate tsc_last = now; 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate CLOCK_UNLOCK(spl); 7017c478bd9Sstevel@tonic-gate } 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate void 704843e1988Sjohnlev tsc_hrtimeinit(uint64_t cpu_freq_hz) 7057c478bd9Sstevel@tonic-gate { 706843e1988Sjohnlev extern int gethrtime_hires; 707843e1988Sjohnlev longlong_t tsc; 708843e1988Sjohnlev ulong_t flags; 7097c478bd9Sstevel@tonic-gate 710843e1988Sjohnlev /* 711843e1988Sjohnlev * cpu_freq_hz is the measured cpu frequency in hertz 712843e1988Sjohnlev */ 7137c478bd9Sstevel@tonic-gate 7147c478bd9Sstevel@tonic-gate /* 715843e1988Sjohnlev * We can't accommodate CPUs slower than 31.25 MHz. 7167c478bd9Sstevel@tonic-gate */ 717843e1988Sjohnlev ASSERT(cpu_freq_hz > NANOSEC / (1 << NSEC_SHIFT)); 718843e1988Sjohnlev nsec_scale = 719843e1988Sjohnlev (uint_t)(((uint64_t)NANOSEC << (32 - NSEC_SHIFT)) / cpu_freq_hz); 720113b131bSEric Saxe nsec_unscale = 721113b131bSEric Saxe (uint_t)(((uint64_t)cpu_freq_hz << (32 - NSEC_SHIFT)) / NANOSEC); 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate flags = clear_int_flag(); 724843e1988Sjohnlev tsc = tsc_read(); 725843e1988Sjohnlev (void) tsc_gethrtime(); 726843e1988Sjohnlev tsc_max_delta = tsc_read() - tsc; 7277c478bd9Sstevel@tonic-gate restore_int_flag(flags); 728843e1988Sjohnlev gethrtimef = tsc_gethrtime; 729843e1988Sjohnlev gethrtimeunscaledf = tsc_gethrtimeunscaled; 730843e1988Sjohnlev scalehrtimef = tsc_scalehrtime; 731113b131bSEric Saxe unscalehrtimef = tsc_unscalehrtime; 732843e1988Sjohnlev hrtime_tick = tsc_tick; 733843e1988Sjohnlev gethrtime_hires = 1; 7342428aad8SPatrick Mooney /* 7352428aad8SPatrick Mooney * Being part of the comm page, tsc_ncpu communicates the published 7362428aad8SPatrick Mooney * length of the tsc_sync_tick_delta array. This is kept zeroed to 7372428aad8SPatrick Mooney * ignore the absent delta data while the TSCs are synced. 7382428aad8SPatrick Mooney */ 7392428aad8SPatrick Mooney tsc_ncpu = 0; 740b3c18020SSudheer A /* 741b3c18020SSudheer A * Allocate memory for the structure used in the tsc sync logic. 742b3c18020SSudheer A * This structure should be aligned on a multiple of cache line size. 743b3c18020SSudheer A */ 744b3c18020SSudheer A tscp = kmem_zalloc(PAGESIZE, KM_SLEEP); 745e014e7f8SPaul Dagnelie 746e014e7f8SPaul Dagnelie /* 747e014e7f8SPaul Dagnelie * Convert the TSC resume cap ns value into its unscaled TSC value. 748e014e7f8SPaul Dagnelie * See tsc_gethrtime(). 749e014e7f8SPaul Dagnelie */ 750e014e7f8SPaul Dagnelie if (tsc_resume_cap == 0) 751e014e7f8SPaul Dagnelie TSC_CONVERT(tsc_resume_cap_ns, tsc_resume_cap, nsec_unscale); 7527c478bd9Sstevel@tonic-gate } 7532df1fe9cSrandyf 7542df1fe9cSrandyf int 7552df1fe9cSrandyf get_tsc_ready() 7562df1fe9cSrandyf { 7572df1fe9cSrandyf return (tsc_ready); 7582df1fe9cSrandyf } 7592df1fe9cSrandyf 7602df1fe9cSrandyf /* 76186cb0be2SPatrick Mooney * Adjust all the deltas by adding the passed value to the array and activate 76286cb0be2SPatrick Mooney * the "delta" versions of the gethrtime functions. It is possible that the 76386cb0be2SPatrick Mooney * adjustment could be negative. Such may occur if the SunOS instance was 76486cb0be2SPatrick Mooney * moved by a virtual manager to a machine with a higher value of TSC. 7652df1fe9cSrandyf */ 7662df1fe9cSrandyf void 7672df1fe9cSrandyf tsc_adjust_delta(hrtime_t tdelta) 7682df1fe9cSrandyf { 7692df1fe9cSrandyf int i; 7702df1fe9cSrandyf 7712df1fe9cSrandyf for (i = 0; i < NCPU; i++) { 7722df1fe9cSrandyf tsc_sync_tick_delta[i] += tdelta; 7732df1fe9cSrandyf } 7742df1fe9cSrandyf 7752df1fe9cSrandyf gethrtimef = tsc_gethrtime_delta; 7762df1fe9cSrandyf gethrtimeunscaledf = tsc_gethrtimeunscaled_delta; 7772428aad8SPatrick Mooney tsc_ncpu = NCPU; 7782df1fe9cSrandyf } 7792df1fe9cSrandyf 7802df1fe9cSrandyf /* 7812df1fe9cSrandyf * Functions to manage TSC and high-res time on suspend and resume. 7822df1fe9cSrandyf */ 7832df1fe9cSrandyf 78486cb0be2SPatrick Mooney /* tod_ops from "uts/i86pc/io/todpc_subr.c" */ 7852df1fe9cSrandyf extern tod_ops_t *tod_ops; 78686cb0be2SPatrick Mooney 7872df1fe9cSrandyf static uint64_t tsc_saved_tsc = 0; /* 1 in 2^64 chance this'll screw up! */ 7882df1fe9cSrandyf static timestruc_t tsc_saved_ts; 7892df1fe9cSrandyf static int tsc_needs_resume = 0; /* We only want to do this once. */ 7902df1fe9cSrandyf int tsc_delta_onsuspend = 0; 7912df1fe9cSrandyf int tsc_adjust_seconds = 1; 7922df1fe9cSrandyf int tsc_suspend_count = 0; 7932df1fe9cSrandyf int tsc_resume_in_cyclic = 0; 7942df1fe9cSrandyf 7952df1fe9cSrandyf /* 79686cb0be2SPatrick Mooney * Take snapshots of the current time and do any other pre-suspend work. 7972df1fe9cSrandyf */ 7982df1fe9cSrandyf void 7992df1fe9cSrandyf tsc_suspend(void) 8002df1fe9cSrandyf { 80186cb0be2SPatrick Mooney /* 80286cb0be2SPatrick Mooney * We need to collect the time at which we suspended here so we know 80386cb0be2SPatrick Mooney * now much should be added during the resume. This is called by each 80486cb0be2SPatrick Mooney * CPU, so reentry must be properly handled. 80586cb0be2SPatrick Mooney */ 8062df1fe9cSrandyf if (tsc_gethrtime_enable) { 8072df1fe9cSrandyf /* 80886cb0be2SPatrick Mooney * Perform the tsc_read after acquiring the lock to make it as 80986cb0be2SPatrick Mooney * accurate as possible in the face of contention. 8102df1fe9cSrandyf */ 8112df1fe9cSrandyf mutex_enter(&tod_lock); 8122df1fe9cSrandyf tsc_saved_tsc = tsc_read(); 8132df1fe9cSrandyf tsc_saved_ts = TODOP_GET(tod_ops); 8142df1fe9cSrandyf mutex_exit(&tod_lock); 8152df1fe9cSrandyf /* We only want to do this once. */ 8162df1fe9cSrandyf if (tsc_needs_resume == 0) { 8172df1fe9cSrandyf if (tsc_delta_onsuspend) { 8182df1fe9cSrandyf tsc_adjust_delta(tsc_saved_tsc); 8192df1fe9cSrandyf } else { 8202df1fe9cSrandyf tsc_adjust_delta(nsec_scale); 8212df1fe9cSrandyf } 8222df1fe9cSrandyf tsc_suspend_count++; 8232df1fe9cSrandyf } 8242df1fe9cSrandyf } 8252df1fe9cSrandyf 8262df1fe9cSrandyf invalidate_cache(); 8272df1fe9cSrandyf tsc_needs_resume = 1; 8282df1fe9cSrandyf } 8292df1fe9cSrandyf 8302df1fe9cSrandyf /* 83186cb0be2SPatrick Mooney * Restore all timestamp state based on the snapshots taken at suspend time. 8322df1fe9cSrandyf */ 8332df1fe9cSrandyf void 8342df1fe9cSrandyf tsc_resume(void) 8352df1fe9cSrandyf { 8362df1fe9cSrandyf /* 8372df1fe9cSrandyf * We only need to (and want to) do this once. So let the first 8382df1fe9cSrandyf * caller handle this (we are locked by the cpu lock), as it 8392df1fe9cSrandyf * is preferential that we get the earliest sync. 8402df1fe9cSrandyf */ 8412df1fe9cSrandyf if (tsc_needs_resume) { 8422df1fe9cSrandyf /* 8432df1fe9cSrandyf * If using the TSC, adjust the delta based on how long 8442df1fe9cSrandyf * we were sleeping (or away). We also adjust for 8452df1fe9cSrandyf * migration and a grown TSC. 8462df1fe9cSrandyf */ 8472df1fe9cSrandyf if (tsc_saved_tsc != 0) { 8482df1fe9cSrandyf timestruc_t ts; 8492df1fe9cSrandyf hrtime_t now, sleep_tsc = 0; 8502df1fe9cSrandyf int sleep_sec; 8512df1fe9cSrandyf extern void tsc_tick(void); 8522df1fe9cSrandyf extern uint64_t cpu_freq_hz; 8532df1fe9cSrandyf 8542df1fe9cSrandyf /* tsc_read() MUST be before TODOP_GET() */ 8552df1fe9cSrandyf mutex_enter(&tod_lock); 8562df1fe9cSrandyf now = tsc_read(); 8572df1fe9cSrandyf ts = TODOP_GET(tod_ops); 8582df1fe9cSrandyf mutex_exit(&tod_lock); 8592df1fe9cSrandyf 8602df1fe9cSrandyf /* Compute seconds of sleep time */ 8612df1fe9cSrandyf sleep_sec = ts.tv_sec - tsc_saved_ts.tv_sec; 8622df1fe9cSrandyf 8632df1fe9cSrandyf /* 8642df1fe9cSrandyf * If the saved sec is less that or equal to 8652df1fe9cSrandyf * the current ts, then there is likely a 8662df1fe9cSrandyf * problem with the clock. Assume at least 8672df1fe9cSrandyf * one second has passed, so that time goes forward. 8682df1fe9cSrandyf */ 8692df1fe9cSrandyf if (sleep_sec <= 0) { 8702df1fe9cSrandyf sleep_sec = 1; 8712df1fe9cSrandyf } 8722df1fe9cSrandyf 8732df1fe9cSrandyf /* How many TSC's should have occured while sleeping */ 8742df1fe9cSrandyf if (tsc_adjust_seconds) 8752df1fe9cSrandyf sleep_tsc = sleep_sec * cpu_freq_hz; 8762df1fe9cSrandyf 8772df1fe9cSrandyf /* 8782df1fe9cSrandyf * We also want to subtract from the "sleep_tsc" 8792df1fe9cSrandyf * the current value of tsc_read(), so that our 8802df1fe9cSrandyf * adjustment accounts for the amount of time we 8812df1fe9cSrandyf * have been resumed _or_ an adjustment based on 8822df1fe9cSrandyf * the fact that we didn't actually power off the 8832df1fe9cSrandyf * CPU (migration is another issue, but _should_ 8842df1fe9cSrandyf * also comply with this calculation). If the CPU 8852df1fe9cSrandyf * never powered off, then: 8862df1fe9cSrandyf * 'now == sleep_tsc + saved_tsc' 8872df1fe9cSrandyf * and the delta will effectively be "0". 8882df1fe9cSrandyf */ 8892df1fe9cSrandyf sleep_tsc -= now; 8902df1fe9cSrandyf if (tsc_delta_onsuspend) { 8912df1fe9cSrandyf tsc_adjust_delta(sleep_tsc); 8922df1fe9cSrandyf } else { 8932df1fe9cSrandyf tsc_adjust_delta(tsc_saved_tsc + sleep_tsc); 8942df1fe9cSrandyf } 8952df1fe9cSrandyf tsc_saved_tsc = 0; 8962df1fe9cSrandyf 8972df1fe9cSrandyf tsc_tick(); 8982df1fe9cSrandyf } 8992df1fe9cSrandyf tsc_needs_resume = 0; 9002df1fe9cSrandyf } 9012df1fe9cSrandyf 9022df1fe9cSrandyf } 903