12428aadPatrick Mooney/*
22428aadPatrick Mooney * This file and its contents are supplied under the terms of the
32428aadPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0.
42428aadPatrick Mooney * You may only use this file in accordance with the terms of version
52428aadPatrick Mooney * 1.0 of the CDDL.
62428aadPatrick Mooney *
72428aadPatrick Mooney * A full copy of the text of the CDDL should have accompanied this
82428aadPatrick Mooney * source.  A copy of the CDDL is also available via the Internet at
92428aadPatrick Mooney * http://www.illumos.org/license/CDDL.
102428aadPatrick Mooney */
112428aadPatrick Mooney
122428aadPatrick Mooney/*
13e121b61Patrick Mooney * Copyright 2019 Joyent, Inc.
142428aadPatrick Mooney */
152428aadPatrick Mooney
162428aadPatrick Mooney#include <sys/asm_linkage.h>
172428aadPatrick Mooney#include <sys/segments.h>
182428aadPatrick Mooney#include <sys/time_impl.h>
192428aadPatrick Mooney#include <sys/tsc.h>
202428aadPatrick Mooney#include <cp_offsets.h>
212428aadPatrick Mooney
222428aadPatrick Mooney#define	GETCPU_GDT_OFFSET	SEL_GDT(GDT_CPUID, SEL_UPL)
232428aadPatrick Mooney
242428aadPatrick Mooney	.file	"cp_subr.s"
252428aadPatrick Mooney
262428aadPatrick Mooney/*
272428aadPatrick Mooney * These are cloned from TSC and time related code in the kernel.  They should
282428aadPatrick Mooney * be kept in sync in the case that the source values are changed.
292428aadPatrick Mooney * See: uts/i86pc/os/timestamp.c
302428aadPatrick Mooney */
312428aadPatrick Mooney#define	NSEC_SHIFT	5
322428aadPatrick Mooney#define	ADJ_SHIFT	4
332428aadPatrick Mooney#define	NANOSEC		0x3b9aca00
342428aadPatrick Mooney
352428aadPatrick Mooney/*
36e121b61Patrick Mooney * For __cp_tsc_read calls which incur looping retries due to CPU migration,
37e121b61Patrick Mooney * this represents the maximum number of tries before bailing out.
38e121b61Patrick Mooney */
39e121b61Patrick Mooney#define	TSC_READ_MAXLOOP	0x4
40e121b61Patrick Mooney
41e121b61Patrick Mooney/*
422428aadPatrick Mooney * hrtime_t
432428aadPatrick Mooney * __cp_tsc_read(comm_page_t *cp)
442428aadPatrick Mooney *
452428aadPatrick Mooney * Stack usage: 0 bytes
462428aadPatrick Mooney */
472428aadPatrick Mooney	ENTRY_NP(__cp_tsc_read)
482428aadPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %esi
492428aadPatrick Mooney	movl	CP_TSC_NCPU(%rdi), %r8d
502428aadPatrick Mooney
512428aadPatrick Mooney	cmpl	$TSC_TSCP, %esi
522428aadPatrick Mooney	jne	2f
532428aadPatrick Mooney	rdtscp
542428aadPatrick Mooney	/*
552428aadPatrick Mooney	 * When the TSC is read, the low 32 bits are placed in %eax while the
562428aadPatrick Mooney	 * high 32 bits are placed in %edx.  They are shifted and ORed together
572428aadPatrick Mooney	 * to obtain the full 64-bit value.
582428aadPatrick Mooney	 */
592428aadPatrick Mooney	shlq	$0x20, %rdx
602428aadPatrick Mooney	orq	%rdx, %rax
61e121b61Patrick Mooney
62e121b61Patrick Mooney	/*
63e121b61Patrick Mooney	 * A zeroed cp_tsc_ncpu (currently held in r8d) indicates that no
64e121b61Patrick Mooney	 * per-CPU TSC offsets are required.
65e121b61Patrick Mooney	 */
66e121b61Patrick Mooney	testl	%r8d, %r8d
67e121b61Patrick Mooney	jnz	1f
682428aadPatrick Mooney	ret
69e121b61Patrick Mooney
702428aadPatrick Mooney1:
712428aadPatrick Mooney	/*
72e121b61Patrick Mooney	 * A non-zero cp_tsc_ncpu indicates the array length of
73e121b61Patrick Mooney	 * cp_tsc_sync_tick_delta containing per-CPU offsets which are applied
74e121b61Patrick Mooney	 * to TSC readings.  The CPU ID furnished by the IA32_TSC_AUX register
75e121b61Patrick Mooney	 * via rdtscp (placed in rcx) is used to look up an offset value in
76e121b61Patrick Mooney	 * that array and apply it to the TSC value.
772428aadPatrick Mooney	 */
78e121b61Patrick Mooney	leaq	CP_TSC_SYNC_TICK_DELTA(%rdi), %r9
792428aadPatrick Mooney	movq	(%r9, %rcx, 8), %rdx
802428aadPatrick Mooney	addq	%rdx, %rax
812428aadPatrick Mooney	ret
822428aadPatrick Mooney
832428aadPatrick Mooney2:
842428aadPatrick Mooney	/*
85e121b61Patrick Mooney	 * TSC reading without RDTSCP
86e121b61Patrick Mooney	 *
87e121b61Patrick Mooney	 * Check if handling for per-CPU TSC offsets is required.  If not,
88e121b61Patrick Mooney	 * immediately skip to the the appropriate steps to perform a rdtsc.
892428aadPatrick Mooney	 *
90e121b61Patrick Mooney	 * If per-CPU offsets are present, the TSC reading process is more
91e121b61Patrick Mooney	 * complicated.  Without rdtscp, there is no way to simultaneously read
92e121b61Patrick Mooney	 * the TSC and query the current CPU.  In order to "catch" migrations
93e121b61Patrick Mooney	 * during execution, the CPU ID is queried before and after rdtsc.  The
94e121b61Patrick Mooney	 * execution is repeated if results differ, subject to a loop limit.
952428aadPatrick Mooney	 */
96e121b61Patrick Mooney	xorq	%r9, %r9
97e121b61Patrick Mooney	testl	%r8d, %r8d
98e121b61Patrick Mooney	jz	3f
99e121b61Patrick Mooney
100e121b61Patrick Mooney	/*
101e121b61Patrick Mooney	 * Load the address of the per-CPU offset array, since it is needed.
102e121b61Patrick Mooney	 * The attempted loop count is kept in r8.
103e121b61Patrick Mooney	 */
104e121b61Patrick Mooney	leaq	CP_TSC_SYNC_TICK_DELTA(%rdi), %r9
105e121b61Patrick Mooney	xorl	%r8d, %r8d
106e121b61Patrick Mooney
107e121b61Patrick Mooney	/* Query the CPU ID and stash it in r10 for later comparison */
1082428aadPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
1092428aadPatrick Mooney	lsl	%dx, %edx
1102428aadPatrick Mooney	movl	%edx, %r10d
1112428aadPatrick Mooney
112e121b61Patrick Mooney3:
1132428aadPatrick Mooney	cmpl	$TSC_RDTSC_MFENCE, %esi
1142428aadPatrick Mooney	jne	4f
1152428aadPatrick Mooney	mfence
1162428aadPatrick Mooney	rdtsc
1172428aadPatrick Mooney	jmp	7f
1182428aadPatrick Mooney
1192428aadPatrick Mooney4:
1202428aadPatrick Mooney	cmpl	$TSC_RDTSC_LFENCE, %esi
1212428aadPatrick Mooney	jne	5f
1222428aadPatrick Mooney	lfence
1232428aadPatrick Mooney	rdtsc
1242428aadPatrick Mooney	jmp	7f
1252428aadPatrick Mooney
1262428aadPatrick Mooney5:
1272428aadPatrick Mooney	cmpl	$TSC_RDTSC_CPUID, %esi
1282428aadPatrick Mooney	jne	6f
1292428aadPatrick Mooney	/*
1302428aadPatrick Mooney	 * Since the amd64 ABI dictates that %rbx is callee-saved, it must be
1312428aadPatrick Mooney	 * preserved here.  Its contents will be overwritten when cpuid is used
1322428aadPatrick Mooney	 * as a serializing instruction.
1332428aadPatrick Mooney	 */
1342428aadPatrick Mooney	movq	%rbx, %r11
1352428aadPatrick Mooney	xorl	%eax, %eax
1362428aadPatrick Mooney	cpuid
1372428aadPatrick Mooney	rdtsc
1382428aadPatrick Mooney	movq	%r11, %rbx
1392428aadPatrick Mooney	jmp	7f
1402428aadPatrick Mooney
1412428aadPatrick Mooney6:
1422428aadPatrick Mooney	/*
1432428aadPatrick Mooney	 * Other protections should have prevented this function from being
144e121b61Patrick Mooney	 * called in the first place.  Since callers must handle a failure from
145e121b61Patrick Mooney	 * CPU migration looping, yield the same result as a bail-out: 0
1462428aadPatrick Mooney	 */
147e121b61Patrick Mooney	xorl	%eax, %eax
148e121b61Patrick Mooney	ret
1492428aadPatrick Mooney
1502428aadPatrick Mooney7:
1512428aadPatrick Mooney	shlq	$0x20, %rdx
1522428aadPatrick Mooney	orq	%rdx, %rax
1532428aadPatrick Mooney
1542428aadPatrick Mooney	/*
155e121b61Patrick Mooney	 * With the TSC reading in-hand, check if any per-CPU offset handling
156e121b61Patrick Mooney	 * is required.  The address to the array of deltas (r9) will not have
157e121b61Patrick Mooney	 * been populated if offset handling is unecessary.
1582428aadPatrick Mooney	 */
159e121b61Patrick Mooney	testq	%r9, %r9
160e121b61Patrick Mooney	jnz	8f
161e121b61Patrick Mooney	ret
162e121b61Patrick Mooney
163e121b61Patrick Mooney8:
1642428aadPatrick Mooney	movl	$GETCPU_GDT_OFFSET, %edx
1652428aadPatrick Mooney	lsl	%dx, %edx
1662428aadPatrick Mooney	cmpl	%edx, %r10d
167e121b61Patrick Mooney	jne	9f
1682428aadPatrick Mooney	movq	(%r9, %rdx, 8), %rdx
1692428aadPatrick Mooney	addq	%rdx, %rax
1702428aadPatrick Mooney	ret
171e121b61Patrick Mooney
172e121b61Patrick Mooney9:
173e121b61Patrick Mooney	/*
174e121b61Patrick Mooney	 * It appears that a migration has occurred between the first CPU ID
175e121b61Patrick Mooney	 * query and now.  Check if the loop limit has been broken and retry if
176e121b61Patrick Mooney	 * that's not the case.
177e121b61Patrick Mooney	 */
178e121b61Patrick Mooney	cmpl	$TSC_READ_MAXLOOP, %r8d
179e121b61Patrick Mooney	jge	10f
180e121b61Patrick Mooney	incl	%r8d
181e121b61Patrick Mooney	movl	%edx, %r10d
182e121b61Patrick Mooney	jmp	3b
183e121b61Patrick Mooney
184e121b61Patrick Mooney10:
185e121b61Patrick Mooney	/* Loop limit was reached. Return bail-out value of 0. */
186e121b61Patrick Mooney	xorl	%eax, %eax
187e121b61Patrick Mooney	ret
188e121b61Patrick Mooney
1892428aadPatrick Mooney	SET_SIZE(__cp_tsc_read)
1902428aadPatrick Mooney
1912428aadPatrick Mooney
1922428aadPatrick Mooney/*
1932428aadPatrick Mooney * uint_t
1942428aadPatrick Mooney * __cp_getcpu(comm_page_t *)
1952428aadPatrick Mooney *
1962428aadPatrick Mooney * Stack usage: 0 bytes
1972428aadPatrick Mooney */
1982428aadPatrick Mooney	ENTRY_NP(__cp_getcpu)
1992428aadPatrick Mooney	movl	CP_TSC_TYPE(%rdi), %edi
2002428aadPatrick Mooney	/*
2012428aadPatrick Mooney	 * If RDTSCP is available, it is a quick way to grab the cpu_id which
2022428aadPatrick Mooney	 * is stored in the TSC_AUX MSR by the kernel.
2032428aadPatrick Mooney	 */
2042428aadPatrick Mooney	cmpl	$TSC_TSCP, %edi
2052428aadPatrick Mooney	jne	1f
2062428aadPatrick Mooney	rdtscp
2072428aadPatrick Mooney	movl	%ecx, %eax
2082428aadPatrick Mooney	ret
2092428aadPatrick Mooney1:
2102428aadPatrick Mooney	mov	$GETCPU_GDT_OFFSET, %eax
2112428aadPatrick Mooney	lsl	%ax, %eax
2122428aadPatrick Mooney	ret
2132428aadPatrick Mooney	SET_SIZE(__cp_getcpu)
2142428aadPatrick Mooney
2152428aadPatrick Mooney/*
2162428aadPatrick Mooney * hrtime_t
2172428aadPatrick Mooney * __cp_gethrtime(comm_page_t *cp)
2182428aadPatrick Mooney *
2192428aadPatrick Mooney * Stack usage: 0x20 local + 0x8 call = 0x28 bytes
2202428aadPatrick Mooney *
2212428aadPatrick Mooney * %rsp+0x00 - hrtime_t tsc_last
2222428aadPatrick Mooney * %rsp+0x08 - hrtime_t hrtime_base
2232428aadPatrick Mooney * %rsp+0x10 - commpage_t *cp
2242428aadPatrick Mooney * %rsp+0x18 - int hres_lock
2252428aadPatrick Mooney */
2262428aadPatrick Mooney	ENTRY_NP(__cp_gethrtime)
2272428aadPatrick Mooney	subq	$0x20, %rsp
2282428aadPatrick Mooney	movq	%rdi, 0x10(%rsp)
2292428aadPatrick Mooney1:
2302428aadPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %r9d
2312428aadPatrick Mooney	movl	%r9d, 0x18(%rsp)
2322428aadPatrick Mooney
2332428aadPatrick Mooney	movq	CP_TSC_LAST(%rdi), %rax
2342428aadPatrick Mooney	movq	CP_TSC_HRTIME_BASE(%rdi), %rdx
2352428aadPatrick Mooney	movq	%rax, (%rsp)
2362428aadPatrick Mooney	movq	%rdx, 0x8(%rsp)
2372428aadPatrick Mooney
2382428aadPatrick Mooney	call	__cp_tsc_read
2392428aadPatrick Mooney
240e121b61Patrick Mooney	/*
241e121b61Patrick Mooney	 * Failure is inferred from a TSC reading of 0.  The normal fasttrap
242e121b61Patrick Mooney	 * mechanism can be used as a fallback in such cases.
243e121b61Patrick Mooney	 */
244e121b61Patrick Mooney	testq	%rax, %rax
245e121b61Patrick Mooney	jz	6f
246e121b61Patrick Mooney
247e121b61Patrick Mooney	movq	0x10(%rsp), %rdi
2482428aadPatrick Mooney	movl	0x18(%rsp), %r9d
2492428aadPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
2502428aadPatrick Mooney	andl	$0xfffffffe, %r9d
2512428aadPatrick Mooney	cmpl	%r9d, %edx
2522428aadPatrick Mooney	jne	1b
2532428aadPatrick Mooney
2542428aadPatrick Mooney	/*
2552428aadPatrick Mooney	 * The in-kernel logic for calculating hrtime performs several checks
2562428aadPatrick Mooney	 * to protect against edge cases.  That logic is summarized as:
2572428aadPatrick Mooney	 * if (tsc >= tsc_last) {
2582428aadPatrick Mooney	 *         delta -= tsc_last;
2592428aadPatrick Mooney	 * } else if (tsc >= tsc_last - 2*tsc_max_delta) {
2602428aadPatrick Mooney	 *         delta = 0;
2612428aadPatrick Mooney	 * } else {
2622428aadPatrick Mooney	 *         delta = MIN(tsc, tsc_resume_cap);
2632428aadPatrick Mooney	 * }
2642428aadPatrick Mooney	 *
2652428aadPatrick Mooney	 * The below implementation achieves the same result, although it is
2662428aadPatrick Mooney	 * structured for speed and optimized for the fast path:
2672428aadPatrick Mooney	 *
2682428aadPatrick Mooney	 * delta = tsc - tsc_last;
2692428aadPatrick Mooney	 * if (delta < 0) {
2702428aadPatrick Mooney	 *         delta += (tsc_max_delta << 1);
2712428aadPatrick Mooney	 *         if (delta >= 0) {
2722428aadPatrick Mooney	 *                 delta = 0;
2732428aadPatrick Mooney	 *         } else {
2742428aadPatrick Mooney	 *                 delta = MIN(tsc, tsc_resume_cap);
2752428aadPatrick Mooney	 *         }
2762428aadPatrick Mooney	 * }
2772428aadPatrick Mooney	 */
2782428aadPatrick Mooney	movq	(%rsp), %rdx
2792428aadPatrick Mooney	subq	%rdx, %rax		/* delta = tsc - tsc_last */
2802428aadPatrick Mooney	jbe	3f			/* if (delta < 0) */
2812428aadPatrick Mooney
2822428aadPatrick Mooney2:
2832428aadPatrick Mooney	/*
2842428aadPatrick Mooney	 * Optimized TSC_CONVERT_AND_ADD:
2852428aadPatrick Mooney	 * hrtime_base += (tsc_delta * nsec_scale) >> (32 - NSEC_SHIFT)
2862428aadPatrick Mooney	 *
2872428aadPatrick Mooney	 * Since the multiply and shift are done in 128-bit, there is no need
2882428aadPatrick Mooney	 * to worry about overflow.
2892428aadPatrick Mooney	 */
2902428aadPatrick Mooney	movl	CP_NSEC_SCALE(%rdi), %ecx
2912428aadPatrick Mooney	mulq	%rcx
2922428aadPatrick Mooney	shrdq	$_CONST(32 - NSEC_SHIFT), %rdx, %rax
2932428aadPatrick Mooney	movq	0x8(%rsp), %r8
2942428aadPatrick Mooney	addq	%r8, %rax
2952428aadPatrick Mooney
2962428aadPatrick Mooney	addq	$0x20, %rsp
2972428aadPatrick Mooney	ret
2982428aadPatrick Mooney
2992428aadPatrick Mooney3:
3002428aadPatrick Mooney	movq	%rax, %r9		/* save (tsc - tsc_last) in r9 */
3012428aadPatrick Mooney	movl	CP_TSC_MAX_DELTA(%rdi), %ecx
3022428aadPatrick Mooney	sall	$1, %ecx
3032428aadPatrick Mooney	addq	%rcx, %rax		/* delta += (tsc_max_delta << 1) */
3042428aadPatrick Mooney	jae	4f			/* delta < 0 */
3052428aadPatrick Mooney	xorq	%rax, %rax
3062428aadPatrick Mooney	jmp	2b
3072428aadPatrick Mooney
3082428aadPatrick Mooney4:
3092428aadPatrick Mooney	/*
3102428aadPatrick Mooney	 * Repopulate %rax with the TSC reading by adding tsc_last to %r9
3112428aadPatrick Mooney	 * (which holds tsc - tsc_last)
3122428aadPatrick Mooney	 */
3132428aadPatrick Mooney	movq	(%rsp), %rax
3142428aadPatrick Mooney	addq	%r9, %rax
3152428aadPatrick Mooney
3162428aadPatrick Mooney	/* delta = MIN(tsc, resume_cap) */
3172428aadPatrick Mooney	movq	CP_TSC_RESUME_CAP(%rdi), %rcx
3182428aadPatrick Mooney	cmpq	%rcx, %rax
3192428aadPatrick Mooney	jbe	5f
3202428aadPatrick Mooney	movq	%rcx, %rax
3212428aadPatrick Mooney5:
3222428aadPatrick Mooney	jmp	2b
3232428aadPatrick Mooney
324e121b61Patrick Mooney6:
325e121b61Patrick Mooney	movl	$T_GETHRTIME, %eax
326e121b61Patrick Mooney	int	$T_FASTTRAP
327e121b61Patrick Mooney	addq	$0x20, %rsp
328e121b61Patrick Mooney	ret
329e121b61Patrick Mooney
3302428aadPatrick Mooney	SET_SIZE(__cp_gethrtime)
3312428aadPatrick Mooney
3322428aadPatrick Mooney/*
3332428aadPatrick Mooney * int
3342428aadPatrick Mooney * __cp_clock_gettime_monotonic(comm_page_t *cp, timespec_t *tsp)
3352428aadPatrick Mooney *
3362428aadPatrick Mooney * Stack usage: 0x8 local + 0x8 call + 0x28 called func. = 0x38 bytes
3372428aadPatrick Mooney *
3382428aadPatrick Mooney * %rsp+0x00 - timespec_t *tsp
3392428aadPatrick Mooney */
3402428aadPatrick Mooney	ENTRY_NP(__cp_clock_gettime_monotonic)
3412428aadPatrick Mooney	subq	$0x8, %rsp
3422428aadPatrick Mooney	movq	%rsi, (%rsp)
3432428aadPatrick Mooney
3442428aadPatrick Mooney	call	__cp_gethrtime
3452428aadPatrick Mooney
3462428aadPatrick Mooney	/*
3472428aadPatrick Mooney	 * Convert from hrtime_t (int64_t in nanoseconds) to timespec_t.
3482428aadPatrick Mooney	 * This uses the same approach as hrt2ts, although it has been updated
3492428aadPatrick Mooney	 * to utilize 64-bit math.
3502428aadPatrick Mooney	 * 1 / 1,000,000,000 =
3512428aadPatrick Mooney	 * 1000100101110000010111110100000100110110101101001010110110011B-26
3522428aadPatrick Mooney	 * = 0x112e0be826d694b3 * 2^-26
3532428aadPatrick Mooney	 *
3542428aadPatrick Mooney	 * secs = (nsecs * 0x112e0be826d694b3) >> 26
3552428aadPatrick Mooney	 *
3562428aadPatrick Mooney	 * In order to account for the 2s-compliment of negative inputs, a
3572428aadPatrick Mooney	 * final operation completes the process:
3582428aadPatrick Mooney	 *
3592428aadPatrick Mooney	 * secs -= (nsecs >> 63)
3602428aadPatrick Mooney	 */
3612428aadPatrick Mooney	movq	%rax, %r11
3622428aadPatrick Mooney	movq	$0x112e0be826d694b3, %rdx
3632428aadPatrick Mooney	imulq	%rdx
3642428aadPatrick Mooney	sarq	$0x1a, %rdx
3652428aadPatrick Mooney	movq	%r11, %rax
3662428aadPatrick Mooney	sarq	$0x3f, %rax
3672428aadPatrick Mooney	subq	%rax, %rdx
3682428aadPatrick Mooney	movq	(%rsp), %rsi
3692428aadPatrick Mooney	movq	%rdx, (%rsi)
3702428aadPatrick Mooney	/*
3712428aadPatrick Mooney	 * Populating tv_nsec is easier:
3722428aadPatrick Mooney	 * tv_nsec = nsecs - (secs * NANOSEC)
3732428aadPatrick Mooney	 */
3742428aadPatrick Mooney	imulq	$NANOSEC, %rdx, %rdx
3752428aadPatrick Mooney	subq	%rdx, %r11
3762428aadPatrick Mooney	movq	%r11, 0x8(%rsi)
3772428aadPatrick Mooney
3782428aadPatrick Mooney	xorl	%eax, %eax
3792428aadPatrick Mooney	addq	$0x8, %rsp
3802428aadPatrick Mooney	ret
3812428aadPatrick Mooney	SET_SIZE(__cp_clock_gettime_monotonic)
3822428aadPatrick Mooney
3832428aadPatrick Mooney/*
3842428aadPatrick Mooney * int
3852428aadPatrick Mooney * __cp_clock_gettime_realtime(comm_page_t *cp, timespec_t *tsp)
3862428aadPatrick Mooney *
3872428aadPatrick Mooney * Stack usage: 0x18 local + 0x8 call + 0x28 called func. = 0x48 bytes
3882428aadPatrick Mooney *
3892428aadPatrick Mooney * %rsp+0x00 - commpage_t *cp
3902428aadPatrick Mooney * %rsp+0x08 - timespec_t *tsp
3912428aadPatrick Mooney * %rsp+0x10 - int hres_lock
3922428aadPatrick Mooney */
3932428aadPatrick Mooney	ENTRY_NP(__cp_clock_gettime_realtime)
3942428aadPatrick Mooney	subq	$0x18, %rsp
3952428aadPatrick Mooney	movq	%rdi, (%rsp)
3962428aadPatrick Mooney	movq	%rsi, 0x8(%rsp)
3972428aadPatrick Mooney
3982428aadPatrick Mooney1:
3992428aadPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %eax
4002428aadPatrick Mooney	movl	%eax, 0x10(%rsp)
4012428aadPatrick Mooney
4022428aadPatrick Mooney	call	__cp_gethrtime
4032428aadPatrick Mooney	movq	(%rsp), %rdi
4042428aadPatrick Mooney	movq	CP_HRES_LAST_TICK(%rdi), %rdx
4052428aadPatrick Mooney	subq	%rdx, %rax			/* nslt = hrtime - last_tick */
4062428aadPatrick Mooney	jb	1b
4072428aadPatrick Mooney	movq	CP_HRESTIME(%rdi), %r9
4082428aadPatrick Mooney	movq	_CONST(CP_HRESTIME + CP_HRESTIME_INCR)(%rdi), %r10
4092428aadPatrick Mooney	movl	CP_HRESTIME_ADJ(%rdi), %r11d
4102428aadPatrick Mooney
4112428aadPatrick Mooney	addq	%rax, %r10			/* now.tv_nsec += nslt */
4122428aadPatrick Mooney
4132428aadPatrick Mooney	cmpl	$0, %r11d
4142428aadPatrick Mooney	jb	4f				/* hres_adj > 0 */
4152428aadPatrick Mooney	ja	6f				/* hres_adj < 0 */
4162428aadPatrick Mooney
4172428aadPatrick Mooney2:
4182428aadPatrick Mooney	cmpq	$NANOSEC, %r10
4192428aadPatrick Mooney	jae	8f				/* tv_nsec >= NANOSEC */
4202428aadPatrick Mooney
4212428aadPatrick Mooney3:
4222428aadPatrick Mooney	movl	0x10(%rsp), %eax
4232428aadPatrick Mooney	movl	CP_HRES_LOCK(%rdi), %edx
4242428aadPatrick Mooney	andl	$0xfffffffe, %edx
4252428aadPatrick Mooney	cmpl	%eax, %edx
4262428aadPatrick Mooney	jne	1b
4272428aadPatrick Mooney
4282428aadPatrick Mooney	movq	0x8(%rsp), %rsi
4292428aadPatrick Mooney	movq	%r9, (%rsi)
4302428aadPatrick Mooney	movq	%r10, 0x8(%rsi)
4312428aadPatrick Mooney
4322428aadPatrick Mooney	xorl	%eax, %eax
4332428aadPatrick Mooney	addq	$0x18, %rsp
4342428aadPatrick Mooney	ret
4352428aadPatrick Mooney
4362428aadPatrick Mooney
4372428aadPatrick Mooney4:						/* hres_adj > 0 */
4382428aadPatrick Mooney	sarq	$ADJ_SHIFT, %rax
4392428aadPatrick Mooney	cmpl	%r11d, %eax
4402428aadPatrick Mooney	jbe	5f
4412428aadPatrick Mooney	movl	%r11d, %eax
4422428aadPatrick Mooney5:
4432428aadPatrick Mooney	addq	%rax, %r10
4442428aadPatrick Mooney	jmp	2b
4452428aadPatrick Mooney
4462428aadPatrick Mooney6:						/* hres_adj < 0 */
4472428aadPatrick Mooney	sarq	$ADJ_SHIFT, %rax
4482428aadPatrick Mooney	negl	%r11d
4492428aadPatrick Mooney	cmpl	%r11d, %eax
4502428aadPatrick Mooney	jbe	7f
4512428aadPatrick Mooney	movl	%r11d, %eax
4522428aadPatrick Mooney7:
4532428aadPatrick Mooney	subq	%rax, %r10
4542428aadPatrick Mooney	jmp	2b
4552428aadPatrick Mooney
4562428aadPatrick Mooney8:						/* tv_nsec >= NANOSEC */
4572428aadPatrick Mooney	subq	$NANOSEC, %r10
4582428aadPatrick Mooney	incq	%r9
4592428aadPatrick Mooney	cmpq	$NANOSEC, %r10
4602428aadPatrick Mooney	jae	8b
4612428aadPatrick Mooney	jmp	3b
4622428aadPatrick Mooney
4632428aadPatrick Mooney	SET_SIZE(__cp_clock_gettime_realtime)
464