1ed093b41SRobert Mustacchi /*
2ed093b41SRobert Mustacchi  * This file and its contents are supplied under the terms of the
3ed093b41SRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4ed093b41SRobert Mustacchi  * You may only use this file in accordance with the terms of version
5ed093b41SRobert Mustacchi  * 1.0 of the CDDL.
6ed093b41SRobert Mustacchi  *
7ed093b41SRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8ed093b41SRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9ed093b41SRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10ed093b41SRobert Mustacchi  */
11ed093b41SRobert Mustacchi 
12ed093b41SRobert Mustacchi /*
13ed093b41SRobert Mustacchi  * Copyright 2023 Oxide Computer Company
14ed093b41SRobert Mustacchi  */
15ed093b41SRobert Mustacchi 
16ed093b41SRobert Mustacchi /*
17ed093b41SRobert Mustacchi  * This file implements various utility functions we use for the xsave tests.
18ed093b41SRobert Mustacchi  */
19ed093b41SRobert Mustacchi 
20ed093b41SRobert Mustacchi #include <string.h>
21ed093b41SRobert Mustacchi #include <strings.h>
22ed093b41SRobert Mustacchi #include <sys/auxv.h>
23ed093b41SRobert Mustacchi #include <sys/sysmacros.h>
24ed093b41SRobert Mustacchi #include <err.h>
25ed093b41SRobert Mustacchi #include <stdlib.h>
26ed093b41SRobert Mustacchi #include <procfs.h>
27ed093b41SRobert Mustacchi #include <sys/x86_archext.h>
28ed093b41SRobert Mustacchi #include <unistd.h>
29ed093b41SRobert Mustacchi #include <errno.h>
30ed093b41SRobert Mustacchi #include <sys/types.h>
31ed093b41SRobert Mustacchi #include <sys/wait.h>
32ed093b41SRobert Mustacchi #include <sys/debug.h>
33ed093b41SRobert Mustacchi #include <ieeefp.h>
34ed093b41SRobert Mustacchi 
35ed093b41SRobert Mustacchi #include "xsave_util.h"
36ed093b41SRobert Mustacchi 
37ed093b41SRobert Mustacchi static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */
38ed093b41SRobert Mustacchi 
39ed093b41SRobert Mustacchi /*
40ed093b41SRobert Mustacchi  * Determine if we have the hardware support required for a given level of
41ed093b41SRobert Mustacchi  * hardware support.
42ed093b41SRobert Mustacchi  */
43ed093b41SRobert Mustacchi uint32_t
xsu_hwsupport(void)44ed093b41SRobert Mustacchi xsu_hwsupport(void)
45ed093b41SRobert Mustacchi {
46ed093b41SRobert Mustacchi 	uint_t isa[3];
47ed093b41SRobert Mustacchi 	uint_t nisa = getisax(isa, ARRAY_SIZE(isa));
48ed093b41SRobert Mustacchi 
49ed093b41SRobert Mustacchi 	if (nisa != ARRAY_SIZE(isa)) {
50*1e56f352SRobert Mustacchi 		errx(EXIT_FAILURE, "did not get all %zu hwcap values, found %u",
51ed093b41SRobert Mustacchi 		    ARRAY_SIZE(isa), nisa);
52ed093b41SRobert Mustacchi 	}
53ed093b41SRobert Mustacchi 
54ed093b41SRobert Mustacchi 	if ((isa[0] & AV_386_XSAVE) == 0) {
55ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "xsave not present: this test should have "
56ed093b41SRobert Mustacchi 		    "been skipped");
57ed093b41SRobert Mustacchi 	}
58ed093b41SRobert Mustacchi 
59ed093b41SRobert Mustacchi 	if ((isa[1] & AV_386_2_AVX512F) != 0) {
60ed093b41SRobert Mustacchi 		warnx("found %%zmm support");
61ed093b41SRobert Mustacchi 		return (XSU_ZMM);
62ed093b41SRobert Mustacchi 	}
63ed093b41SRobert Mustacchi 
64ed093b41SRobert Mustacchi 	if ((isa[0] & AV_386_AVX) != 0) {
65ed093b41SRobert Mustacchi 		warnx("found %%ymm support");
66ed093b41SRobert Mustacchi 		return (XSU_YMM);
67ed093b41SRobert Mustacchi 	}
68ed093b41SRobert Mustacchi 
69ed093b41SRobert Mustacchi 	errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should "
70ed093b41SRobert Mustacchi 	    "have been skipped");
71ed093b41SRobert Mustacchi }
72ed093b41SRobert Mustacchi 
73ed093b41SRobert Mustacchi /*
74ed093b41SRobert Mustacchi  * Fill all the valid regions of an FPU based on treating the vector register as
75ed093b41SRobert Mustacchi  * a series of uint32_t values and going from there.
76ed093b41SRobert Mustacchi  */
77ed093b41SRobert Mustacchi void
xsu_fill(xsu_fpu_t * fpu,uint32_t level,uint32_t start)78ed093b41SRobert Mustacchi xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start)
79ed093b41SRobert Mustacchi {
80ed093b41SRobert Mustacchi 	(void) memset(fpu, 0, sizeof (xsu_fpu_t));
81ed093b41SRobert Mustacchi 
82ed093b41SRobert Mustacchi 	switch (level) {
83ed093b41SRobert Mustacchi 	default:
84ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level);
85ed093b41SRobert Mustacchi 	case XSU_YMM:
86ed093b41SRobert Mustacchi 		for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) {
87ed093b41SRobert Mustacchi 			for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++,
88ed093b41SRobert Mustacchi 			    start++) {
89ed093b41SRobert Mustacchi 				fpu->xf_reg[regno]._l[u32] = start;
90ed093b41SRobert Mustacchi 			}
91ed093b41SRobert Mustacchi 		}
92ed093b41SRobert Mustacchi 		break;
93ed093b41SRobert Mustacchi 	case XSU_ZMM:
94ed093b41SRobert Mustacchi 		for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) {
95ed093b41SRobert Mustacchi 			for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++,
96ed093b41SRobert Mustacchi 			    start++) {
97ed093b41SRobert Mustacchi 				fpu->xf_reg[regno]._l[u32] = start;
98ed093b41SRobert Mustacchi 			}
99ed093b41SRobert Mustacchi 		}
100ed093b41SRobert Mustacchi 		for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask);
101ed093b41SRobert Mustacchi 		    regno++) {
102ed093b41SRobert Mustacchi 			uint64_t val = start | (((uint64_t)start + 1) << 32);
103ed093b41SRobert Mustacchi 			fpu->xf_opmask[regno] = val;
104ed093b41SRobert Mustacchi 			start += 2;
105ed093b41SRobert Mustacchi 		}
106ed093b41SRobert Mustacchi 		break;
107ed093b41SRobert Mustacchi 	}
108ed093b41SRobert Mustacchi }
109ed093b41SRobert Mustacchi 
110ed093b41SRobert Mustacchi static void
xsu_overwrite_uctx_xmm(ucontext_t * uctx,const xsu_fpu_t * fpu)111ed093b41SRobert Mustacchi xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu)
112ed093b41SRobert Mustacchi {
113ed093b41SRobert Mustacchi 	struct _fpchip_state *fp;
114ed093b41SRobert Mustacchi 
115ed093b41SRobert Mustacchi 	fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state;
116ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
117ed093b41SRobert Mustacchi 		(void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0],
118ed093b41SRobert Mustacchi 		    XSU_XMM_U32 * sizeof (uint32_t));
119ed093b41SRobert Mustacchi 	}
120ed093b41SRobert Mustacchi }
121ed093b41SRobert Mustacchi 
122ed093b41SRobert Mustacchi static void
xsu_overwrite_uctx_ymm(uintptr_t arg,const xsu_fpu_t * fpu)123ed093b41SRobert Mustacchi xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu)
124ed093b41SRobert Mustacchi {
125ed093b41SRobert Mustacchi 	prxregset_ymm_t *ymm = (void *)arg;
126ed093b41SRobert Mustacchi 
127ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
128ed093b41SRobert Mustacchi 		(void) memcpy(&ymm->prx_ymm[i]._l[0],
129ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[XSU_XMM_U32],
130ed093b41SRobert Mustacchi 		    XSU_XMM_U32 * sizeof (uint32_t));
131ed093b41SRobert Mustacchi 	}
132ed093b41SRobert Mustacchi }
133ed093b41SRobert Mustacchi 
134ed093b41SRobert Mustacchi static void
xsu_overwrite_uctx_zmm(uintptr_t arg,const xsu_fpu_t * fpu)135ed093b41SRobert Mustacchi xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
136ed093b41SRobert Mustacchi {
137ed093b41SRobert Mustacchi 	prxregset_zmm_t *zmm = (void *)arg;
138ed093b41SRobert Mustacchi 
139ed093b41SRobert Mustacchi 	/*
140ed093b41SRobert Mustacchi 	 * Because this is the low zmm registers, we actually use the max ymm
141ed093b41SRobert Mustacchi 	 * value as that's what actually fits in the low zmm and not the full
142ed093b41SRobert Mustacchi 	 * definition.
143ed093b41SRobert Mustacchi 	 */
144ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
145ed093b41SRobert Mustacchi 		(void) memcpy(&zmm->prx_zmm[i]._l[0],
146ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[XSU_YMM_U32],
147ed093b41SRobert Mustacchi 		    XSU_YMM_U32 * sizeof (uint32_t));
148ed093b41SRobert Mustacchi 	}
149ed093b41SRobert Mustacchi }
150ed093b41SRobert Mustacchi 
151ed093b41SRobert Mustacchi static void
xsu_overwrite_uctx_hi_zmm(uintptr_t arg,const xsu_fpu_t * fpu)152ed093b41SRobert Mustacchi xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
153ed093b41SRobert Mustacchi {
154ed093b41SRobert Mustacchi #ifdef __amd64
155ed093b41SRobert Mustacchi 	prxregset_hi_zmm_t *zmm = (void *)arg;
156ed093b41SRobert Mustacchi 
157ed093b41SRobert Mustacchi 	for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) {
158ed093b41SRobert Mustacchi 		(void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0],
159ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[0],
160ed093b41SRobert Mustacchi 		    XSU_ZMM_U32 * sizeof (uint32_t));
161ed093b41SRobert Mustacchi 	}
162ed093b41SRobert Mustacchi #else	/* !__amd64 */
163ed093b41SRobert Mustacchi 	warnx("attempted to set High ZMM registers on a 32-bit process!");
164ed093b41SRobert Mustacchi 	abort();
165ed093b41SRobert Mustacchi #endif	/* __amd64 */
166ed093b41SRobert Mustacchi }
167ed093b41SRobert Mustacchi 
168ed093b41SRobert Mustacchi void
xsu_overwrite_uctx(ucontext_t * uctx,const xsu_fpu_t * fpu,uint32_t hwsup)169ed093b41SRobert Mustacchi xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup)
170ed093b41SRobert Mustacchi {
171ed093b41SRobert Mustacchi 	size_t xsave_size = sizeof (uc_xsave_t);
172ed093b41SRobert Mustacchi 	void *new_buf;
173ed093b41SRobert Mustacchi 	uc_xsave_t *ucs;
174ed093b41SRobert Mustacchi 	uintptr_t write_ptr;
175ed093b41SRobert Mustacchi 
176ed093b41SRobert Mustacchi 	if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
177ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
178ed093b41SRobert Mustacchi 	}
179ed093b41SRobert Mustacchi 
180ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
181ed093b41SRobert Mustacchi 		xsave_size += sizeof (prxregset_ymm_t);
182ed093b41SRobert Mustacchi 	}
183ed093b41SRobert Mustacchi 
184ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
185ed093b41SRobert Mustacchi 		xsave_size += sizeof (prxregset_zmm_t);
186ed093b41SRobert Mustacchi 		xsave_size += sizeof (prxregset_opmask_t);
187ed093b41SRobert Mustacchi 		if (XSU_MAX_ZMM > 16) {
188ed093b41SRobert Mustacchi 			xsave_size += sizeof (prxregset_hi_zmm_t);
189ed093b41SRobert Mustacchi 		}
190ed093b41SRobert Mustacchi 	}
191ed093b41SRobert Mustacchi 
192ed093b41SRobert Mustacchi 	new_buf = calloc(1, xsave_size);
193ed093b41SRobert Mustacchi 	if (new_buf == NULL) {
194ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "failed to allocate xsave buf");
195ed093b41SRobert Mustacchi 	}
196ed093b41SRobert Mustacchi 	ucs = new_buf;
197ed093b41SRobert Mustacchi 	ucs->ucx_vers = UC_XSAVE_VERS;
198ed093b41SRobert Mustacchi 	ucs->ucx_len = xsave_size;
199ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
200ed093b41SRobert Mustacchi 		ucs->ucx_bv |= XFEATURE_AVX;
201ed093b41SRobert Mustacchi 	}
202ed093b41SRobert Mustacchi 
203ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
204ed093b41SRobert Mustacchi 		ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM;
205ed093b41SRobert Mustacchi 		if (XSU_MAX_ZMM > 16)
206ed093b41SRobert Mustacchi 			ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM;
207ed093b41SRobert Mustacchi 	}
208ed093b41SRobert Mustacchi 
209ed093b41SRobert Mustacchi 	/*
210ed093b41SRobert Mustacchi 	 * At this point we have rigged things up. XMM values are in the
211ed093b41SRobert Mustacchi 	 * ucontext_t itself. After that we must write things out in the kernel
212ed093b41SRobert Mustacchi 	 * signal order. Note, the XMM state is not set in the bit-vector
213ed093b41SRobert Mustacchi 	 * because well, we don't actually use the xsave pieces for it because o
214ed093b41SRobert Mustacchi 	 * the ucontext_t ABI has the xmm state always there. See
215ed093b41SRobert Mustacchi 	 * uts/intel/os/fpu.c's big theory statement for more info.
216ed093b41SRobert Mustacchi 	 */
217ed093b41SRobert Mustacchi 	xsu_overwrite_uctx_xmm(uctx, fpu);
218ed093b41SRobert Mustacchi 	write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t);
219ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
220ed093b41SRobert Mustacchi 		xsu_overwrite_uctx_ymm(write_ptr, fpu);
221ed093b41SRobert Mustacchi 		write_ptr += sizeof (prxregset_ymm_t);
222ed093b41SRobert Mustacchi 	}
223ed093b41SRobert Mustacchi 
224ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
225ed093b41SRobert Mustacchi 		(void) memcpy((void *)write_ptr, fpu->xf_opmask,
226ed093b41SRobert Mustacchi 		    sizeof (fpu->xf_opmask));
227ed093b41SRobert Mustacchi 		write_ptr += sizeof (fpu->xf_opmask);
228ed093b41SRobert Mustacchi 		xsu_overwrite_uctx_zmm(write_ptr, fpu);
229ed093b41SRobert Mustacchi 		write_ptr += sizeof (prxregset_zmm_t);
230ed093b41SRobert Mustacchi 		if (XSU_MAX_ZMM > 16) {
231ed093b41SRobert Mustacchi 			xsu_overwrite_uctx_hi_zmm(write_ptr, fpu);
232ed093b41SRobert Mustacchi 			write_ptr += sizeof (prxregset_hi_zmm_t);
233ed093b41SRobert Mustacchi 		}
234ed093b41SRobert Mustacchi 	}
235ed093b41SRobert Mustacchi 
236ed093b41SRobert Mustacchi 	uctx->uc_xsave = (long)(uintptr_t)new_buf;
237ed093b41SRobert Mustacchi }
238ed093b41SRobert Mustacchi 
239ed093b41SRobert Mustacchi static boolean_t
xsu_check_vector(const upad512_t * src,const upad512_t * chk,uint32_t regno,uint32_t nu32)240ed093b41SRobert Mustacchi xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno,
241ed093b41SRobert Mustacchi     uint32_t nu32)
242ed093b41SRobert Mustacchi {
243ed093b41SRobert Mustacchi 	boolean_t valid = B_TRUE;
244ed093b41SRobert Mustacchi 
245ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < nu32; i++) {
246ed093b41SRobert Mustacchi 		if (src->_l[i] != chk->_l[i]) {
247ed093b41SRobert Mustacchi 			warnx("vec[%u] u32 %u differs: expected 0x%x, "
248ed093b41SRobert Mustacchi 			    "found 0x%x", regno, i, src->_l[i], chk->_l[i]);
249ed093b41SRobert Mustacchi 			valid = B_FALSE;
250ed093b41SRobert Mustacchi 		}
251ed093b41SRobert Mustacchi 	}
252ed093b41SRobert Mustacchi 
253ed093b41SRobert Mustacchi 	return (valid);
254ed093b41SRobert Mustacchi }
255ed093b41SRobert Mustacchi 
256ed093b41SRobert Mustacchi boolean_t
xsu_same(const xsu_fpu_t * src,const xsu_fpu_t * check,uint32_t hwsup)257ed093b41SRobert Mustacchi xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup)
258ed093b41SRobert Mustacchi {
259ed093b41SRobert Mustacchi 	boolean_t valid = B_TRUE;
260ed093b41SRobert Mustacchi 
261ed093b41SRobert Mustacchi 	switch (hwsup) {
262ed093b41SRobert Mustacchi 	default:
263ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
264ed093b41SRobert Mustacchi 	case XSU_YMM:
265ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
266ed093b41SRobert Mustacchi 			if (!xsu_check_vector(&src->xf_reg[i],
267ed093b41SRobert Mustacchi 			    &check->xf_reg[i], i, XSU_YMM_U32)) {
268ed093b41SRobert Mustacchi 				valid = B_FALSE;
269ed093b41SRobert Mustacchi 			}
270ed093b41SRobert Mustacchi 		}
271ed093b41SRobert Mustacchi 		break;
272ed093b41SRobert Mustacchi 	case XSU_ZMM:
273ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
274ed093b41SRobert Mustacchi 			if (!xsu_check_vector(&src->xf_reg[i],
275ed093b41SRobert Mustacchi 			    &check->xf_reg[i], i, XSU_ZMM_U32)) {
276ed093b41SRobert Mustacchi 				valid = B_FALSE;
277ed093b41SRobert Mustacchi 			}
278ed093b41SRobert Mustacchi 		}
279ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) {
280ed093b41SRobert Mustacchi 			if (src->xf_opmask[i] != check->xf_opmask[i]) {
281ed093b41SRobert Mustacchi 				warnx("mask[%u] differs: expected 0x%" PRIx64
282ed093b41SRobert Mustacchi 				    ", found 0x%" PRIx64, i, src->xf_opmask[i],
283ed093b41SRobert Mustacchi 				    check->xf_opmask[i]);
284ed093b41SRobert Mustacchi 				valid = B_FALSE;
285ed093b41SRobert Mustacchi 			}
286ed093b41SRobert Mustacchi 		}
287ed093b41SRobert Mustacchi 		break;
288ed093b41SRobert Mustacchi 	}
289ed093b41SRobert Mustacchi 	return (valid);
290ed093b41SRobert Mustacchi }
291ed093b41SRobert Mustacchi 
292ed093b41SRobert Mustacchi 
293ed093b41SRobert Mustacchi void *
xsu_sleeper_thread(void * arg __unused)294ed093b41SRobert Mustacchi xsu_sleeper_thread(void *arg __unused)
295ed093b41SRobert Mustacchi {
296ed093b41SRobert Mustacchi 	for (;;) {
297ed093b41SRobert Mustacchi 		(void) sleep(100);
298ed093b41SRobert Mustacchi 	}
299ed093b41SRobert Mustacchi 	return (NULL);
300ed093b41SRobert Mustacchi }
301ed093b41SRobert Mustacchi 
302ed093b41SRobert Mustacchi static void
xsu_dump_vector(FILE * f,const upad512_t * reg,uint32_t nu32,const char * name,uint32_t idx)303ed093b41SRobert Mustacchi xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name,
304ed093b41SRobert Mustacchi     uint32_t idx)
305ed093b41SRobert Mustacchi {
306ed093b41SRobert Mustacchi 	VERIFY3U(nu32 % 4, ==, 0);
307ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < nu32; i += 4) {
308ed093b41SRobert Mustacchi 		(void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x "
309ed093b41SRobert Mustacchi 		    "0x%08x 0x%08x }\n", name, idx, i + 3, i,  reg->_l[i + 3],
310ed093b41SRobert Mustacchi 		    reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]);
311ed093b41SRobert Mustacchi 	}
312ed093b41SRobert Mustacchi }
313ed093b41SRobert Mustacchi 
314ed093b41SRobert Mustacchi void
xsu_dump(FILE * f,const xsu_fpu_t * fpu,uint32_t hwsup)315ed093b41SRobert Mustacchi xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup)
316ed093b41SRobert Mustacchi {
317ed093b41SRobert Mustacchi 
318ed093b41SRobert Mustacchi 	switch (hwsup) {
319ed093b41SRobert Mustacchi 	default:
320ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
321ed093b41SRobert Mustacchi 	case XSU_YMM:
322ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
323ed093b41SRobert Mustacchi 			xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32,
324ed093b41SRobert Mustacchi 			    "ymm", i);
325ed093b41SRobert Mustacchi 		}
326ed093b41SRobert Mustacchi 		break;
327ed093b41SRobert Mustacchi 	case XSU_ZMM:
328ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
329ed093b41SRobert Mustacchi 			xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32,
330ed093b41SRobert Mustacchi 			    "zmm", i);
331ed093b41SRobert Mustacchi 		}
332ed093b41SRobert Mustacchi 
333ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) {
334ed093b41SRobert Mustacchi 			(void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i,
335ed093b41SRobert Mustacchi 			    fpu->xf_opmask[i]);
336ed093b41SRobert Mustacchi 		}
337ed093b41SRobert Mustacchi 		break;
338ed093b41SRobert Mustacchi 	}
339ed093b41SRobert Mustacchi }
340ed093b41SRobert Mustacchi 
341ed093b41SRobert Mustacchi typedef struct xsu_prx {
342ed093b41SRobert Mustacchi 	uint32_t xp_hwsup;
343ed093b41SRobert Mustacchi 	prxregset_xsave_t *xp_xsave;
344ed093b41SRobert Mustacchi 	prxregset_ymm_t *xp_ymm;
345ed093b41SRobert Mustacchi 	prxregset_opmask_t *xp_opmask;
346ed093b41SRobert Mustacchi 	prxregset_zmm_t *xp_zmm;
347ed093b41SRobert Mustacchi 	prxregset_hi_zmm_t *xp_hi_zmm;
348ed093b41SRobert Mustacchi } xsu_prx_t;
349ed093b41SRobert Mustacchi 
350ed093b41SRobert Mustacchi static void
xsu_fpu_to_xregs_xsave(xsu_prx_t * prx,const xsu_fpu_t * fpu)351ed093b41SRobert Mustacchi xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu)
352ed093b41SRobert Mustacchi {
353ed093b41SRobert Mustacchi 	prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT;
354ed093b41SRobert Mustacchi 	prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT;
355ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
356ed093b41SRobert Mustacchi 		(void) memcpy(&prx->xp_xsave->prx_fx_xmm[i],
357ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t));
358ed093b41SRobert Mustacchi 	}
359ed093b41SRobert Mustacchi 
360ed093b41SRobert Mustacchi 	prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP |
361ed093b41SRobert Mustacchi 	    XFEATURE_SSE;
362ed093b41SRobert Mustacchi 	if (prx->xp_hwsup >= XSU_YMM) {
363ed093b41SRobert Mustacchi 		prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX;
364ed093b41SRobert Mustacchi 	}
365ed093b41SRobert Mustacchi 
366ed093b41SRobert Mustacchi 	if (prx->xp_hwsup >= XSU_ZMM) {
367ed093b41SRobert Mustacchi 		prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512;
368ed093b41SRobert Mustacchi 	}
369ed093b41SRobert Mustacchi }
370ed093b41SRobert Mustacchi 
371ed093b41SRobert Mustacchi static void
xsu_fpu_to_xregs_ymm(xsu_prx_t * prx,const xsu_fpu_t * fpu)372ed093b41SRobert Mustacchi xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
373ed093b41SRobert Mustacchi {
374ed093b41SRobert Mustacchi 	/* Copy the upper 128-bits to the YMM save area */
375ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
376ed093b41SRobert Mustacchi 		(void) memcpy(&prx->xp_ymm->prx_ymm[i],
377ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[XSU_XMM_U32],
378ed093b41SRobert Mustacchi 		    XSU_XMM_U32 * sizeof (uint32_t));
379ed093b41SRobert Mustacchi 	}
380ed093b41SRobert Mustacchi }
381ed093b41SRobert Mustacchi 
382ed093b41SRobert Mustacchi static void
xsu_fpu_to_xregs_zmm(xsu_prx_t * prx,const xsu_fpu_t * fpu)383ed093b41SRobert Mustacchi xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
384ed093b41SRobert Mustacchi {
385ed093b41SRobert Mustacchi 	/* The lower 16 regs are only 256-bit, the upper are 512-bit */
386ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) {
387ed093b41SRobert Mustacchi 		(void) memcpy(&prx->xp_zmm->prx_zmm[i],
388ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[XSU_YMM_U32],
389ed093b41SRobert Mustacchi 		    XSU_YMM_U32 * sizeof (uint32_t));
390ed093b41SRobert Mustacchi 	}
391ed093b41SRobert Mustacchi 
392ed093b41SRobert Mustacchi #ifdef __amd64
393ed093b41SRobert Mustacchi 	for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) {
394ed093b41SRobert Mustacchi 		(void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16],
395ed093b41SRobert Mustacchi 		    &fpu->xf_reg[i]._l[0],
396ed093b41SRobert Mustacchi 		    XSU_ZMM_U32 * sizeof (uint32_t));
397ed093b41SRobert Mustacchi 	}
398ed093b41SRobert Mustacchi #endif
399ed093b41SRobert Mustacchi 
400ed093b41SRobert Mustacchi 	(void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask,
401ed093b41SRobert Mustacchi 	    sizeof (prx->xp_opmask->prx_opmask));
402ed093b41SRobert Mustacchi }
403ed093b41SRobert Mustacchi 
404ed093b41SRobert Mustacchi 
405ed093b41SRobert Mustacchi void
xsu_fpu_to_xregs(const xsu_fpu_t * fpu,uint32_t hwsup,prxregset_t ** prxp,size_t * sizep)406ed093b41SRobert Mustacchi xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp,
407ed093b41SRobert Mustacchi     size_t *sizep)
408ed093b41SRobert Mustacchi {
409ed093b41SRobert Mustacchi 	uint32_t ninfo = 1, curinfo;
410ed093b41SRobert Mustacchi 	size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) +
411ed093b41SRobert Mustacchi 	    sizeof (prxregset_xsave_t);
412ed093b41SRobert Mustacchi 	prxregset_hdr_t *hdr;
413ed093b41SRobert Mustacchi 	uint32_t off;
414ed093b41SRobert Mustacchi 	xsu_prx_t prx;
415ed093b41SRobert Mustacchi 
416ed093b41SRobert Mustacchi 	if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
417ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
418ed093b41SRobert Mustacchi 	}
419ed093b41SRobert Mustacchi 
420ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
421ed093b41SRobert Mustacchi 		len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t);
422ed093b41SRobert Mustacchi 		ninfo++;
423ed093b41SRobert Mustacchi 	}
424ed093b41SRobert Mustacchi 
425ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
426ed093b41SRobert Mustacchi 		len += 3 * sizeof (prxregset_info_t) +
427ed093b41SRobert Mustacchi 		    sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) +
428ed093b41SRobert Mustacchi 		    sizeof (prxregset_hi_zmm_t);
429ed093b41SRobert Mustacchi 		ninfo += 3;
430ed093b41SRobert Mustacchi 	}
431ed093b41SRobert Mustacchi 
432ed093b41SRobert Mustacchi 	hdr = calloc(1, len);
433ed093b41SRobert Mustacchi 	if (hdr == NULL) {
434ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)",
435ed093b41SRobert Mustacchi 		    len);
436ed093b41SRobert Mustacchi 	}
437ed093b41SRobert Mustacchi 	(void) memset(&prx, 0, sizeof (prx));
438ed093b41SRobert Mustacchi 	prx.xp_hwsup = hwsup;
439ed093b41SRobert Mustacchi 
440ed093b41SRobert Mustacchi #ifdef __amd64
441ed093b41SRobert Mustacchi 	VERIFY3U(len, <=, UINT32_MAX);
442ed093b41SRobert Mustacchi #endif	/* __amd64 */
443ed093b41SRobert Mustacchi 	hdr->pr_type = PR_TYPE_XSAVE;
444ed093b41SRobert Mustacchi 	hdr->pr_size = (uint32_t)len;
445ed093b41SRobert Mustacchi 	hdr->pr_ninfo = ninfo;
446ed093b41SRobert Mustacchi 
447ed093b41SRobert Mustacchi 	curinfo = 0;
448ed093b41SRobert Mustacchi 	off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo;
449ed093b41SRobert Mustacchi 	hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE;
450ed093b41SRobert Mustacchi 	hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t);
451ed093b41SRobert Mustacchi 	hdr->pr_info[curinfo].pri_offset = off;
452ed093b41SRobert Mustacchi 	prx.xp_xsave = (void *)((uintptr_t)hdr + off);
453ed093b41SRobert Mustacchi 	off += sizeof (prxregset_xsave_t);
454ed093b41SRobert Mustacchi 	curinfo++;
455ed093b41SRobert Mustacchi 
456ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
457ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM;
458ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t);
459ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_offset = off;
460ed093b41SRobert Mustacchi 		prx.xp_ymm = (void *)((uintptr_t)hdr + off);
461ed093b41SRobert Mustacchi 		off += sizeof (prxregset_ymm_t);
462ed093b41SRobert Mustacchi 		curinfo++;
463ed093b41SRobert Mustacchi 	}
464ed093b41SRobert Mustacchi 
465ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
466ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK;
467ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t);
468ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_offset = off;
469ed093b41SRobert Mustacchi 		prx.xp_opmask = (void *)((uintptr_t)hdr + off);
470ed093b41SRobert Mustacchi 		off += sizeof (prxregset_opmask_t);
471ed093b41SRobert Mustacchi 		curinfo++;
472ed093b41SRobert Mustacchi 
473ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM;
474ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t);
475ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_offset = off;
476ed093b41SRobert Mustacchi 		prx.xp_zmm = (void *)((uintptr_t)hdr + off);
477ed093b41SRobert Mustacchi 		off += sizeof (prxregset_zmm_t);
478ed093b41SRobert Mustacchi 		curinfo++;
479ed093b41SRobert Mustacchi 
480ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM;
481ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t);
482ed093b41SRobert Mustacchi 		hdr->pr_info[curinfo].pri_offset = off;
483ed093b41SRobert Mustacchi 		prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off);
484ed093b41SRobert Mustacchi 		off += sizeof (prxregset_hi_zmm_t);
485ed093b41SRobert Mustacchi 		curinfo++;
486ed093b41SRobert Mustacchi 	}
487ed093b41SRobert Mustacchi 
488ed093b41SRobert Mustacchi 	xsu_fpu_to_xregs_xsave(&prx, fpu);
489ed093b41SRobert Mustacchi 	if (hwsup >= XSU_YMM) {
490ed093b41SRobert Mustacchi 		xsu_fpu_to_xregs_ymm(&prx, fpu);
491ed093b41SRobert Mustacchi 	}
492ed093b41SRobert Mustacchi 
493ed093b41SRobert Mustacchi 	if (hwsup >= XSU_ZMM) {
494ed093b41SRobert Mustacchi 		xsu_fpu_to_xregs_zmm(&prx, fpu);
495ed093b41SRobert Mustacchi 	}
496ed093b41SRobert Mustacchi 
497ed093b41SRobert Mustacchi 	*prxp = (prxregset_t *)hdr;
498ed093b41SRobert Mustacchi 	*sizep = len;
499ed093b41SRobert Mustacchi }
500ed093b41SRobert Mustacchi 
501ed093b41SRobert Mustacchi /*
502ed093b41SRobert Mustacchi  * This pairs with xsu_proc_finish() below. The goal is to allow us to inject
503ed093b41SRobert Mustacchi  * state after hitting a breakpoint, which is generally used right before
504ed093b41SRobert Mustacchi  * something wants to print data.
505ed093b41SRobert Mustacchi  */
506ed093b41SRobert Mustacchi void
xsu_proc_bkpt(xsu_proc_t * xp)507ed093b41SRobert Mustacchi xsu_proc_bkpt(xsu_proc_t *xp)
508ed093b41SRobert Mustacchi {
509ed093b41SRobert Mustacchi 	int perr;
510ed093b41SRobert Mustacchi 	struct ps_prochandle *P;
511ed093b41SRobert Mustacchi 	char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL };
512ed093b41SRobert Mustacchi 	GElf_Sym sym;
513ed093b41SRobert Mustacchi 
514ed093b41SRobert Mustacchi 	P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0);
515ed093b41SRobert Mustacchi 	if (P == NULL) {
516ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog,
517ed093b41SRobert Mustacchi 		    Pcreate_error(perr));
518ed093b41SRobert Mustacchi 	}
519ed093b41SRobert Mustacchi 
520ed093b41SRobert Mustacchi 	xp->xp_proc = P;
521ed093b41SRobert Mustacchi 	(void) Punsetflags(P, PR_RLC);
522ed093b41SRobert Mustacchi 	if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) {
523ed093b41SRobert Mustacchi 		int e = errno;
524ed093b41SRobert Mustacchi 		Prelease(P, PRELEASE_KILL);
525ed093b41SRobert Mustacchi 		errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags");
526ed093b41SRobert Mustacchi 	}
527ed093b41SRobert Mustacchi 
528ed093b41SRobert Mustacchi 	if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym,
529ed093b41SRobert Mustacchi 	    NULL) != 0) {
530ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object,
531ed093b41SRobert Mustacchi 		    xp->xp_symname);
532ed093b41SRobert Mustacchi 	}
533ed093b41SRobert Mustacchi 
534ed093b41SRobert Mustacchi 	if (Pfault(P, FLTBPT, 1) != 0) {
535ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "failed to set the FLTBPT disposition");
536ed093b41SRobert Mustacchi 	}
537ed093b41SRobert Mustacchi 
538ed093b41SRobert Mustacchi 	xp->xp_addr = sym.st_value;
539ed093b41SRobert Mustacchi 	if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) {
540ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu "
541ed093b41SRobert Mustacchi 		    "(0x%" PRIx64 ")", sym.st_value);
542ed093b41SRobert Mustacchi 	}
543ed093b41SRobert Mustacchi 
544ed093b41SRobert Mustacchi 	if (Psetrun(P, 0, 0) != 0) {
545ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to resume running our target");
546ed093b41SRobert Mustacchi 	}
547ed093b41SRobert Mustacchi 
548ed093b41SRobert Mustacchi 	if (Pwait(P, xsu_proc_timeout) != 0) {
549ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "%s did not hit our expected breakpoint",
550ed093b41SRobert Mustacchi 		    argv[1]);
551ed093b41SRobert Mustacchi 	}
552ed093b41SRobert Mustacchi }
553ed093b41SRobert Mustacchi 
554ed093b41SRobert Mustacchi /*
555ed093b41SRobert Mustacchi  * Run a process to completion and get its wait exit status.
556ed093b41SRobert Mustacchi  */
557ed093b41SRobert Mustacchi void
xsu_proc_finish(xsu_proc_t * xp)558ed093b41SRobert Mustacchi xsu_proc_finish(xsu_proc_t *xp)
559ed093b41SRobert Mustacchi {
560ed093b41SRobert Mustacchi 	pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid;
561ed093b41SRobert Mustacchi 
562ed093b41SRobert Mustacchi 	if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) {
563ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint",
564ed093b41SRobert Mustacchi 		    xp->xp_object, xp->xp_symname);
565ed093b41SRobert Mustacchi 	}
566ed093b41SRobert Mustacchi 
567ed093b41SRobert Mustacchi 	if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) {
568ed093b41SRobert Mustacchi 		err(EXIT_FAILURE, "failed to resume running our target");
569ed093b41SRobert Mustacchi 	}
570ed093b41SRobert Mustacchi 
571ed093b41SRobert Mustacchi 	if (waitpid(pid, &xp->xp_wait, 0) != pid) {
572*1e56f352SRobert Mustacchi 		err(EXIT_FAILURE, "failed to get our child processes's (%"
573*1e56f352SRobert Mustacchi 		    _PRIdID "), wait info", pid);
574ed093b41SRobert Mustacchi 	}
575ed093b41SRobert Mustacchi 
576ed093b41SRobert Mustacchi 	if (WIFEXITED(xp->xp_wait) == 0) {
577ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "our child process didn't actually exit!");
578ed093b41SRobert Mustacchi 	}
579ed093b41SRobert Mustacchi 
580ed093b41SRobert Mustacchi 	Pfree(xp->xp_proc);
581ed093b41SRobert Mustacchi 	xp->xp_proc = NULL;
582ed093b41SRobert Mustacchi }
583ed093b41SRobert Mustacchi 
584ed093b41SRobert Mustacchi void
xsu_fpregset_xmm_set(fpregset_t * fpr,uint32_t seed)585ed093b41SRobert Mustacchi xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed)
586ed093b41SRobert Mustacchi {
587ed093b41SRobert Mustacchi 	size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
588ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < nregs; i++) {
589ed093b41SRobert Mustacchi 		upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
590ed093b41SRobert Mustacchi 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
591ed093b41SRobert Mustacchi 			u128->_l[u32] = seed;
592ed093b41SRobert Mustacchi 		}
593ed093b41SRobert Mustacchi 	}
594ed093b41SRobert Mustacchi }
595ed093b41SRobert Mustacchi 
596ed093b41SRobert Mustacchi void
xsu_xregs_xmm_set(prxregset_t * prx,uint32_t seed)597ed093b41SRobert Mustacchi xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed)
598ed093b41SRobert Mustacchi {
599ed093b41SRobert Mustacchi 	prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
600ed093b41SRobert Mustacchi 	prxregset_xsave_t *xsave = NULL;
601ed093b41SRobert Mustacchi 
602ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
603ed093b41SRobert Mustacchi 		if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
604ed093b41SRobert Mustacchi 			xsave = (void *)((uintptr_t)prx +
605ed093b41SRobert Mustacchi 			    hdr->pr_info[i].pri_offset);
606ed093b41SRobert Mustacchi 			break;
607ed093b41SRobert Mustacchi 		}
608ed093b41SRobert Mustacchi 	}
609ed093b41SRobert Mustacchi 
610ed093b41SRobert Mustacchi 	if (xsave == NULL) {
611ed093b41SRobert Mustacchi 		errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no "
612ed093b41SRobert Mustacchi 		    "xsave info present");
613ed093b41SRobert Mustacchi 	}
614ed093b41SRobert Mustacchi 
615ed093b41SRobert Mustacchi 	size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm);
616ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < nregs; i++) {
617ed093b41SRobert Mustacchi 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
618ed093b41SRobert Mustacchi 			xsave->prx_fx_xmm[i]._l[u32] = seed;
619ed093b41SRobert Mustacchi 		}
620ed093b41SRobert Mustacchi 	}
621ed093b41SRobert Mustacchi }
622ed093b41SRobert Mustacchi 
623ed093b41SRobert Mustacchi static const prxregset_info_t *
xsu_xregs_find_comp(const prxregset_hdr_t * hdr,uint32_t comp,uintptr_t * datap)624ed093b41SRobert Mustacchi xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap)
625ed093b41SRobert Mustacchi {
626ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
627ed093b41SRobert Mustacchi 		if (hdr->pr_info[i].pri_type == comp) {
628ed093b41SRobert Mustacchi 			*datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset;
629ed093b41SRobert Mustacchi 			return (&hdr->pr_info[i]);
630ed093b41SRobert Mustacchi 		}
631ed093b41SRobert Mustacchi 	}
632ed093b41SRobert Mustacchi 
633ed093b41SRobert Mustacchi 	return (NULL);
634ed093b41SRobert Mustacchi }
635ed093b41SRobert Mustacchi 
636ed093b41SRobert Mustacchi boolean_t
xsu_xregs_comp_equal(const prxregset_t * src,const prxregset_t * dest,uint32_t comp)637ed093b41SRobert Mustacchi xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest,
638ed093b41SRobert Mustacchi     uint32_t comp)
639ed093b41SRobert Mustacchi {
640ed093b41SRobert Mustacchi 	const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src;
641ed093b41SRobert Mustacchi 	const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest;
642ed093b41SRobert Mustacchi 	const prxregset_info_t *sinfo = NULL, *dinfo = NULL;
643ed093b41SRobert Mustacchi 	uintptr_t sdata, ddata;
644ed093b41SRobert Mustacchi 
645ed093b41SRobert Mustacchi 	sinfo = xsu_xregs_find_comp(shdr, comp, &sdata);
646ed093b41SRobert Mustacchi 	if (sinfo == NULL) {
647ed093b41SRobert Mustacchi 		warnx("source xregs missing component %u", comp);
648ed093b41SRobert Mustacchi 		return (B_FALSE);
649ed093b41SRobert Mustacchi 	}
650ed093b41SRobert Mustacchi 
651ed093b41SRobert Mustacchi 	dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata);
652ed093b41SRobert Mustacchi 	if (dinfo == NULL) {
653ed093b41SRobert Mustacchi 		warnx("destination xregs missing component %u", comp);
654ed093b41SRobert Mustacchi 		return (B_FALSE);
655ed093b41SRobert Mustacchi 	}
656ed093b41SRobert Mustacchi 
657ed093b41SRobert Mustacchi 	if (sinfo->pri_size != dinfo->pri_size) {
658ed093b41SRobert Mustacchi 		warnx("source xregs length 0x%x does not match dest xregs 0x%x",
659ed093b41SRobert Mustacchi 		    sinfo->pri_size, dinfo->pri_size);
660ed093b41SRobert Mustacchi 	}
661ed093b41SRobert Mustacchi 
662ed093b41SRobert Mustacchi 	if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) {
663ed093b41SRobert Mustacchi 		warnx("component data differs: dumping!");
664ed093b41SRobert Mustacchi 		for (uint32_t i = 0; i < sinfo->pri_offset; i++) {
665ed093b41SRobert Mustacchi 			const uint8_t *su8 = (uint8_t *)sdata;
666ed093b41SRobert Mustacchi 			const uint8_t *du8 = (uint8_t *)ddata;
667ed093b41SRobert Mustacchi 
668ed093b41SRobert Mustacchi 			if (su8[i] != du8[i]) {
669ed093b41SRobert Mustacchi 				(void) fprintf(stderr,
670ed093b41SRobert Mustacchi 				    "src[%u] = 0x%2x\tdst[%u] = 0x%x\n",
671ed093b41SRobert Mustacchi 				    i, su8[i], i, du8[i]);
672ed093b41SRobert Mustacchi 			}
673ed093b41SRobert Mustacchi 		}
674ed093b41SRobert Mustacchi 
675ed093b41SRobert Mustacchi 		return (B_FALSE);
676ed093b41SRobert Mustacchi 	}
677ed093b41SRobert Mustacchi 
678ed093b41SRobert Mustacchi 	return (B_TRUE);
679ed093b41SRobert Mustacchi }
680ed093b41SRobert Mustacchi 
681ed093b41SRobert Mustacchi boolean_t
xsu_fpregs_cmp(const fpregset_t * fpr,const prxregset_t * prx)682ed093b41SRobert Mustacchi xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx)
683ed093b41SRobert Mustacchi {
684ed093b41SRobert Mustacchi 	boolean_t valid = B_TRUE;
685ed093b41SRobert Mustacchi 	const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
686ed093b41SRobert Mustacchi 	const prxregset_xsave_t *xsave = NULL;
687ed093b41SRobert Mustacchi 	uint16_t fpr_cw, fpr_sw;
688ed093b41SRobert Mustacchi 
689ed093b41SRobert Mustacchi 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
690ed093b41SRobert Mustacchi 		if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
691ed093b41SRobert Mustacchi 			xsave = (void *)((uintptr_t)prx +
692ed093b41SRobert Mustacchi 			    hdr->pr_info[i].pri_offset);
693ed093b41SRobert Mustacchi 			break;
694ed093b41SRobert Mustacchi 		}
695ed093b41SRobert Mustacchi 	}
696ed093b41SRobert Mustacchi 
697ed093b41SRobert Mustacchi 	if (xsave == NULL) {
698ed093b41SRobert Mustacchi 		warnx("xregs missing xsave component for fpregs comparison");
699ed093b41SRobert Mustacchi 		return (B_FALSE);
700ed093b41SRobert Mustacchi 	}
701ed093b41SRobert Mustacchi 
702ed093b41SRobert Mustacchi 	/*
703ed093b41SRobert Mustacchi 	 * First check the XMM registers because those don't require ifdefs,
704ed093b41SRobert Mustacchi 	 * thankfully.
705ed093b41SRobert Mustacchi 	 */
706ed093b41SRobert Mustacchi 	size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
707ed093b41SRobert Mustacchi 	for (size_t i = 0; i < nregs; i++) {
708ed093b41SRobert Mustacchi 		const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
709ed093b41SRobert Mustacchi 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) {
710ed093b41SRobert Mustacchi 			if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) {
711ed093b41SRobert Mustacchi 				valid = B_FALSE;
712ed093b41SRobert Mustacchi 				(void) fprintf(stderr, "fpregset xmm[%u] "
713ed093b41SRobert Mustacchi 				    "u32[%u] does not match xsave, fpregset: "
714ed093b41SRobert Mustacchi 				    "0x%x, xsave: 0x%x\n", i, u32,
715ed093b41SRobert Mustacchi 				    u128->_l[u32],
716ed093b41SRobert Mustacchi 				    xsave->prx_fx_xmm[i]._l[u32]);
717ed093b41SRobert Mustacchi 			}
718ed093b41SRobert Mustacchi 		}
719ed093b41SRobert Mustacchi 	}
720ed093b41SRobert Mustacchi 
721ed093b41SRobert Mustacchi 	if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) {
722ed093b41SRobert Mustacchi 		valid = B_FALSE;
723ed093b41SRobert Mustacchi 		(void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, "
724ed093b41SRobert Mustacchi 		    "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr,
725ed093b41SRobert Mustacchi 		    xsave->prx_fx_mxcsr);
726ed093b41SRobert Mustacchi 	}
727ed093b41SRobert Mustacchi 
728ed093b41SRobert Mustacchi 	/*
729ed093b41SRobert Mustacchi 	 * Extract the basic x87 state. This requires ifdefs because the 32-bit
730ed093b41SRobert Mustacchi 	 * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t
731ed093b41SRobert Mustacchi 	 * struct which is mostly opaque and we need to use the ieeefp.h types
732ed093b41SRobert Mustacchi 	 * which are only visible for ILP32. It also treats 16-bit values as
733ed093b41SRobert Mustacchi 	 * 32-bit ones, hence masking below.
734ed093b41SRobert Mustacchi 	 */
735ed093b41SRobert Mustacchi #ifdef __amd64
736ed093b41SRobert Mustacchi 	fpr_cw = fpr->fp_reg_set.fpchip_state.cw;
737ed093b41SRobert Mustacchi 	fpr_sw = fpr->fp_reg_set.fpchip_state.sw;
738ed093b41SRobert Mustacchi #else	/* !__amd64 (__i386) */
739ed093b41SRobert Mustacchi 	struct _fpstate fps;
740ed093b41SRobert Mustacchi 
741ed093b41SRobert Mustacchi 	(void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps));
742ed093b41SRobert Mustacchi 	fpr_cw = fps.cw & 0xffff;
743ed093b41SRobert Mustacchi 	fpr_sw = fps.sw & 0xffff;
744ed093b41SRobert Mustacchi #endif	/* __amd64 */
745ed093b41SRobert Mustacchi 
746ed093b41SRobert Mustacchi 	if (fpr_cw != xsave->prx_fx_fcw) {
747ed093b41SRobert Mustacchi 		valid = B_FALSE;
748ed093b41SRobert Mustacchi 		(void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, "
749ed093b41SRobert Mustacchi 		    "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw);
750ed093b41SRobert Mustacchi 	}
751ed093b41SRobert Mustacchi 
752ed093b41SRobert Mustacchi 	if (fpr_sw != xsave->prx_fx_fsw) {
753ed093b41SRobert Mustacchi 		valid = B_FALSE;
754ed093b41SRobert Mustacchi 		(void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, "
755ed093b41SRobert Mustacchi 		    "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw);
756ed093b41SRobert Mustacchi 	}
757ed093b41SRobert Mustacchi 
758ed093b41SRobert Mustacchi 	return (valid);
759ed093b41SRobert Mustacchi }
760ed093b41SRobert Mustacchi 
761ed093b41SRobert Mustacchi void
xsu_ustack_alloc(ucontext_t * ctx)762ed093b41SRobert Mustacchi xsu_ustack_alloc(ucontext_t *ctx)
763ed093b41SRobert Mustacchi {
764ed093b41SRobert Mustacchi 	static void *stack = NULL;
765ed093b41SRobert Mustacchi 	static size_t size = 0;
766ed093b41SRobert Mustacchi 
767ed093b41SRobert Mustacchi 	if (size == 0) {
768ed093b41SRobert Mustacchi 		long sys = sysconf(_SC_THREAD_STACK_MIN);
769ed093b41SRobert Mustacchi 		if (sys == -1) {
770ed093b41SRobert Mustacchi 			err(EXIT_FAILURE, "failed to get minimum stack size");
771ed093b41SRobert Mustacchi 		}
772ed093b41SRobert Mustacchi 		size = (size_t)sys;
773ed093b41SRobert Mustacchi 
774ed093b41SRobert Mustacchi 		stack = calloc(size, sizeof (uint8_t));
775ed093b41SRobert Mustacchi 		if (stack == NULL) {
776ed093b41SRobert Mustacchi 			err(EXIT_FAILURE, "failed to allocate stack buffer");
777ed093b41SRobert Mustacchi 		}
778ed093b41SRobert Mustacchi 	}
779ed093b41SRobert Mustacchi 
780ed093b41SRobert Mustacchi 	ctx->uc_stack.ss_size = size;
781ed093b41SRobert Mustacchi 	ctx->uc_stack.ss_sp = stack;
782ed093b41SRobert Mustacchi 	ctx->uc_stack.ss_flags = 0;
783ed093b41SRobert Mustacchi }
784