1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/* VIS floating point instruction simulator for Sparc FPU simulator. */
27
28#include <sys/types.h>
29#include <sys/systm.h>
30#include <sys/fpu/fpusystm.h>
31#include <sys/fpu/fpu_simulator.h>
32#include <sys/vis_simulator.h>
33#include <sys/fpu/globals.h>
34#include <sys/privregs.h>
35#include <sys/sun4asi.h>
36#include <sys/machasi.h>
37#include <sys/debug.h>
38#include <sys/cpu_module.h>
39#include <sys/systm.h>
40
41#define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
42#define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
43#define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
44
45extern	uint_t	get_subcc_ccr(uint64_t, uint64_t);
46
47static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
48				void *);
49static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
50				struct regs *, void *, kfpu_t *);
51static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
52				void *);
53static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
54				kfpu_t *);
55static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
56				void *, kfpu_t *);
57static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
58				kfpu_t *);
59static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
60static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
61				void *);
62static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
63static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
64static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
65static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type, struct regs *,
66				void *, uint_t);
67static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
68				void *, uint_t);
69static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
70				struct regs *, void *, uint_t);
71static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
72				struct regs *, void *, uint_t);
73
74/*
75 * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
76 * traps.
77 */
78enum ftt_type
79vis_fpu_simulator(
80	fp_simd_type	*pfpsd,	/* FPU simulator data. */
81	fp_inst_type	pinst,	/* FPU instruction to simulate. */
82	struct regs	*pregs,	/* Pointer to PCB image of registers. */
83	void		*prw,	/* Pointer to locals and ins. */
84	kfpu_t		*fp)	/* Need to fp to access gsr reg */
85{
86	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
87	uint_t	us1, us2, usr;
88	uint64_t lus1, lus2, lusr;
89	enum ftt_type ftt = ftt_none;
90	union {
91		vis_inst_type	inst;
92		fp_inst_type	pinst;
93	} f;
94
95	ASSERT(USERMODE(pregs->r_tstate));
96	nrs1 = pinst.rs1;
97	nrs2 = pinst.rs2;
98	nrd = pinst.rd;
99	f.pinst = pinst;
100	if ((f.inst.opf & 1) == 0) {		/* double precision */
101		if ((nrs1 & 1) == 1) 		/* fix register encoding */
102			nrs1 = (nrs1 & 0x1e) | 0x20;
103		if ((nrs2 & 1) == 1)
104			nrs2 = (nrs2 & 0x1e) | 0x20;
105		if ((nrd & 1) == 1)
106			nrd = (nrd & 0x1e) | 0x20;
107	}
108
109	switch (f.inst.opf) {
110		/* these instr's do not use fp regs */
111	case edge8:
112	case edge8l:
113	case edge8n:
114	case edge8ln:
115	case edge16:
116	case edge16l:
117	case edge16n:
118	case edge16ln:
119	case edge32:
120	case edge32l:
121	case edge32n:
122	case edge32ln:
123		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
124		break;
125	case array8:
126	case array16:
127	case array32:
128		ftt = vis_array(pfpsd, f.inst, pregs, prw);
129		break;
130	case alignaddr:
131	case alignaddrl:
132		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
133		break;
134	case bmask:
135		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
136		break;
137	case fcmple16:
138	case fcmpne16:
139	case fcmpgt16:
140	case fcmpeq16:
141	case fcmple32:
142	case fcmpne32:
143	case fcmpgt32:
144	case fcmpeq32:
145		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
146		break;
147	case fmul8x16:
148	case fmul8x16au:
149	case fmul8x16al:
150	case fmul8sux16:
151	case fmul8ulx16:
152	case fmuld8sux16:
153	case fmuld8ulx16:
154		ftt = vis_fmul(pfpsd, f.inst);
155		break;
156	case fpack16:
157	case fpack32:
158	case fpackfix:
159	case fexpand:
160	case fpmerge:
161		ftt = vis_fpixel(pfpsd, f.inst, fp);
162		break;
163	case pdist:
164	case pdistn:
165		ftt = vis_pdist(pfpsd, pinst, pregs, prw, f.inst.opf);
166		break;
167	case faligndata:
168		ftt = vis_faligndata(pfpsd, pinst, fp);
169		break;
170	case bshuffle:
171		ftt = vis_bshuffle(pfpsd, pinst, fp);
172		break;
173	case fpadd16:
174	case fpadd16s:
175	case fpadd32:
176	case fpadd32s:
177	case fpsub16:
178	case fpsub16s:
179	case fpsub32:
180	case fpsub32s:
181		ftt = vis_fpaddsub(pfpsd, f.inst);
182		break;
183	case fzero:
184		lusr = 0;
185		_fp_pack_extword(pfpsd, &lusr, nrd);
186		break;
187	case fzeros:
188		usr = 0;
189		_fp_pack_word(pfpsd, &usr, nrd);
190		break;
191	case fnor:
192		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194		lusr = ~(lus1 | lus2);
195		_fp_pack_extword(pfpsd, &lusr, nrd);
196		break;
197	case fnors:
198		_fp_unpack_word(pfpsd, &us1, nrs1);
199		_fp_unpack_word(pfpsd, &us2, nrs2);
200		usr = ~(us1 | us2);
201		_fp_pack_word(pfpsd, &usr, nrd);
202		break;
203	case fandnot2:
204		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206		lusr = (lus1 & ~lus2);
207		_fp_pack_extword(pfpsd, &lusr, nrd);
208		break;
209	case fandnot2s:
210		_fp_unpack_word(pfpsd, &us1, nrs1);
211		_fp_unpack_word(pfpsd, &us2, nrs2);
212		usr = (us1 & ~us2);
213		_fp_pack_word(pfpsd, &usr, nrd);
214		break;
215	case fnot2:
216		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217		lusr = ~lus2;
218		_fp_pack_extword(pfpsd, &lusr, nrd);
219		break;
220	case fnot2s:
221		_fp_unpack_word(pfpsd, &us2, nrs2);
222		usr = ~us2;
223		_fp_pack_word(pfpsd, &usr, nrd);
224		break;
225	case fandnot1:
226		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228		lusr = (~lus1 & lus2);
229		_fp_pack_extword(pfpsd, &lusr, nrd);
230		break;
231	case fandnot1s:
232		_fp_unpack_word(pfpsd, &us1, nrs1);
233		_fp_unpack_word(pfpsd, &us2, nrs2);
234		usr = (~us1 & us2);
235		_fp_pack_word(pfpsd, &usr, nrd);
236		break;
237	case fnot1:
238		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239		lusr = ~lus1;
240		_fp_pack_extword(pfpsd, &lusr, nrd);
241		break;
242	case fnot1s:
243		_fp_unpack_word(pfpsd, &us1, nrs1);
244		usr = ~us1;
245		_fp_pack_word(pfpsd, &usr, nrd);
246		break;
247	case fxor:
248		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250		lusr = (lus1 ^ lus2);
251		_fp_pack_extword(pfpsd, &lusr, nrd);
252		break;
253	case fxors:
254		_fp_unpack_word(pfpsd, &us1, nrs1);
255		_fp_unpack_word(pfpsd, &us2, nrs2);
256		usr = (us1 ^ us2);
257		_fp_pack_word(pfpsd, &usr, nrd);
258		break;
259	case fnand:
260		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262		lusr = ~(lus1 & lus2);
263		_fp_pack_extword(pfpsd, &lusr, nrd);
264		break;
265	case fnands:
266		_fp_unpack_word(pfpsd, &us1, nrs1);
267		_fp_unpack_word(pfpsd, &us2, nrs2);
268		usr = ~(us1 & us2);
269		_fp_pack_word(pfpsd, &usr, nrd);
270		break;
271	case fand:
272		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274		lusr = (lus1 & lus2);
275		_fp_pack_extword(pfpsd, &lusr, nrd);
276		break;
277	case fands:
278		_fp_unpack_word(pfpsd, &us1, nrs1);
279		_fp_unpack_word(pfpsd, &us2, nrs2);
280		usr = (us1 & us2);
281		_fp_pack_word(pfpsd, &usr, nrd);
282		break;
283	case fxnor:
284		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286		lusr = ~(lus1 ^ lus2);
287		_fp_pack_extword(pfpsd, &lusr, nrd);
288		break;
289	case fxnors:
290		_fp_unpack_word(pfpsd, &us1, nrs1);
291		_fp_unpack_word(pfpsd, &us2, nrs2);
292		usr = ~(us1 ^ us2);
293		_fp_pack_word(pfpsd, &usr, nrd);
294		break;
295	case fsrc1:
296		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297		_fp_pack_extword(pfpsd, &lusr, nrd);
298		break;
299	case fsrc1s:
300		_fp_unpack_word(pfpsd, &usr, nrs1);
301		_fp_pack_word(pfpsd, &usr, nrd);
302		break;
303	case fornot2:
304		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306		lusr = (lus1 | ~lus2);
307		_fp_pack_extword(pfpsd, &lusr, nrd);
308		break;
309	case fornot2s:
310		_fp_unpack_word(pfpsd, &us1, nrs1);
311		_fp_unpack_word(pfpsd, &us2, nrs2);
312		usr = (us1 | ~us2);
313		_fp_pack_word(pfpsd, &usr, nrd);
314		break;
315	case fsrc2:
316		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317		_fp_pack_extword(pfpsd, &lusr, nrd);
318		break;
319	case fsrc2s:
320		_fp_unpack_word(pfpsd, &usr, nrs2);
321		_fp_pack_word(pfpsd, &usr, nrd);
322		break;
323	case fornot1:
324		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326		lusr = (~lus1 | lus2);
327		_fp_pack_extword(pfpsd, &lusr, nrd);
328		break;
329	case fornot1s:
330		_fp_unpack_word(pfpsd, &us1, nrs1);
331		_fp_unpack_word(pfpsd, &us2, nrs2);
332		usr = (~us1 | us2);
333		_fp_pack_word(pfpsd, &usr, nrd);
334		break;
335	case for_op:
336		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338		lusr = (lus1 | lus2);
339		_fp_pack_extword(pfpsd, &lusr, nrd);
340		break;
341	case fors_op:
342		_fp_unpack_word(pfpsd, &us1, nrs1);
343		_fp_unpack_word(pfpsd, &us2, nrs2);
344		usr = (us1 | us2);
345		_fp_pack_word(pfpsd, &usr, nrd);
346		break;
347	case fone:
348		lusr = 0xffffffffffffffff;
349		_fp_pack_extword(pfpsd, &lusr, nrd);
350		break;
351	case fones:
352		usr = 0xffffffffUL;
353		_fp_pack_word(pfpsd, &usr, nrd);
354		break;
355	case siam:
356		ftt = vis_siam(pfpsd, f.inst, fp);
357		break;
358	default:
359		return (ftt_unimplemented);
360	}
361
362	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363	pregs->r_npc += 4;
364	return (ftt);
365}
366
367/*
368 * Simulator for edge instructions
369 */
370static enum ftt_type
371vis_edge(
372	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373	vis_inst_type	inst,	/* FPU instruction to simulate. */
374	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375	void		*prw)	/* Pointer to locals and ins. */
376
377{
378	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379	enum ftt_type ftt;
380	uint64_t addrl, addrr, mask;
381	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382	int al3l, al3r;			/* Lower 3 bits of address */
383	uint_t	ccr;
384
385	nrs1 = inst.rs1;
386	nrs2 = inst.rs2;
387	nrd = inst.rd;
388
389	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
390	if (ftt != ftt_none)
391		return (ftt);
392	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
393	if (ftt != ftt_none)
394		return (ftt);
395
396	/* Test PSTATE.AM to determine 32-bit vs 64-bit addressing */
397	if ((pregs->r_tstate & TSTATE_AM) != 0) {
398		ah61l = addrl & 0xfffffff8;
399		ah61r = addrr & 0xfffffff8;
400	} else {
401		ah61l = addrl & ~0x7;
402		ah61r = addrr & ~0x7;
403	}
404
405
406	switch (inst.opf) {
407	case edge8:
408	case edge8n:
409	case edge8l:
410	case edge8ln:
411		al3l = addrl & 0x7;
412		switch (inst.opf) {
413		case edge8:
414		case edge8n:
415			if (inst.opf == edge8) {
416				VISINFO_KSTAT(vis_edge8);
417			} else {
418				VISINFO_KSTAT(vis_edge8n);
419			}
420			mask = 0xff >> al3l;
421			if (ah61l == ah61r) {
422				al3r = addrr & 0x7;
423				mask &= (0xff << (0x7 - al3r)) & 0xff;
424			}
425			break;
426		case edge8l:
427		case edge8ln:
428			if (inst.opf == edge8l) {
429				VISINFO_KSTAT(vis_edge8l);
430			} else {
431				VISINFO_KSTAT(vis_edge8ln);
432			}
433			mask = (0xff << al3l) & 0xff;
434			if (ah61l == ah61r) {
435				al3r = addrr & 0x7;
436				mask &= 0xff >> (0x7 - al3r);
437			}
438			break;
439		}
440		break;
441	case edge16:
442	case edge16l:
443	case edge16n:
444	case edge16ln:
445		al3l = addrl & 0x6;
446		al3l >>= 0x1;
447		switch (inst.opf) {
448		case edge16:
449		case edge16n:
450			if (inst.opf == edge16) {
451				VISINFO_KSTAT(vis_edge16);
452
453			} else {
454				VISINFO_KSTAT(vis_edge16n);
455			}
456			mask = 0xf >> al3l;
457			if (ah61l == ah61r) {
458				al3r = addrr & 0x6;
459				al3r >>= 0x1;
460				mask &= (0xf << (0x3 - al3r)) & 0xf;
461			}
462			break;
463		case edge16l:
464		case edge16ln:
465			if (inst.opf == edge16l) {
466				VISINFO_KSTAT(vis_edge16l);
467
468			} else {
469				VISINFO_KSTAT(vis_edge16ln);
470			}
471
472			mask = (0xf << al3l) & 0xf;
473			if (ah61l == ah61r) {
474				al3r = addrr & 0x6;
475				al3r >>= 0x1;
476				mask &= 0xf >> (0x3 - al3r);
477			}
478			break;
479		}
480		break;
481	case edge32:
482	case edge32l:
483	case edge32n:
484	case edge32ln:
485		al3l = addrl & 0x4;
486		al3l >>= 0x2;
487
488		switch (inst.opf) {
489		case edge32:
490		case edge32n:
491			if (inst.opf == edge32) {
492				VISINFO_KSTAT(vis_edge32);
493
494			} else {
495				VISINFO_KSTAT(vis_edge32n);
496			}
497			mask = 0x3 >> al3l;
498			if (ah61l == ah61r) {
499				al3r = addrr & 0x4;
500				al3r >>= 0x2;
501				mask &= (0x3 << (0x1 - al3r)) & 0x3;
502			}
503			break;
504		case edge32l:
505		case edge32ln:
506			if (inst.opf == edge32l) {
507				VISINFO_KSTAT(vis_edge32l);
508
509			} else {
510				VISINFO_KSTAT(vis_edge32ln);
511			}
512			mask = (0x3 << al3l) & 0x3;
513			if (ah61l == ah61r) {
514				al3r = addrr & 0x4;
515				al3r >>= 0x2;
516				mask &= 0x3 >> (0x1 - al3r);
517			}
518			break;
519		}
520		break;
521	}
522
523	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
524
525	switch (inst.opf) {
526	case edge8:
527	case edge8l:
528	case edge16:
529	case edge16l:
530	case edge32:
531	case edge32l:
532
533		/* Update flags per SUBcc outcome */
534		pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK
535					<< TSTATE_CCR_SHIFT);
536		ccr = get_subcc_ccr(addrl, addrr);  /* get subcc cond. codes */
537		pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT);
538
539		break;
540	}
541	return (ftt);
542}
543
544/*
545 * Simulator for three dimentional array addressing instructions.
546 */
547static enum ftt_type
548vis_array(
549	fp_simd_type	*pfpsd,	/* FPU simulator data. */
550	vis_inst_type	inst,	/* FPU instruction to simulate. */
551	struct regs	*pregs,	/* Pointer to PCB image of registers. */
552	void		*prw)	/* Pointer to locals and ins. */
553
554{
555	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
556	enum ftt_type ftt;
557	uint64_t laddr, bsize, baddr;
558	uint64_t nbit;
559	int oy, oz;
560
561	nrs1 = inst.rs1;
562	nrs2 = inst.rs2;
563	nrd = inst.rd;
564
565	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
566	if (ftt != ftt_none)
567		return (ftt);
568	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
569	if (ftt != ftt_none)
570		return (ftt);
571
572	if (bsize > 5) {
573		bsize = 5;
574	}
575	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
576	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
577	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
578
579	baddr = 0;
580	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
581	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
582	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
583	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
584	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
585	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
586	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
587	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
588	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
589
590	switch (inst.opf) {
591	case array8:
592		VISINFO_KSTAT(vis_array8);
593		break;
594	case array16:
595		VISINFO_KSTAT(vis_array16);
596		baddr <<= 1;
597		break;
598	case array32:
599		VISINFO_KSTAT(vis_array32);
600		baddr <<= 2;
601		break;
602	}
603
604	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
605
606	return (ftt);
607}
608
609/*
610 * Simulator for alignaddr and alignaddrl instructions.
611 */
612static enum ftt_type
613vis_alignaddr(
614	fp_simd_type	*pfpsd,	/* FPU simulator data. */
615	vis_inst_type	inst,	/* FPU instruction to simulate. */
616	struct regs	*pregs,	/* Pointer to PCB image of registers. */
617	void		*prw,	/* Pointer to locals and ins. */
618	kfpu_t		*fp)	/* Need to fp to access gsr reg */
619{
620	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
621	enum ftt_type ftt;
622	uint64_t ea, tea, g, r;
623	short s;
624
625	nrs1 = inst.rs1;
626	nrs2 = inst.rs2;
627	nrd = inst.rd;
628
629	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
630	if (ftt != ftt_none)
631		return (ftt);
632	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
633	if (ftt != ftt_none)
634		return (ftt);
635	ea += tea;
636	r = ea & ~0x7;	/* zero least 3 significant bits */
637	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
638
639
640	g = pfpsd->fp_current_read_gsr(fp);
641	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
642	r = ea & 0x7;
643	if (inst.opf == alignaddrl) {
644		s = (short)(~r);	/* 2's complement for alignaddrl */
645		if (s < 0)
646			r = (uint64_t)((s + 1) & 0x7);
647		else
648			r = (uint64_t)(s & 0x7);
649	}
650	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
651	pfpsd->fp_current_write_gsr(g, fp);
652
653	return (ftt);
654}
655
656/*
657 * Simulator for bmask instruction.
658 */
659static enum ftt_type
660vis_bmask(
661	fp_simd_type	*pfpsd,	/* FPU simulator data. */
662	vis_inst_type	inst,	/* FPU instruction to simulate. */
663	struct regs	*pregs,	/* Pointer to PCB image of registers. */
664	void		*prw,	/* Pointer to locals and ins. */
665	kfpu_t		*fp)	/* Need to fp to access gsr reg */
666{
667	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
668	enum ftt_type ftt;
669	uint64_t ea, tea, g;
670
671	VISINFO_KSTAT(vis_bmask);
672	nrs1 = inst.rs1;
673	nrs2 = inst.rs2;
674	nrd = inst.rd;
675
676	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
677	if (ftt != ftt_none)
678		return (ftt);
679	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
680	if (ftt != ftt_none)
681		return (ftt);
682	ea += tea;
683	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
684
685	g = pfpsd->fp_current_read_gsr(fp);
686	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
687
688	/* Put the least significant 32 bits of ea in GSR.mask */
689	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
690	pfpsd->fp_current_write_gsr(g, fp);
691	return (ftt);
692}
693
694/*
695 * Simulator for fp[add|sub]* instruction.
696 */
697static enum ftt_type
698vis_fpaddsub(
699	fp_simd_type	*pfpsd,	/* FPU simulator data. */
700	vis_inst_type	inst)	/* FPU instruction to simulate. */
701{
702	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
703	union {
704		uint64_t	ll;
705		uint32_t	i[2];
706		uint16_t	s[4];
707	} lrs1, lrs2, lrd;
708	union {
709		uint32_t	i;
710		uint16_t	s[2];
711	} krs1, krs2, krd;
712	int i;
713
714	nrs1 = inst.rs1;
715	nrs2 = inst.rs2;
716	nrd = inst.rd;
717	if ((inst.opf & 1) == 0) {	/* double precision */
718		if ((nrs1 & 1) == 1) 	/* fix register encoding */
719			nrs1 = (nrs1 & 0x1e) | 0x20;
720		if ((nrs2 & 1) == 1)
721			nrs2 = (nrs2 & 0x1e) | 0x20;
722		if ((nrd & 1) == 1)
723			nrd = (nrd & 0x1e) | 0x20;
724	}
725	switch (inst.opf) {
726	case fpadd16:
727		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
728		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
729		for (i = 0; i <= 3; i++) {
730			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
731		}
732		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
733		break;
734	case fpadd16s:
735		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
736		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
737		for (i = 0; i <= 1; i++) {
738			krd.s[i] = krs1.s[i] + krs2.s[i];
739		}
740		_fp_pack_word(pfpsd, &krd.i, nrd);
741		break;
742	case fpadd32:
743		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
744		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
745		for (i = 0; i <= 1; i++) {
746			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
747		}
748		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
749		break;
750	case fpadd32s:
751		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
752		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
753		krd.i = krs1.i + krs2.i;
754		_fp_pack_word(pfpsd, &krd.i, nrd);
755		break;
756	case fpsub16:
757		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
758		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
759		for (i = 0; i <= 3; i++) {
760			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
761		}
762		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
763		break;
764	case fpsub16s:
765		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
766		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
767		for (i = 0; i <= 1; i++) {
768			krd.s[i] = krs1.s[i] - krs2.s[i];
769		}
770		_fp_pack_word(pfpsd, &krd.i, nrd);
771		break;
772	case fpsub32:
773		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
774		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
775		for (i = 0; i <= 1; i++) {
776			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
777		}
778		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
779		break;
780	case fpsub32s:
781		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
782		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
783		krd.i = krs1.i - krs2.i;
784		_fp_pack_word(pfpsd, &krd.i, nrd);
785		break;
786	}
787	return (ftt_none);
788}
789
790/*
791 * Simulator for fcmp* instruction.
792 */
793static enum ftt_type
794vis_fcmp(
795	fp_simd_type	*pfpsd,	/* FPU simulator data. */
796	vis_inst_type	inst,	/* FPU instruction to simulate. */
797	struct regs	*pregs,	/* Pointer to PCB image of registers. */
798	void		*prw)	/* Pointer to locals and ins. */
799{
800	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
801	union {
802		uint64_t	ll;
803		uint32_t	i[2];
804		uint16_t	s[4];
805	} krs1, krs2, krd;
806	enum ftt_type ftt;
807	short sr1, sr2;
808	int i, ir1, ir2;
809
810	nrs1 = inst.rs1;
811	nrs2 = inst.rs2;
812	nrd = inst.rd;
813	krd.ll = 0;
814	if ((nrs1 & 1) == 1) 	/* fix register encoding */
815		nrs1 = (nrs1 & 0x1e) | 0x20;
816	if ((nrs2 & 1) == 1)
817		nrs2 = (nrs2 & 0x1e) | 0x20;
818
819	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
820	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
821	switch (inst.opf) {
822	case fcmple16:
823		VISINFO_KSTAT(vis_fcmple16);
824		for (i = 0; i <= 3; i++) {
825			sr1 = (short)krs1.s[i];
826			sr2 = (short)krs2.s[i];
827			if (sr1 <= sr2)
828				krd.ll += (0x8 >> i);
829		}
830		break;
831	case fcmpne16:
832		VISINFO_KSTAT(vis_fcmpne16);
833		for (i = 0; i <= 3; i++) {
834			sr1 = (short)krs1.s[i];
835			sr2 = (short)krs2.s[i];
836			if (sr1 != sr2)
837				krd.ll += (0x8 >> i);
838		}
839		break;
840	case fcmpgt16:
841		VISINFO_KSTAT(vis_fcmpgt16);
842		for (i = 0; i <= 3; i++) {
843			sr1 = (short)krs1.s[i];
844			sr2 = (short)krs2.s[i];
845			if (sr1 > sr2)
846				krd.ll += (0x8 >> i);
847		}
848		break;
849	case fcmpeq16:
850		VISINFO_KSTAT(vis_fcmpeq16);
851		for (i = 0; i <= 3; i++) {
852			sr1 = (short)krs1.s[i];
853			sr2 = (short)krs2.s[i];
854			if (sr1 == sr2)
855				krd.ll += (0x8 >> i);
856		}
857		break;
858	case fcmple32:
859		VISINFO_KSTAT(vis_fcmple32);
860		for (i = 0; i <= 1; i++) {
861			ir1 = (int)krs1.i[i];
862			ir2 = (int)krs2.i[i];
863			if (ir1 <= ir2)
864				krd.ll += (0x2 >> i);
865		}
866		break;
867	case fcmpne32:
868		VISINFO_KSTAT(vis_fcmpne32);
869		for (i = 0; i <= 1; i++) {
870			ir1 = (int)krs1.i[i];
871			ir2 = (int)krs2.i[i];
872			if (ir1 != ir2)
873				krd.ll += (0x2 >> i);
874		}
875		break;
876	case fcmpgt32:
877		VISINFO_KSTAT(vis_fcmpgt32);
878		for (i = 0; i <= 1; i++) {
879			ir1 = (int)krs1.i[i];
880			ir2 = (int)krs2.i[i];
881			if (ir1 > ir2)
882				krd.ll += (0x2 >> i);
883		}
884		break;
885	case fcmpeq32:
886		VISINFO_KSTAT(vis_fcmpeq32);
887		for (i = 0; i <= 1; i++) {
888			ir1 = (int)krs1.i[i];
889			ir2 = (int)krs2.i[i];
890			if (ir1 == ir2)
891				krd.ll += (0x2 >> i);
892		}
893		break;
894	}
895	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
896	return (ftt);
897}
898
899/*
900 * Simulator for fmul* instruction.
901 */
902static enum ftt_type
903vis_fmul(
904	fp_simd_type	*pfpsd,	/* FPU simulator data. */
905	vis_inst_type	inst)	/* FPU instruction to simulate. */
906{
907	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
908	union {
909		uint64_t	ll;
910		uint32_t	i[2];
911		uint16_t	s[4];
912		uint8_t		c[8];
913	} lrs1, lrs2, lrd;
914	union {
915		uint32_t	i;
916		uint16_t	s[2];
917		uint8_t		c[4];
918	} krs1, krs2, kres;
919	short s1, s2, sres;
920	ushort_t us1;
921	char c1;
922	int i;
923
924	nrs1 = inst.rs1;
925	nrs2 = inst.rs2;
926	nrd = inst.rd;
927	if ((inst.opf & 1) == 0) {	/* double precision */
928		if ((nrd & 1) == 1) 	/* fix register encoding */
929			nrd = (nrd & 0x1e) | 0x20;
930	}
931
932	switch (inst.opf) {
933	case fmul8x16:
934		VISINFO_KSTAT(vis_fmul8x16);
935		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
936		if ((nrs2 & 1) == 1)
937			nrs2 = (nrs2 & 0x1e) | 0x20;
938		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
939		for (i = 0; i <= 3; i++) {
940			us1 = (ushort_t)krs1.c[i];
941			s2 = (short)lrs2.s[i];
942			kres.i = us1 * s2;
943			sres = (short)((kres.c[1] << 8) | kres.c[2]);
944			if (kres.c[3] >= 0x80)
945				sres++;
946			lrd.s[i] = sres;
947		}
948		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
949		break;
950	case fmul8x16au:
951		VISINFO_KSTAT(vis_fmul8x16au);
952		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
953		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
954		for (i = 0; i <= 3; i++) {
955			us1 = (ushort_t)krs1.c[i];
956			s2 = (short)krs2.s[0];
957			kres.i = us1 * s2;
958			sres = (short)((kres.c[1] << 8) | kres.c[2]);
959			if (kres.c[3] >= 0x80)
960				sres++;
961			lrd.s[i] = sres;
962		}
963		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
964		break;
965	case fmul8x16al:
966		VISINFO_KSTAT(vis_fmul8x16al);
967		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
968		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
969		for (i = 0; i <= 3; i++) {
970			us1 = (ushort_t)krs1.c[i];
971			s2 = (short)krs2.s[1];
972			kres.i = us1 * s2;
973			sres = (short)((kres.c[1] << 8) | kres.c[2]);
974			if (kres.c[3] >= 0x80)
975				sres++;
976			lrd.s[i] = sres;
977		}
978		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
979		break;
980	case fmul8sux16:
981		VISINFO_KSTAT(vis_fmul8sux16);
982		if ((nrs1 & 1) == 1) 	/* fix register encoding */
983			nrs1 = (nrs1 & 0x1e) | 0x20;
984		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
985		if ((nrs2 & 1) == 1)
986			nrs2 = (nrs2 & 0x1e) | 0x20;
987		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
988		for (i = 0; i <= 3; i++) {
989			c1 = lrs1.c[(i*2)];
990			s1 = (short)c1;		/* keeps the sign alive */
991			s2 = (short)lrs2.s[i];
992			kres.i = s1 * s2;
993			sres = (short)((kres.c[1] << 8) | kres.c[2]);
994			if (kres.c[3] >= 0x80)
995				sres++;
996			if (sres < 0)
997				lrd.s[i] = (sres & 0xFFFF);
998			else
999				lrd.s[i] = sres;
1000		}
1001		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1002		break;
1003	case fmul8ulx16:
1004		VISINFO_KSTAT(vis_fmul8ulx16);
1005		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1006			nrs1 = (nrs1 & 0x1e) | 0x20;
1007		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1008		if ((nrs2 & 1) == 1)
1009			nrs2 = (nrs2 & 0x1e) | 0x20;
1010		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1011		for (i = 0; i <= 3; i++) {
1012			us1 = (ushort_t)lrs1.c[(i*2)+1];
1013			s2 = (short)lrs2.s[i];
1014			kres.i = us1 * s2;
1015			sres = (short)kres.s[0];
1016			if (kres.s[1] >= 0x8000)
1017				sres++;
1018			lrd.s[i] = sres;
1019		}
1020		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1021		break;
1022	case fmuld8sux16:
1023		VISINFO_KSTAT(vis_fmuld8sux16);
1024		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1025		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1026		for (i = 0; i <= 1; i++) {
1027			c1 = krs1.c[(i*2)];
1028			s1 = (short)c1;		/* keeps the sign alive */
1029			s2 = (short)krs2.s[i];
1030			kres.i = s1 * s2;
1031			lrd.i[i] = kres.i << 8;
1032		}
1033		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1034		break;
1035	case fmuld8ulx16:
1036		VISINFO_KSTAT(vis_fmuld8ulx16);
1037		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1038		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1039		for (i = 0; i <= 1; i++) {
1040			us1 = (ushort_t)krs1.c[(i*2)+1];
1041			s2 = (short)krs2.s[i];
1042			lrd.i[i] = us1 * s2;
1043		}
1044		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1045		break;
1046	}
1047	return (ftt_none);
1048}
1049
1050/*
1051 * Simulator for fpixel formatting instructions.
1052 */
1053static enum ftt_type
1054vis_fpixel(
1055	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1056	vis_inst_type	inst,	/* FPU instruction to simulate. */
1057	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1058{
1059	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1060	int	i, j, k, sf;
1061	union {
1062		uint64_t	ll;
1063		uint32_t	i[2];
1064		uint16_t	s[4];
1065		uint8_t		c[8];
1066	} lrs1, lrs2, lrd;
1067	union {
1068		uint32_t	i;
1069		uint16_t	s[2];
1070		uint8_t		c[4];
1071	} krs1, krs2, krd;
1072	uint64_t r;
1073	int64_t l, m;
1074	short s;
1075	uchar_t uc;
1076
1077	nrs1 = inst.rs1;
1078	nrs2 = inst.rs2;
1079	nrd = inst.rd;
1080	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1081		if ((nrd & 1) == 1) 	/* fix register encoding */
1082			nrd = (nrd & 0x1e) | 0x20;
1083	}
1084
1085	switch (inst.opf) {
1086	case fpack16:
1087		VISINFO_KSTAT(vis_fpack16);
1088		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1089			nrs2 = (nrs2 & 0x1e) | 0x20;
1090		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1091		r = pfpsd->fp_current_read_gsr(fp);
1092		/* fpack16 ignores GSR.scale msb */
1093		sf = (int)(GSR_SCALE(r) & 0xf);
1094		for (i = 0; i <= 3; i++) {
1095			s = (short)lrs2.s[i];	/* preserve the sign */
1096			j = ((int)s << sf);
1097			k = j >> 7;
1098			if (k < 0) {
1099				uc = 0;
1100			} else if (k > 255) {
1101				uc = 255;
1102			} else {
1103				uc = (uchar_t)k;
1104			}
1105			krd.c[i] = uc;
1106		}
1107		_fp_pack_word(pfpsd, &krd.i, nrd);
1108		break;
1109	case fpack32:
1110		VISINFO_KSTAT(vis_fpack32);
1111		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1112			nrs1 = (nrs1 & 0x1e) | 0x20;
1113		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1114		if ((nrs2 & 1) == 1)
1115			nrs2 = (nrs2 & 0x1e) | 0x20;
1116		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1117
1118		r = pfpsd->fp_current_read_gsr(fp);
1119		sf = (int)GSR_SCALE(r);
1120		lrd.ll = lrs1.ll << 8;
1121		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1122			j = (int)lrs2.i[i];	/* preserve the sign */
1123			l = ((int64_t)j << sf);
1124			m = l >> 23;
1125			if (m < 0) {
1126				uc = 0;
1127			} else if (m > 255) {
1128				uc = 255;
1129			} else {
1130				uc = (uchar_t)m;
1131			}
1132			lrd.c[k] = uc;
1133		}
1134		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1135		break;
1136	case fpackfix:
1137		VISINFO_KSTAT(vis_fpackfix);
1138		if ((nrs2 & 1) == 1)
1139			nrs2 = (nrs2 & 0x1e) | 0x20;
1140		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1141
1142		r = pfpsd->fp_current_read_gsr(fp);
1143		sf = (int)GSR_SCALE(r);
1144		for (i = 0; i <= 1; i++) {
1145			j = (int)lrs2.i[i];	/* preserve the sign */
1146			l = ((int64_t)j << sf);
1147			m = l >> 16;
1148			if (m < -32768) {
1149				s = -32768;
1150			} else if (m > 32767) {
1151				s = 32767;
1152			} else {
1153				s = (short)m;
1154			}
1155			krd.s[i] = s;
1156		}
1157		_fp_pack_word(pfpsd, &krd.i, nrd);
1158		break;
1159	case fexpand:
1160		VISINFO_KSTAT(vis_fexpand);
1161		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1162		for (i = 0; i <= 3; i++) {
1163			uc = krs2.c[i];
1164			lrd.s[i] = (ushort_t)(uc << 4);
1165		}
1166		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1167		break;
1168	case fpmerge:
1169		VISINFO_KSTAT(vis_fpmerge);
1170		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1171		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1172		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1173			lrd.c[j] = krs1.c[i];
1174			lrd.c[j+1] = krs2.c[i];
1175		}
1176		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1177		break;
1178	}
1179	return (ftt_none);
1180}
1181
1182/*
1183 * Simulator for pdist instruction.
1184 */
1185enum ftt_type
1186vis_pdist(
1187	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1188	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1189	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1190	void		*prw,	/* Pointer to locals and ins. */
1191	uint_t		op)	/* Opcode pdist or pdistn */
1192{
1193	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1194	int	i;
1195	short	s;
1196	union {
1197		uint64_t	ll;
1198		uint8_t		c[8];
1199	} lrs1, lrs2, lrd;
1200
1201	nrs1 = pinst.rs1;
1202	nrs2 = pinst.rs2;
1203	nrd = pinst.rd;
1204
1205	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1206		nrs1 = (nrs1 & 0x1e) | 0x20;
1207	if ((nrs2 & 1) == 1)
1208		nrs2 = (nrs2 & 0x1e) | 0x20;
1209	if ((nrd & 1) == 1)
1210		nrd = (nrd & 0x1e) | 0x20;
1211
1212	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1213	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1214
1215	if (op == pdist) {
1216		VISINFO_KSTAT(vis_pdist);
1217		_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1218	} else {
1219		/* pdistn */
1220		VISINFO_KSTAT(vis_pdistn);
1221		lrd.ll = 0;
1222	}
1223
1224	for (i = 0; i <= 7; i++) {
1225		s = (short)(lrs1.c[i] - lrs2.c[i]);
1226		if (s < 0)
1227			s = ~s + 1;
1228		lrd.ll += s;
1229	}
1230
1231	if (op == pdist)
1232		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1233	else
1234		/* pdistn */
1235		(void) write_iureg(pfpsd, nrd, pregs, prw, &lrd.ll);
1236	return (ftt_none);
1237}
1238
1239/*
1240 * Simulator for faligndata instruction.
1241 */
1242static enum ftt_type
1243vis_faligndata(
1244	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1245	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1246	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1247{
1248	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1249	int	i, j, k, ao;
1250	union {
1251		uint64_t	ll;
1252		uint8_t		c[8];
1253	} lrs1, lrs2, lrd;
1254	uint64_t r;
1255
1256	nrs1 = pinst.rs1;
1257	nrs2 = pinst.rs2;
1258	nrd = pinst.rd;
1259	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1260		nrs1 = (nrs1 & 0x1e) | 0x20;
1261	if ((nrs2 & 1) == 1)
1262		nrs2 = (nrs2 & 0x1e) | 0x20;
1263	if ((nrd & 1) == 1)
1264		nrd = (nrd & 0x1e) | 0x20;
1265
1266	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1267	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1268
1269	r = pfpsd->fp_current_read_gsr(fp);
1270	ao = (int)GSR_ALIGN(r);
1271
1272	for (i = 0, j = ao, k = 0; i <= 7; i++)
1273		if (j <= 7) {
1274			lrd.c[i] = lrs1.c[j++];
1275		} else {
1276			lrd.c[i] = lrs2.c[k++];
1277		}
1278	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1279
1280	return (ftt_none);
1281}
1282
1283/*
1284 * Simulator for bshuffle instruction.
1285 */
1286static enum ftt_type
1287vis_bshuffle(
1288	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1289	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1290	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1291{
1292	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1293	int	i, j, ao;
1294	union {
1295		uint64_t	ll;
1296		uint8_t		c[8];
1297	} lrs1, lrs2, lrd;
1298	uint64_t r;
1299
1300	VISINFO_KSTAT(vis_bshuffle);
1301	nrs1 = pinst.rs1;
1302	nrs2 = pinst.rs2;
1303	nrd = pinst.rd;
1304	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1305		nrs1 = (nrs1 & 0x1e) | 0x20;
1306	if ((nrs2 & 1) == 1)
1307		nrs2 = (nrs2 & 0x1e) | 0x20;
1308	if ((nrd & 1) == 1)
1309		nrd = (nrd & 0x1e) | 0x20;
1310
1311	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1312	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1313
1314	r = pfpsd->fp_current_read_gsr(fp);
1315	ao = (int)GSR_MASK(r);
1316
1317	/*
1318	 * BSHUFFLE Destination Byte Selection
1319	 * rd Byte	Source
1320	 * 0		rs byte[GSR.mask<31..28>]
1321	 * 1		rs byte[GSR.mask<27..24>]
1322	 * 2		rs byte[GSR.mask<23..20>]
1323	 * 3		rs byte[GSR.mask<19..16>]
1324	 * 4		rs byte[GSR.mask<15..12>]
1325	 * 5		rs byte[GSR.mask<11..8>]
1326	 * 6		rs byte[GSR.mask<7..4>]
1327	 * 7		rs byte[GSR.mask<3..0>]
1328	 * P.S. rs1 is the upper half and rs2 is the lower half
1329	 * Bytes in the source value are numbered from most to
1330	 * least significant
1331	 */
1332	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1333		j = ao & 0xf;		/* get byte number */
1334		if (j < 8) {
1335			lrd.c[i] = lrs1.c[j];
1336		} else {
1337			lrd.c[i] = lrs2.c[j - 8];
1338		}
1339	}
1340	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1341
1342	return (ftt_none);
1343}
1344
1345/*
1346 * Simulator for siam instruction.
1347 */
1348static enum ftt_type
1349vis_siam(
1350	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1351	vis_inst_type	inst,	/* FPU instruction to simulate. */
1352	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1353{
1354	uint_t	nrs2;			/* Register number fields. */
1355	uint64_t g, r;
1356	nrs2 = inst.rs2;
1357
1358	g = pfpsd->fp_current_read_gsr(fp);
1359	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1360	r = nrs2 & 0x7;			/* get mode(3 bit) */
1361	g |= (r << GSR_IRND_SHIFT);
1362	pfpsd->fp_current_write_gsr(g, fp);
1363	return (ftt_none);
1364}
1365
1366/*
1367 * Simulator for VIS loads and stores between floating-point unit and memory.
1368 */
1369enum ftt_type
1370vis_fldst(
1371	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1372	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1373	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1374	void		*prw,	/* Pointer to locals and ins. */
1375	uint_t		asi)	/* asi to emulate! */
1376{
1377	union {
1378		vis_inst_type	inst;
1379		fp_inst_type	pinst;
1380	} i;
1381
1382	ASSERT(USERMODE(pregs->r_tstate));
1383	i.pinst = pinst;
1384	switch (asi) {
1385		case ASI_PST8_P:
1386		case ASI_PST8_S:
1387		case ASI_PST16_P:
1388		case ASI_PST16_S:
1389		case ASI_PST32_P:
1390		case ASI_PST32_S:
1391		case ASI_PST8_PL:
1392		case ASI_PST8_SL:
1393		case ASI_PST16_PL:
1394		case ASI_PST16_SL:
1395		case ASI_PST32_PL:
1396		case ASI_PST32_SL:
1397			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1398			    prw, asi));
1399		case ASI_FL8_P:
1400		case ASI_FL8_S:
1401		case ASI_FL8_PL:
1402		case ASI_FL8_SL:
1403		case ASI_FL16_P:
1404		case ASI_FL16_S:
1405		case ASI_FL16_PL:
1406		case ASI_FL16_SL:
1407			return (vis_short_fls(pfpsd, i.inst, pregs,
1408			    prw, asi));
1409		case ASI_BLK_AIUP:
1410		case ASI_BLK_AIUS:
1411		case ASI_BLK_AIUPL:
1412		case ASI_BLK_AIUSL:
1413		case ASI_BLK_P:
1414		case ASI_BLK_S:
1415		case ASI_BLK_PL:
1416		case ASI_BLK_SL:
1417		case ASI_BLK_COMMIT_P:
1418		case ASI_BLK_COMMIT_S:
1419			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1420			    prw, asi));
1421		default:
1422			return (ftt_unimplemented);
1423	}
1424}
1425
1426/*
1427 * Simulator for partial stores between floating-point unit and memory.
1428 */
1429static enum ftt_type
1430vis_prtl_fst(
1431	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1432	vis_inst_type	inst,	/* ISE instruction to simulate. */
1433	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1434	void		*prw,	/* Pointer to locals and ins. */
1435	uint_t		asi)	/* asi to emulate! */
1436{
1437	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1438	uint_t	opf, msk;
1439	int	h, i, j;
1440	uint64_t ea, tmsk;
1441	union {
1442		freg_type	f;
1443		uint64_t	ll;
1444		uint32_t	i[2];
1445		uint16_t	s[4];
1446		uint8_t		c[8];
1447	} k, l, res;
1448	enum ftt_type   ftt;
1449
1450	nrs1 = inst.rs1;
1451	nrs2 = inst.rs2;
1452	nrd = inst.rd;
1453	if ((nrd & 1) == 1) 		/* fix register encoding */
1454		nrd = (nrd & 0x1e) | 0x20;
1455	opf = inst.opf;
1456	res.ll = 0;
1457	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1458		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1459		if (ftt != ftt_none)
1460			return (ftt);
1461		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1462		if (ftt != ftt_none)
1463			return (ftt);
1464		msk = (uint_t)tmsk;
1465	} else {
1466		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1467		return (ftt_unimplemented);
1468	}
1469
1470	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1471	if ((ea & 0x3) != 0)
1472		return (ftt_alignment);	/* Require 32 bit-alignment. */
1473
1474	switch (asi) {
1475	case ASI_PST8_P:
1476	case ASI_PST8_S:
1477		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1478		if (ftt != ftt_none)
1479			return (ftt);
1480		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1481		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1482			if ((msk & j) == j)
1483				res.c[i] = k.c[i];
1484			else
1485				res.c[i] = l.c[i];
1486		}
1487		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1488		if (ftt != ftt_none)
1489			return (ftt);
1490		break;
1491	case ASI_PST8_PL:	/* little-endian */
1492	case ASI_PST8_SL:
1493		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1494		if (ftt != ftt_none)
1495			return (ftt);
1496		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1497		for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) {
1498			if ((msk & j) == j)
1499				res.c[i] = k.c[h];
1500			else
1501				res.c[i] = l.c[i];
1502		}
1503		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1504		if (ftt != ftt_none)
1505			return (ftt);
1506		break;
1507	case ASI_PST16_P:
1508	case ASI_PST16_S:
1509		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1510		if (ftt != ftt_none)
1511			return (ftt);
1512		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1513		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1514			if ((msk & j) == j)
1515				res.s[i] = k.s[i];
1516			else
1517				res.s[i] = l.s[i];
1518		}
1519		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1520		if (ftt != ftt_none)
1521			return (ftt);
1522		break;
1523	case ASI_PST16_PL:
1524	case ASI_PST16_SL:
1525		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1526		if (ftt != ftt_none)
1527			return (ftt);
1528		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1529		for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) {
1530			if ((msk & j) == j) {
1531				res.c[i] = k.c[h];
1532				res.c[i+1] = k.c[h-1];
1533			} else {
1534				res.c[i] = l.c[i];
1535				res.c[i+1] = l.c[i+1];
1536			}
1537		}
1538		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1539		if (ftt != ftt_none)
1540			return (ftt);
1541		break;
1542	case ASI_PST32_P:
1543	case ASI_PST32_S:
1544		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1545		if (ftt != ftt_none)
1546			return (ftt);
1547		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1548		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1549			if ((msk & j) == j)
1550				res.i[i] = k.i[i];
1551			else
1552				res.i[i] = l.i[i];
1553		}
1554		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1555		if (ftt != ftt_none)
1556			return (ftt);
1557		break;
1558	case ASI_PST32_PL:
1559	case ASI_PST32_SL:
1560		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1561		if (ftt != ftt_none)
1562			return (ftt);
1563		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1564		for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) {
1565			if ((msk & j) == j) {
1566				res.c[i] = k.c[h];
1567				res.c[i+1] = k.c[h-1];
1568				res.c[i+2] = k.c[h-2];
1569				res.c[i+3] = k.c[h-3];
1570			} else {
1571				res.c[i] = l.c[i];
1572				res.c[i+1] = l.c[i+1];
1573				res.c[i+2] = l.c[i+2];
1574				res.c[i+3] = l.c[i+3];
1575			}
1576		}
1577		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1578		if (ftt != ftt_none)
1579			return (ftt);
1580		break;
1581	}
1582
1583	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1584	pregs->r_npc += 4;
1585	return (ftt_none);
1586}
1587
1588/*
1589 * Simulator for short load/stores between floating-point unit and memory.
1590 */
1591static enum ftt_type
1592vis_short_fls(
1593	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1594	vis_inst_type	inst,	/* ISE instruction to simulate. */
1595	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1596	void		*prw,	/* Pointer to locals and ins. */
1597	uint_t		asi)	/* asi to emulate! */
1598{
1599	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1600	uint_t	opf;
1601	uint64_t ea, tea;
1602	union {
1603		freg_type	f;
1604		uint64_t	ll;
1605		uint32_t	i[2];
1606		uint16_t	s[4];
1607		uint8_t		c[8];
1608	} k;
1609	union {
1610		vis_inst_type	inst;
1611		int		i;
1612	} fp;
1613	enum ftt_type   ftt = ftt_none;
1614	ushort_t us;
1615	uchar_t uc;
1616
1617	nrs1 = inst.rs1;
1618	nrs2 = inst.rs2;
1619	nrd = inst.rd;
1620	if ((nrd & 1) == 1) 		/* fix register encoding */
1621		nrd = (nrd & 0x1e) | 0x20;
1622	opf = inst.opf;
1623	fp.inst = inst;
1624	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1625		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1626		if (ftt != ftt_none)
1627			return (ftt);
1628		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1629		if (ftt != ftt_none)
1630			return (ftt);
1631		ea += tea;
1632	} else {	/* effective address = rs1 + imm13 */
1633		fp.inst = inst;
1634		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1635		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1636		if (ftt != ftt_none)
1637			return (ftt);
1638		ea += tea;
1639	}
1640	if (get_udatamodel() == DATAMODEL_ILP32)
1641		ea = (uint64_t)(caddr32_t)ea;
1642
1643	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1644	switch (asi) {
1645	case ASI_FL8_P:
1646	case ASI_FL8_S:
1647	case ASI_FL8_PL:		/* little-endian */
1648	case ASI_FL8_SL:
1649		if ((inst.op3 & 7) == 3) {	/* load byte */
1650			if (fuword8((void *)ea, &uc) == -1)
1651				return (ftt_fault);
1652			k.ll = 0;
1653			k.c[7] = uc;
1654			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1655		} else {			/* store byte */
1656			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1657			uc = k.c[7];
1658			if (subyte((caddr_t)ea, uc) == -1)
1659				return (ftt_fault);
1660		}
1661		break;
1662	case ASI_FL16_P:
1663	case ASI_FL16_S:
1664		if ((ea & 1) == 1)
1665			return (ftt_alignment);
1666		if ((inst.op3 & 7) == 3) {	/* load short */
1667			if (fuword16((void *)ea, &us) == -1)
1668				return (ftt_fault);
1669			k.ll = 0;
1670			k.s[3] = us;
1671			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1672		} else {			/* store short */
1673			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1674			us = k.s[3];
1675			if (suword16((caddr_t)ea, us) == -1)
1676				return (ftt_fault);
1677		}
1678		break;
1679	case ASI_FL16_PL:		/* little-endian */
1680	case ASI_FL16_SL:
1681		if ((ea & 1) == 1)
1682			return (ftt_alignment);
1683		if ((inst.op3 & 7) == 3) {	/* load short */
1684			if (fuword16((void *)ea, &us) == -1)
1685				return (ftt_fault);
1686			k.ll = 0;
1687			k.c[6] = (uchar_t)us;
1688			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1689			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1690		} else {			/* store short */
1691			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1692			uc = k.c[7];
1693			us = (ushort_t)((uc << 8) | k.c[6]);
1694			if (suword16((void *)ea, us) == -1)
1695				return (ftt_fault);
1696		}
1697		break;
1698	}
1699
1700	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1701	pregs->r_npc += 4;
1702	return (ftt_none);
1703}
1704
1705/*
1706 * Simulator for block loads and stores between floating-point unit and memory.
1707 * We pass the addrees of ea to sync_data_memory() to flush the Ecache.
1708 * Sync_data_memory() calls platform dependent code to flush the Ecache.
1709 */
1710static enum ftt_type
1711vis_blk_fldst(
1712	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1713	vis_inst_type	inst,	/* ISE instruction to simulate. */
1714	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1715	void		*prw,	/* Pointer to locals and ins. */
1716	uint_t		asi)	/* asi to emulate! */
1717{
1718	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1719	uint_t	opf, h, i, j;
1720	uint64_t ea, tea;
1721	union {
1722		freg_type	f;
1723		uint64_t	ll;
1724		uint8_t		c[8];
1725	} k, l;
1726	union {
1727		vis_inst_type	inst;
1728		int32_t		i;
1729	} fp;
1730	enum ftt_type   ftt;
1731	boolean_t little_endian = B_FALSE;
1732
1733	nrs1 = inst.rs1;
1734	nrs2 = inst.rs2;
1735	nrd = inst.rd;
1736	if ((nrd & 1) == 1) 		/* fix register encoding */
1737		nrd = (nrd & 0x1e) | 0x20;
1738
1739	/* ensure register is 8-double precision aligned */
1740	if ((nrd & 0xf) != 0)
1741		return (ftt_unimplemented);
1742
1743	opf = inst.opf;
1744	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1745		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1746		if (ftt != ftt_none)
1747			return (ftt);
1748		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1749		if (ftt != ftt_none)
1750			return (ftt);
1751		ea += tea;
1752	} else {			/* effective address = rs1 + imm13 */
1753		fp.inst = inst;
1754		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1755		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1756		if (ftt != ftt_none)
1757			return (ftt);
1758		ea += tea;
1759	}
1760	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1761		return (ftt_alignment);
1762
1763	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1764	switch (asi) {
1765	case ASI_BLK_AIUPL:
1766	case ASI_BLK_AIUSL:
1767	case ASI_BLK_PL:
1768	case ASI_BLK_SL:
1769		little_endian = B_TRUE;
1770		/* FALLTHROUGH */
1771	case ASI_BLK_AIUP:
1772	case ASI_BLK_AIUS:
1773	case ASI_BLK_P:
1774	case ASI_BLK_S:
1775	case ASI_BLK_COMMIT_P:
1776	case ASI_BLK_COMMIT_S:
1777		if ((inst.op3 & 7) == 3) {	/* lddf */
1778			for (i = 0; i < 8; i++, nrd += 2) {
1779				ftt = _fp_read_extword((uint64_t *)ea, &k.ll,
1780				    pfpsd);
1781				if (ftt != ftt_none)
1782					return (ftt);
1783				if (little_endian) {
1784					for (j = 0, h = 7; j < 8; j++, h--)
1785						l.c[h] = k.c[j];
1786					k.ll = l.ll;
1787				}
1788				_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1789				    nrd);
1790				ea += 8;
1791			}
1792		} else {			/* stdf */
1793			for (i = 0; i < 8; i++, nrd += 2) {
1794				_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD,
1795				    nrd);
1796				if (little_endian) {
1797					for (j = 0, h = 7; j < 8; j++, h--)
1798						l.c[h] = k.c[j];
1799					k.ll = l.ll;
1800				}
1801				ftt = _fp_write_extword((uint64_t *)ea, k.ll,
1802				    pfpsd);
1803				if (ftt != ftt_none)
1804					return (ftt);
1805				ea += 8;
1806			}
1807		}
1808		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1809			sync_data_memory((caddr_t)(ea - 64), 64);
1810		break;
1811	default:
1812		/* addr of unimp inst */
1813		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1814		return (ftt_unimplemented);
1815	}
1816
1817	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1818	pregs->r_npc += 4;
1819	return (ftt_none);
1820}
1821
1822/*
1823 * Simulator for rd %gsr instruction.
1824 */
1825enum ftt_type
1826vis_rdgsr(
1827	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1828	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1829	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1830	void		*prw,	/* Pointer to locals and ins. */
1831	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1832{
1833	uint_t nrd;
1834	uint64_t r;
1835	enum ftt_type ftt = ftt_none;
1836
1837	nrd = pinst.rd;
1838
1839	r = pfpsd->fp_current_read_gsr(fp);
1840	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
1841	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1842	pregs->r_npc += 4;
1843	return (ftt);
1844}
1845
1846/*
1847 * Simulator for wr %gsr instruction.
1848 */
1849enum ftt_type
1850vis_wrgsr(
1851	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1852	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1853	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1854	void		*prw,	/* Pointer to locals and ins. */
1855	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1856{
1857	uint_t nrs1;
1858	uint64_t r, r1, r2;
1859	enum ftt_type ftt = ftt_none;
1860
1861	nrs1 = pinst.rs1;
1862	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
1863	if (ftt != ftt_none)
1864		return (ftt);
1865	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
1866		uint_t nrs2;
1867
1868		nrs2 = pinst.rs2;
1869		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
1870		if (ftt != ftt_none)
1871			return (ftt);
1872	} else {	/* use sign_ext(simm13) */
1873		union {
1874			fp_inst_type	inst;
1875			uint32_t	i;
1876		} fp;
1877
1878		fp.inst = pinst;		/* Extract simm13 field */
1879		r2 = (fp.i << 19) >> 19;
1880	}
1881	r = r1 ^ r2;
1882	pfpsd->fp_current_write_gsr(r, fp);
1883	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1884	pregs->r_npc += 4;
1885	return (ftt);
1886}
1887
1888/*
1889 * This is the loadable module wrapper.
1890 */
1891#include <sys/errno.h>
1892#include <sys/modctl.h>
1893
1894/*
1895 * Module linkage information for the kernel.
1896 */
1897extern struct mod_ops mod_miscops;
1898
1899static struct modlmisc modlmisc = {
1900	&mod_miscops,
1901	"vis fp simulation",
1902};
1903
1904static struct modlinkage modlinkage = {
1905	MODREV_1, (void *)&modlmisc, NULL
1906};
1907
1908int
1909_init(void)
1910{
1911	return (mod_install(&modlinkage));
1912}
1913
1914int
1915_info(struct modinfo *modinfop)
1916{
1917	return (mod_info(&modlinkage, modinfop));
1918}
1919