xref: /illumos-gate/usr/src/uts/sun4/os/visinstr.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /* VIS floating point instruction simulator for Sparc FPU simulator. */
30 
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/fpu/fpusystm.h>
34 #include <sys/fpu/fpu_simulator.h>
35 #include <sys/vis_simulator.h>
36 #include <sys/fpu/globals.h>
37 #include <sys/privregs.h>
38 #include <sys/sun4asi.h>
39 #include <sys/machasi.h>
40 #include <sys/debug.h>
41 #include <sys/cpu_module.h>
42 #include <sys/systm.h>
43 #include <sys/machsystm.h>
44 
45 #define	FPU_REG_FIELD uint32_reg	/* Coordinate with FPU_REGS_TYPE. */
46 #define	FPU_DREG_FIELD uint64_reg	/* Coordinate with FPU_DREGS_TYPE. */
47 #define	FPU_FSR_FIELD uint64_reg	/* Coordinate with V9_FPU_FSR_TYPE. */
48 
49 static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *,
50 				void *);
51 static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type,
52 				struct regs *, void *, kfpu_t *);
53 static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *,
54 				void *);
55 static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type,
56 				kfpu_t *);
57 static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *,
58 				void *, kfpu_t *);
59 static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type,
60 				kfpu_t *);
61 static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *);
62 static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *,
63 				void *);
64 static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type);
65 static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *);
66 static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type);
67 static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type);
68 static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *,
69 				void *, uint_t);
70 static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type,
71 				struct regs *, void *, uint_t);
72 static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type,
73 				struct regs *, void *, uint_t);
74 
75 /*
76  * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled
77  * traps.
78  */
79 enum ftt_type
80 vis_fpu_simulator(
81 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
82 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
83 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
84 	void		*prw,	/* Pointer to locals and ins. */
85 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
86 {
87 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
88 	uint_t	us1, us2, usr;
89 	uint64_t lus1, lus2, lusr;
90 	enum ftt_type ftt = ftt_none;
91 	union {
92 		vis_inst_type	inst;
93 		fp_inst_type	pinst;
94 	} f;
95 
96 	ASSERT(USERMODE(pregs->r_tstate));
97 	nrs1 = pinst.rs1;
98 	nrs2 = pinst.rs2;
99 	nrd = pinst.rd;
100 	f.pinst = pinst;
101 	if ((f.inst.opf & 1) == 0) {		/* double precision */
102 		if ((nrs1 & 1) == 1) 		/* fix register encoding */
103 			nrs1 = (nrs1 & 0x1e) | 0x20;
104 		if ((nrs2 & 1) == 1)
105 			nrs2 = (nrs2 & 0x1e) | 0x20;
106 		if ((nrd & 1) == 1)
107 			nrd = (nrd & 0x1e) | 0x20;
108 	}
109 
110 	switch (f.inst.opf) {
111 		/* these instr's do not use fp regs */
112 	case edge8:
113 	case edge8l:
114 	case edge8n:
115 	case edge8ln:
116 	case edge16:
117 	case edge16l:
118 	case edge16n:
119 	case edge16ln:
120 	case edge32:
121 	case edge32l:
122 	case edge32n:
123 	case edge32ln:
124 		ftt = vis_edge(pfpsd, f.inst, pregs, prw);
125 		break;
126 	case array8:
127 	case array16:
128 	case array32:
129 		ftt = vis_array(pfpsd, f.inst, pregs, prw);
130 		break;
131 	case alignaddr:
132 	case alignaddrl:
133 		ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp);
134 		break;
135 	case bmask:
136 		ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp);
137 		break;
138 	case fcmple16:
139 	case fcmpne16:
140 	case fcmpgt16:
141 	case fcmpeq16:
142 	case fcmple32:
143 	case fcmpne32:
144 	case fcmpgt32:
145 	case fcmpeq32:
146 		ftt = vis_fcmp(pfpsd, f.inst, pregs, prw);
147 		break;
148 	case fmul8x16:
149 	case fmul8x16au:
150 	case fmul8x16al:
151 	case fmul8sux16:
152 	case fmul8ulx16:
153 	case fmuld8sux16:
154 	case fmuld8ulx16:
155 		ftt = vis_fmul(pfpsd, f.inst);
156 		break;
157 	case fpack16:
158 	case fpack32:
159 	case fpackfix:
160 	case fexpand:
161 	case fpmerge:
162 		ftt = vis_fpixel(pfpsd, f.inst, fp);
163 		break;
164 	case pdist:
165 		ftt = vis_pdist(pfpsd, pinst);
166 		break;
167 	case faligndata:
168 		ftt = vis_faligndata(pfpsd, pinst, fp);
169 		break;
170 	case bshuffle:
171 		ftt = vis_bshuffle(pfpsd, pinst, fp);
172 		break;
173 	case fpadd16:
174 	case fpadd16s:
175 	case fpadd32:
176 	case fpadd32s:
177 	case fpsub16:
178 	case fpsub16s:
179 	case fpsub32:
180 	case fpsub32s:
181 		ftt = vis_fpaddsub(pfpsd, f.inst);
182 		break;
183 	case fzero:
184 		lusr = 0;
185 		_fp_pack_extword(pfpsd, &lusr, nrd);
186 		break;
187 	case fzeros:
188 		usr = 0;
189 		_fp_pack_word(pfpsd, &usr, nrd);
190 		break;
191 	case fnor:
192 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
193 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
194 		lusr = ~(lus1 | lus2);
195 		_fp_pack_extword(pfpsd, &lusr, nrd);
196 		break;
197 	case fnors:
198 		_fp_unpack_word(pfpsd, &us1, nrs1);
199 		_fp_unpack_word(pfpsd, &us2, nrs2);
200 		usr = ~(us1 | us2);
201 		_fp_pack_word(pfpsd, &usr, nrd);
202 		break;
203 	case fandnot2:
204 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
205 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
206 		lusr = (lus1 & ~lus2);
207 		_fp_pack_extword(pfpsd, &lusr, nrd);
208 		break;
209 	case fandnot2s:
210 		_fp_unpack_word(pfpsd, &us1, nrs1);
211 		_fp_unpack_word(pfpsd, &us2, nrs2);
212 		usr = (us1 & ~us2);
213 		_fp_pack_word(pfpsd, &usr, nrd);
214 		break;
215 	case fnot2:
216 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
217 		lusr = ~lus2;
218 		_fp_pack_extword(pfpsd, &lusr, nrd);
219 		break;
220 	case fnot2s:
221 		_fp_unpack_word(pfpsd, &us2, nrs2);
222 		usr = ~us2;
223 		_fp_pack_word(pfpsd, &usr, nrd);
224 		break;
225 	case fandnot1:
226 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
227 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
228 		lusr = (~lus1 & lus2);
229 		_fp_pack_extword(pfpsd, &lusr, nrd);
230 		break;
231 	case fandnot1s:
232 		_fp_unpack_word(pfpsd, &us1, nrs1);
233 		_fp_unpack_word(pfpsd, &us2, nrs2);
234 		usr = (~us1 & us2);
235 		_fp_pack_word(pfpsd, &usr, nrd);
236 		break;
237 	case fnot1:
238 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
239 		lusr = ~lus1;
240 		_fp_pack_extword(pfpsd, &lusr, nrd);
241 		break;
242 	case fnot1s:
243 		_fp_unpack_word(pfpsd, &us1, nrs1);
244 		usr = ~us1;
245 		_fp_pack_word(pfpsd, &usr, nrd);
246 		break;
247 	case fxor:
248 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
249 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
250 		lusr = (lus1 ^ lus2);
251 		_fp_pack_extword(pfpsd, &lusr, nrd);
252 		break;
253 	case fxors:
254 		_fp_unpack_word(pfpsd, &us1, nrs1);
255 		_fp_unpack_word(pfpsd, &us2, nrs2);
256 		usr = (us1 ^ us2);
257 		_fp_pack_word(pfpsd, &usr, nrd);
258 		break;
259 	case fnand:
260 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
261 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
262 		lusr = ~(lus1 & lus2);
263 		_fp_pack_extword(pfpsd, &lusr, nrd);
264 		break;
265 	case fnands:
266 		_fp_unpack_word(pfpsd, &us1, nrs1);
267 		_fp_unpack_word(pfpsd, &us2, nrs2);
268 		usr = ~(us1 & us2);
269 		_fp_pack_word(pfpsd, &usr, nrd);
270 		break;
271 	case fand:
272 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
273 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
274 		lusr = (lus1 & lus2);
275 		_fp_pack_extword(pfpsd, &lusr, nrd);
276 		break;
277 	case fands:
278 		_fp_unpack_word(pfpsd, &us1, nrs1);
279 		_fp_unpack_word(pfpsd, &us2, nrs2);
280 		usr = (us1 & us2);
281 		_fp_pack_word(pfpsd, &usr, nrd);
282 		break;
283 	case fxnor:
284 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
285 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
286 		lusr = ~(lus1 ^ lus2);
287 		_fp_pack_extword(pfpsd, &lusr, nrd);
288 		break;
289 	case fxnors:
290 		_fp_unpack_word(pfpsd, &us1, nrs1);
291 		_fp_unpack_word(pfpsd, &us2, nrs2);
292 		usr = ~(us1 ^ us2);
293 		_fp_pack_word(pfpsd, &usr, nrd);
294 		break;
295 	case fsrc1:
296 		_fp_unpack_extword(pfpsd, &lusr, nrs1);
297 		_fp_pack_extword(pfpsd, &lusr, nrd);
298 		break;
299 	case fsrc1s:
300 		_fp_unpack_word(pfpsd, &usr, nrs1);
301 		_fp_pack_word(pfpsd, &usr, nrd);
302 		break;
303 	case fornot2:
304 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
305 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
306 		lusr = (lus1 | ~lus2);
307 		_fp_pack_extword(pfpsd, &lusr, nrd);
308 		break;
309 	case fornot2s:
310 		_fp_unpack_word(pfpsd, &us1, nrs1);
311 		_fp_unpack_word(pfpsd, &us2, nrs2);
312 		usr = (us1 | ~us2);
313 		_fp_pack_word(pfpsd, &usr, nrd);
314 		break;
315 	case fsrc2:
316 		_fp_unpack_extword(pfpsd, &lusr, nrs2);
317 		_fp_pack_extword(pfpsd, &lusr, nrd);
318 		break;
319 	case fsrc2s:
320 		_fp_unpack_word(pfpsd, &usr, nrs2);
321 		_fp_pack_word(pfpsd, &usr, nrd);
322 		break;
323 	case fornot1:
324 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
325 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
326 		lusr = (~lus1 | lus2);
327 		_fp_pack_extword(pfpsd, &lusr, nrd);
328 		break;
329 	case fornot1s:
330 		_fp_unpack_word(pfpsd, &us1, nrs1);
331 		_fp_unpack_word(pfpsd, &us2, nrs2);
332 		usr = (~us1 | us2);
333 		_fp_pack_word(pfpsd, &usr, nrd);
334 		break;
335 	case for_op:
336 		_fp_unpack_extword(pfpsd, &lus1, nrs1);
337 		_fp_unpack_extword(pfpsd, &lus2, nrs2);
338 		lusr = (lus1 | lus2);
339 		_fp_pack_extword(pfpsd, &lusr, nrd);
340 		break;
341 	case fors_op:
342 		_fp_unpack_word(pfpsd, &us1, nrs1);
343 		_fp_unpack_word(pfpsd, &us2, nrs2);
344 		usr = (us1 | us2);
345 		_fp_pack_word(pfpsd, &usr, nrd);
346 		break;
347 	case fone:
348 		lusr = 0xffffffffffffffff;
349 		_fp_pack_extword(pfpsd, &lusr, nrd);
350 		break;
351 	case fones:
352 		usr = 0xffffffffUL;
353 		_fp_pack_word(pfpsd, &usr, nrd);
354 		break;
355 	case siam:
356 		ftt = vis_siam(pfpsd, f.inst, fp);
357 		break;
358 	default:
359 		return (ftt_unimplemented);
360 	}
361 
362 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
363 	pregs->r_npc += 4;
364 	return (ftt);
365 }
366 
367 /*
368  * Simulator for edge instructions
369  */
370 static enum ftt_type
371 vis_edge(
372 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
373 	vis_inst_type	inst,	/* FPU instruction to simulate. */
374 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
375 	void		*prw)	/* Pointer to locals and ins. */
376 
377 {
378 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
379 	enum ftt_type ftt;
380 	uint64_t addrl, addrr, mask;
381 	uint64_t ah61l, ah61r;		/* Higher 61 bits of address */
382 	int al3l, al3r;			/* Lower 3 bits of address */
383 	int am32;			/* Whether PSTATE.AM == 1 */
384 	uint_t ccr;
385 
386 	nrs1 = inst.rs1;
387 	nrs2 = inst.rs2;
388 	nrd = inst.rd;
389 
390 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl);
391 	if (ftt != ftt_none)
392 		return (ftt);
393 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr);
394 	if (ftt != ftt_none)
395 		return (ftt);
396 
397 	/* Get PSTATE.AM to determine 32-bit vs 64-bit addressing */
398 	am32 =  get_pstate() & 0x8;
399 	if (am32 == 1) {
400 		ah61l = addrl & ~0x7 & 0xffffffff;
401 		ah61r = addrr & ~0x7 & 0xffffffff;
402 	} else {
403 		ah61l = addrl & ~0x7;
404 		ah61r = addrr & ~0x7;
405 	}
406 
407 
408 	switch (inst.opf) {
409 	case edge8:
410 	case edge8n:
411 	case edge8l:
412 	case edge8ln:
413 		al3l = addrl & 0x7;
414 		switch (inst.opf) {
415 		case edge8:
416 		case edge8n:
417 			if (inst.opf == edge8) {
418 				VISINFO_KSTAT(vis_edge8);
419 			} else {
420 				VISINFO_KSTAT(vis_edge8n);
421 			}
422 			switch (al3l) {
423 			case 0:
424 				mask = 0xff;
425 				break;
426 			case 1:
427 				mask = 0x7f;
428 				break;
429 			case 2:
430 				mask = 0x3f;
431 				break;
432 			case 3:
433 				mask = 0x1f;
434 				break;
435 			case 4:
436 				mask = 0x0f;
437 				break;
438 			case 5:
439 				mask = 0x07;
440 				break;
441 			case 6:
442 				mask = 0x03;
443 				break;
444 			case 7:
445 				mask = 0x01;
446 				break;
447 			}
448 			if (ah61l == ah61r) {
449 				al3r = addrr & 0x7;
450 				switch (al3r) {
451 
452 				case 0:
453 					mask &= 0x80;
454 					break;
455 				case 1:
456 					mask &= 0xc0;
457 					break;
458 				case 2:
459 					mask &= 0xe0;
460 					break;
461 				case 3:
462 					mask &= 0xf0;
463 					break;
464 				case 4:
465 					mask &= 0xf8;
466 					break;
467 				case 5:
468 					mask &= 0xfc;
469 					break;
470 				case 6:
471 					mask &= 0xfe;
472 					break;
473 				case 7:
474 					mask &= 0xff;
475 					break;
476 				}
477 			}
478 			break;
479 		case edge8l:
480 		case edge8ln:
481 			if (inst.opf == edge8l) {
482 				VISINFO_KSTAT(vis_edge8l);
483 			} else {
484 				VISINFO_KSTAT(vis_edge8ln);
485 			}
486 
487 			switch (al3l) {
488 			case 0:
489 				mask = 0xff;
490 				break;
491 			case 1:
492 				mask = 0xfe;
493 				break;
494 			case 2:
495 				mask = 0xfc;
496 				break;
497 			case 3:
498 				mask = 0xf8;
499 				break;
500 			case 4:
501 				mask = 0xf0;
502 				break;
503 			case 5:
504 				mask = 0xe0;
505 				break;
506 			case 6:
507 				mask = 0xc0;
508 				break;
509 			case 7:
510 				mask = 0x80;
511 				break;
512 			}
513 			if (ah61l == ah61r) {
514 				al3r = addrr & 0x7;
515 				switch (al3r) {
516 				case 0:
517 					mask &= 0x01;
518 					break;
519 				case 1:
520 					mask &= 0x03;
521 					break;
522 				case 2:
523 					mask &= 0x07;
524 					break;
525 				case 3:
526 					mask &= 0x0f;
527 					break;
528 				case 4:
529 					mask &= 0x1f;
530 					break;
531 				case 5:
532 					mask &= 0x3f;
533 					break;
534 				case 6:
535 					mask &= 0x7f;
536 					break;
537 				case 7:
538 					mask &= 0xff;
539 					break;
540 				}
541 			}
542 			break;
543 		}
544 		break;
545 	case edge16:
546 	case edge16l:
547 	case edge16n:
548 	case edge16ln:
549 		al3l = addrl & 0x6;
550 		switch (inst.opf) {
551 		case edge16:
552 		case edge16n:
553 			if (inst.opf == edge16) {
554 				VISINFO_KSTAT(vis_edge16);
555 
556 			} else {
557 				VISINFO_KSTAT(vis_edge16n);
558 			}
559 			switch (al3l) {
560 			case 0:
561 				mask = 0xf;
562 				break;
563 			case 2:
564 				mask = 0x7;
565 				break;
566 			case 4:
567 				mask = 0x3;
568 				break;
569 			case 6:
570 				mask = 0x1;
571 				break;
572 			}
573 			if (ah61l == ah61r) {
574 				al3r = addrr & 0x6;
575 				switch (al3r) {
576 				case 0:
577 					mask &= 0x8;
578 					break;
579 				case 2:
580 					mask &= 0xc;
581 					break;
582 				case 4:
583 					mask &= 0xe;
584 					break;
585 				case 6:
586 					mask &= 0xf;
587 				}
588 			}
589 			break;
590 		case edge16l:
591 		case edge16ln:
592 			if (inst.opf == edge16l) {
593 				VISINFO_KSTAT(vis_edge16l);
594 
595 			} else {
596 				VISINFO_KSTAT(vis_edge16ln);
597 			}
598 			switch (al3l) {
599 			case 0:
600 				mask = 0xf;
601 				break;
602 			case 2:
603 				mask = 0xe;
604 				break;
605 			case 4:
606 				mask = 0xc;
607 				break;
608 			case 6:
609 				mask = 0x8;
610 				break;
611 			}
612 			if (ah61l == ah61r) {
613 				al3r = addrr & 0x6;
614 				switch (al3r) {
615 				case 0:
616 					mask &= 0x1;
617 					break;
618 				case 2:
619 					mask &= 0x3;
620 					break;
621 				case 4:
622 					mask &= 0x7;
623 					break;
624 				case 6:
625 					mask &= 0xf;
626 					break;
627 				}
628 			}
629 			break;
630 		}
631 		break;
632 	case edge32:
633 	case edge32l:
634 	case edge32n:
635 	case edge32ln:
636 		al3l = addrl & 0x4;
637 		switch (inst.opf) {
638 		case edge32:
639 		case edge32n:
640 			if (inst.opf == edge32) {
641 				VISINFO_KSTAT(vis_edge32);
642 
643 			} else {
644 				VISINFO_KSTAT(vis_edge32n);
645 			}
646 			switch (al3l) {
647 			case 0:
648 				mask = 0x3;
649 				break;
650 			case 4:
651 				mask = 0x1;
652 				break;
653 			}
654 			if (ah61l == ah61r) {
655 				al3r = addrr & 0x4;
656 				switch (al3r) {
657 				case 0:
658 					mask &= 0x2;
659 					break;
660 				case 4:
661 					mask &= 0x3;
662 					break;
663 				}
664 			}
665 			break;
666 		case edge32l:
667 		case edge32ln:
668 			if (inst.opf == edge32l) {
669 				VISINFO_KSTAT(vis_edge32l);
670 
671 			} else {
672 				VISINFO_KSTAT(vis_edge32ln);
673 			}
674 			switch (al3l) {
675 			case 0:
676 				mask = 0x3;
677 				break;
678 			case 4:
679 				mask = 0x2;
680 				break;
681 			}
682 			if (ah61l == ah61r) {
683 				al3r = addrr & 0x4;
684 				switch (al3r) {
685 				case 0:
686 					mask &= 0x1;
687 					break;
688 				case 4:
689 					mask &= 0x3;
690 					break;
691 			}
692 			break;
693 		}
694 		break;
695 		}
696 	}
697 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask);
698 
699 	switch (inst.opf) {
700 	case edge8:
701 	case edge8l:
702 	case edge16:
703 	case edge16l:
704 	case edge32:
705 	case edge32l:
706 
707 		/* We need to set the CCR if we have a carry overflow */
708 		/* If this is a 64 bit app, we need to CCR.xcc.v */
709 		/* This is the same as the SUBcc instruction */
710 		ccr = get_ccr();
711 		if (addrl > addrr) {
712 			if (am32 == 1) {
713 				ccr |= 0x2;
714 				set_ccr(ccr);
715 			} else {
716 			    ccr |= 0x20;
717 				set_ccr(ccr);
718 			}
719 		}
720 		break;
721 	}
722 	return (ftt);
723 }
724 /*
725  * Simulator for three dimentional array addressing instructions.
726  */
727 static enum ftt_type
728 vis_array(
729 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
730 	vis_inst_type	inst,	/* FPU instruction to simulate. */
731 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
732 	void		*prw)	/* Pointer to locals and ins. */
733 
734 {
735 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
736 	enum ftt_type ftt;
737 	uint64_t laddr, bsize, baddr;
738 	uint64_t nbit;
739 	int oy, oz;
740 
741 	nrs1 = inst.rs1;
742 	nrs2 = inst.rs2;
743 	nrd = inst.rd;
744 
745 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr);
746 	if (ftt != ftt_none)
747 		return (ftt);
748 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize);
749 	if (ftt != ftt_none)
750 		return (ftt);
751 
752 	if (bsize > 5) {
753 		bsize = 5;
754 	}
755 	nbit = (1 << bsize) - 1;	/* Number of bits for XY<6+n-1:6> */
756 	oy = 17 + bsize;		/* Offset of Y<6+n-1:6> */
757 	oz = 17 + 2 * bsize;		/* Offset of Z<8:5> */
758 
759 	baddr = 0;
760 	baddr |= (laddr >> (11 -  0)) & (0x03 <<  0);	/* X_integer<1:0> */
761 	baddr |= (laddr >> (33 -  2)) & (0x03 <<  2);	/* Y_integer<1:0> */
762 	baddr |= (laddr >> (55 -  4)) & (0x01 <<  4);	/* Z_integer<0>   */
763 	baddr |= (laddr >> (13 -  5)) & (0x0f <<  5);	/* X_integer<5:2> */
764 	baddr |= (laddr >> (35 -  9)) & (0x0f <<  9);	/* Y_integer<5:2> */
765 	baddr |= (laddr >> (56 - 13)) & (0x0f << 13);	/* Z_integer<4:1> */
766 	baddr |= (laddr >> (17 - 17)) & (nbit << 17);	/* X_integer<6+n-1:6> */
767 	baddr |= (laddr >> (39 - oy)) & (nbit << oy);	/* Y_integer<6+n-1:6> */
768 	baddr |= (laddr >> (60 - oz)) & (0x0f << oz);	/* Z_integer<8:5> */
769 
770 	switch (inst.opf) {
771 	case array8:
772 		VISINFO_KSTAT(vis_array8);
773 		break;
774 	case array16:
775 		VISINFO_KSTAT(vis_array16);
776 		baddr <<= 1;
777 		break;
778 	case array32:
779 		VISINFO_KSTAT(vis_array32);
780 		baddr <<= 2;
781 		break;
782 	}
783 
784 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr);
785 
786 	return (ftt);
787 }
788 
789 /*
790  * Simulator for alignaddr and alignaddrl instructions.
791  */
792 static enum ftt_type
793 vis_alignaddr(
794 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
795 	vis_inst_type	inst,	/* FPU instruction to simulate. */
796 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
797 	void		*prw,	/* Pointer to locals and ins. */
798 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
799 {
800 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
801 	enum ftt_type ftt;
802 	uint64_t ea, tea, g, r;
803 	short s;
804 
805 	nrs1 = inst.rs1;
806 	nrs2 = inst.rs2;
807 	nrd = inst.rd;
808 
809 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
810 	if (ftt != ftt_none)
811 		return (ftt);
812 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
813 	if (ftt != ftt_none)
814 		return (ftt);
815 	ea += tea;
816 	r = ea & ~0x7;	/* zero least 3 significant bits */
817 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
818 
819 
820 	g = pfpsd->get_gsr(fp);
821 	g &= ~(GSR_ALIGN_MASK);		/* zero the align offset */
822 	r = ea & 0x7;
823 	if (inst.opf == alignaddrl) {
824 		s = (short)(~r);	/* 2's complement for alignaddrl */
825 		if (s < 0)
826 			r = (uint64_t)((s + 1) & 0x7);
827 		else
828 			r = (uint64_t)(s & 0x7);
829 	}
830 	g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK;
831 	pfpsd->set_gsr(g, fp);
832 
833 	return (ftt);
834 }
835 
836 /*
837  * Simulator for bmask instruction.
838  */
839 static enum ftt_type
840 vis_bmask(
841 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
842 	vis_inst_type	inst,	/* FPU instruction to simulate. */
843 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
844 	void		*prw,	/* Pointer to locals and ins. */
845 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
846 {
847 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
848 	enum ftt_type ftt;
849 	uint64_t ea, tea, g;
850 
851 	VISINFO_KSTAT(vis_bmask);
852 	nrs1 = inst.rs1;
853 	nrs2 = inst.rs2;
854 	nrd = inst.rd;
855 
856 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
857 	if (ftt != ftt_none)
858 		return (ftt);
859 	ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
860 	if (ftt != ftt_none)
861 		return (ftt);
862 	ea += tea;
863 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea);
864 
865 	g = pfpsd->get_gsr(fp);
866 	g &= ~(GSR_MASK_MASK);		/* zero the mask offset */
867 
868 	/* Put the least significant 32 bits of ea in GSR.mask */
869 	g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK;
870 	pfpsd->set_gsr(g, fp);
871 	return (ftt);
872 }
873 
874 /*
875  * Simulator for fp[add|sub]* instruction.
876  */
877 static enum ftt_type
878 vis_fpaddsub(
879 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
880 	vis_inst_type	inst)	/* FPU instruction to simulate. */
881 {
882 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
883 	union {
884 		uint64_t	ll;
885 		uint32_t	i[2];
886 		uint16_t	s[4];
887 	} lrs1, lrs2, lrd;
888 	union {
889 		uint32_t	i;
890 		uint16_t	s[2];
891 	} krs1, krs2, krd;
892 	int i;
893 
894 	nrs1 = inst.rs1;
895 	nrs2 = inst.rs2;
896 	nrd = inst.rd;
897 	if ((inst.opf & 1) == 0) {	/* double precision */
898 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
899 			nrs1 = (nrs1 & 0x1e) | 0x20;
900 		if ((nrs2 & 1) == 1)
901 			nrs2 = (nrs2 & 0x1e) | 0x20;
902 		if ((nrd & 1) == 1)
903 			nrd = (nrd & 0x1e) | 0x20;
904 	}
905 	switch (inst.opf) {
906 	case fpadd16:
907 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
908 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
909 		for (i = 0; i <= 3; i++) {
910 			lrd.s[i] = lrs1.s[i] + lrs2.s[i];
911 		}
912 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
913 		break;
914 	case fpadd16s:
915 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
916 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
917 		for (i = 0; i <= 1; i++) {
918 			krd.s[i] = krs1.s[i] + krs2.s[i];
919 		}
920 		_fp_pack_word(pfpsd, &krd.i, nrd);
921 		break;
922 	case fpadd32:
923 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
924 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
925 		for (i = 0; i <= 1; i++) {
926 			lrd.i[i] = lrs1.i[i] + lrs2.i[i];
927 		}
928 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
929 		break;
930 	case fpadd32s:
931 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
932 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
933 		krd.i = krs1.i + krs2.i;
934 		_fp_pack_word(pfpsd, &krd.i, nrd);
935 		break;
936 	case fpsub16:
937 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
938 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
939 		for (i = 0; i <= 3; i++) {
940 			lrd.s[i] = lrs1.s[i] - lrs2.s[i];
941 		}
942 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
943 		break;
944 	case fpsub16s:
945 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
946 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
947 		for (i = 0; i <= 1; i++) {
948 			krd.s[i] = krs1.s[i] - krs2.s[i];
949 		}
950 		_fp_pack_word(pfpsd, &krd.i, nrd);
951 		break;
952 	case fpsub32:
953 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
954 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
955 		for (i = 0; i <= 1; i++) {
956 			lrd.i[i] = lrs1.i[i] - lrs2.i[i];
957 		}
958 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
959 		break;
960 	case fpsub32s:
961 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
962 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
963 		krd.i = krs1.i - krs2.i;
964 		_fp_pack_word(pfpsd, &krd.i, nrd);
965 		break;
966 	}
967 	return (ftt_none);
968 }
969 
970 /*
971  * Simulator for fcmp* instruction.
972  */
973 static enum ftt_type
974 vis_fcmp(
975 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
976 	vis_inst_type	inst,	/* FPU instruction to simulate. */
977 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
978 	void		*prw)	/* Pointer to locals and ins. */
979 {
980 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
981 	union {
982 		uint64_t	ll;
983 		uint32_t	i[2];
984 		uint16_t	s[4];
985 	} krs1, krs2, krd;
986 	enum ftt_type ftt;
987 	short sr1, sr2;
988 	int i, ir1, ir2;
989 
990 	nrs1 = inst.rs1;
991 	nrs2 = inst.rs2;
992 	nrd = inst.rd;
993 	krd.ll = 0;
994 	if ((nrs1 & 1) == 1) 	/* fix register encoding */
995 		nrs1 = (nrs1 & 0x1e) | 0x20;
996 	if ((nrs2 & 1) == 1)
997 		nrs2 = (nrs2 & 0x1e) | 0x20;
998 
999 	_fp_unpack_extword(pfpsd, &krs1.ll, nrs1);
1000 	_fp_unpack_extword(pfpsd, &krs2.ll, nrs2);
1001 	switch (inst.opf) {
1002 	case fcmple16:
1003 		VISINFO_KSTAT(vis_fcmple16);
1004 		for (i = 0; i <= 3; i++) {
1005 			sr1 = (short)krs1.s[i];
1006 			sr2 = (short)krs2.s[i];
1007 			if (sr1 <= sr2)
1008 				krd.ll += (0x8 >> i);
1009 		}
1010 		break;
1011 	case fcmpne16:
1012 		VISINFO_KSTAT(vis_fcmpne16);
1013 		for (i = 0; i <= 3; i++) {
1014 			sr1 = (short)krs1.s[i];
1015 			sr2 = (short)krs2.s[i];
1016 			if (sr1 != sr2)
1017 				krd.ll += (0x8 >> i);
1018 		}
1019 		break;
1020 	case fcmpgt16:
1021 		VISINFO_KSTAT(vis_fcmpgt16);
1022 		for (i = 0; i <= 3; i++) {
1023 			sr1 = (short)krs1.s[i];
1024 			sr2 = (short)krs2.s[i];
1025 			if (sr1 > sr2)
1026 				krd.ll += (0x8 >> i);
1027 		}
1028 		break;
1029 	case fcmpeq16:
1030 		VISINFO_KSTAT(vis_fcmpeq16);
1031 		for (i = 0; i <= 3; i++) {
1032 			sr1 = (short)krs1.s[i];
1033 			sr2 = (short)krs2.s[i];
1034 			if (sr1 == sr2)
1035 				krd.ll += (0x8 >> i);
1036 		}
1037 		break;
1038 	case fcmple32:
1039 		VISINFO_KSTAT(vis_fcmple32);
1040 		for (i = 0; i <= 1; i++) {
1041 			ir1 = (int)krs1.i[i];
1042 			ir2 = (int)krs2.i[i];
1043 			if (ir1 <= ir2)
1044 				krd.ll += (0x2 >> i);
1045 		}
1046 		break;
1047 	case fcmpne32:
1048 		VISINFO_KSTAT(vis_fcmpne32);
1049 		for (i = 0; i <= 1; i++) {
1050 			ir1 = (int)krs1.i[i];
1051 			ir2 = (int)krs2.i[i];
1052 			if (ir1 != ir2)
1053 				krd.ll += (0x2 >> i);
1054 		}
1055 		break;
1056 	case fcmpgt32:
1057 		VISINFO_KSTAT(vis_fcmpgt32);
1058 		for (i = 0; i <= 1; i++) {
1059 			ir1 = (int)krs1.i[i];
1060 			ir2 = (int)krs2.i[i];
1061 			if (ir1 > ir2)
1062 				krd.ll += (0x2 >> i);
1063 		}
1064 		break;
1065 	case fcmpeq32:
1066 		VISINFO_KSTAT(vis_fcmpeq32);
1067 		for (i = 0; i <= 1; i++) {
1068 			ir1 = (int)krs1.i[i];
1069 			ir2 = (int)krs2.i[i];
1070 			if (ir1 == ir2)
1071 				krd.ll += (0x2 >> i);
1072 		}
1073 		break;
1074 	}
1075 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll);
1076 	return (ftt);
1077 }
1078 
1079 /*
1080  * Simulator for fmul* instruction.
1081  */
1082 static enum ftt_type
1083 vis_fmul(
1084 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1085 	vis_inst_type	inst)	/* FPU instruction to simulate. */
1086 {
1087 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1088 	union {
1089 		uint64_t	ll;
1090 		uint32_t	i[2];
1091 		uint16_t	s[4];
1092 		uint8_t		c[8];
1093 	} lrs1, lrs2, lrd;
1094 	union {
1095 		uint32_t	i;
1096 		uint16_t	s[2];
1097 		uint8_t		c[4];
1098 	} krs1, krs2, kres;
1099 	short s1, s2, sres;
1100 	ushort_t us1;
1101 	char c1;
1102 	int i;
1103 
1104 	nrs1 = inst.rs1;
1105 	nrs2 = inst.rs2;
1106 	nrd = inst.rd;
1107 	if ((inst.opf & 1) == 0) {	/* double precision */
1108 		if ((nrd & 1) == 1) 	/* fix register encoding */
1109 			nrd = (nrd & 0x1e) | 0x20;
1110 	}
1111 
1112 	switch (inst.opf) {
1113 	case fmul8x16:
1114 		VISINFO_KSTAT(vis_fmul8x16);
1115 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1116 		if ((nrs2 & 1) == 1)
1117 			nrs2 = (nrs2 & 0x1e) | 0x20;
1118 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1119 		for (i = 0; i <= 3; i++) {
1120 			us1 = (ushort_t)krs1.c[i];
1121 			s2 = (short)lrs2.s[i];
1122 			kres.i = us1 * s2;
1123 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
1124 			if (kres.c[3] >= 0x80)
1125 				sres++;
1126 			lrd.s[i] = sres;
1127 		}
1128 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1129 		break;
1130 	case fmul8x16au:
1131 		VISINFO_KSTAT(vis_fmul8x16au);
1132 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1133 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1134 		for (i = 0; i <= 3; i++) {
1135 			us1 = (ushort_t)krs1.c[i];
1136 			s2 = (short)krs2.s[0];
1137 			kres.i = us1 * s2;
1138 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
1139 			if (kres.c[3] >= 0x80)
1140 				sres++;
1141 			lrd.s[i] = sres;
1142 		}
1143 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1144 		break;
1145 	case fmul8x16al:
1146 		VISINFO_KSTAT(vis_fmul8x16al);
1147 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1148 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1149 		for (i = 0; i <= 3; i++) {
1150 			us1 = (ushort_t)krs1.c[i];
1151 			s2 = (short)krs2.s[1];
1152 			kres.i = us1 * s2;
1153 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
1154 			if (kres.c[3] >= 0x80)
1155 				sres++;
1156 			lrd.s[i] = sres;
1157 		}
1158 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1159 		break;
1160 	case fmul8sux16:
1161 		VISINFO_KSTAT(vis_fmul8sux16);
1162 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1163 			nrs1 = (nrs1 & 0x1e) | 0x20;
1164 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1165 		if ((nrs2 & 1) == 1)
1166 			nrs2 = (nrs2 & 0x1e) | 0x20;
1167 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1168 		for (i = 0; i <= 3; i++) {
1169 			c1 = lrs1.c[(i*2)];
1170 			s1 = (short)c1;		/* keeps the sign alive */
1171 			s2 = (short)lrs2.s[i];
1172 			kres.i = s1 * s2;
1173 			sres = (short)((kres.c[1] << 8) | kres.c[2]);
1174 			if (kres.c[3] >= 0x80)
1175 				sres++;
1176 			if (sres < 0)
1177 				lrd.s[i] = (sres & 0xFFFF);
1178 			else
1179 				lrd.s[i] = sres;
1180 		}
1181 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1182 		break;
1183 	case fmul8ulx16:
1184 		VISINFO_KSTAT(vis_fmul8ulx16);
1185 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1186 			nrs1 = (nrs1 & 0x1e) | 0x20;
1187 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1188 		if ((nrs2 & 1) == 1)
1189 			nrs2 = (nrs2 & 0x1e) | 0x20;
1190 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1191 		for (i = 0; i <= 3; i++) {
1192 			us1 = (ushort_t)lrs1.c[(i*2)+1];
1193 			s2 = (short)lrs2.s[i];
1194 			kres.i = us1 * s2;
1195 			sres = (short)kres.s[0];
1196 			if (kres.s[1] >= 0x8000)
1197 				sres++;
1198 			lrd.s[i] = sres;
1199 		}
1200 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1201 		break;
1202 	case fmuld8sux16:
1203 		VISINFO_KSTAT(vis_fmuld8sux16);
1204 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1205 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1206 		for (i = 0; i <= 1; i++) {
1207 			c1 = krs1.c[(i*2)];
1208 			s1 = (short)c1;		/* keeps the sign alive */
1209 			s2 = (short)krs2.s[i];
1210 			kres.i = s1 * s2;
1211 			lrd.i[i] = kres.i << 8;
1212 		}
1213 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1214 		break;
1215 	case fmuld8ulx16:
1216 		VISINFO_KSTAT(vis_fmuld8ulx16);
1217 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1218 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1219 		for (i = 0; i <= 1; i++) {
1220 			us1 = (ushort_t)krs1.c[(i*2)+1];
1221 			s2 = (short)krs2.s[i];
1222 			lrd.i[i] = us1 * s2;
1223 		}
1224 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1225 		break;
1226 	}
1227 	return (ftt_none);
1228 }
1229 
1230 /*
1231  * Simulator for fpixel formatting instructions.
1232  */
1233 static enum ftt_type
1234 vis_fpixel(
1235 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1236 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1237 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1238 {
1239 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1240 	int	i, j, k, sf;
1241 	union {
1242 		uint64_t	ll;
1243 		uint32_t	i[2];
1244 		uint16_t	s[4];
1245 		uint8_t		c[8];
1246 	} lrs1, lrs2, lrd;
1247 	union {
1248 		uint32_t	i;
1249 		uint16_t	s[2];
1250 		uint8_t		c[4];
1251 	} krs1, krs2, krd;
1252 	uint64_t r;
1253 	int64_t l, m;
1254 	short s;
1255 	uchar_t uc;
1256 
1257 	nrs1 = inst.rs1;
1258 	nrs2 = inst.rs2;
1259 	nrd = inst.rd;
1260 	if ((inst.opf != fpack16) && (inst.opf != fpackfix)) {
1261 		if ((nrd & 1) == 1) 	/* fix register encoding */
1262 			nrd = (nrd & 0x1e) | 0x20;
1263 	}
1264 
1265 	switch (inst.opf) {
1266 	case fpack16:
1267 		VISINFO_KSTAT(vis_fpack16);
1268 		if ((nrs2 & 1) == 1) 	/* fix register encoding */
1269 			nrs2 = (nrs2 & 0x1e) | 0x20;
1270 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1271 		r = pfpsd->get_gsr(fp);
1272 		/* fpack16 ignores GSR.scale msb */
1273 		sf = (int)(GSR_SCALE(r) & 0xf);
1274 		for (i = 0; i <= 3; i++) {
1275 			s = (short)lrs2.s[i];	/* preserve the sign */
1276 			j = ((int)s << sf);
1277 			k = j >> 7;
1278 			if (k < 0) {
1279 				uc = 0;
1280 			} else if (k > 255) {
1281 				uc = 255;
1282 			} else {
1283 				uc = (uchar_t)k;
1284 			}
1285 			krd.c[i] = uc;
1286 		}
1287 		_fp_pack_word(pfpsd, &krd.i, nrd);
1288 		break;
1289 	case fpack32:
1290 		VISINFO_KSTAT(vis_fpack32);
1291 		if ((nrs1 & 1) == 1) 	/* fix register encoding */
1292 			nrs1 = (nrs1 & 0x1e) | 0x20;
1293 		_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1294 		if ((nrs2 & 1) == 1)
1295 			nrs2 = (nrs2 & 0x1e) | 0x20;
1296 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1297 
1298 		r = pfpsd->get_gsr(fp);
1299 		sf = (int)GSR_SCALE(r);
1300 		lrd.ll = lrs1.ll << 8;
1301 		for (i = 0, k = 3; i <= 1; i++, k += 4) {
1302 			j = (int)lrs2.i[i];	/* preserve the sign */
1303 			l = ((int64_t)j << sf);
1304 			m = l >> 23;
1305 			if (m < 0) {
1306 				uc = 0;
1307 			} else if (m > 255) {
1308 				uc = 255;
1309 			} else {
1310 				uc = (uchar_t)m;
1311 			}
1312 			lrd.c[k] = uc;
1313 		}
1314 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1315 		break;
1316 	case fpackfix:
1317 		VISINFO_KSTAT(vis_fpackfix);
1318 		if ((nrs2 & 1) == 1)
1319 			nrs2 = (nrs2 & 0x1e) | 0x20;
1320 		_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1321 
1322 		r = pfpsd->get_gsr(fp);
1323 		sf = (int)GSR_SCALE(r);
1324 		for (i = 0; i <= 1; i++) {
1325 			j = (int)lrs2.i[i];	/* preserve the sign */
1326 			l = ((int64_t)j << sf);
1327 			m = l >> 16;
1328 			if (m < -32768) {
1329 				s = -32768;
1330 			} else if (m > 32767) {
1331 				s = 32767;
1332 			} else {
1333 				s = (short)m;
1334 			}
1335 			krd.s[i] = s;
1336 		}
1337 		_fp_pack_word(pfpsd, &krd.i, nrd);
1338 		break;
1339 	case fexpand:
1340 		VISINFO_KSTAT(vis_fexpand);
1341 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1342 		for (i = 0; i <= 3; i++) {
1343 			uc = krs2.c[i];
1344 			lrd.s[i] = (ushort_t)(uc << 4);
1345 		}
1346 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1347 		break;
1348 	case fpmerge:
1349 		VISINFO_KSTAT(vis_fpmerge);
1350 		_fp_unpack_word(pfpsd, &krs1.i, nrs1);
1351 		_fp_unpack_word(pfpsd, &krs2.i, nrs2);
1352 		for (i = 0, j = 0; i <= 3; i++, j += 2) {
1353 			lrd.c[j] = krs1.c[i];
1354 			lrd.c[j+1] = krs2.c[i];
1355 		}
1356 		_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1357 		break;
1358 	}
1359 	return (ftt_none);
1360 }
1361 
1362 /*
1363  * Simulator for pdist instruction.
1364  */
1365 enum ftt_type
1366 vis_pdist(
1367 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1368 	fp_inst_type	pinst)	/* FPU instruction to simulate. */
1369 {
1370 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1371 	int	i;
1372 	short	s;
1373 	union {
1374 		uint64_t	ll;
1375 		uint8_t		c[8];
1376 	} lrs1, lrs2, lrd;
1377 
1378 	nrs1 = pinst.rs1;
1379 	nrs2 = pinst.rs2;
1380 	nrd = pinst.rd;
1381 	VISINFO_KSTAT(vis_pdist);
1382 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1383 		nrs1 = (nrs1 & 0x1e) | 0x20;
1384 	if ((nrs2 & 1) == 1)
1385 		nrs2 = (nrs2 & 0x1e) | 0x20;
1386 	if ((nrd & 1) == 1)
1387 		nrd = (nrd & 0x1e) | 0x20;
1388 
1389 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1390 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1391 	_fp_unpack_extword(pfpsd, &lrd.ll, nrd);
1392 
1393 	for (i = 0; i <= 7; i++) {
1394 		s = (short)(lrs1.c[i] - lrs2.c[i]);
1395 		if (s < 0)
1396 			s = ~s + 1;
1397 		lrd.ll += s;
1398 	}
1399 
1400 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1401 	return (ftt_none);
1402 }
1403 
1404 /*
1405  * Simulator for faligndata instruction.
1406  */
1407 static enum ftt_type
1408 vis_faligndata(
1409 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1410 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1411 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1412 {
1413 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1414 	int	i, j, k, ao;
1415 	union {
1416 		uint64_t	ll;
1417 		uint8_t		c[8];
1418 	} lrs1, lrs2, lrd;
1419 	uint64_t r;
1420 
1421 	nrs1 = pinst.rs1;
1422 	nrs2 = pinst.rs2;
1423 	nrd = pinst.rd;
1424 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1425 		nrs1 = (nrs1 & 0x1e) | 0x20;
1426 	if ((nrs2 & 1) == 1)
1427 		nrs2 = (nrs2 & 0x1e) | 0x20;
1428 	if ((nrd & 1) == 1)
1429 		nrd = (nrd & 0x1e) | 0x20;
1430 
1431 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1432 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1433 
1434 	r = pfpsd->get_gsr(fp);
1435 	ao = (int)GSR_ALIGN(r);
1436 
1437 	for (i = 0, j = ao, k = 0; i <= 7; i++)
1438 		if (j <= 7) {
1439 			lrd.c[i] = lrs1.c[j++];
1440 		} else {
1441 			lrd.c[i] = lrs2.c[k++];
1442 		}
1443 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1444 
1445 	return (ftt_none);
1446 }
1447 
1448 /*
1449  * Simulator for bshuffle instruction.
1450  */
1451 static enum ftt_type
1452 vis_bshuffle(
1453 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1454 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1455 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1456 {
1457 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1458 	int	i, j, ao;
1459 	union {
1460 		uint64_t	ll;
1461 		uint8_t		c[8];
1462 	} lrs1, lrs2, lrd;
1463 	uint64_t r;
1464 
1465 	VISINFO_KSTAT(vis_bshuffle);
1466 	nrs1 = pinst.rs1;
1467 	nrs2 = pinst.rs2;
1468 	nrd = pinst.rd;
1469 	if ((nrs1 & 1) == 1) 		/* fix register encoding */
1470 		nrs1 = (nrs1 & 0x1e) | 0x20;
1471 	if ((nrs2 & 1) == 1)
1472 		nrs2 = (nrs2 & 0x1e) | 0x20;
1473 	if ((nrd & 1) == 1)
1474 		nrd = (nrd & 0x1e) | 0x20;
1475 
1476 	_fp_unpack_extword(pfpsd, &lrs1.ll, nrs1);
1477 	_fp_unpack_extword(pfpsd, &lrs2.ll, nrs2);
1478 
1479 	/* r = get_gsr(fp); */
1480 	r = pfpsd->get_gsr(fp);
1481 	ao = (int)GSR_MASK(r);
1482 
1483 	/*
1484 	 * BSHUFFLE Destination Byte Selection
1485 	 * rd Byte	Source
1486 	 * 0		rs byte[GSR.mask<31..28>]
1487 	 * 1		rs byte[GSR.mask<27..24>]
1488 	 * 2		rs byte[GSR.mask<23..20>]
1489 	 * 3		rs byte[GSR.mask<19..16>]
1490 	 * 4		rs byte[GSR.mask<15..12>]
1491 	 * 5		rs byte[GSR.mask<11..8>]
1492 	 * 6		rs byte[GSR.mask<7..4>]
1493 	 * 7		rs byte[GSR.mask<3..0>]
1494 	 * P.S. rs1 is the upper half and rs2 is the lower half
1495 	 * Bytes in the source value are numbered from most to
1496 	 * least significant
1497 	 */
1498 	for (i = 7; i >= 0; i--, ao = (ao >> 4)) {
1499 		j = ao & 0xf;		/* get byte number */
1500 		if (j < 8) {
1501 			lrd.c[i] = lrs1.c[j];
1502 		} else {
1503 			lrd.c[i] = lrs2.c[j - 8];
1504 		}
1505 	}
1506 	_fp_pack_extword(pfpsd, &lrd.ll, nrd);
1507 
1508 	return (ftt_none);
1509 }
1510 
1511 /*
1512  * Simulator for siam instruction.
1513  */
1514 static enum ftt_type
1515 vis_siam(
1516 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1517 	vis_inst_type	inst,	/* FPU instruction to simulate. */
1518 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1519 {
1520 	uint_t	nrs2;			/* Register number fields. */
1521 	uint64_t g, r;
1522 	nrs2 = inst.rs2;
1523 
1524 	g = pfpsd->get_gsr(fp);
1525 	g &= ~(GSR_IM_IRND_MASK);	/* zero the IM and IRND fields */
1526 	r = nrs2 & 0x7;			/* get mode(3 bit) */
1527 	g |= (r << GSR_IRND_SHIFT);
1528 	pfpsd->set_gsr(g, fp);
1529 	return (ftt_none);
1530 }
1531 
1532 /*
1533  * Simulator for VIS loads and stores between floating-point unit and memory.
1534  */
1535 enum ftt_type
1536 vis_fldst(
1537 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1538 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1539 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1540 	void		*prw,	/* Pointer to locals and ins. */
1541 	uint_t		asi)	/* asi to emulate! */
1542 {
1543 	union {
1544 		vis_inst_type	inst;
1545 		fp_inst_type	pinst;
1546 	} i;
1547 
1548 	ASSERT(USERMODE(pregs->r_tstate));
1549 	i.pinst = pinst;
1550 	switch (asi) {
1551 		case ASI_PST8_P:
1552 		case ASI_PST8_S:
1553 		case ASI_PST16_P:
1554 		case ASI_PST16_S:
1555 		case ASI_PST32_P:
1556 		case ASI_PST32_S:
1557 		case ASI_PST8_PL:
1558 		case ASI_PST8_SL:
1559 		case ASI_PST16_PL:
1560 		case ASI_PST16_SL:
1561 		case ASI_PST32_PL:
1562 		case ASI_PST32_SL:
1563 			return (vis_prtl_fst(pfpsd, i.inst, pregs,
1564 				prw, asi));
1565 		case ASI_FL8_P:
1566 		case ASI_FL8_S:
1567 		case ASI_FL8_PL:
1568 		case ASI_FL8_SL:
1569 		case ASI_FL16_P:
1570 		case ASI_FL16_S:
1571 		case ASI_FL16_PL:
1572 		case ASI_FL16_SL:
1573 			return (vis_short_fls(pfpsd, i.inst, pregs,
1574 				prw, asi));
1575 		case ASI_BLK_AIUP:
1576 		case ASI_BLK_AIUS:
1577 		case ASI_BLK_AIUPL:
1578 		case ASI_BLK_AIUSL:
1579 		case ASI_BLK_P:
1580 		case ASI_BLK_S:
1581 		case ASI_BLK_PL:
1582 		case ASI_BLK_SL:
1583 		case ASI_BLK_COMMIT_P:
1584 		case ASI_BLK_COMMIT_S:
1585 			return (vis_blk_fldst(pfpsd, i.inst, pregs,
1586 				prw, asi));
1587 		default:
1588 			return (ftt_unimplemented);
1589 	}
1590 }
1591 
1592 /*
1593  * Simulator for partial stores between floating-point unit and memory.
1594  */
1595 static enum ftt_type
1596 vis_prtl_fst(
1597 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1598 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1599 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1600 	void		*prw,	/* Pointer to locals and ins. */
1601 	uint_t		asi)	/* asi to emulate! */
1602 {
1603 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1604 	uint_t	opf, msk;
1605 	int	h, i, j;
1606 	uint64_t ea, tmsk;
1607 	union {
1608 		freg_type	f;
1609 		uint64_t	ll;
1610 		uint32_t	i[2];
1611 		uint16_t	s[4];
1612 		uint8_t		c[8];
1613 	} k, l, res;
1614 	enum ftt_type   ftt;
1615 
1616 	nrs1 = inst.rs1;
1617 	nrs2 = inst.rs2;
1618 	nrd = inst.rd;
1619 	if ((nrd & 1) == 1) 		/* fix register encoding */
1620 		nrd = (nrd & 0x1e) | 0x20;
1621 	opf = inst.opf;
1622 	res.ll = 0;
1623 	if ((opf & 0x100) == 0) {	/* effective address = rs1  */
1624 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1625 		if (ftt != ftt_none)
1626 			return (ftt);
1627 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk);
1628 		if (ftt != ftt_none)
1629 			return (ftt);
1630 		msk = (uint_t)tmsk;
1631 	} else {
1632 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1633 		return (ftt_unimplemented);
1634 	}
1635 
1636 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1637 	if ((ea & 0x3) != 0)
1638 		return (ftt_alignment);	/* Require 32 bit-alignment. */
1639 
1640 	switch (asi) {
1641 	case ASI_PST8_P:
1642 	case ASI_PST8_S:
1643 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1644 		if (ftt != ftt_none)
1645 			return (ftt);
1646 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1647 		for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) {
1648 			if ((msk & j) == j)
1649 				res.c[i] = k.c[i];
1650 			else
1651 				res.c[i] = l.c[i];
1652 		}
1653 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1654 		if (ftt != ftt_none)
1655 			return (ftt);
1656 		break;
1657 	case ASI_PST8_PL:	/* little-endian */
1658 	case ASI_PST8_SL:
1659 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1660 		if (ftt != ftt_none)
1661 			return (ftt);
1662 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1663 		for (h = 7, i = 0, j = 0x80; i <= 7; h--, i++, j >>= 1) {
1664 			if ((msk & j) == j)
1665 				res.c[h] = k.c[i];
1666 			else
1667 				res.c[h] = l.c[i];
1668 		}
1669 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1670 		if (ftt != ftt_none)
1671 			return (ftt);
1672 		break;
1673 	case ASI_PST16_P:
1674 	case ASI_PST16_S:
1675 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1676 		if (ftt != ftt_none)
1677 			return (ftt);
1678 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1679 		for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) {
1680 			if ((msk & j) == j)
1681 				res.s[i] = k.s[i];
1682 			else
1683 				res.s[i] = l.s[i];
1684 		}
1685 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1686 		if (ftt != ftt_none)
1687 			return (ftt);
1688 		break;
1689 	case ASI_PST16_PL:
1690 	case ASI_PST16_SL:
1691 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1692 		if (ftt != ftt_none)
1693 			return (ftt);
1694 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1695 		for (h = 7, i = 0, j = 0x8; i <= 6; h -= 2, i += 2, j >>= 1) {
1696 			if ((msk & j) == j) {
1697 				res.c[h] = k.c[i];
1698 				res.c[h-1] = k.c[i+1];
1699 			} else {
1700 				res.c[h] = l.c[i];
1701 				res.c[h-1] = l.c[i+1];
1702 			}
1703 		}
1704 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1705 		if (ftt != ftt_none)
1706 			return (ftt);
1707 		break;
1708 	case ASI_PST32_P:
1709 	case ASI_PST32_S:
1710 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1711 		if (ftt != ftt_none)
1712 			return (ftt);
1713 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1714 		for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) {
1715 			if ((msk & j) == j)
1716 				res.i[i] = k.i[i];
1717 			else
1718 				res.i[i] = l.i[i];
1719 		}
1720 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1721 		if (ftt != ftt_none)
1722 			return (ftt);
1723 		break;
1724 	case ASI_PST32_PL:
1725 	case ASI_PST32_SL:
1726 		ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd);
1727 		if (ftt != ftt_none)
1728 			return (ftt);
1729 		_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1730 		for (h = 7, i = 0, j = 0x2; i <= 4; h -= 4, i += 4, j >>= 1) {
1731 			if ((msk & j) == j) {
1732 				res.c[h] = k.c[i];
1733 				res.c[h-1] = k.c[i+1];
1734 				res.c[h-2] = k.c[i+2];
1735 				res.c[h-3] = k.c[i+3];
1736 			} else {
1737 				res.c[h] = l.c[i];
1738 				res.c[h-1] = l.c[i+1];
1739 				res.c[h-2] = l.c[i+2];
1740 				res.c[h-3] = l.c[i+3];
1741 			}
1742 		}
1743 		ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd);
1744 		if (ftt != ftt_none)
1745 			return (ftt);
1746 		break;
1747 	}
1748 
1749 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1750 	pregs->r_npc += 4;
1751 	return (ftt_none);
1752 }
1753 
1754 /*
1755  * Simulator for short load/stores between floating-point unit and memory.
1756  */
1757 static enum ftt_type
1758 vis_short_fls(
1759 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1760 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1761 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1762 	void		*prw,	/* Pointer to locals and ins. */
1763 	uint_t		asi)	/* asi to emulate! */
1764 {
1765 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1766 	uint_t	opf;
1767 	uint64_t ea, tea;
1768 	union {
1769 		freg_type	f;
1770 		uint64_t	ll;
1771 		uint32_t	i[2];
1772 		uint16_t	s[4];
1773 		uint8_t		c[8];
1774 	} k;
1775 	union {
1776 		vis_inst_type	inst;
1777 		int		i;
1778 	} fp;
1779 	enum ftt_type   ftt = ftt_none;
1780 	ushort_t us;
1781 	uchar_t uc;
1782 
1783 	nrs1 = inst.rs1;
1784 	nrs2 = inst.rs2;
1785 	nrd = inst.rd;
1786 	if ((nrd & 1) == 1) 		/* fix register encoding */
1787 		nrd = (nrd & 0x1e) | 0x20;
1788 	opf = inst.opf;
1789 	fp.inst = inst;
1790 	if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */
1791 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1792 		if (ftt != ftt_none)
1793 			return (ftt);
1794 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1795 		if (ftt != ftt_none)
1796 			return (ftt);
1797 		ea += tea;
1798 	} else {	/* effective address = rs1 + imm13 */
1799 		fp.inst = inst;
1800 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1801 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1802 		if (ftt != ftt_none)
1803 			return (ftt);
1804 		ea += tea;
1805 	}
1806 	if (get_udatamodel() == DATAMODEL_ILP32)
1807 		ea = (uint64_t)(caddr32_t)ea;
1808 
1809 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1810 	switch (asi) {
1811 	case ASI_FL8_P:
1812 	case ASI_FL8_S:
1813 	case ASI_FL8_PL:		/* little-endian */
1814 	case ASI_FL8_SL:
1815 		if ((inst.op3 & 7) == 3) {	/* load byte */
1816 			if (fuword8((void *)ea, &uc) == -1)
1817 				return (ftt_fault);
1818 			k.ll = 0;
1819 			k.c[7] = uc;
1820 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1821 		} else {			/* store byte */
1822 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1823 			uc = k.c[7];
1824 			if (subyte((caddr_t)ea, uc) == -1)
1825 				return (ftt_fault);
1826 		}
1827 		break;
1828 	case ASI_FL16_P:
1829 	case ASI_FL16_S:
1830 		if ((ea & 1) == 1)
1831 			return (ftt_alignment);
1832 		if ((inst.op3 & 7) == 3) {	/* load short */
1833 			if (fuword16((void *)ea, &us) == -1)
1834 				return (ftt_fault);
1835 			k.ll = 0;
1836 			k.s[3] = us;
1837 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1838 		} else {			/* store short */
1839 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1840 			us = k.s[3];
1841 			if (suword16((caddr_t)ea, us) == -1)
1842 				return (ftt_fault);
1843 		}
1844 		break;
1845 	case ASI_FL16_PL:		/* little-endian */
1846 	case ASI_FL16_SL:
1847 		if ((ea & 1) == 1)
1848 			return (ftt_alignment);
1849 		if ((inst.op3 & 7) == 3) {	/* load short */
1850 			if (fuword16((void *)ea, &us) == -1)
1851 				return (ftt_fault);
1852 			k.ll = 0;
1853 			k.c[6] = (uchar_t)us;
1854 			k.c[7] = (uchar_t)((us & 0xff00) >> 8);
1855 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1856 		} else {			/* store short */
1857 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1858 			uc = k.c[7];
1859 			us = (ushort_t)((uc << 8) | k.c[6]);
1860 			if (suword16((void *)ea, us) == -1)
1861 				return (ftt_fault);
1862 		}
1863 		break;
1864 	}
1865 
1866 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1867 	pregs->r_npc += 4;
1868 	return (ftt_none);
1869 }
1870 
1871 /*
1872  * Simulator for block loads and stores between floating-point unit and memory.
1873  * XXX - OK, so it is really gross to flush the whole Ecache for a block commit
1874  *	 store - but the circumstances under which this code actually gets
1875  *	 used in real life are so obscure that you can live with it!
1876  */
1877 static enum ftt_type
1878 vis_blk_fldst(
1879 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1880 	vis_inst_type	inst,	/* ISE instruction to simulate. */
1881 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1882 	void		*prw,	/* Pointer to locals and ins. */
1883 	uint_t		asi)	/* asi to emulate! */
1884 {
1885 	uint_t	nrs1, nrs2, nrd;	/* Register number fields. */
1886 	uint_t	opf, h, i, j;
1887 	uint64_t ea, tea;
1888 	union {
1889 		freg_type	f;
1890 		uint64_t	ll;
1891 		uint8_t		c[8];
1892 	} k, l;
1893 	union {
1894 		vis_inst_type	inst;
1895 		int32_t		i;
1896 	} fp;
1897 	enum ftt_type   ftt;
1898 	boolean_t little_endian = B_FALSE;
1899 
1900 	nrs1 = inst.rs1;
1901 	nrs2 = inst.rs2;
1902 	nrd = inst.rd;
1903 	if ((nrd & 1) == 1) 		/* fix register encoding */
1904 		nrd = (nrd & 0x1e) | 0x20;
1905 
1906 	/* ensure register is 8-double precision aligned */
1907 	if ((nrd & 0xf) != 0)
1908 		return (ftt_unimplemented);
1909 
1910 	opf = inst.opf;
1911 	if ((opf & 0x100) == 0) { 	/* effective address = rs1 + rs2 */
1912 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea);
1913 		if (ftt != ftt_none)
1914 			return (ftt);
1915 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea);
1916 		if (ftt != ftt_none)
1917 			return (ftt);
1918 		ea += tea;
1919 	} else {			/* effective address = rs1 + imm13 */
1920 		fp.inst = inst;
1921 		ea = (fp.i << 19) >> 19;	/* Extract simm13 field. */
1922 		ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea);
1923 		if (ftt != ftt_none)
1924 			return (ftt);
1925 		ea += tea;
1926 	}
1927 	if ((ea & 0x3F) != 0)		/* Require 64 byte-alignment. */
1928 		return (ftt_alignment);
1929 
1930 	pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */
1931 	switch (asi) {
1932 	case ASI_BLK_AIUPL:
1933 	case ASI_BLK_AIUSL:
1934 	case ASI_BLK_PL:
1935 	case ASI_BLK_SL:
1936 		little_endian = B_TRUE;
1937 		/* FALLTHROUGH */
1938 	case ASI_BLK_AIUP:
1939 	case ASI_BLK_AIUS:
1940 	case ASI_BLK_P:
1941 	case ASI_BLK_S:
1942 	case ASI_BLK_COMMIT_P:
1943 	case ASI_BLK_COMMIT_S:
1944 		if ((inst.op3 & 7) == 3) {	/* lddf */
1945 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1946 			ftt = _fp_read_extword((uint64_t *)ea, &k.ll, pfpsd);
1947 			if (ftt != ftt_none)
1948 				return (ftt);
1949 			if (little_endian) {
1950 				for (j = 0, h = 7; j < 8; j++, h--)
1951 					l.c[h] = k.c[j];
1952 				k.ll = l.ll;
1953 			}
1954 			_fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1955 		    }
1956 		} else {			/* stdf */
1957 		    for (i = 0; i < 8; i++, ea += 8, nrd += 2) {
1958 			_fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd);
1959 			if (little_endian) {
1960 				for (j = 0, h = 7; j < 8; j++, h--)
1961 					l.c[h] = k.c[j];
1962 				k.ll = l.ll;
1963 			}
1964 			ftt = _fp_write_extword((uint64_t *)ea, k.ll, pfpsd);
1965 			if (ftt != ftt_none)
1966 				return (ftt);
1967 		    }
1968 		}
1969 		if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S))
1970 			cpu_flush_ecache();
1971 		break;
1972 	default:
1973 		/* addr of unimp inst */
1974 		pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc;
1975 		return (ftt_unimplemented);
1976 	}
1977 
1978 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
1979 	pregs->r_npc += 4;
1980 	return (ftt_none);
1981 }
1982 
1983 /*
1984  * Simulator for rd %gsr instruction.
1985  */
1986 enum ftt_type
1987 vis_rdgsr(
1988 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
1989 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
1990 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
1991 	void		*prw,	/* Pointer to locals and ins. */
1992 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
1993 {
1994 	uint_t nrd;
1995 	uint64_t r;
1996 	enum ftt_type ftt = ftt_none;
1997 
1998 	nrd = pinst.rd;
1999 
2000 	r = pfpsd->get_gsr(fp);
2001 	ftt = write_iureg(pfpsd, nrd, pregs, prw, &r);
2002 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
2003 	pregs->r_npc += 4;
2004 	return (ftt);
2005 }
2006 
2007 /*
2008  * Simulator for wr %gsr instruction.
2009  */
2010 enum ftt_type
2011 vis_wrgsr(
2012 	fp_simd_type	*pfpsd,	/* FPU simulator data. */
2013 	fp_inst_type	pinst,	/* FPU instruction to simulate. */
2014 	struct regs	*pregs,	/* Pointer to PCB image of registers. */
2015 	void		*prw,	/* Pointer to locals and ins. */
2016 	kfpu_t		*fp)	/* Need to fp to access gsr reg */
2017 {
2018 	uint_t nrs1;
2019 	uint64_t r, r1, r2;
2020 	enum ftt_type ftt = ftt_none;
2021 
2022 	nrs1 = pinst.rs1;
2023 	ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1);
2024 	if (ftt != ftt_none)
2025 		return (ftt);
2026 	if (pinst.ibit == 0) {	/* copy the value in r[rs2] */
2027 		uint_t nrs2;
2028 
2029 		nrs2 = pinst.rs2;
2030 		ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2);
2031 		if (ftt != ftt_none)
2032 			return (ftt);
2033 	} else {	/* use sign_ext(simm13) */
2034 		union {
2035 			fp_inst_type	inst;
2036 			uint32_t	i;
2037 		} fp;
2038 
2039 		fp.inst = pinst;		/* Extract simm13 field */
2040 		r2 = (fp.i << 19) >> 19;
2041 	}
2042 	r = r1 ^ r2;
2043 	pfpsd->set_gsr(r, fp);
2044 	pregs->r_pc = pregs->r_npc;	/* Do not retry emulated instruction. */
2045 	pregs->r_npc += 4;
2046 	return (ftt);
2047 }
2048 
2049 /*
2050  * This is the loadable module wrapper.
2051  */
2052 #include <sys/errno.h>
2053 #include <sys/modctl.h>
2054 
2055 /*
2056  * Module linkage information for the kernel.
2057  */
2058 extern struct mod_ops mod_miscops;
2059 
2060 static struct modlmisc modlmisc = {
2061 	&mod_miscops,
2062 	"vis fp simulation",
2063 };
2064 
2065 static struct modlinkage modlinkage = {
2066 	MODREV_1, (void *)&modlmisc, NULL
2067 };
2068 
2069 int
2070 _init(void)
2071 {
2072 	return (mod_install(&modlinkage));
2073 }
2074 
2075 int
2076 _info(struct modinfo *modinfop)
2077 {
2078 	return (mod_info(&modlinkage, modinfop));
2079 }
2080