/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* VIS floating point instruction simulator for Sparc FPU simulator. */ #include #include #include #include #include #include #include #include #include #include #include #include #define FPU_REG_FIELD uint32_reg /* Coordinate with FPU_REGS_TYPE. */ #define FPU_DREG_FIELD uint64_reg /* Coordinate with FPU_DREGS_TYPE. */ #define FPU_FSR_FIELD uint64_reg /* Coordinate with V9_FPU_FSR_TYPE. */ extern uint_t get_subcc_ccr(uint64_t, uint64_t); static enum ftt_type vis_array(fp_simd_type *, vis_inst_type, struct regs *, void *); static enum ftt_type vis_alignaddr(fp_simd_type *, vis_inst_type, struct regs *, void *, kfpu_t *); static enum ftt_type vis_edge(fp_simd_type *, vis_inst_type, struct regs *, void *); static enum ftt_type vis_faligndata(fp_simd_type *, fp_inst_type, kfpu_t *); static enum ftt_type vis_bmask(fp_simd_type *, vis_inst_type, struct regs *, void *, kfpu_t *); static enum ftt_type vis_bshuffle(fp_simd_type *, fp_inst_type, kfpu_t *); static enum ftt_type vis_siam(fp_simd_type *, vis_inst_type, kfpu_t *); static enum ftt_type vis_fcmp(fp_simd_type *, vis_inst_type, struct regs *, void *); static enum ftt_type vis_fmul(fp_simd_type *, vis_inst_type); static enum ftt_type vis_fpixel(fp_simd_type *, vis_inst_type, kfpu_t *); static enum ftt_type vis_fpaddsub(fp_simd_type *, vis_inst_type); static enum ftt_type vis_pdist(fp_simd_type *, fp_inst_type, struct regs *, void *, uint_t); static enum ftt_type vis_prtl_fst(fp_simd_type *, vis_inst_type, struct regs *, void *, uint_t); static enum ftt_type vis_short_fls(fp_simd_type *, vis_inst_type, struct regs *, void *, uint_t); static enum ftt_type vis_blk_fldst(fp_simd_type *, vis_inst_type, struct regs *, void *, uint_t); /* * Simulator for VIS instructions with op3 == 0x36 that get fp_disabled * traps. */ enum ftt_type vis_fpu_simulator( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ uint_t us1, us2, usr; uint64_t lus1, lus2, lusr; enum ftt_type ftt = ftt_none; union { vis_inst_type inst; fp_inst_type pinst; } f; ASSERT(USERMODE(pregs->r_tstate)); nrs1 = pinst.rs1; nrs2 = pinst.rs2; nrd = pinst.rd; f.pinst = pinst; if ((f.inst.opf & 1) == 0) { /* double precision */ if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; if ((nrd & 1) == 1) nrd = (nrd & 0x1e) | 0x20; } switch (f.inst.opf) { /* these instr's do not use fp regs */ case edge8: case edge8l: case edge8n: case edge8ln: case edge16: case edge16l: case edge16n: case edge16ln: case edge32: case edge32l: case edge32n: case edge32ln: ftt = vis_edge(pfpsd, f.inst, pregs, prw); break; case array8: case array16: case array32: ftt = vis_array(pfpsd, f.inst, pregs, prw); break; case alignaddr: case alignaddrl: ftt = vis_alignaddr(pfpsd, f.inst, pregs, prw, fp); break; case bmask: ftt = vis_bmask(pfpsd, f.inst, pregs, prw, fp); break; case fcmple16: case fcmpne16: case fcmpgt16: case fcmpeq16: case fcmple32: case fcmpne32: case fcmpgt32: case fcmpeq32: ftt = vis_fcmp(pfpsd, f.inst, pregs, prw); break; case fmul8x16: case fmul8x16au: case fmul8x16al: case fmul8sux16: case fmul8ulx16: case fmuld8sux16: case fmuld8ulx16: ftt = vis_fmul(pfpsd, f.inst); break; case fpack16: case fpack32: case fpackfix: case fexpand: case fpmerge: ftt = vis_fpixel(pfpsd, f.inst, fp); break; case pdist: case pdistn: ftt = vis_pdist(pfpsd, pinst, pregs, prw, f.inst.opf); break; case faligndata: ftt = vis_faligndata(pfpsd, pinst, fp); break; case bshuffle: ftt = vis_bshuffle(pfpsd, pinst, fp); break; case fpadd16: case fpadd16s: case fpadd32: case fpadd32s: case fpsub16: case fpsub16s: case fpsub32: case fpsub32s: ftt = vis_fpaddsub(pfpsd, f.inst); break; case fzero: lusr = 0; _fp_pack_extword(pfpsd, &lusr, nrd); break; case fzeros: usr = 0; _fp_pack_word(pfpsd, &usr, nrd); break; case fnor: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = ~(lus1 | lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fnors: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = ~(us1 | us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fandnot2: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (lus1 & ~lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fandnot2s: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (us1 & ~us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fnot2: _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = ~lus2; _fp_pack_extword(pfpsd, &lusr, nrd); break; case fnot2s: _fp_unpack_word(pfpsd, &us2, nrs2); usr = ~us2; _fp_pack_word(pfpsd, &usr, nrd); break; case fandnot1: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (~lus1 & lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fandnot1s: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (~us1 & us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fnot1: _fp_unpack_extword(pfpsd, &lus1, nrs1); lusr = ~lus1; _fp_pack_extword(pfpsd, &lusr, nrd); break; case fnot1s: _fp_unpack_word(pfpsd, &us1, nrs1); usr = ~us1; _fp_pack_word(pfpsd, &usr, nrd); break; case fxor: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (lus1 ^ lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fxors: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (us1 ^ us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fnand: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = ~(lus1 & lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fnands: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = ~(us1 & us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fand: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (lus1 & lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fands: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (us1 & us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fxnor: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = ~(lus1 ^ lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fxnors: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = ~(us1 ^ us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fsrc1: _fp_unpack_extword(pfpsd, &lusr, nrs1); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fsrc1s: _fp_unpack_word(pfpsd, &usr, nrs1); _fp_pack_word(pfpsd, &usr, nrd); break; case fornot2: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (lus1 | ~lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fornot2s: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (us1 | ~us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fsrc2: _fp_unpack_extword(pfpsd, &lusr, nrs2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fsrc2s: _fp_unpack_word(pfpsd, &usr, nrs2); _fp_pack_word(pfpsd, &usr, nrd); break; case fornot1: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (~lus1 | lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fornot1s: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (~us1 | us2); _fp_pack_word(pfpsd, &usr, nrd); break; case for_op: _fp_unpack_extword(pfpsd, &lus1, nrs1); _fp_unpack_extword(pfpsd, &lus2, nrs2); lusr = (lus1 | lus2); _fp_pack_extword(pfpsd, &lusr, nrd); break; case fors_op: _fp_unpack_word(pfpsd, &us1, nrs1); _fp_unpack_word(pfpsd, &us2, nrs2); usr = (us1 | us2); _fp_pack_word(pfpsd, &usr, nrd); break; case fone: lusr = 0xffffffffffffffff; _fp_pack_extword(pfpsd, &lusr, nrd); break; case fones: usr = 0xffffffffUL; _fp_pack_word(pfpsd, &usr, nrd); break; case siam: ftt = vis_siam(pfpsd, f.inst, fp); break; default: return (ftt_unimplemented); } pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt); } /* * Simulator for edge instructions */ static enum ftt_type vis_edge( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw) /* Pointer to locals and ins. */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ enum ftt_type ftt; uint64_t addrl, addrr, mask; uint64_t ah61l, ah61r; /* Higher 61 bits of address */ int al3l, al3r; /* Lower 3 bits of address */ uint_t ccr; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; ftt = read_iureg(pfpsd, nrs1, pregs, prw, &addrl); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &addrr); if (ftt != ftt_none) return (ftt); /* Test PSTATE.AM to determine 32-bit vs 64-bit addressing */ if ((pregs->r_tstate & TSTATE_AM) != 0) { ah61l = addrl & 0xfffffff8; ah61r = addrr & 0xfffffff8; } else { ah61l = addrl & ~0x7; ah61r = addrr & ~0x7; } switch (inst.opf) { case edge8: case edge8n: case edge8l: case edge8ln: al3l = addrl & 0x7; switch (inst.opf) { case edge8: case edge8n: if (inst.opf == edge8) { VISINFO_KSTAT(vis_edge8); } else { VISINFO_KSTAT(vis_edge8n); } mask = 0xff >> al3l; if (ah61l == ah61r) { al3r = addrr & 0x7; mask &= (0xff << (0x7 - al3r)) & 0xff; } break; case edge8l: case edge8ln: if (inst.opf == edge8l) { VISINFO_KSTAT(vis_edge8l); } else { VISINFO_KSTAT(vis_edge8ln); } mask = (0xff << al3l) & 0xff; if (ah61l == ah61r) { al3r = addrr & 0x7; mask &= 0xff >> (0x7 - al3r); } break; } break; case edge16: case edge16l: case edge16n: case edge16ln: al3l = addrl & 0x6; al3l >>= 0x1; switch (inst.opf) { case edge16: case edge16n: if (inst.opf == edge16) { VISINFO_KSTAT(vis_edge16); } else { VISINFO_KSTAT(vis_edge16n); } mask = 0xf >> al3l; if (ah61l == ah61r) { al3r = addrr & 0x6; al3r >>= 0x1; mask &= (0xf << (0x3 - al3r)) & 0xf; } break; case edge16l: case edge16ln: if (inst.opf == edge16l) { VISINFO_KSTAT(vis_edge16l); } else { VISINFO_KSTAT(vis_edge16ln); } mask = (0xf << al3l) & 0xf; if (ah61l == ah61r) { al3r = addrr & 0x6; al3r >>= 0x1; mask &= 0xf >> (0x3 - al3r); } break; } break; case edge32: case edge32l: case edge32n: case edge32ln: al3l = addrl & 0x4; al3l >>= 0x2; switch (inst.opf) { case edge32: case edge32n: if (inst.opf == edge32) { VISINFO_KSTAT(vis_edge32); } else { VISINFO_KSTAT(vis_edge32n); } mask = 0x3 >> al3l; if (ah61l == ah61r) { al3r = addrr & 0x4; al3r >>= 0x2; mask &= (0x3 << (0x1 - al3r)) & 0x3; } break; case edge32l: case edge32ln: if (inst.opf == edge32l) { VISINFO_KSTAT(vis_edge32l); } else { VISINFO_KSTAT(vis_edge32ln); } mask = (0x3 << al3l) & 0x3; if (ah61l == ah61r) { al3r = addrr & 0x4; al3r >>= 0x2; mask &= 0x3 >> (0x1 - al3r); } break; } break; } ftt = write_iureg(pfpsd, nrd, pregs, prw, &mask); switch (inst.opf) { case edge8: case edge8l: case edge16: case edge16l: case edge32: case edge32l: /* Update flags per SUBcc outcome */ pregs->r_tstate &= ~((uint64_t)TSTATE_CCR_MASK << TSTATE_CCR_SHIFT); ccr = get_subcc_ccr(addrl, addrr); /* get subcc cond. codes */ pregs->r_tstate |= ((uint64_t)ccr << TSTATE_CCR_SHIFT); break; } return (ftt); } /* * Simulator for three dimentional array addressing instructions. */ static enum ftt_type vis_array( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw) /* Pointer to locals and ins. */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ enum ftt_type ftt; uint64_t laddr, bsize, baddr; uint64_t nbit; int oy, oz; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; ftt = read_iureg(pfpsd, nrs1, pregs, prw, &laddr); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &bsize); if (ftt != ftt_none) return (ftt); if (bsize > 5) { bsize = 5; } nbit = (1 << bsize) - 1; /* Number of bits for XY<6+n-1:6> */ oy = 17 + bsize; /* Offset of Y<6+n-1:6> */ oz = 17 + 2 * bsize; /* Offset of Z<8:5> */ baddr = 0; baddr |= (laddr >> (11 - 0)) & (0x03 << 0); /* X_integer<1:0> */ baddr |= (laddr >> (33 - 2)) & (0x03 << 2); /* Y_integer<1:0> */ baddr |= (laddr >> (55 - 4)) & (0x01 << 4); /* Z_integer<0> */ baddr |= (laddr >> (13 - 5)) & (0x0f << 5); /* X_integer<5:2> */ baddr |= (laddr >> (35 - 9)) & (0x0f << 9); /* Y_integer<5:2> */ baddr |= (laddr >> (56 - 13)) & (0x0f << 13); /* Z_integer<4:1> */ baddr |= (laddr >> (17 - 17)) & (nbit << 17); /* X_integer<6+n-1:6> */ baddr |= (laddr >> (39 - oy)) & (nbit << oy); /* Y_integer<6+n-1:6> */ baddr |= (laddr >> (60 - oz)) & (0x0f << oz); /* Z_integer<8:5> */ switch (inst.opf) { case array8: VISINFO_KSTAT(vis_array8); break; case array16: VISINFO_KSTAT(vis_array16); baddr <<= 1; break; case array32: VISINFO_KSTAT(vis_array32); baddr <<= 2; break; } ftt = write_iureg(pfpsd, nrd, pregs, prw, &baddr); return (ftt); } /* * Simulator for alignaddr and alignaddrl instructions. */ static enum ftt_type vis_alignaddr( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ enum ftt_type ftt; uint64_t ea, tea, g, r; short s; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; r = ea & ~0x7; /* zero least 3 significant bits */ ftt = write_iureg(pfpsd, nrd, pregs, prw, &r); g = pfpsd->fp_current_read_gsr(fp); g &= ~(GSR_ALIGN_MASK); /* zero the align offset */ r = ea & 0x7; if (inst.opf == alignaddrl) { s = (short)(~r); /* 2's complement for alignaddrl */ if (s < 0) r = (uint64_t)((s + 1) & 0x7); else r = (uint64_t)(s & 0x7); } g |= (r << GSR_ALIGN_SHIFT) & GSR_ALIGN_MASK; pfpsd->fp_current_write_gsr(g, fp); return (ftt); } /* * Simulator for bmask instruction. */ static enum ftt_type vis_bmask( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ enum ftt_type ftt; uint64_t ea, tea, g; VISINFO_KSTAT(vis_bmask); nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; ftt = write_iureg(pfpsd, nrd, pregs, prw, &ea); g = pfpsd->fp_current_read_gsr(fp); g &= ~(GSR_MASK_MASK); /* zero the mask offset */ /* Put the least significant 32 bits of ea in GSR.mask */ g |= (ea << GSR_MASK_SHIFT) & GSR_MASK_MASK; pfpsd->fp_current_write_gsr(g, fp); return (ftt); } /* * Simulator for fp[add|sub]* instruction. */ static enum ftt_type vis_fpaddsub( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst) /* FPU instruction to simulate. */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ union { uint64_t ll; uint32_t i[2]; uint16_t s[4]; } lrs1, lrs2, lrd; union { uint32_t i; uint16_t s[2]; } krs1, krs2, krd; int i; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((inst.opf & 1) == 0) { /* double precision */ if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; if ((nrd & 1) == 1) nrd = (nrd & 0x1e) | 0x20; } switch (inst.opf) { case fpadd16: _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 3; i++) { lrd.s[i] = lrs1.s[i] + lrs2.s[i]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpadd16s: _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 1; i++) { krd.s[i] = krs1.s[i] + krs2.s[i]; } _fp_pack_word(pfpsd, &krd.i, nrd); break; case fpadd32: _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 1; i++) { lrd.i[i] = lrs1.i[i] + lrs2.i[i]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpadd32s: _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); krd.i = krs1.i + krs2.i; _fp_pack_word(pfpsd, &krd.i, nrd); break; case fpsub16: _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 3; i++) { lrd.s[i] = lrs1.s[i] - lrs2.s[i]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpsub16s: _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 1; i++) { krd.s[i] = krs1.s[i] - krs2.s[i]; } _fp_pack_word(pfpsd, &krd.i, nrd); break; case fpsub32: _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 1; i++) { lrd.i[i] = lrs1.i[i] - lrs2.i[i]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpsub32s: _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); krd.i = krs1.i - krs2.i; _fp_pack_word(pfpsd, &krd.i, nrd); break; } return (ftt_none); } /* * Simulator for fcmp* instruction. */ static enum ftt_type vis_fcmp( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw) /* Pointer to locals and ins. */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ union { uint64_t ll; uint32_t i[2]; uint16_t s[4]; } krs1, krs2, krd; enum ftt_type ftt; short sr1, sr2; int i, ir1, ir2; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; krd.ll = 0; if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &krs1.ll, nrs1); _fp_unpack_extword(pfpsd, &krs2.ll, nrs2); switch (inst.opf) { case fcmple16: VISINFO_KSTAT(vis_fcmple16); for (i = 0; i <= 3; i++) { sr1 = (short)krs1.s[i]; sr2 = (short)krs2.s[i]; if (sr1 <= sr2) krd.ll += (0x8 >> i); } break; case fcmpne16: VISINFO_KSTAT(vis_fcmpne16); for (i = 0; i <= 3; i++) { sr1 = (short)krs1.s[i]; sr2 = (short)krs2.s[i]; if (sr1 != sr2) krd.ll += (0x8 >> i); } break; case fcmpgt16: VISINFO_KSTAT(vis_fcmpgt16); for (i = 0; i <= 3; i++) { sr1 = (short)krs1.s[i]; sr2 = (short)krs2.s[i]; if (sr1 > sr2) krd.ll += (0x8 >> i); } break; case fcmpeq16: VISINFO_KSTAT(vis_fcmpeq16); for (i = 0; i <= 3; i++) { sr1 = (short)krs1.s[i]; sr2 = (short)krs2.s[i]; if (sr1 == sr2) krd.ll += (0x8 >> i); } break; case fcmple32: VISINFO_KSTAT(vis_fcmple32); for (i = 0; i <= 1; i++) { ir1 = (int)krs1.i[i]; ir2 = (int)krs2.i[i]; if (ir1 <= ir2) krd.ll += (0x2 >> i); } break; case fcmpne32: VISINFO_KSTAT(vis_fcmpne32); for (i = 0; i <= 1; i++) { ir1 = (int)krs1.i[i]; ir2 = (int)krs2.i[i]; if (ir1 != ir2) krd.ll += (0x2 >> i); } break; case fcmpgt32: VISINFO_KSTAT(vis_fcmpgt32); for (i = 0; i <= 1; i++) { ir1 = (int)krs1.i[i]; ir2 = (int)krs2.i[i]; if (ir1 > ir2) krd.ll += (0x2 >> i); } break; case fcmpeq32: VISINFO_KSTAT(vis_fcmpeq32); for (i = 0; i <= 1; i++) { ir1 = (int)krs1.i[i]; ir2 = (int)krs2.i[i]; if (ir1 == ir2) krd.ll += (0x2 >> i); } break; } ftt = write_iureg(pfpsd, nrd, pregs, prw, &krd.ll); return (ftt); } /* * Simulator for fmul* instruction. */ static enum ftt_type vis_fmul( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst) /* FPU instruction to simulate. */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ union { uint64_t ll; uint32_t i[2]; uint16_t s[4]; uint8_t c[8]; } lrs1, lrs2, lrd; union { uint32_t i; uint16_t s[2]; uint8_t c[4]; } krs1, krs2, kres; short s1, s2, sres; ushort_t us1; char c1; int i; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((inst.opf & 1) == 0) { /* double precision */ if ((nrd & 1) == 1) /* fix register encoding */ nrd = (nrd & 0x1e) | 0x20; } switch (inst.opf) { case fmul8x16: VISINFO_KSTAT(vis_fmul8x16); _fp_unpack_word(pfpsd, &krs1.i, nrs1); if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 3; i++) { us1 = (ushort_t)krs1.c[i]; s2 = (short)lrs2.s[i]; kres.i = us1 * s2; sres = (short)((kres.c[1] << 8) | kres.c[2]); if (kres.c[3] >= 0x80) sres++; lrd.s[i] = sres; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmul8x16au: VISINFO_KSTAT(vis_fmul8x16au); _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 3; i++) { us1 = (ushort_t)krs1.c[i]; s2 = (short)krs2.s[0]; kres.i = us1 * s2; sres = (short)((kres.c[1] << 8) | kres.c[2]); if (kres.c[3] >= 0x80) sres++; lrd.s[i] = sres; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmul8x16al: VISINFO_KSTAT(vis_fmul8x16al); _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 3; i++) { us1 = (ushort_t)krs1.c[i]; s2 = (short)krs2.s[1]; kres.i = us1 * s2; sres = (short)((kres.c[1] << 8) | kres.c[2]); if (kres.c[3] >= 0x80) sres++; lrd.s[i] = sres; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmul8sux16: VISINFO_KSTAT(vis_fmul8sux16); if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 3; i++) { c1 = lrs1.c[(i*2)]; s1 = (short)c1; /* keeps the sign alive */ s2 = (short)lrs2.s[i]; kres.i = s1 * s2; sres = (short)((kres.c[1] << 8) | kres.c[2]); if (kres.c[3] >= 0x80) sres++; if (sres < 0) lrd.s[i] = (sres & 0xFFFF); else lrd.s[i] = sres; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmul8ulx16: VISINFO_KSTAT(vis_fmul8ulx16); if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); for (i = 0; i <= 3; i++) { us1 = (ushort_t)lrs1.c[(i*2)+1]; s2 = (short)lrs2.s[i]; kres.i = us1 * s2; sres = (short)kres.s[0]; if (kres.s[1] >= 0x8000) sres++; lrd.s[i] = sres; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmuld8sux16: VISINFO_KSTAT(vis_fmuld8sux16); _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 1; i++) { c1 = krs1.c[(i*2)]; s1 = (short)c1; /* keeps the sign alive */ s2 = (short)krs2.s[i]; kres.i = s1 * s2; lrd.i[i] = kres.i << 8; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fmuld8ulx16: VISINFO_KSTAT(vis_fmuld8ulx16); _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 1; i++) { us1 = (ushort_t)krs1.c[(i*2)+1]; s2 = (short)krs2.s[i]; lrd.i[i] = us1 * s2; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; } return (ftt_none); } /* * Simulator for fpixel formatting instructions. */ static enum ftt_type vis_fpixel( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ int i, j, k, sf; union { uint64_t ll; uint32_t i[2]; uint16_t s[4]; uint8_t c[8]; } lrs1, lrs2, lrd; union { uint32_t i; uint16_t s[2]; uint8_t c[4]; } krs1, krs2, krd; uint64_t r; int64_t l, m; short s; uchar_t uc; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((inst.opf != fpack16) && (inst.opf != fpackfix)) { if ((nrd & 1) == 1) /* fix register encoding */ nrd = (nrd & 0x1e) | 0x20; } switch (inst.opf) { case fpack16: VISINFO_KSTAT(vis_fpack16); if ((nrs2 & 1) == 1) /* fix register encoding */ nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); r = pfpsd->fp_current_read_gsr(fp); /* fpack16 ignores GSR.scale msb */ sf = (int)(GSR_SCALE(r) & 0xf); for (i = 0; i <= 3; i++) { s = (short)lrs2.s[i]; /* preserve the sign */ j = ((int)s << sf); k = j >> 7; if (k < 0) { uc = 0; } else if (k > 255) { uc = 255; } else { uc = (uchar_t)k; } krd.c[i] = uc; } _fp_pack_word(pfpsd, &krd.i, nrd); break; case fpack32: VISINFO_KSTAT(vis_fpack32); if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); r = pfpsd->fp_current_read_gsr(fp); sf = (int)GSR_SCALE(r); lrd.ll = lrs1.ll << 8; for (i = 0, k = 3; i <= 1; i++, k += 4) { j = (int)lrs2.i[i]; /* preserve the sign */ l = ((int64_t)j << sf); m = l >> 23; if (m < 0) { uc = 0; } else if (m > 255) { uc = 255; } else { uc = (uchar_t)m; } lrd.c[k] = uc; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpackfix: VISINFO_KSTAT(vis_fpackfix); if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); r = pfpsd->fp_current_read_gsr(fp); sf = (int)GSR_SCALE(r); for (i = 0; i <= 1; i++) { j = (int)lrs2.i[i]; /* preserve the sign */ l = ((int64_t)j << sf); m = l >> 16; if (m < -32768) { s = -32768; } else if (m > 32767) { s = 32767; } else { s = (short)m; } krd.s[i] = s; } _fp_pack_word(pfpsd, &krd.i, nrd); break; case fexpand: VISINFO_KSTAT(vis_fexpand); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0; i <= 3; i++) { uc = krs2.c[i]; lrd.s[i] = (ushort_t)(uc << 4); } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; case fpmerge: VISINFO_KSTAT(vis_fpmerge); _fp_unpack_word(pfpsd, &krs1.i, nrs1); _fp_unpack_word(pfpsd, &krs2.i, nrs2); for (i = 0, j = 0; i <= 3; i++, j += 2) { lrd.c[j] = krs1.c[i]; lrd.c[j+1] = krs2.c[i]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); break; } return (ftt_none); } /* * Simulator for pdist instruction. */ enum ftt_type vis_pdist( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ uint_t op) /* Opcode pdist or pdistn */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ int i; short s; union { uint64_t ll; uint8_t c[8]; } lrs1, lrs2, lrd; nrs1 = pinst.rs1; nrs2 = pinst.rs2; nrd = pinst.rd; if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; if ((nrd & 1) == 1) nrd = (nrd & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); if (op == pdist) { VISINFO_KSTAT(vis_pdist); _fp_unpack_extword(pfpsd, &lrd.ll, nrd); } else { /* pdistn */ VISINFO_KSTAT(vis_pdistn); lrd.ll = 0; } for (i = 0; i <= 7; i++) { s = (short)(lrs1.c[i] - lrs2.c[i]); if (s < 0) s = ~s + 1; lrd.ll += s; } if (op == pdist) _fp_pack_extword(pfpsd, &lrd.ll, nrd); else /* pdistn */ (void) write_iureg(pfpsd, nrd, pregs, prw, &lrd.ll); return (ftt_none); } /* * Simulator for faligndata instruction. */ static enum ftt_type vis_faligndata( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ int i, j, k, ao; union { uint64_t ll; uint8_t c[8]; } lrs1, lrs2, lrd; uint64_t r; nrs1 = pinst.rs1; nrs2 = pinst.rs2; nrd = pinst.rd; if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; if ((nrd & 1) == 1) nrd = (nrd & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); r = pfpsd->fp_current_read_gsr(fp); ao = (int)GSR_ALIGN(r); for (i = 0, j = ao, k = 0; i <= 7; i++) if (j <= 7) { lrd.c[i] = lrs1.c[j++]; } else { lrd.c[i] = lrs2.c[k++]; } _fp_pack_extword(pfpsd, &lrd.ll, nrd); return (ftt_none); } /* * Simulator for bshuffle instruction. */ static enum ftt_type vis_bshuffle( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ int i, j, ao; union { uint64_t ll; uint8_t c[8]; } lrs1, lrs2, lrd; uint64_t r; VISINFO_KSTAT(vis_bshuffle); nrs1 = pinst.rs1; nrs2 = pinst.rs2; nrd = pinst.rd; if ((nrs1 & 1) == 1) /* fix register encoding */ nrs1 = (nrs1 & 0x1e) | 0x20; if ((nrs2 & 1) == 1) nrs2 = (nrs2 & 0x1e) | 0x20; if ((nrd & 1) == 1) nrd = (nrd & 0x1e) | 0x20; _fp_unpack_extword(pfpsd, &lrs1.ll, nrs1); _fp_unpack_extword(pfpsd, &lrs2.ll, nrs2); r = pfpsd->fp_current_read_gsr(fp); ao = (int)GSR_MASK(r); /* * BSHUFFLE Destination Byte Selection * rd Byte Source * 0 rs byte[GSR.mask<31..28>] * 1 rs byte[GSR.mask<27..24>] * 2 rs byte[GSR.mask<23..20>] * 3 rs byte[GSR.mask<19..16>] * 4 rs byte[GSR.mask<15..12>] * 5 rs byte[GSR.mask<11..8>] * 6 rs byte[GSR.mask<7..4>] * 7 rs byte[GSR.mask<3..0>] * P.S. rs1 is the upper half and rs2 is the lower half * Bytes in the source value are numbered from most to * least significant */ for (i = 7; i >= 0; i--, ao = (ao >> 4)) { j = ao & 0xf; /* get byte number */ if (j < 8) { lrd.c[i] = lrs1.c[j]; } else { lrd.c[i] = lrs2.c[j - 8]; } } _fp_pack_extword(pfpsd, &lrd.ll, nrd); return (ftt_none); } /* * Simulator for siam instruction. */ static enum ftt_type vis_siam( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* FPU instruction to simulate. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs2; /* Register number fields. */ uint64_t g, r; nrs2 = inst.rs2; g = pfpsd->fp_current_read_gsr(fp); g &= ~(GSR_IM_IRND_MASK); /* zero the IM and IRND fields */ r = nrs2 & 0x7; /* get mode(3 bit) */ g |= (r << GSR_IRND_SHIFT); pfpsd->fp_current_write_gsr(g, fp); return (ftt_none); } /* * Simulator for VIS loads and stores between floating-point unit and memory. */ enum ftt_type vis_fldst( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ uint_t asi) /* asi to emulate! */ { union { vis_inst_type inst; fp_inst_type pinst; } i; ASSERT(USERMODE(pregs->r_tstate)); i.pinst = pinst; switch (asi) { case ASI_PST8_P: case ASI_PST8_S: case ASI_PST16_P: case ASI_PST16_S: case ASI_PST32_P: case ASI_PST32_S: case ASI_PST8_PL: case ASI_PST8_SL: case ASI_PST16_PL: case ASI_PST16_SL: case ASI_PST32_PL: case ASI_PST32_SL: return (vis_prtl_fst(pfpsd, i.inst, pregs, prw, asi)); case ASI_FL8_P: case ASI_FL8_S: case ASI_FL8_PL: case ASI_FL8_SL: case ASI_FL16_P: case ASI_FL16_S: case ASI_FL16_PL: case ASI_FL16_SL: return (vis_short_fls(pfpsd, i.inst, pregs, prw, asi)); case ASI_BLK_AIUP: case ASI_BLK_AIUS: case ASI_BLK_AIUPL: case ASI_BLK_AIUSL: case ASI_BLK_P: case ASI_BLK_S: case ASI_BLK_PL: case ASI_BLK_SL: case ASI_BLK_COMMIT_P: case ASI_BLK_COMMIT_S: return (vis_blk_fldst(pfpsd, i.inst, pregs, prw, asi)); default: return (ftt_unimplemented); } } /* * Simulator for partial stores between floating-point unit and memory. */ static enum ftt_type vis_prtl_fst( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* ISE instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ uint_t asi) /* asi to emulate! */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ uint_t opf, msk; int h, i, j; uint64_t ea, tmsk; union { freg_type f; uint64_t ll; uint32_t i[2]; uint16_t s[4]; uint8_t c[8]; } k, l, res; enum ftt_type ftt; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((nrd & 1) == 1) /* fix register encoding */ nrd = (nrd & 0x1e) | 0x20; opf = inst.opf; res.ll = 0; if ((opf & 0x100) == 0) { /* effective address = rs1 */ ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tmsk); if (ftt != ftt_none) return (ftt); msk = (uint_t)tmsk; } else { pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc; return (ftt_unimplemented); } pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */ if ((ea & 0x3) != 0) return (ftt_alignment); /* Require 32 bit-alignment. */ switch (asi) { case ASI_PST8_P: case ASI_PST8_S: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (i = 0, j = 0x80; i <= 7; i++, j >>= 1) { if ((msk & j) == j) res.c[i] = k.c[i]; else res.c[i] = l.c[i]; } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; case ASI_PST8_PL: /* little-endian */ case ASI_PST8_SL: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (h = 7, i = 0, j = 1; i <= 7; h--, i++, j <<= 1) { if ((msk & j) == j) res.c[i] = k.c[h]; else res.c[i] = l.c[i]; } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; case ASI_PST16_P: case ASI_PST16_S: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (i = 0, j = 0x8; i <= 3; i++, j >>= 1) { if ((msk & j) == j) res.s[i] = k.s[i]; else res.s[i] = l.s[i]; } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; case ASI_PST16_PL: case ASI_PST16_SL: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (h = 7, i = 0, j = 1; i <= 6; h -= 2, i += 2, j <<= 1) { if ((msk & j) == j) { res.c[i] = k.c[h]; res.c[i+1] = k.c[h-1]; } else { res.c[i] = l.c[i]; res.c[i+1] = l.c[i+1]; } } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; case ASI_PST32_P: case ASI_PST32_S: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (i = 0, j = 0x2; i <= 1; i++, j >>= 1) { if ((msk & j) == j) res.i[i] = k.i[i]; else res.i[i] = l.i[i]; } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; case ASI_PST32_PL: case ASI_PST32_SL: ftt = _fp_read_extword((uint64_t *)ea, &l.ll, pfpsd); if (ftt != ftt_none) return (ftt); _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); for (h = 7, i = 0, j = 1; i <= 4; h -= 4, i += 4, j <<= 1) { if ((msk & j) == j) { res.c[i] = k.c[h]; res.c[i+1] = k.c[h-1]; res.c[i+2] = k.c[h-2]; res.c[i+3] = k.c[h-3]; } else { res.c[i] = l.c[i]; res.c[i+1] = l.c[i+1]; res.c[i+2] = l.c[i+2]; res.c[i+3] = l.c[i+3]; } } ftt = _fp_write_extword((uint64_t *)ea, res.ll, pfpsd); if (ftt != ftt_none) return (ftt); break; } pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt_none); } /* * Simulator for short load/stores between floating-point unit and memory. */ static enum ftt_type vis_short_fls( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* ISE instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ uint_t asi) /* asi to emulate! */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ uint_t opf; uint64_t ea, tea; union { freg_type f; uint64_t ll; uint32_t i[2]; uint16_t s[4]; uint8_t c[8]; } k; union { vis_inst_type inst; int i; } fp; enum ftt_type ftt = ftt_none; ushort_t us; uchar_t uc; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((nrd & 1) == 1) /* fix register encoding */ nrd = (nrd & 0x1e) | 0x20; opf = inst.opf; fp.inst = inst; if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */ ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; } else { /* effective address = rs1 + imm13 */ fp.inst = inst; ea = (fp.i << 19) >> 19; /* Extract simm13 field. */ ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; } if (get_udatamodel() == DATAMODEL_ILP32) ea = (uint64_t)(caddr32_t)ea; pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */ switch (asi) { case ASI_FL8_P: case ASI_FL8_S: case ASI_FL8_PL: /* little-endian */ case ASI_FL8_SL: if ((inst.op3 & 7) == 3) { /* load byte */ if (fuword8((void *)ea, &uc) == -1) return (ftt_fault); k.ll = 0; k.c[7] = uc; _fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); } else { /* store byte */ _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); uc = k.c[7]; if (subyte((caddr_t)ea, uc) == -1) return (ftt_fault); } break; case ASI_FL16_P: case ASI_FL16_S: if ((ea & 1) == 1) return (ftt_alignment); if ((inst.op3 & 7) == 3) { /* load short */ if (fuword16((void *)ea, &us) == -1) return (ftt_fault); k.ll = 0; k.s[3] = us; _fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); } else { /* store short */ _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); us = k.s[3]; if (suword16((caddr_t)ea, us) == -1) return (ftt_fault); } break; case ASI_FL16_PL: /* little-endian */ case ASI_FL16_SL: if ((ea & 1) == 1) return (ftt_alignment); if ((inst.op3 & 7) == 3) { /* load short */ if (fuword16((void *)ea, &us) == -1) return (ftt_fault); k.ll = 0; k.c[6] = (uchar_t)us; k.c[7] = (uchar_t)((us & 0xff00) >> 8); _fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); } else { /* store short */ _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); uc = k.c[7]; us = (ushort_t)((uc << 8) | k.c[6]); if (suword16((void *)ea, us) == -1) return (ftt_fault); } break; } pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt_none); } /* * Simulator for block loads and stores between floating-point unit and memory. * We pass the addrees of ea to sync_data_memory() to flush the Ecache. * Sync_data_memory() calls platform dependent code to flush the Ecache. */ static enum ftt_type vis_blk_fldst( fp_simd_type *pfpsd, /* FPU simulator data. */ vis_inst_type inst, /* ISE instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ uint_t asi) /* asi to emulate! */ { uint_t nrs1, nrs2, nrd; /* Register number fields. */ uint_t opf, h, i, j; uint64_t ea, tea; union { freg_type f; uint64_t ll; uint8_t c[8]; } k, l; union { vis_inst_type inst; int32_t i; } fp; enum ftt_type ftt; boolean_t little_endian = B_FALSE; nrs1 = inst.rs1; nrs2 = inst.rs2; nrd = inst.rd; if ((nrd & 1) == 1) /* fix register encoding */ nrd = (nrd & 0x1e) | 0x20; /* ensure register is 8-double precision aligned */ if ((nrd & 0xf) != 0) return (ftt_unimplemented); opf = inst.opf; if ((opf & 0x100) == 0) { /* effective address = rs1 + rs2 */ ftt = read_iureg(pfpsd, nrs1, pregs, prw, &ea); if (ftt != ftt_none) return (ftt); ftt = read_iureg(pfpsd, nrs2, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; } else { /* effective address = rs1 + imm13 */ fp.inst = inst; ea = (fp.i << 19) >> 19; /* Extract simm13 field. */ ftt = read_iureg(pfpsd, nrs1, pregs, prw, &tea); if (ftt != ftt_none) return (ftt); ea += tea; } if ((ea & 0x3F) != 0) /* Require 64 byte-alignment. */ return (ftt_alignment); pfpsd->fp_trapaddr = (caddr_t)ea; /* setup bad addr in case we trap */ switch (asi) { case ASI_BLK_AIUPL: case ASI_BLK_AIUSL: case ASI_BLK_PL: case ASI_BLK_SL: little_endian = B_TRUE; /* FALLTHROUGH */ case ASI_BLK_AIUP: case ASI_BLK_AIUS: case ASI_BLK_P: case ASI_BLK_S: case ASI_BLK_COMMIT_P: case ASI_BLK_COMMIT_S: if ((inst.op3 & 7) == 3) { /* lddf */ for (i = 0; i < 8; i++, nrd += 2) { ftt = _fp_read_extword((uint64_t *)ea, &k.ll, pfpsd); if (ftt != ftt_none) return (ftt); if (little_endian) { for (j = 0, h = 7; j < 8; j++, h--) l.c[h] = k.c[j]; k.ll = l.ll; } _fp_pack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); ea += 8; } } else { /* stdf */ for (i = 0; i < 8; i++, nrd += 2) { _fp_unpack_extword(pfpsd, &k.f.FPU_DREG_FIELD, nrd); if (little_endian) { for (j = 0, h = 7; j < 8; j++, h--) l.c[h] = k.c[j]; k.ll = l.ll; } ftt = _fp_write_extword((uint64_t *)ea, k.ll, pfpsd); if (ftt != ftt_none) return (ftt); ea += 8; } } if ((asi == ASI_BLK_COMMIT_P) || (asi == ASI_BLK_COMMIT_S)) sync_data_memory((caddr_t)(ea - 64), 64); break; default: /* addr of unimp inst */ pfpsd->fp_trapaddr = (caddr_t)pregs->r_pc; return (ftt_unimplemented); } pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt_none); } /* * Simulator for rd %gsr instruction. */ enum ftt_type vis_rdgsr( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrd; uint64_t r; enum ftt_type ftt = ftt_none; nrd = pinst.rd; r = pfpsd->fp_current_read_gsr(fp); ftt = write_iureg(pfpsd, nrd, pregs, prw, &r); pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt); } /* * Simulator for wr %gsr instruction. */ enum ftt_type vis_wrgsr( fp_simd_type *pfpsd, /* FPU simulator data. */ fp_inst_type pinst, /* FPU instruction to simulate. */ struct regs *pregs, /* Pointer to PCB image of registers. */ void *prw, /* Pointer to locals and ins. */ kfpu_t *fp) /* Need to fp to access gsr reg */ { uint_t nrs1; uint64_t r, r1, r2; enum ftt_type ftt = ftt_none; nrs1 = pinst.rs1; ftt = read_iureg(pfpsd, nrs1, pregs, prw, &r1); if (ftt != ftt_none) return (ftt); if (pinst.ibit == 0) { /* copy the value in r[rs2] */ uint_t nrs2; nrs2 = pinst.rs2; ftt = read_iureg(pfpsd, nrs2, pregs, prw, &r2); if (ftt != ftt_none) return (ftt); } else { /* use sign_ext(simm13) */ union { fp_inst_type inst; uint32_t i; } fp; fp.inst = pinst; /* Extract simm13 field */ r2 = (fp.i << 19) >> 19; } r = r1 ^ r2; pfpsd->fp_current_write_gsr(r, fp); pregs->r_pc = pregs->r_npc; /* Do not retry emulated instruction. */ pregs->r_npc += 4; return (ftt); } /* * This is the loadable module wrapper. */ #include #include /* * Module linkage information for the kernel. */ extern struct mod_ops mod_miscops; static struct modlmisc modlmisc = { &mod_miscops, "vis fp simulation", }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modlmisc, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); }