xref: /illumos-gate/usr/src/uts/sparc/v9/fpu/fpu.c (revision bc0e9132)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/signal.h>
30 #include <sys/trap.h>
31 #include <sys/machtrap.h>
32 #include <sys/fault.h>
33 #include <sys/systm.h>
34 #include <sys/user.h>
35 #include <sys/file.h>
36 #include <sys/proc.h>
37 #include <sys/core.h>
38 #include <sys/pcb.h>
39 #include <sys/cpuvar.h>
40 #include <sys/thread.h>
41 #include <sys/disp.h>
42 #include <sys/stack.h>
43 #include <sys/cmn_err.h>
44 #include <sys/privregs.h>
45 #include <sys/debug.h>
46 
47 #include <sys/fpu/fpu_simulator.h>
48 #include <sys/fpu/globals.h>
49 #include <sys/fpu/fpusystm.h>
50 
51 int fpdispr = 0;
52 
53 /*
54  * For use by procfs to save the floating point context of the thread.
55  * Note the if (ttolwp(lwp) == curthread) in prstop, which calls
56  * this function, ensures that it is safe to read the fprs here.
57  */
58 void
fp_prsave(kfpu_t * fp)59 fp_prsave(kfpu_t *fp)
60 {
61 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))  {
62 		kpreempt_disable();
63 		if (fpu_exists) {
64 			fp->fpu_fprs = _fp_read_fprs();
65 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
66 				uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
67 
68 				_fp_write_fprs(fprs);
69 				fp->fpu_fprs = fprs;
70 #ifdef DEBUG
71 				if (fpdispr)
72 					cmn_err(CE_NOTE,
73 					    "fp_prsave with fp disabled!");
74 #endif
75 			}
76 			fp_fksave(fp);
77 		}
78 		kpreempt_enable();
79 	}
80 }
81 
82 /*
83  * Copy the floating point context of the forked thread.
84  */
85 void
fp_fork(klwp_t * lwp,klwp_t * clwp)86 fp_fork(klwp_t *lwp, klwp_t *clwp)
87 {
88 	kfpu_t *cfp, *pfp;
89 	int i;
90 
91 	cfp = lwptofpu(clwp);
92 	pfp = lwptofpu(lwp);
93 
94 	/*
95 	 * copy the parents fpq
96 	 */
97 	cfp->fpu_qcnt = pfp->fpu_qcnt;
98 	for (i = 0; i < pfp->fpu_qcnt; i++)
99 		cfp->fpu_q[i] = pfp->fpu_q[i];
100 
101 	/*
102 	 * save the context of the parent into the childs fpu structure
103 	 */
104 	cfp->fpu_fprs = pfp->fpu_fprs;
105 	if (ttolwp(curthread) == lwp && fpu_exists) {
106 		fp_fksave(cfp);
107 	} else {
108 		for (i = 0; i < 32; i++)
109 			cfp->fpu_fr.fpu_regs[i] = pfp->fpu_fr.fpu_regs[i];
110 		for (i = 16; i < 32; i++)
111 			cfp->fpu_fr.fpu_dregs[i] = pfp->fpu_fr.fpu_dregs[i];
112 	}
113 	cfp->fpu_en = 1;
114 }
115 
116 /*
117  * Free any state associated with floating point context.
118  * Fp_free can be called in two cases:
119  * 1) from reaper -> thread_free -> lwp_freeregs -> fp_free
120  *	fp context belongs to a thread on deathrow
121  *	nothing to do,  thread will never be resumed
122  *	thread calling ctxfree is reaper
123  *
124  * 2) from exec -> lwp_freeregs -> fp_free
125  *	fp context belongs to the current thread
126  *	must disable fpu, thread calling ctxfree is curthread
127  */
128 /*ARGSUSED1*/
129 void
fp_free(kfpu_t * fp,int isexec)130 fp_free(kfpu_t *fp, int isexec)
131 {
132 	int s;
133 	uint32_t fprs = 0;
134 
135 	if (curthread->t_lwp != NULL && lwptofpu(curthread->t_lwp) == fp) {
136 		fp->fpu_en = 0;
137 		fp->fpu_fprs = fprs;
138 		s = splhigh();
139 		_fp_write_fprs(fprs);
140 		splx(s);
141 	}
142 }
143 
144 
145 #ifdef SF_ERRATA_30 /* call causes fp-disabled */
146 extern int spitfire_call_bug;
147 int ill_fpcalls;
148 #endif
149 
150 void
fp_enable(void)151 fp_enable(void)
152 {
153 	klwp_id_t lwp;
154 	kfpu_t *fp;
155 
156 	lwp = ttolwp(curthread);
157 	ASSERT(lwp != NULL);
158 	fp = lwptofpu(lwp);
159 
160 	if (fpu_exists) {
161 		if (fp->fpu_en) {
162 #ifdef DEBUG
163 			if (fpdispr)
164 				cmn_err(CE_NOTE,
165 				    "fpu disabled, but already enabled\n");
166 #endif
167 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
168 				fp->fpu_fprs = FPRS_FEF;
169 #ifdef DEBUG
170 				if (fpdispr)
171 					cmn_err(CE_NOTE,
172 					"fpu disabled, saved fprs disabled\n");
173 #endif
174 			}
175 			_fp_write_fprs(FPRS_FEF);
176 			fp_restore(fp);
177 		} else {
178 			fp->fpu_en = 1;
179 			fp->fpu_fsr = 0;
180 			fp->fpu_fprs = FPRS_FEF;
181 			_fp_write_fprs(FPRS_FEF);
182 			fp_clearregs(fp);
183 		}
184 	} else {
185 		int i;
186 
187 		if (!fp->fpu_en) {
188 			fp->fpu_en = 1;
189 			fp->fpu_fsr = 0;
190 			for (i = 0; i < 32; i++)
191 				fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
192 			for (i = 16; i < 32; i++)		/* NaN */
193 				fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
194 		}
195 	}
196 }
197 
198 /*
199  * fp_disabled normally occurs when the first floating point in a non-threaded
200  * program causes an fp_disabled trap. For threaded programs, the ILP32 threads
201  * library calls the .setpsr fasttrap, which has been modified to also set the
202  * appropriate bits in fpu_en and fpu_fprs, as well as to enable the %fprs,
203  * as before. The LP64 threads library will write to the %fprs directly,
204  * so fpu_en will never get updated for LP64 threaded programs,
205  * although fpu_fprs will, via resume.
206  */
207 void
fp_disabled(struct regs * rp)208 fp_disabled(struct regs *rp)
209 {
210 	klwp_id_t lwp;
211 	kfpu_t *fp;
212 	int ftt;
213 
214 #ifdef SF_ERRATA_30 /* call causes fp-disabled */
215 	/*
216 	 * This code is here because sometimes the call instruction
217 	 * generates an fp_disabled trap when the call offset is large.
218 	 */
219 	if (spitfire_call_bug) {
220 		uint_t instr = 0;
221 		extern void trap(struct regs *rp, caddr_t addr, uint32_t type,
222 		    uint32_t mmu_fsr);
223 
224 		if (USERMODE(rp->r_tstate)) {
225 			(void) fuword32((void *)rp->r_pc, &instr);
226 		} else {
227 			instr = *(uint_t *)(rp->r_pc);
228 		}
229 		if ((instr & 0xc0000000) == 0x40000000) {
230 			ill_fpcalls++;
231 			trap(rp, NULL, T_UNIMP_INSTR, 0);
232 			return;
233 		}
234 	}
235 #endif /* SF_ERRATA_30 - call causes fp-disabled */
236 
237 #ifdef CHEETAH_ERRATUM_109 /* interrupts not taken during fpops */
238 	/*
239 	 * UltraSPARC III will report spurious fp-disabled exceptions when
240 	 * the pipe is full of fpops and an interrupt is triggered.  By the
241 	 * time we get here the interrupt has been taken and we just need
242 	 * to return to where we came from and try again.
243 	 */
244 	if (fpu_exists && _fp_read_fprs() & FPRS_FEF)
245 		return;
246 #endif /* CHEETAH_ERRATUM_109 */
247 
248 	lwp = ttolwp(curthread);
249 	ASSERT(lwp != NULL);
250 	fp = lwptofpu(lwp);
251 	if (fpu_exists) {
252 		kpreempt_disable();
253 		if (fp->fpu_en) {
254 #ifdef DEBUG
255 			if (fpdispr)
256 				cmn_err(CE_NOTE,
257 				    "fpu disabled, but already enabled\n");
258 #endif
259 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
260 				fp->fpu_fprs = FPRS_FEF;
261 #ifdef DEBUG
262 				if (fpdispr)
263 					cmn_err(CE_NOTE,
264 					"fpu disabled, saved fprs disabled\n");
265 #endif
266 			}
267 			_fp_write_fprs(FPRS_FEF);
268 			fp_restore(fp);
269 		} else {
270 			fp->fpu_en = 1;
271 			fp->fpu_fsr = 0;
272 			fp->fpu_fprs = FPRS_FEF;
273 			_fp_write_fprs(FPRS_FEF);
274 			fp_clearregs(fp);
275 		}
276 		kpreempt_enable();
277 	} else {
278 		fp_simd_type fpsd;
279 		int i;
280 
281 		(void) flush_user_windows_to_stack(NULL);
282 		if (!fp->fpu_en) {
283 			fp->fpu_en = 1;
284 			fp->fpu_fsr = 0;
285 			for (i = 0; i < 32; i++)
286 				fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
287 			for (i = 16; i < 32; i++)		/* NaN */
288 				fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
289 		}
290 		if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc,
291 		    rp, (ulong_t *)rp->r_sp, fp)) {
292 			fp->fpu_q_entrysize = sizeof (struct _fpq);
293 			fp_traps(&fpsd, ftt, rp);
294 		}
295 	}
296 }
297 
298 /*
299  * Process the floating point queue in lwp->lwp_pcb.
300  *
301  * Each entry in the floating point queue is processed in turn.
302  * If processing an entry results in an exception fp_traps() is called to
303  * handle the exception - this usually results in the generation of a signal
304  * to be delivered to the user. There are 2 possible outcomes to this (note
305  * that hardware generated signals cannot be held!):
306  *
307  *   1. If the signal is being ignored we continue to process the rest
308  *	of the entries in the queue.
309  *
310  *   2. If arrangements have been made for return to a user signal handler,
311  *	sendsig() will have copied the floating point queue onto the user's
312  *	signal stack and zero'ed the queue count in the u_pcb. Note that
313  *	this has the side effect of terminating fp_runq's processing loop.
314  *	We will re-run the floating point queue on return from the user
315  *	signal handler if necessary as part of normal setcontext processing.
316  */
317 void
fp_runq(struct regs * rp)318 fp_runq(struct regs *rp)
319 {
320 	kfpu_t *fp = lwptofpu(curthread->t_lwp);
321 	struct _fq *fqp = fp->fpu_q;
322 	fp_simd_type fpsd;
323 	uint64_t gsr = get_gsr(fp);
324 
325 	/*
326 	 * don't preempt while manipulating the queue
327 	 */
328 	kpreempt_disable();
329 
330 	while (fp->fpu_qcnt) {
331 		int fptrap;
332 
333 		fptrap = fpu_simulator((fp_simd_type *)&fpsd,
334 		    (fp_inst_type *)fqp->FQu.fpq.fpq_addr,
335 		    (fsr_type *)&fp->fpu_fsr, gsr,
336 		    fqp->FQu.fpq.fpq_instr);
337 		if (fptrap) {
338 			/*
339 			 * Instruction could not be simulated so we will
340 			 * attempt to deliver a signal.
341 			 * We may be called again upon signal exit (setcontext)
342 			 * and can continue to process the queue then.
343 			 */
344 			if (fqp != fp->fpu_q) {
345 				int i;
346 				struct _fq *fqdp;
347 
348 				/*
349 				 * We need to normalize the floating queue so
350 				 * the excepting instruction is at the head,
351 				 * so that the queue may be copied onto the
352 				 * user signal stack by sendsig().
353 				 */
354 				fqdp = fp->fpu_q;
355 				for (i = fp->fpu_qcnt; i; i--) {
356 					*fqdp++ = *fqp++;
357 				}
358 				fqp = fp->fpu_q;
359 			}
360 			fp->fpu_q_entrysize = sizeof (struct _fpq);
361 
362 			/*
363 			 * fpu_simulator uses the fp registers directly but it
364 			 * uses the software copy of the fsr. We need to write
365 			 * that back to fpu so that fpu's state is current for
366 			 * ucontext.
367 			 */
368 			if (fpu_exists)
369 				_fp_write_pfsr(&fp->fpu_fsr);
370 
371 			/* post signal */
372 			fp_traps(&fpsd, fptrap, rp);
373 
374 			/*
375 			 * Break from loop to allow signal to be sent.
376 			 * If there are other instructions in the fp queue
377 			 * they will be processed when/if the user retuns
378 			 * from the signal handler with a non-empty queue.
379 			 */
380 			break;
381 		}
382 		fp->fpu_qcnt--;
383 		fqp++;
384 	}
385 
386 	/*
387 	 * fpu_simulator uses the fp registers directly, so we have
388 	 * to update the pcb copies to keep current, but it uses the
389 	 * software copy of the fsr, so we write that back to fpu
390 	 */
391 	if (fpu_exists) {
392 		int i;
393 
394 		for (i = 0; i < 32; i++)
395 			_fp_read_pfreg(&fp->fpu_fr.fpu_regs[i], i);
396 		for (i = 16; i < 32; i++)
397 			_fp_read_pdreg(&fp->fpu_fr.fpu_dregs[i], i);
398 		_fp_write_pfsr(&fp->fpu_fsr);
399 	}
400 
401 	kpreempt_enable();
402 }
403 
404 /*
405  * Get the precise trapped V9 floating point instruction.
406  * Fake up a queue to process. If getting the instruction results
407  * in an exception fp_traps() is called to handle the exception - this
408  * usually results in the generation of a signal to be delivered to the user.
409  */
410 
411 void
fp_precise(struct regs * rp)412 fp_precise(struct regs *rp)
413 {
414 	fp_simd_type	fpsd;
415 	int		inst_ftt;
416 
417 	union {
418 		uint_t		i;
419 		fp_inst_type	inst;
420 	} kluge;
421 
422 	klwp_t *lwp = ttolwp(curthread);
423 	kfpu_t *fp = lwptofpu(lwp);
424 	uint64_t gsr;
425 	int mstate;
426 	if (fpu_exists)
427 		save_gsr(fp);
428 	gsr = get_gsr(fp);
429 
430 	/*
431 	 * Get the instruction to be emulated from the pc saved by the trap.
432 	 * Note that the kernel is NOT prepared to handle a kernel fp
433 	 * exception if it can't pass successfully through the fp simulator.
434 	 *
435 	 * If the trap occurred in user mode, set lwp_state to LWP_SYS for the
436 	 * purposes of clock accounting and switch to the LMS_TRAP microstate.
437 	 */
438 	if (USERMODE(rp->r_tstate)) {
439 		inst_ftt = _fp_read_inst((uint32_t *)rp->r_pc, &kluge.i, &fpsd);
440 		mstate = new_mstate(curthread, LMS_TRAP);
441 		lwp->lwp_state = LWP_SYS;
442 	} else {
443 		kluge.i = *(uint_t *)rp->r_pc;
444 		inst_ftt = ftt_none;
445 	}
446 
447 	if (inst_ftt != ftt_none) {
448 		/*
449 		 * Save the bad address and post the signal.
450 		 * It can only be an ftt_alignment or ftt_fault trap.
451 		 * XXX - How can this work w/mainsail and do_unaligned?
452 		 */
453 		fpsd.fp_trapaddr = (caddr_t)rp->r_pc;
454 		fp_traps(&fpsd, inst_ftt, rp);
455 	} else {
456 		/*
457 		 * Conjure up a floating point queue and advance the pc/npc
458 		 * to fake a deferred fp trap. We now run the fp simulator
459 		 * in fp_precise, while allowing setfpregs to call fp_runq,
460 		 * because this allows us to do the ugly machinations to
461 		 * inc/dec the pc depending on the trap type, as per
462 		 * bugid 1210159. fp_runq is still going to have the
463 		 * generic "how do I connect the "fp queue to the pc/npc"
464 		 * problem alluded to in bugid 1192883, which is only a
465 		 * problem for a restorecontext of a v8 fp queue on a
466 		 * v9 system, which seems like the .000000001% case (on v9)!
467 		 */
468 		struct _fpq *pfpq = &fp->fpu_q->FQu.fpq;
469 		fp_simd_type	fpsd;
470 		int fptrap;
471 
472 		pfpq->fpq_addr = (uint_t *)rp->r_pc;
473 		pfpq->fpq_instr = kluge.i;
474 		fp->fpu_qcnt = 1;
475 		fp->fpu_q_entrysize = sizeof (struct _fpq);
476 
477 		kpreempt_disable();
478 		(void) flush_user_windows_to_stack(NULL);
479 		fptrap = fpu_vis_sim((fp_simd_type *)&fpsd,
480 		    (fp_inst_type *)pfpq->fpq_addr, rp,
481 		    (fsr_type *)&fp->fpu_fsr, gsr, kluge.i);
482 
483 		/* update the hardware fp fsr state for sake of ucontext */
484 		if (fpu_exists)
485 			_fp_write_pfsr(&fp->fpu_fsr);
486 
487 		if (fptrap) {
488 			/* back up the pc if the signal needs to be precise */
489 			if (fptrap != ftt_ieee) {
490 				fp->fpu_qcnt = 0;
491 			}
492 			/* post signal */
493 			fp_traps(&fpsd, fptrap, rp);
494 
495 			/* decrement queue count for ieee exceptions */
496 			if (fptrap == ftt_ieee) {
497 				fp->fpu_qcnt = 0;
498 			}
499 		} else {
500 			fp->fpu_qcnt = 0;
501 		}
502 		/* update the software pcb copies of hardware fp registers */
503 		if (fpu_exists) {
504 			fp_save(fp);
505 		}
506 		kpreempt_enable();
507 	}
508 
509 	/*
510 	 * Reset lwp_state to LWP_USER for the purposes of clock accounting,
511 	 * and restore the previously saved microstate.
512 	 */
513 	if (USERMODE(rp->r_tstate)) {
514 		(void) new_mstate(curthread, mstate);
515 		lwp->lwp_state = LWP_USER;
516 	}
517 }
518 
519 /*
520  * Handle floating point traps generated by simulation/emulation.
521  */
522 void
fp_traps(fp_simd_type * pfpsd,enum ftt_type ftt,struct regs * rp)523 fp_traps(
524 	fp_simd_type *pfpsd,	/* Pointer to simulator data */
525 	enum ftt_type ftt,	/* trap type */
526 	struct regs *rp)	/* ptr to regs fro trap */
527 {
528 	/*
529 	 * If we take a user's exception in kernel mode, we want to trap
530 	 * with the user's registers.
531 	 */
532 	switch (ftt) {
533 	case ftt_ieee:
534 		fpu_trap(rp, pfpsd->fp_trapaddr, T_FP_EXCEPTION_IEEE,
535 		    pfpsd->fp_trapcode);
536 		break;
537 	case ftt_fault:
538 		fpu_trap(rp, pfpsd->fp_trapaddr, T_DATA_EXCEPTION, 0);
539 		break;
540 	case ftt_alignment:
541 		fpu_trap(rp, pfpsd->fp_trapaddr, T_ALIGNMENT, 0);
542 		break;
543 	case ftt_unimplemented:
544 		fpu_trap(rp, pfpsd->fp_trapaddr, T_UNIMP_INSTR, 0);
545 		break;
546 	default:
547 		/*
548 		 * We don't expect any of the other types here.
549 		 */
550 		cmn_err(CE_PANIC, "fp_traps: bad ftt");
551 	}
552 }
553