17c478bd9Sstevel@tonic-gate/* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate/* 237c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate#include <sys/param.h> 287c478bd9Sstevel@tonic-gate#include <sys/errno.h> 297c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h> 307c478bd9Sstevel@tonic-gate#include <sys/vtrace.h> 317c478bd9Sstevel@tonic-gate#include <sys/machthread.h> 327c478bd9Sstevel@tonic-gate#include <sys/clock.h> 337c478bd9Sstevel@tonic-gate#include <sys/asi.h> 347c478bd9Sstevel@tonic-gate#include <sys/fsr.h> 357c478bd9Sstevel@tonic-gate#include <sys/privregs.h> 367c478bd9Sstevel@tonic-gate#include <sys/fpras_impl.h> 377c478bd9Sstevel@tonic-gate 387c478bd9Sstevel@tonic-gate#include "assym.h" 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate/* 417c478bd9Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the 427c478bd9Sstevel@tonic-gate * bcopy/copyin/copyout routines. 437c478bd9Sstevel@tonic-gate * 447c478bd9Sstevel@tonic-gate * On entry: 457c478bd9Sstevel@tonic-gate * 467c478bd9Sstevel@tonic-gate * ! Determine whether to use the FP register version 477c478bd9Sstevel@tonic-gate * ! or the leaf routine version depending on size 487c478bd9Sstevel@tonic-gate * ! of copy and flags. Set up error handling accordingly. 497c478bd9Sstevel@tonic-gate * ! The transition point depends on whether the src and 507c478bd9Sstevel@tonic-gate * ! dst addresses can be aligned to long word, word, 517c478bd9Sstevel@tonic-gate * ! half word, or byte boundaries. 527c478bd9Sstevel@tonic-gate * ! 537c478bd9Sstevel@tonic-gate * ! WARNING: <Register usage convention> 547c478bd9Sstevel@tonic-gate * ! For FP version, %l6 holds previous error handling and 557c478bd9Sstevel@tonic-gate * ! a flag: TRAMP_FLAG (low bits) 567c478bd9Sstevel@tonic-gate * ! for leaf routine version, %o4 holds those values. 577c478bd9Sstevel@tonic-gate * ! So either %l6 or %o4 is reserved and not available for 587c478bd9Sstevel@tonic-gate * ! any other use. 597c478bd9Sstevel@tonic-gate * 607c478bd9Sstevel@tonic-gate * if (length <= VIS_COPY_THRESHOLD) ! start with a quick test 617c478bd9Sstevel@tonic-gate * go to small_copy; ! to speed short copies 62*5d9d9091SRichard Lowe * 637c478bd9Sstevel@tonic-gate * ! src, dst long word alignable 647c478bd9Sstevel@tonic-gate * if (hw_copy_limit_8 == 0) ! hw_copy disabled 657c478bd9Sstevel@tonic-gate * go to small_copy; 667c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_8) 677c478bd9Sstevel@tonic-gate * go to small_copy; 687c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 697c478bd9Sstevel@tonic-gate * } 707c478bd9Sstevel@tonic-gate * if (src,dst not alignable) { 717c478bd9Sstevel@tonic-gate * if (hw_copy_limit_1 == 0) ! hw_copy disabled 727c478bd9Sstevel@tonic-gate * go to small_copy; 737c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_1) 747c478bd9Sstevel@tonic-gate * go to small_copy; 757c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 767c478bd9Sstevel@tonic-gate * } 777c478bd9Sstevel@tonic-gate * if (src,dst halfword alignable) { 787c478bd9Sstevel@tonic-gate * if (hw_copy_limit_2 == 0) ! hw_copy disabled 797c478bd9Sstevel@tonic-gate * go to small_copy; 807c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_2) 817c478bd9Sstevel@tonic-gate * go to small_copy; 827c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 837c478bd9Sstevel@tonic-gate * } 847c478bd9Sstevel@tonic-gate * if (src,dst word alignable) { 857c478bd9Sstevel@tonic-gate * if (hw_copy_limit_4 == 0) ! hw_copy disabled 867c478bd9Sstevel@tonic-gate * go to small_copy; 877c478bd9Sstevel@tonic-gate * if (length <= hw_copy_limit_4) 887c478bd9Sstevel@tonic-gate * go to small_copy; 897c478bd9Sstevel@tonic-gate * go to FPBLK_copy; 907c478bd9Sstevel@tonic-gate * } 917c478bd9Sstevel@tonic-gate * 927c478bd9Sstevel@tonic-gate * small_copy: 937c478bd9Sstevel@tonic-gate * Setup_leaf_rtn_error_handler; ! diffs for each entry point 94*5d9d9091SRichard Lowe * 957c478bd9Sstevel@tonic-gate * if (count <= 3) ! fast path for tiny copies 967c478bd9Sstevel@tonic-gate * go to sm_left; ! special finish up code 977c478bd9Sstevel@tonic-gate * else 987c478bd9Sstevel@tonic-gate * if (count > CHKSIZE) ! medium sized copies 997c478bd9Sstevel@tonic-gate * go to sm_med ! tuned by alignment 1007c478bd9Sstevel@tonic-gate * if(src&dst not both word aligned) { 1017c478bd9Sstevel@tonic-gate * sm_movebytes: 1027c478bd9Sstevel@tonic-gate * move byte by byte in 4-way unrolled loop 1037c478bd9Sstevel@tonic-gate * fall into sm_left; 1047c478bd9Sstevel@tonic-gate * sm_left: 1057c478bd9Sstevel@tonic-gate * move 0-3 bytes byte at a time as needed. 1067c478bd9Sstevel@tonic-gate * restore error handler and exit. 1077c478bd9Sstevel@tonic-gate * 1087c478bd9Sstevel@tonic-gate * } else { ! src&dst are word aligned 1097c478bd9Sstevel@tonic-gate * check for at least 8 bytes left, 1107c478bd9Sstevel@tonic-gate * move word at a time, unrolled by 2 1117c478bd9Sstevel@tonic-gate * when fewer than 8 bytes left, 1127c478bd9Sstevel@tonic-gate * sm_half: move half word at a time while 2 or more bytes left 1137c478bd9Sstevel@tonic-gate * sm_byte: move final byte if necessary 1147c478bd9Sstevel@tonic-gate * sm_exit: 1157c478bd9Sstevel@tonic-gate * restore error handler and exit. 1167c478bd9Sstevel@tonic-gate * } 1177c478bd9Sstevel@tonic-gate * 1187c478bd9Sstevel@tonic-gate * ! Medium length cases with at least CHKSIZE bytes available 1197c478bd9Sstevel@tonic-gate * ! method: line up src and dst as best possible, then 1207c478bd9Sstevel@tonic-gate * ! move data in 4-way unrolled loops. 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * sm_med: 1237c478bd9Sstevel@tonic-gate * if(src&dst unalignable) 1247c478bd9Sstevel@tonic-gate * go to sm_movebytes 1257c478bd9Sstevel@tonic-gate * if(src&dst halfword alignable) 1267c478bd9Sstevel@tonic-gate * go to sm_movehalf 1277c478bd9Sstevel@tonic-gate * if(src&dst word alignable) 1287c478bd9Sstevel@tonic-gate * go to sm_moveword 1297c478bd9Sstevel@tonic-gate * ! fall into long word movement 1307c478bd9Sstevel@tonic-gate * move bytes until src is word aligned 1317c478bd9Sstevel@tonic-gate * if not long word aligned, move a word 1327c478bd9Sstevel@tonic-gate * move long words in 4-way unrolled loop until < 32 bytes left 1337c478bd9Sstevel@tonic-gate * move long words in 1-way unrolled loop until < 8 bytes left 1347c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 1357c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 1367c478bd9Sstevel@tonic-gate * else go to sm_half 1377c478bd9Sstevel@tonic-gate * 1387c478bd9Sstevel@tonic-gate * sm_moveword: 1397c478bd9Sstevel@tonic-gate * move bytes until src is word aligned 1407c478bd9Sstevel@tonic-gate * move words in 4-way unrolled loop until < 16 bytes left 1417c478bd9Sstevel@tonic-gate * move words in 1-way unrolled loop until < 4 bytes left 1427c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 1437c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 1447c478bd9Sstevel@tonic-gate * else go to sm_half 1457c478bd9Sstevel@tonic-gate * 1467c478bd9Sstevel@tonic-gate * sm_movehalf: 1477c478bd9Sstevel@tonic-gate * move a byte if needed to align src on halfword 1487c478bd9Sstevel@tonic-gate * move halfwords in 4-way unrolled loop until < 8 bytes left 1497c478bd9Sstevel@tonic-gate * if zero bytes left, goto sm_exit 1507c478bd9Sstevel@tonic-gate * if one byte left, go to sm_byte 1517c478bd9Sstevel@tonic-gate * else go to sm_half 1527c478bd9Sstevel@tonic-gate * 1537c478bd9Sstevel@tonic-gate * 1547c478bd9Sstevel@tonic-gate * FPBLK_copy: 1557c478bd9Sstevel@tonic-gate * %l6 = curthread->t_lofault; 1567c478bd9Sstevel@tonic-gate * if (%l6 != NULL) { 1577c478bd9Sstevel@tonic-gate * membar #Sync 1587c478bd9Sstevel@tonic-gate * curthread->t_lofault = .copyerr; 1597c478bd9Sstevel@tonic-gate * caller_error_handler = TRUE ! %l6 |= 2 1607c478bd9Sstevel@tonic-gate * } 1617c478bd9Sstevel@tonic-gate * 1627c478bd9Sstevel@tonic-gate * ! for FPU testing we must not migrate cpus 1637c478bd9Sstevel@tonic-gate * if (curthread->t_lwp == NULL) { 1647c478bd9Sstevel@tonic-gate * ! Kernel threads do not have pcb's in which to store 1657c478bd9Sstevel@tonic-gate * ! the floating point state, so disallow preemption during 1667c478bd9Sstevel@tonic-gate * ! the copy. This also prevents cpu migration. 1677c478bd9Sstevel@tonic-gate * kpreempt_disable(curthread); 1687c478bd9Sstevel@tonic-gate * } else { 1697c478bd9Sstevel@tonic-gate * thread_nomigrate(); 1707c478bd9Sstevel@tonic-gate * } 1717c478bd9Sstevel@tonic-gate * 1727c478bd9Sstevel@tonic-gate * old_fprs = %fprs; 1737c478bd9Sstevel@tonic-gate * old_gsr = %gsr; 1747c478bd9Sstevel@tonic-gate * if (%fprs.fef) { 1757c478bd9Sstevel@tonic-gate * %fprs.fef = 1; 1767c478bd9Sstevel@tonic-gate * save current fpregs on stack using blockstore 1777c478bd9Sstevel@tonic-gate * } else { 1787c478bd9Sstevel@tonic-gate * %fprs.fef = 1; 1797c478bd9Sstevel@tonic-gate * } 1807c478bd9Sstevel@tonic-gate * 1817c478bd9Sstevel@tonic-gate * 1827c478bd9Sstevel@tonic-gate * do_blockcopy_here; 1837c478bd9Sstevel@tonic-gate * 1847c478bd9Sstevel@tonic-gate * In lofault handler: 1857c478bd9Sstevel@tonic-gate * curthread->t_lofault = .copyerr2; 1867c478bd9Sstevel@tonic-gate * Continue on with the normal exit handler 1877c478bd9Sstevel@tonic-gate * 1887c478bd9Sstevel@tonic-gate * On normal exit: 1897c478bd9Sstevel@tonic-gate * %gsr = old_gsr; 1907c478bd9Sstevel@tonic-gate * if (old_fprs & FPRS_FEF) 1917c478bd9Sstevel@tonic-gate * restore fpregs from stack using blockload 1927c478bd9Sstevel@tonic-gate * else 1937c478bd9Sstevel@tonic-gate * zero fpregs 1947c478bd9Sstevel@tonic-gate * %fprs = old_fprs; 1957c478bd9Sstevel@tonic-gate * membar #Sync 1967c478bd9Sstevel@tonic-gate * curthread->t_lofault = (%l6 & ~3); 1977c478bd9Sstevel@tonic-gate * ! following test omitted from copyin/copyout as they 1987c478bd9Sstevel@tonic-gate * ! will always have a current thread 1997c478bd9Sstevel@tonic-gate * if (curthread->t_lwp == NULL) 2007c478bd9Sstevel@tonic-gate * kpreempt_enable(curthread); 2017c478bd9Sstevel@tonic-gate * else 2027c478bd9Sstevel@tonic-gate * thread_allowmigrate(); 2037c478bd9Sstevel@tonic-gate * return (0) 2047c478bd9Sstevel@tonic-gate * 2057c478bd9Sstevel@tonic-gate * In second lofault handler (.copyerr2): 2067c478bd9Sstevel@tonic-gate * We've tried to restore fp state from the stack and failed. To 2077c478bd9Sstevel@tonic-gate * prevent from returning with a corrupted fp state, we will panic. 2087c478bd9Sstevel@tonic-gate */ 2097c478bd9Sstevel@tonic-gate 2107c478bd9Sstevel@tonic-gate/* 2117c478bd9Sstevel@tonic-gate * Comments about optimization choices 2127c478bd9Sstevel@tonic-gate * 2137c478bd9Sstevel@tonic-gate * The initial optimization decision in this code is to determine 2147c478bd9Sstevel@tonic-gate * whether to use the FP registers for a copy or not. If we don't 2157c478bd9Sstevel@tonic-gate * use the FP registers, we can execute the copy as a leaf routine, 2167c478bd9Sstevel@tonic-gate * saving a register save and restore. Also, less elaborate setup 2177c478bd9Sstevel@tonic-gate * is required, allowing short copies to be completed more quickly. 2187c478bd9Sstevel@tonic-gate * For longer copies, especially unaligned ones (where the src and 2197c478bd9Sstevel@tonic-gate * dst do not align to allow simple ldx,stx operation), the FP 2207c478bd9Sstevel@tonic-gate * registers allow much faster copy operations. 2217c478bd9Sstevel@tonic-gate * 2227c478bd9Sstevel@tonic-gate * The estimated extra cost of the FP path will vary depending on 2237c478bd9Sstevel@tonic-gate * src/dst alignment, dst offset from the next 64 byte FPblock store 2247c478bd9Sstevel@tonic-gate * boundary, remaining src data after the last full dst cache line is 2257c478bd9Sstevel@tonic-gate * moved whether the FP registers need to be saved, and some other 2267c478bd9Sstevel@tonic-gate * minor issues. The average additional overhead is estimated to be 2277c478bd9Sstevel@tonic-gate * 400 clocks. Since each non-repeated/predicted tst and branch costs 228*5d9d9091SRichard Lowe * around 10 clocks, elaborate calculation would slow down to all 2297c478bd9Sstevel@tonic-gate * longer copies and only benefit a small portion of medium sized 2307c478bd9Sstevel@tonic-gate * copies. Rather than incur such cost, we chose fixed transition 2317c478bd9Sstevel@tonic-gate * points for each of the alignment choices. 2327c478bd9Sstevel@tonic-gate * 2337c478bd9Sstevel@tonic-gate * For the inner loop, here is a comparison of the per cache line 2347c478bd9Sstevel@tonic-gate * costs for each alignment when src&dst are in cache: 235*5d9d9091SRichard Lowe * 2367c478bd9Sstevel@tonic-gate * byte aligned: 108 clocks slower for non-FPBLK 2377c478bd9Sstevel@tonic-gate * half aligned: 44 clocks slower for non-FPBLK 2387c478bd9Sstevel@tonic-gate * word aligned: 12 clocks slower for non-FPBLK 2397c478bd9Sstevel@tonic-gate * long aligned: 4 clocks >>faster<< for non-FPBLK 2407c478bd9Sstevel@tonic-gate * 2417c478bd9Sstevel@tonic-gate * The long aligned loop runs faster because it does no prefetching. 2427c478bd9Sstevel@tonic-gate * That wins if the data is not in cache or there is too little 2437c478bd9Sstevel@tonic-gate * data to gain much benefit from prefetching. But when there 2447c478bd9Sstevel@tonic-gate * is more data and that data is not in cache, failing to prefetch 2457c478bd9Sstevel@tonic-gate * can run much slower. In addition, there is a 2 Kbyte store queue 2467c478bd9Sstevel@tonic-gate * which will cause the non-FPBLK inner loop to slow for larger copies. 2477c478bd9Sstevel@tonic-gate * The exact tradeoff is strongly load and application dependent, with 2487c478bd9Sstevel@tonic-gate * increasing risk of a customer visible performance regression if the 2497c478bd9Sstevel@tonic-gate * non-FPBLK code is used for larger copies. Studies of synthetic in-cache 2507c478bd9Sstevel@tonic-gate * vs out-of-cache copy tests in user space suggest 1024 bytes as a safe 2517c478bd9Sstevel@tonic-gate * upper limit for the non-FPBLK code. To minimize performance regression 252*5d9d9091SRichard Lowe * risk while still gaining the primary benefits of the improvements to 2537c478bd9Sstevel@tonic-gate * the non-FPBLK code, we set an upper bound of 1024 bytes for the various 254*5d9d9091SRichard Lowe * hw_copy_limit_*. Later experimental studies using different values 255*5d9d9091SRichard Lowe * of hw_copy_limit_* can be used to make further adjustments if 2567c478bd9Sstevel@tonic-gate * appropriate. 2577c478bd9Sstevel@tonic-gate * 2587c478bd9Sstevel@tonic-gate * hw_copy_limit_1 = src and dst are byte aligned but not halfword aligned 2597c478bd9Sstevel@tonic-gate * hw_copy_limit_2 = src and dst are halfword aligned but not word aligned 2607c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = src and dst are word aligned but not longword aligned 2617c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = src and dst are longword aligned 2627c478bd9Sstevel@tonic-gate * 2637c478bd9Sstevel@tonic-gate * To say that src and dst are word aligned means that after 2647c478bd9Sstevel@tonic-gate * some initial alignment activity of moving 0 to 3 bytes, 2657c478bd9Sstevel@tonic-gate * both the src and dst will be on word boundaries so that 2667c478bd9Sstevel@tonic-gate * word loads and stores may be used. 2677c478bd9Sstevel@tonic-gate * 2687c478bd9Sstevel@tonic-gate * Recommended initial values as of Mar 2004, includes testing 2697c478bd9Sstevel@tonic-gate * on Cheetah+ (900MHz), Cheetah++ (1200MHz), and Jaguar(1050MHz): 2707c478bd9Sstevel@tonic-gate * hw_copy_limit_1 = 256 2717c478bd9Sstevel@tonic-gate * hw_copy_limit_2 = 512 2727c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = 1024 2737c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = 1024 (or 1536 on some systems) 2747c478bd9Sstevel@tonic-gate * 2757c478bd9Sstevel@tonic-gate * 2767c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to zero, then use of FPBLK copy is 2777c478bd9Sstevel@tonic-gate * disabled for that alignment choice. 2787c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to a value between 1 and VIS_COPY_THRESHOLD (256) 2797c478bd9Sstevel@tonic-gate * the value of VIS_COPY_THRESHOLD is used. 2807c478bd9Sstevel@tonic-gate * It is not envisioned that hw_copy_limit_? will be changed in the field 2817c478bd9Sstevel@tonic-gate * It is provided to allow for disabling FPBLK copies and to allow 2827c478bd9Sstevel@tonic-gate * easy testing of alternate values on future HW implementations 2837c478bd9Sstevel@tonic-gate * that might have different cache sizes, clock rates or instruction 2847c478bd9Sstevel@tonic-gate * timing rules. 2857c478bd9Sstevel@tonic-gate * 2867c478bd9Sstevel@tonic-gate * Our first test for FPBLK copies vs non-FPBLK copies checks a minimum 2877c478bd9Sstevel@tonic-gate * threshold to speedup all shorter copies (less than 256). That 2887c478bd9Sstevel@tonic-gate * saves an alignment test, memory reference, and enabling test 2897c478bd9Sstevel@tonic-gate * for all short copies, or an estimated 24 clocks. 2907c478bd9Sstevel@tonic-gate * 2917c478bd9Sstevel@tonic-gate * The order in which these limits are checked does matter since each 2927c478bd9Sstevel@tonic-gate * non-predicted tst and branch costs around 10 clocks. 2937c478bd9Sstevel@tonic-gate * If src and dst are randomly selected addresses, 2947c478bd9Sstevel@tonic-gate * 4 of 8 will not be alignable. 2957c478bd9Sstevel@tonic-gate * 2 of 8 will be half word alignable. 2967c478bd9Sstevel@tonic-gate * 1 of 8 will be word alignable. 2977c478bd9Sstevel@tonic-gate * 1 of 8 will be long word alignable. 2987c478bd9Sstevel@tonic-gate * But, tests on running kernels show that src and dst to copy code 2997c478bd9Sstevel@tonic-gate * are typically not on random alignments. Structure copies and 3007c478bd9Sstevel@tonic-gate * copies of larger data sizes are often on long word boundaries. 3017c478bd9Sstevel@tonic-gate * So we test the long word alignment case first, then 3027c478bd9Sstevel@tonic-gate * the byte alignment, then halfword, then word alignment. 3037c478bd9Sstevel@tonic-gate * 3047c478bd9Sstevel@tonic-gate * Several times, tests for length are made to split the code 3057c478bd9Sstevel@tonic-gate * into subcases. These tests often allow later tests to be 306*5d9d9091SRichard Lowe * avoided. For example, within the non-FPBLK copy, we first 3077c478bd9Sstevel@tonic-gate * check for tiny copies of 3 bytes or less. That allows us 3087c478bd9Sstevel@tonic-gate * to use a 4-way unrolled loop for the general byte copy case 3097c478bd9Sstevel@tonic-gate * without a test on loop entry. 3107c478bd9Sstevel@tonic-gate * We subdivide the non-FPBLK case further into CHKSIZE bytes and less 3117c478bd9Sstevel@tonic-gate * vs longer cases. For the really short case, we don't attempt 3127c478bd9Sstevel@tonic-gate * align src and dst. We try to minimize special case tests in 3137c478bd9Sstevel@tonic-gate * the shortest loops as each test adds a significant percentage 3147c478bd9Sstevel@tonic-gate * to the total time. 3157c478bd9Sstevel@tonic-gate * 3167c478bd9Sstevel@tonic-gate * For the medium sized cases, we allow ourselves to adjust the 3177c478bd9Sstevel@tonic-gate * src and dst alignment and provide special cases for each of 3187c478bd9Sstevel@tonic-gate * the four adjusted alignment cases. The CHKSIZE that was used 3197c478bd9Sstevel@tonic-gate * to decide between short and medium size was chosen to be 39 3207c478bd9Sstevel@tonic-gate * as that allows for the worst case of 7 bytes of alignment 3217c478bd9Sstevel@tonic-gate * shift and 4 times 8 bytes for the first long word unrolling. 3227c478bd9Sstevel@tonic-gate * That knowledge saves an initial test for length on entry into 3237c478bd9Sstevel@tonic-gate * the medium cases. If the general loop unrolling factor were 3247c478bd9Sstevel@tonic-gate * to be increases, this number would also need to be adjusted. 3257c478bd9Sstevel@tonic-gate * 3267c478bd9Sstevel@tonic-gate * For all cases in the non-FPBLK code where it is known that at 3277c478bd9Sstevel@tonic-gate * least 4 chunks of data are available for movement, the 3287c478bd9Sstevel@tonic-gate * loop is unrolled by four. This 4-way loop runs in 8 clocks 3297c478bd9Sstevel@tonic-gate * or 2 clocks per data element. Due to limitations of the 3307c478bd9Sstevel@tonic-gate * branch instruction on Cheetah, Jaguar, and Panther, the 3317c478bd9Sstevel@tonic-gate * minimum time for a small, tight loop is 3 clocks. So 3327c478bd9Sstevel@tonic-gate * the 4-way loop runs 50% faster than the fastest non-unrolled 3337c478bd9Sstevel@tonic-gate * loop. 3347c478bd9Sstevel@tonic-gate * 3357c478bd9Sstevel@tonic-gate * Instruction alignment is forced by used of .align 16 directives 3367c478bd9Sstevel@tonic-gate * and nops which are not executed in the code. This 3377c478bd9Sstevel@tonic-gate * combination of operations shifts the alignment of following 3387c478bd9Sstevel@tonic-gate * loops to insure that loops are aligned so that their instructions 339*5d9d9091SRichard Lowe * fall within the minimum number of 4 instruction fetch groups. 340*5d9d9091SRichard Lowe * If instructions are inserted or removed between the .align 3417c478bd9Sstevel@tonic-gate * instruction and the unrolled loops, then the alignment needs 3427c478bd9Sstevel@tonic-gate * to be readjusted. Misaligned loops can add a clock per loop 3437c478bd9Sstevel@tonic-gate * iteration to the loop timing. 3447c478bd9Sstevel@tonic-gate * 3457c478bd9Sstevel@tonic-gate * In a few cases, code is duplicated to avoid a branch. Since 3467c478bd9Sstevel@tonic-gate * a non-predicted tst and branch takes 10 clocks, this savings 3477c478bd9Sstevel@tonic-gate * is judged an appropriate time-space tradeoff. 3487c478bd9Sstevel@tonic-gate * 3497c478bd9Sstevel@tonic-gate * Within the FPBLK-code, the prefetch method in the inner 350*5d9d9091SRichard Lowe * loop needs to be explained as it is not standard. Two 3517c478bd9Sstevel@tonic-gate * prefetches are issued for each cache line instead of one. 3527c478bd9Sstevel@tonic-gate * The primary one is at the maximum reach of 8 cache lines. 3537c478bd9Sstevel@tonic-gate * Most of the time, that maximum prefetch reach gives the 3547c478bd9Sstevel@tonic-gate * cache line more time to reach the processor for systems with 3557c478bd9Sstevel@tonic-gate * higher processor clocks. But, sometimes memory interference 3567c478bd9Sstevel@tonic-gate * can cause that prefetch to be dropped. Putting a second 3577c478bd9Sstevel@tonic-gate * prefetch at a reach of 5 cache lines catches the drops 3587c478bd9Sstevel@tonic-gate * three iterations later and shows a measured improvement 3597c478bd9Sstevel@tonic-gate * in performance over any similar loop with a single prefetch. 360*5d9d9091SRichard Lowe * The prefetches are placed in the loop so they overlap with 361*5d9d9091SRichard Lowe * non-memory instructions, so that there is no extra cost 3627c478bd9Sstevel@tonic-gate * when the data is already in-cache. 3637c478bd9Sstevel@tonic-gate * 3647c478bd9Sstevel@tonic-gate */ 3657c478bd9Sstevel@tonic-gate 3667c478bd9Sstevel@tonic-gate/* 3677c478bd9Sstevel@tonic-gate * Notes on preserving existing fp state and on membars. 3687c478bd9Sstevel@tonic-gate * 3697c478bd9Sstevel@tonic-gate * When a copyOP decides to use fp we may have to preserve existing 3707c478bd9Sstevel@tonic-gate * floating point state. It is not the caller's state that we need to 3717c478bd9Sstevel@tonic-gate * preserve - the rest of the kernel does not use fp and, anyway, fp 3727c478bd9Sstevel@tonic-gate * registers are volatile across a call. Some examples: 3737c478bd9Sstevel@tonic-gate * 374*5d9d9091SRichard Lowe * - userland has fp state and is interrupted (device interrupt 3757c478bd9Sstevel@tonic-gate * or trap) and within the interrupt/trap handling we use 3767c478bd9Sstevel@tonic-gate * bcopy() 3777c478bd9Sstevel@tonic-gate * - another (higher level) interrupt or trap handler uses bcopy 3787c478bd9Sstevel@tonic-gate * while a bcopy from an earlier interrupt is still active 3797c478bd9Sstevel@tonic-gate * - an asynchronous error trap occurs while fp state exists (in 3807c478bd9Sstevel@tonic-gate * userland or in kernel copy) and the tl0 component of the handling 3817c478bd9Sstevel@tonic-gate * uses bcopy 3827c478bd9Sstevel@tonic-gate * - a user process with fp state incurs a copy-on-write fault and 3837c478bd9Sstevel@tonic-gate * hwblkpagecopy always uses fp 3847c478bd9Sstevel@tonic-gate * 3857c478bd9Sstevel@tonic-gate * We therefore need a per-call place in which to preserve fp state - 3867c478bd9Sstevel@tonic-gate * using our stack is ideal (and since fp copy cannot be leaf optimized 3877c478bd9Sstevel@tonic-gate * because of calls it makes, this is no hardship). 3887c478bd9Sstevel@tonic-gate * 3897c478bd9Sstevel@tonic-gate * The following membar BLD/BST discussion is Cheetah pipeline specific. 3907c478bd9Sstevel@tonic-gate * In Cheetah BLD is blocking, #LoadLoad/#LoadStore/#StoreStore are 3917c478bd9Sstevel@tonic-gate * nops (those semantics always apply) and #StoreLoad is implemented 3927c478bd9Sstevel@tonic-gate * as a membar #Sync. 3937c478bd9Sstevel@tonic-gate * 3947c478bd9Sstevel@tonic-gate * It is possible that the owner of the fp state has a block load or 3957c478bd9Sstevel@tonic-gate * block store still "in flight" at the time we come to preserve that 3967c478bd9Sstevel@tonic-gate * state. Block loads are blocking in Cheetah pipelines so we do not 3977c478bd9Sstevel@tonic-gate * need to sync with them. In preserving fp regs we will use block stores 3987c478bd9Sstevel@tonic-gate * (which are not blocking in Cheetah pipelines) so we require a membar #Sync 3997c478bd9Sstevel@tonic-gate * after storing state (so that our subsequent use of those registers 4007c478bd9Sstevel@tonic-gate * does not modify them before the block stores complete); this membar 4017c478bd9Sstevel@tonic-gate * also serves to sync with block stores the owner of the fp state has 4027c478bd9Sstevel@tonic-gate * initiated. 4037c478bd9Sstevel@tonic-gate * 4047c478bd9Sstevel@tonic-gate * When we have finished fp copy (with it's repeated block stores) 4057c478bd9Sstevel@tonic-gate * we must membar #Sync so that our block stores may complete before 4067c478bd9Sstevel@tonic-gate * we either restore the original fp state into the fp registers or 4077c478bd9Sstevel@tonic-gate * return to a caller which may initiate other fp operations that could 4087c478bd9Sstevel@tonic-gate * modify the fp regs we used before the block stores complete. 4097c478bd9Sstevel@tonic-gate * 4107c478bd9Sstevel@tonic-gate * Synchronous faults (eg, unresolvable DMMU miss) that occur while 4117c478bd9Sstevel@tonic-gate * t_lofault is not NULL will not panic but will instead trampoline 4127c478bd9Sstevel@tonic-gate * to the registered lofault handler. There is no need for any 4137c478bd9Sstevel@tonic-gate * membars for these - eg, our store to t_lofault will always be visible to 4147c478bd9Sstevel@tonic-gate * ourselves and it is our cpu which will take any trap. 4157c478bd9Sstevel@tonic-gate * 4167c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) that occur 4177c478bd9Sstevel@tonic-gate * while t_lofault is not NULL will also not panic. Since we're copying 4187c478bd9Sstevel@tonic-gate * to or from userland the extent of the damage is known - the destination 4197c478bd9Sstevel@tonic-gate * buffer is incomplete. So trap handlers will trampoline to the lofault 4207c478bd9Sstevel@tonic-gate * handler in this case which should take some form of error action to 4217c478bd9Sstevel@tonic-gate * avoid using the incomplete buffer. The trap handler also flags the 4227c478bd9Sstevel@tonic-gate * fault so that later return-from-trap handling (for the trap that brought 4237c478bd9Sstevel@tonic-gate * this thread into the kernel in the first place) can notify the process 4247c478bd9Sstevel@tonic-gate * and reboot the system (or restart the service with Greenline/Contracts). 4257c478bd9Sstevel@tonic-gate * 4267c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) can 4277c478bd9Sstevel@tonic-gate * result in deferred error traps - the trap is taken sometime after 4287c478bd9Sstevel@tonic-gate * the event and the trap PC may not be the PC of the faulting access. 4297c478bd9Sstevel@tonic-gate * Delivery of such pending traps can be forced by a membar #Sync, acting 4307c478bd9Sstevel@tonic-gate * as an "error barrier" in this role. To accurately apply the user/kernel 4317c478bd9Sstevel@tonic-gate * separation described in the preceding paragraph we must force delivery 4327c478bd9Sstevel@tonic-gate * of deferred traps affecting kernel state before we install a lofault 4337c478bd9Sstevel@tonic-gate * handler (if we interpose a new lofault handler on an existing one there 4347c478bd9Sstevel@tonic-gate * is no need to repeat this), and we must force delivery of deferred 4357c478bd9Sstevel@tonic-gate * errors affecting the lofault-protected region before we clear t_lofault. 4367c478bd9Sstevel@tonic-gate * Failure to do so results in lost kernel state being interpreted as 4377c478bd9Sstevel@tonic-gate * affecting a copyin/copyout only, or of an error that really only 4387c478bd9Sstevel@tonic-gate * affects copy data being interpreted as losing kernel state. 4397c478bd9Sstevel@tonic-gate * 4407c478bd9Sstevel@tonic-gate * Since the copy operations may preserve and later restore floating 4417c478bd9Sstevel@tonic-gate * point state that does not belong to the caller (see examples above), 4427c478bd9Sstevel@tonic-gate * we must be careful in how we do this in order to prevent corruption 4437c478bd9Sstevel@tonic-gate * of another program. 4447c478bd9Sstevel@tonic-gate * 4457c478bd9Sstevel@tonic-gate * To make sure that floating point state is always saved and restored 4467c478bd9Sstevel@tonic-gate * correctly, the following "big rules" must be followed when the floating 4477c478bd9Sstevel@tonic-gate * point registers will be used: 4487c478bd9Sstevel@tonic-gate * 4497c478bd9Sstevel@tonic-gate * 1. %l6 always holds the caller's lofault handler. Also in this register, 4507c478bd9Sstevel@tonic-gate * Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in 4517c478bd9Sstevel@tonic-gate * use. Bit 2 (TRAMP_FLAG) indicates that the call was to bcopy, and a 4527c478bd9Sstevel@tonic-gate * lofault handler was set coming in. 4537c478bd9Sstevel@tonic-gate * 4547c478bd9Sstevel@tonic-gate * 2. The FPUSED flag indicates that all FP state has been successfully stored 4557c478bd9Sstevel@tonic-gate * on the stack. It should not be set until this save has been completed. 4567c478bd9Sstevel@tonic-gate * 4577c478bd9Sstevel@tonic-gate * 3. The FPUSED flag should not be cleared on exit until all FP state has 4587c478bd9Sstevel@tonic-gate * been restored from the stack. If an error occurs while restoring 4597c478bd9Sstevel@tonic-gate * data from the stack, the error handler can check this flag to see if 4607c478bd9Sstevel@tonic-gate * a restore is necessary. 4617c478bd9Sstevel@tonic-gate * 4627c478bd9Sstevel@tonic-gate * 4. Code run under the new lofault handler must be kept to a minimum. In 4637c478bd9Sstevel@tonic-gate * particular, any calls to FP_ALLOWMIGRATE, which could result in a call 4647c478bd9Sstevel@tonic-gate * to kpreempt(), should not be made until after the lofault handler has 4657c478bd9Sstevel@tonic-gate * been restored. 4667c478bd9Sstevel@tonic-gate */ 4677c478bd9Sstevel@tonic-gate 4687c478bd9Sstevel@tonic-gate/* 4697c478bd9Sstevel@tonic-gate * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed 4707c478bd9Sstevel@tonic-gate * to "break even" using FP/VIS-accelerated memory operations. 4717c478bd9Sstevel@tonic-gate * The FPBLK code assumes a minimum number of bytes are available 472*5d9d9091SRichard Lowe * to be moved on entry. Check that code carefully before 4737c478bd9Sstevel@tonic-gate * reducing VIS_COPY_THRESHOLD below 256. 4747c478bd9Sstevel@tonic-gate */ 4757c478bd9Sstevel@tonic-gate/* 4767c478bd9Sstevel@tonic-gate * This shadows sys/machsystm.h which can't be included due to the lack of 4777c478bd9Sstevel@tonic-gate * _ASM guards in include files it references. Change it here, change it there. 4787c478bd9Sstevel@tonic-gate */ 4797c478bd9Sstevel@tonic-gate#define VIS_COPY_THRESHOLD 256 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate/* 4827c478bd9Sstevel@tonic-gate * TEST for very short copies 4837c478bd9Sstevel@tonic-gate * Be aware that the maximum unroll for the short unaligned case 4847c478bd9Sstevel@tonic-gate * is SHORTCOPY+1 4857c478bd9Sstevel@tonic-gate */ 4867c478bd9Sstevel@tonic-gate#define SHORTCOPY 3 4877c478bd9Sstevel@tonic-gate#define CHKSIZE 39 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate/* 4907c478bd9Sstevel@tonic-gate * Indicates that we're to trampoline to the error handler. 4917c478bd9Sstevel@tonic-gate * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag. 4927c478bd9Sstevel@tonic-gate * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag. 4937c478bd9Sstevel@tonic-gate */ 4947c478bd9Sstevel@tonic-gate#define FPUSED_FLAG 1 4957c478bd9Sstevel@tonic-gate#define TRAMP_FLAG 2 4967c478bd9Sstevel@tonic-gate#define MASK_FLAGS 3 4977c478bd9Sstevel@tonic-gate 4987c478bd9Sstevel@tonic-gate/* 4997c478bd9Sstevel@tonic-gate * Number of outstanding prefetches. 5007c478bd9Sstevel@tonic-gate * Testing with 1200 MHz Cheetah+ and Jaguar gives best results with 5017c478bd9Sstevel@tonic-gate * two prefetches, one with a reach of 8*BLOCK_SIZE+8 and one with a 5027c478bd9Sstevel@tonic-gate * reach of 5*BLOCK_SIZE. The double prefetch gives an typical improvement 5037c478bd9Sstevel@tonic-gate * of 5% for large copies as compared to a single prefetch. The reason 5047c478bd9Sstevel@tonic-gate * for the improvement is that with Cheetah and Jaguar, some prefetches 5057c478bd9Sstevel@tonic-gate * are dropped due to the prefetch queue being full. The second prefetch 506*5d9d9091SRichard Lowe * reduces the number of cache lines that are dropped. 5077c478bd9Sstevel@tonic-gate * Do not remove the double prefetch or change either CHEETAH_PREFETCH 5087c478bd9Sstevel@tonic-gate * or CHEETAH_2ND_PREFETCH without extensive performance tests to prove 5097c478bd9Sstevel@tonic-gate * there is no loss of performance. 5107c478bd9Sstevel@tonic-gate */ 5117c478bd9Sstevel@tonic-gate#define CHEETAH_PREFETCH 8 5127c478bd9Sstevel@tonic-gate#define CHEETAH_2ND_PREFETCH 5 5137c478bd9Sstevel@tonic-gate 5147c478bd9Sstevel@tonic-gate#define VIS_BLOCKSIZE 64 5157c478bd9Sstevel@tonic-gate 5167c478bd9Sstevel@tonic-gate/* 5177c478bd9Sstevel@tonic-gate * Size of stack frame in order to accomodate a 64-byte aligned 5187c478bd9Sstevel@tonic-gate * floating-point register save area and 2 64-bit temp locations. 5197c478bd9Sstevel@tonic-gate * All copy functions use two quadrants of fp registers; to assure a 5207c478bd9Sstevel@tonic-gate * block-aligned two block buffer in which to save we must reserve 5217c478bd9Sstevel@tonic-gate * three blocks on stack. Not all functions preserve %pfrs on stack 5227c478bd9Sstevel@tonic-gate * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all. 5237c478bd9Sstevel@tonic-gate * 5247c478bd9Sstevel@tonic-gate * _______________________________________ <-- %fp + STACK_BIAS 5257c478bd9Sstevel@tonic-gate * | We may need to preserve 2 quadrants | 5267c478bd9Sstevel@tonic-gate * | of fp regs, but since we do so with | 5277c478bd9Sstevel@tonic-gate * | BST/BLD we need room in which to | 5287c478bd9Sstevel@tonic-gate * | align to VIS_BLOCKSIZE bytes. So | 5297c478bd9Sstevel@tonic-gate * | this area is 3 * VIS_BLOCKSIZE. | <-- - SAVED_FPREGS_OFFSET 5307c478bd9Sstevel@tonic-gate * |-------------------------------------| 5317c478bd9Sstevel@tonic-gate * | 8 bytes to save %fprs | <-- - SAVED_FPRS_OFFSET 5327c478bd9Sstevel@tonic-gate * |-------------------------------------| 5337c478bd9Sstevel@tonic-gate * | 8 bytes to save %gsr | <-- - SAVED_GSR_OFFSET 5347c478bd9Sstevel@tonic-gate * --------------------------------------- 5357c478bd9Sstevel@tonic-gate */ 5367c478bd9Sstevel@tonic-gate#define HWCOPYFRAMESIZE ((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8)) 5377c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_OFFSET (VIS_BLOCKSIZE * 3) 5387c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_ADJUST ((VIS_BLOCKSIZE * 2) - 1) 5397c478bd9Sstevel@tonic-gate#define SAVED_FPRS_OFFSET (SAVED_FPREGS_OFFSET + 8) 5407c478bd9Sstevel@tonic-gate#define SAVED_GSR_OFFSET (SAVED_FPRS_OFFSET + 8) 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate/* 5437c478bd9Sstevel@tonic-gate * Common macros used by the various versions of the block copy 5447c478bd9Sstevel@tonic-gate * routines in this file. 5457c478bd9Sstevel@tonic-gate */ 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate/* 5487c478bd9Sstevel@tonic-gate * In FP copies if we do not have preserved data to restore over 5497c478bd9Sstevel@tonic-gate * the fp regs we used then we must zero those regs to avoid 5507c478bd9Sstevel@tonic-gate * exposing portions of the data to later threads (data security). 5517c478bd9Sstevel@tonic-gate * 5527c478bd9Sstevel@tonic-gate * Copy functions use either quadrants 1 and 3 or 2 and 4. 5537c478bd9Sstevel@tonic-gate * 5547c478bd9Sstevel@tonic-gate * FZEROQ1Q3: Zero quadrants 1 and 3, ie %f0 - %f15 and %f32 - %f47 5557c478bd9Sstevel@tonic-gate * FZEROQ2Q4: Zero quadrants 2 and 4, ie %f16 - %f31 and %f48 - %f63 5567c478bd9Sstevel@tonic-gate * 5577c478bd9Sstevel@tonic-gate * The instructions below are quicker than repeated fzero instructions 5587c478bd9Sstevel@tonic-gate * since they can dispatch down two fp pipelines. 5597c478bd9Sstevel@tonic-gate */ 5607c478bd9Sstevel@tonic-gate#define FZEROQ1Q3 \ 5617c478bd9Sstevel@tonic-gate fzero %f0 ;\ 5627c478bd9Sstevel@tonic-gate fzero %f2 ;\ 5637c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f4 ;\ 5647c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f6 ;\ 5657c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f8 ;\ 5667c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f10 ;\ 5677c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f12 ;\ 5687c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f14 ;\ 5697c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f32 ;\ 5707c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f34 ;\ 5717c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f36 ;\ 5727c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f38 ;\ 5737c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f40 ;\ 5747c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f42 ;\ 5757c478bd9Sstevel@tonic-gate faddd %f0, %f2, %f44 ;\ 5767c478bd9Sstevel@tonic-gate fmuld %f0, %f2, %f46 5777c478bd9Sstevel@tonic-gate 5787c478bd9Sstevel@tonic-gate#define FZEROQ2Q4 \ 5797c478bd9Sstevel@tonic-gate fzero %f16 ;\ 5807c478bd9Sstevel@tonic-gate fzero %f18 ;\ 5817c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f20 ;\ 5827c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f22 ;\ 5837c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f24 ;\ 5847c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f26 ;\ 5857c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f28 ;\ 5867c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f30 ;\ 5877c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f48 ;\ 5887c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f50 ;\ 5897c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f52 ;\ 5907c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f54 ;\ 5917c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f56 ;\ 5927c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f58 ;\ 5937c478bd9Sstevel@tonic-gate faddd %f16, %f18, %f60 ;\ 5947c478bd9Sstevel@tonic-gate fmuld %f16, %f18, %f62 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate/* 5977c478bd9Sstevel@tonic-gate * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack. 5987c478bd9Sstevel@tonic-gate * Used to save and restore in-use fp registers when we want to use FP 5997c478bd9Sstevel@tonic-gate * and find fp already in use and copy size still large enough to justify 6007c478bd9Sstevel@tonic-gate * the additional overhead of this save and restore. 6017c478bd9Sstevel@tonic-gate * 6027c478bd9Sstevel@tonic-gate * A membar #Sync is needed before save to sync fp ops initiated before 6037c478bd9Sstevel@tonic-gate * the call to the copy function (by whoever has fp in use); for example 6047c478bd9Sstevel@tonic-gate * an earlier block load to the quadrant we are about to save may still be 6057c478bd9Sstevel@tonic-gate * "in flight". A membar #Sync is required at the end of the save to 6067c478bd9Sstevel@tonic-gate * sync our block store (the copy code is about to begin ldd's to the 6077c478bd9Sstevel@tonic-gate * first quadrant). Note, however, that since Cheetah pipeline block load 6087c478bd9Sstevel@tonic-gate * is blocking we can omit the initial membar before saving fp state (they're 6097c478bd9Sstevel@tonic-gate * commented below in case of future porting to a chip that does not block 6107c478bd9Sstevel@tonic-gate * on block load). 6117c478bd9Sstevel@tonic-gate * 6127c478bd9Sstevel@tonic-gate * Similarly: a membar #Sync before restore allows the block stores of 6137c478bd9Sstevel@tonic-gate * the copy operation to complete before we fill the quadrants with their 6147c478bd9Sstevel@tonic-gate * original data, and a membar #Sync after restore lets the block loads 6157c478bd9Sstevel@tonic-gate * of the restore complete before we return to whoever has the fp regs 6167c478bd9Sstevel@tonic-gate * in use. To avoid repeated membar #Sync we make it the responsibility 6177c478bd9Sstevel@tonic-gate * of the copy code to membar #Sync immediately after copy is complete 6187c478bd9Sstevel@tonic-gate * and before using the BLD_*_FROMSTACK macro. 6197c478bd9Sstevel@tonic-gate */ 6207c478bd9Sstevel@tonic-gate#define BST_FPQ1Q3_TOSTACK(tmp1) \ 6217c478bd9Sstevel@tonic-gate /* membar #Sync */ ;\ 6227c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 6237c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 6247c478bd9Sstevel@tonic-gate stda %f0, [tmp1]ASI_BLK_P ;\ 6257c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 6267c478bd9Sstevel@tonic-gate stda %f32, [tmp1]ASI_BLK_P ;\ 6277c478bd9Sstevel@tonic-gate membar #Sync 6287c478bd9Sstevel@tonic-gate 6297c478bd9Sstevel@tonic-gate#define BLD_FPQ1Q3_FROMSTACK(tmp1) \ 6307c478bd9Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 6317c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 6327c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 6337c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f0 ;\ 6347c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 6357c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f32 ;\ 6367c478bd9Sstevel@tonic-gate membar #Sync 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate#define BST_FPQ2Q4_TOSTACK(tmp1) \ 6397c478bd9Sstevel@tonic-gate /* membar #Sync */ ;\ 6407c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 6417c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 6427c478bd9Sstevel@tonic-gate stda %f16, [tmp1]ASI_BLK_P ;\ 6437c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 6447c478bd9Sstevel@tonic-gate stda %f48, [tmp1]ASI_BLK_P ;\ 6457c478bd9Sstevel@tonic-gate membar #Sync 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate#define BLD_FPQ2Q4_FROMSTACK(tmp1) \ 6487c478bd9Sstevel@tonic-gate /* membar #Sync - provided at copy completion */ ;\ 6497c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1 ;\ 6507c478bd9Sstevel@tonic-gate and tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */ ;\ 6517c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f16 ;\ 6527c478bd9Sstevel@tonic-gate add tmp1, VIS_BLOCKSIZE, tmp1 ;\ 6537c478bd9Sstevel@tonic-gate ldda [tmp1]ASI_BLK_P, %f48 ;\ 6547c478bd9Sstevel@tonic-gate membar #Sync 6557c478bd9Sstevel@tonic-gate 6567c478bd9Sstevel@tonic-gate/* 6577c478bd9Sstevel@tonic-gate * FP_NOMIGRATE and FP_ALLOWMIGRATE. Prevent migration (or, stronger, 6587c478bd9Sstevel@tonic-gate * prevent preemption if there is no t_lwp to save FP state to on context 6597c478bd9Sstevel@tonic-gate * switch) before commencing a FP copy, and reallow it on completion or 6607c478bd9Sstevel@tonic-gate * in error trampoline paths when we were using FP copy. 6617c478bd9Sstevel@tonic-gate * 6627c478bd9Sstevel@tonic-gate * Both macros may call other functions, so be aware that all outputs are 6637c478bd9Sstevel@tonic-gate * forfeit after using these macros. For this reason we do not pass registers 6647c478bd9Sstevel@tonic-gate * to use - we just use any outputs we want. 6657c478bd9Sstevel@tonic-gate * 6667c478bd9Sstevel@tonic-gate * For fpRAS we need to perform the fpRAS mechanism test on the same 6677c478bd9Sstevel@tonic-gate * CPU as we use for the copy operation, both so that we validate the 6687c478bd9Sstevel@tonic-gate * CPU we perform the copy on and so that we know which CPU failed 6697c478bd9Sstevel@tonic-gate * if a failure is detected. Hence we need to be bound to "our" CPU. 6707c478bd9Sstevel@tonic-gate * This could be achieved through disabling preemption (and we have do it that 6717c478bd9Sstevel@tonic-gate * way for threads with no t_lwp) but for larger copies this may hold 6727c478bd9Sstevel@tonic-gate * higher priority threads off of cpu for too long (eg, realtime). So we 6737c478bd9Sstevel@tonic-gate * make use of the lightweight t_nomigrate mechanism where we can (ie, when 6747c478bd9Sstevel@tonic-gate * we have a t_lwp). 6757c478bd9Sstevel@tonic-gate * 6767c478bd9Sstevel@tonic-gate * Pseudo code: 6777c478bd9Sstevel@tonic-gate * 6787c478bd9Sstevel@tonic-gate * FP_NOMIGRATE: 6797c478bd9Sstevel@tonic-gate * 6807c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) { 6817c478bd9Sstevel@tonic-gate * thread_nomigrate(); 6827c478bd9Sstevel@tonic-gate * } else { 6837c478bd9Sstevel@tonic-gate * kpreempt_disable(); 6847c478bd9Sstevel@tonic-gate * } 6857c478bd9Sstevel@tonic-gate * 6867c478bd9Sstevel@tonic-gate * FP_ALLOWMIGRATE: 6877c478bd9Sstevel@tonic-gate * 6887c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) { 6897c478bd9Sstevel@tonic-gate * thread_allowmigrate(); 6907c478bd9Sstevel@tonic-gate * } else { 6917c478bd9Sstevel@tonic-gate * kpreempt_enable(); 6927c478bd9Sstevel@tonic-gate * } 6937c478bd9Sstevel@tonic-gate */ 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate#define FP_NOMIGRATE(label1, label2) \ 6967c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 697*5d9d9091SRichard Lowe brz,a,pn %o0, label1##f ;\ 6987c478bd9Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 6997c478bd9Sstevel@tonic-gate call thread_nomigrate ;\ 7007c478bd9Sstevel@tonic-gate nop ;\ 701*5d9d9091SRichard Lowe ba label2##f ;\ 7027c478bd9Sstevel@tonic-gate nop ;\ 7037c478bd9Sstevel@tonic-gatelabel1: ;\ 7047c478bd9Sstevel@tonic-gate inc %o1 ;\ 7057c478bd9Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 7067c478bd9Sstevel@tonic-gatelabel2: 7077c478bd9Sstevel@tonic-gate 708*5d9d9091SRichard Lowe#define FP_ALLOWMIGRATE(label1, label2) \ 7097c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LWP], %o0 ;\ 710*5d9d9091SRichard Lowe brz,a,pn %o0, label1##f ;\ 7117c478bd9Sstevel@tonic-gate ldsb [THREAD_REG + T_PREEMPT], %o1 ;\ 7127c478bd9Sstevel@tonic-gate call thread_allowmigrate ;\ 7137c478bd9Sstevel@tonic-gate nop ;\ 714*5d9d9091SRichard Lowe ba label2##f ;\ 7157c478bd9Sstevel@tonic-gate nop ;\ 7167c478bd9Sstevel@tonic-gatelabel1: ;\ 7177c478bd9Sstevel@tonic-gate dec %o1 ;\ 718*5d9d9091SRichard Lowe brnz,pn %o1, label2##f ;\ 7197c478bd9Sstevel@tonic-gate stb %o1, [THREAD_REG + T_PREEMPT] ;\ 7207c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_CPU], %o0 ;\ 7217c478bd9Sstevel@tonic-gate ldub [%o0 + CPU_KPRUNRUN], %o0 ;\ 722*5d9d9091SRichard Lowe brz,pt %o0, label2##f ;\ 7237c478bd9Sstevel@tonic-gate nop ;\ 7247c478bd9Sstevel@tonic-gate call kpreempt ;\ 7257c478bd9Sstevel@tonic-gate rdpr %pil, %o0 ;\ 7267c478bd9Sstevel@tonic-gatelabel2: 7277c478bd9Sstevel@tonic-gate 7287c478bd9Sstevel@tonic-gate/* 7297c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or 7307c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved. 7317c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok 7327c478bd9Sstevel@tonic-gate */ 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate .seg ".text" 7357c478bd9Sstevel@tonic-gate .align 4 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate ENTRY(kcopy) 7387c478bd9Sstevel@tonic-gate 7397c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 7407c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to larger cases 7417c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 7427c478bd9Sstevel@tonic-gate btst 7, %o3 ! 7437c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_8 ! check for longword alignment 7447c478bd9Sstevel@tonic-gate nop 745*5d9d9091SRichard Lowe btst 1, %o3 ! 7467c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_2 ! check for half-word 7477c478bd9Sstevel@tonic-gate nop 7487c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 7497c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 7507c478bd9Sstevel@tonic-gate tst %o3 7517c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 7527c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 7537c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 7547c478bd9Sstevel@tonic-gate nop 7557c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 7567c478bd9Sstevel@tonic-gate nop 7577c478bd9Sstevel@tonic-gate.kcopy_2: 7587c478bd9Sstevel@tonic-gate btst 3, %o3 ! 7597c478bd9Sstevel@tonic-gate bz,pt %ncc, .kcopy_4 ! check for word alignment 7607c478bd9Sstevel@tonic-gate nop 7617c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 7627c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 7637c478bd9Sstevel@tonic-gate tst %o3 7647c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 7657c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 7667c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 7677c478bd9Sstevel@tonic-gate nop 7687c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 7697c478bd9Sstevel@tonic-gate nop 7707c478bd9Sstevel@tonic-gate.kcopy_4: 7717c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 7727c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 7737c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 7747c478bd9Sstevel@tonic-gate tst %o3 7757c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 7767c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 7777c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 7787c478bd9Sstevel@tonic-gate nop 7797c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 7807c478bd9Sstevel@tonic-gate nop 7817c478bd9Sstevel@tonic-gate.kcopy_8: 7827c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 7837c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 7847c478bd9Sstevel@tonic-gate tst %o3 7857c478bd9Sstevel@tonic-gate bz,pn %icc, .kcopy_small ! if zero, disable HW copy 7867c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 7877c478bd9Sstevel@tonic-gate bleu,pt %ncc, .kcopy_small ! go to small copy 7887c478bd9Sstevel@tonic-gate nop 7897c478bd9Sstevel@tonic-gate ba,pt %ncc, .kcopy_more ! otherwise go to large copy 7907c478bd9Sstevel@tonic-gate nop 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate.kcopy_small: 7937c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 ! sm_copyerr is lofault value 7947c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 7957c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 7967c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 7977c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copy ! common code 7987c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 7997c478bd9Sstevel@tonic-gate 8007c478bd9Sstevel@tonic-gate.kcopy_more: 8017c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 8027c478bd9Sstevel@tonic-gate sethi %hi(.copyerr), %l7 ! copyerr is lofault value 8037c478bd9Sstevel@tonic-gate or %l7, %lo(.copyerr), %l7 8047c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 8057c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 8067c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copy ! common code 8077c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate 8107c478bd9Sstevel@tonic-gate/* 8117c478bd9Sstevel@tonic-gate * We got here because of a fault during bcopy_more, called from kcopy or bcopy. 8127c478bd9Sstevel@tonic-gate * Errno value is in %g1. bcopy_more uses fp quadrants 1 and 3. 8137c478bd9Sstevel@tonic-gate */ 8147c478bd9Sstevel@tonic-gate.copyerr: 8157c478bd9Sstevel@tonic-gate set .copyerr2, %l0 8167c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 8177c478bd9Sstevel@tonic-gate stn %l0, [THREAD_REG + T_LOFAULT] ! set t_lofault 8187c478bd9Sstevel@tonic-gate btst FPUSED_FLAG, %l6 8197c478bd9Sstevel@tonic-gate bz %ncc, 1f 8207c478bd9Sstevel@tonic-gate and %l6, TRAMP_FLAG, %l0 ! copy trampoline flag to %l0 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 8237c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 8247c478bd9Sstevel@tonic-gate 8257c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 8267c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 8277c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 8287c478bd9Sstevel@tonic-gate nop 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 8337c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 8347c478bd9Sstevel@tonic-gate 8357c478bd9Sstevel@tonic-gate4: 8367c478bd9Sstevel@tonic-gate FZEROQ1Q3 8377c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate ! 8407c478bd9Sstevel@tonic-gate ! Need to cater for the different expectations of kcopy 8417c478bd9Sstevel@tonic-gate ! and bcopy. kcopy will *always* set a t_lofault handler 8427c478bd9Sstevel@tonic-gate ! If it fires, we're expected to just return the error code 8437c478bd9Sstevel@tonic-gate ! and *not* to invoke any existing error handler. As far as 8447c478bd9Sstevel@tonic-gate ! bcopy is concerned, we only set t_lofault if there was an 8457c478bd9Sstevel@tonic-gate ! existing lofault handler. In that case we're expected to 8467c478bd9Sstevel@tonic-gate ! invoke the previously existing handler after resetting the 8477c478bd9Sstevel@tonic-gate ! t_lofault value. 8487c478bd9Sstevel@tonic-gate ! 8497c478bd9Sstevel@tonic-gate1: 8507c478bd9Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 ! turn trampoline flag off 8517c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 8527c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 8537c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 8547c478bd9Sstevel@tonic-gate 8557c478bd9Sstevel@tonic-gate btst TRAMP_FLAG, %l0 8567c478bd9Sstevel@tonic-gate bnz,pn %ncc, 3f 8577c478bd9Sstevel@tonic-gate nop 8587c478bd9Sstevel@tonic-gate ret 8597c478bd9Sstevel@tonic-gate restore %g1, 0, %o0 8607c478bd9Sstevel@tonic-gate 8617c478bd9Sstevel@tonic-gate3: 8627c478bd9Sstevel@tonic-gate ! 8637c478bd9Sstevel@tonic-gate ! We're here via bcopy. There *must* have been an error handler 8647c478bd9Sstevel@tonic-gate ! in place otherwise we would have died a nasty death already. 8657c478bd9Sstevel@tonic-gate ! 8667c478bd9Sstevel@tonic-gate jmp %l6 ! goto real handler 8677c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! dispose of copy window 8687c478bd9Sstevel@tonic-gate 8697c478bd9Sstevel@tonic-gate/* 8707c478bd9Sstevel@tonic-gate * We got here because of a fault in .copyerr. We can't safely restore fp 8717c478bd9Sstevel@tonic-gate * state, so we panic. 8727c478bd9Sstevel@tonic-gate */ 8737c478bd9Sstevel@tonic-gatefp_panic_msg: 8747c478bd9Sstevel@tonic-gate .asciz "Unable to restore fp state after copy operation" 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate .align 4 8777c478bd9Sstevel@tonic-gate.copyerr2: 8787c478bd9Sstevel@tonic-gate set fp_panic_msg, %o0 8797c478bd9Sstevel@tonic-gate call panic 8807c478bd9Sstevel@tonic-gate nop 8817c478bd9Sstevel@tonic-gate 8827c478bd9Sstevel@tonic-gate/* 8837c478bd9Sstevel@tonic-gate * We got here because of a fault during a small kcopy or bcopy. 8847c478bd9Sstevel@tonic-gate * No floating point registers are used by the small copies. 8857c478bd9Sstevel@tonic-gate * Errno value is in %g1. 8867c478bd9Sstevel@tonic-gate */ 8877c478bd9Sstevel@tonic-gate.sm_copyerr: 8887c478bd9Sstevel@tonic-gate1: 8897c478bd9Sstevel@tonic-gate btst TRAMP_FLAG, %o4 8907c478bd9Sstevel@tonic-gate membar #Sync 8917c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 8927c478bd9Sstevel@tonic-gate bnz,pn %ncc, 3f 8937c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 8947c478bd9Sstevel@tonic-gate retl 8957c478bd9Sstevel@tonic-gate mov %g1, %o0 8967c478bd9Sstevel@tonic-gate3: 8977c478bd9Sstevel@tonic-gate jmp %o4 ! goto real handler 898*5d9d9091SRichard Lowe mov %g0, %o0 ! 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate SET_SIZE(kcopy) 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate 9037c478bd9Sstevel@tonic-gate/* 9047c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 9057c478bd9Sstevel@tonic-gate * Registers: l6 - saved t_lofault 9067c478bd9Sstevel@tonic-gate * (for short copies, o4 - saved t_lofault) 9077c478bd9Sstevel@tonic-gate * 9087c478bd9Sstevel@tonic-gate * Copy a page of memory. 9097c478bd9Sstevel@tonic-gate * Assumes double word alignment and a count >= 256. 9107c478bd9Sstevel@tonic-gate */ 9117c478bd9Sstevel@tonic-gate 9127c478bd9Sstevel@tonic-gate ENTRY(bcopy) 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 9157c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to larger cases 9167c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 9177c478bd9Sstevel@tonic-gate btst 7, %o3 ! 9187c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_8 ! check for longword alignment 9197c478bd9Sstevel@tonic-gate nop 920*5d9d9091SRichard Lowe btst 1, %o3 ! 9217c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_2 ! check for half-word 9227c478bd9Sstevel@tonic-gate nop 9237c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 9247c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 9257c478bd9Sstevel@tonic-gate tst %o3 9267c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 9277c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 9287c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 9297c478bd9Sstevel@tonic-gate nop 9307c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 9317c478bd9Sstevel@tonic-gate nop 9327c478bd9Sstevel@tonic-gate.bcopy_2: 9337c478bd9Sstevel@tonic-gate btst 3, %o3 ! 9347c478bd9Sstevel@tonic-gate bz,pt %ncc, .bcopy_4 ! check for word alignment 9357c478bd9Sstevel@tonic-gate nop 9367c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 9377c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 9387c478bd9Sstevel@tonic-gate tst %o3 9397c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 9407c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 9417c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 9427c478bd9Sstevel@tonic-gate nop 9437c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 9447c478bd9Sstevel@tonic-gate nop 9457c478bd9Sstevel@tonic-gate.bcopy_4: 9467c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 9477c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 9487c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 9497c478bd9Sstevel@tonic-gate tst %o3 9507c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 9517c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 9527c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 9537c478bd9Sstevel@tonic-gate nop 9547c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 9557c478bd9Sstevel@tonic-gate nop 9567c478bd9Sstevel@tonic-gate.bcopy_8: 9577c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 9587c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 9597c478bd9Sstevel@tonic-gate tst %o3 9607c478bd9Sstevel@tonic-gate bz,pn %icc, .bcopy_small ! if zero, disable HW copy 9617c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 9627c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bcopy_small ! go to small copy 9637c478bd9Sstevel@tonic-gate nop 9647c478bd9Sstevel@tonic-gate ba,pt %ncc, .bcopy_more ! otherwise go to large copy 9657c478bd9Sstevel@tonic-gate nop 9667c478bd9Sstevel@tonic-gate 9677c478bd9Sstevel@tonic-gate .align 16 9687c478bd9Sstevel@tonic-gate.bcopy_small: 9697c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save t_lofault 9707c478bd9Sstevel@tonic-gate tst %o4 9717c478bd9Sstevel@tonic-gate bz,pt %icc, .sm_do_copy 9727c478bd9Sstevel@tonic-gate nop 9737c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyerr), %o5 9747c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyerr), %o5 9757c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 9767c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! install new vector 9777c478bd9Sstevel@tonic-gate or %o4, TRAMP_FLAG, %o4 ! error should trampoline 9787c478bd9Sstevel@tonic-gate.sm_do_copy: 9797c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 9807c478bd9Sstevel@tonic-gate bleu,pt %ncc, .bc_sm_left ! 9817c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 9827c478bd9Sstevel@tonic-gate bgu,pn %ncc, .bc_med ! 9837c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 9847c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 9857c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_word ! branch to word aligned case 9867c478bd9Sstevel@tonic-gate.bc_sm_movebytes: 9877c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 9887c478bd9Sstevel@tonic-gate.bc_sm_notalign4: 9897c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 9907c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write byte 9917c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 9927c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 9937c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 9947c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 9957c478bd9Sstevel@tonic-gate ldub [%o0 - 2], %o3 9967c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 9977c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 2] 9987c478bd9Sstevel@tonic-gate ldub [%o0 - 1], %o3 9997c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_notalign4 ! loop til 3 or fewer bytes remain 10007c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 1] 10017c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 10027c478bd9Sstevel@tonic-gate.bc_sm_left: 10037c478bd9Sstevel@tonic-gate tst %o2 10047c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! check for zero length 10057c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 10067c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! move one byte 10077c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10087c478bd9Sstevel@tonic-gate stb %o3, [%o1] 10097c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! move another byte 10107c478bd9Sstevel@tonic-gate deccc %o2 ! check for more 10117c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10127c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 10137c478bd9Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! move final byte 10147c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 2] 10157c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 10167c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 10177c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 10187c478bd9Sstevel@tonic-gate retl 10197c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 10207c478bd9Sstevel@tonic-gate .align 16 10217c478bd9Sstevel@tonic-gate nop ! instruction alignment 10227c478bd9Sstevel@tonic-gate ! see discussion at start of file 10237c478bd9Sstevel@tonic-gate.bc_sm_words: 10247c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 10257c478bd9Sstevel@tonic-gate.bc_sm_wordx: 10267c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 10277c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 10287c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 10297c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 10307c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 10317c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_words ! loop til done 10327c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 10337c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 10347c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10357c478bd9Sstevel@tonic-gate deccc %o2 10367c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 10377c478bd9Sstevel@tonic-gate.bc_sm_half: 10387c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 10397c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 10407c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 ! read half word 10417c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 10427c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_half ! loop til done 10437c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 10447c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 10457c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10467c478bd9Sstevel@tonic-gate nop 10477c478bd9Sstevel@tonic-gate.bc_sm_byte: 10487c478bd9Sstevel@tonic-gate ldub [%o0], %o3 10497c478bd9Sstevel@tonic-gate stb %o3, [%o1] 10507c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 10517c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 10527c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 10537c478bd9Sstevel@tonic-gate retl 10547c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 10557c478bd9Sstevel@tonic-gate 10567c478bd9Sstevel@tonic-gate.bc_sm_word: 10577c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 10587c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_sm_wordx 10597c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 10607c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 10617c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10627c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 10637c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 10647c478bd9Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 10657c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10667c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 10677c478bd9Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 10687c478bd9Sstevel@tonic-gate deccc %o2 10697c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 10707c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 10717c478bd9Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 10727c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 10737c478bd9Sstevel@tonic-gate.bc_sm_exit: 10747c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 10757c478bd9Sstevel@tonic-gate andn %o4, TRAMP_FLAG, %o4 10767c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 10777c478bd9Sstevel@tonic-gate retl 10787c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 10797c478bd9Sstevel@tonic-gate 10807c478bd9Sstevel@tonic-gate .align 16 10817c478bd9Sstevel@tonic-gate.bc_med: 10827c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 10837c478bd9Sstevel@tonic-gate btst 1, %o3 10847c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_sm_movebytes ! unaligned 10857c478bd9Sstevel@tonic-gate nop 10867c478bd9Sstevel@tonic-gate btst 3, %o3 10877c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_half ! halfword aligned 10887c478bd9Sstevel@tonic-gate nop 10897c478bd9Sstevel@tonic-gate btst 7, %o3 10907c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word ! word aligned 10917c478bd9Sstevel@tonic-gate nop 10927c478bd9Sstevel@tonic-gate.bc_med_long: 10937c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 10947c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_long1 ! word alignment 10957c478bd9Sstevel@tonic-gate nop 10967c478bd9Sstevel@tonic-gate.bc_med_long0: 10977c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 10987c478bd9Sstevel@tonic-gate inc %o0 10997c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 11007c478bd9Sstevel@tonic-gate inc %o1 11017c478bd9Sstevel@tonic-gate btst 3, %o0 11027c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_long0 11037c478bd9Sstevel@tonic-gate dec %o2 11047c478bd9Sstevel@tonic-gate.bc_med_long1: ! word aligned 11057c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 11067c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_long2 11077c478bd9Sstevel@tonic-gate nop 11087c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! load word 11097c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 11107c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! store word 11117c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 11127c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 11137c478bd9Sstevel@tonic-gate! 11147c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 11157c478bd9Sstevel@tonic-gate! 11167c478bd9Sstevel@tonic-gate.bc_med_long2: 11177c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 11187c478bd9Sstevel@tonic-gate.bc_med_lmove: 11197c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 11207c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 11217c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 11227c478bd9Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 11237c478bd9Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 11247c478bd9Sstevel@tonic-gate stx %o3, [%o1 + 8] 11257c478bd9Sstevel@tonic-gate ldx [%o0 - 16], %o3 11267c478bd9Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 11277c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 16] 11287c478bd9Sstevel@tonic-gate ldx [%o0 - 8], %o3 11297c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lmove ! loop til 31 or fewer bytes left 11307c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 8] 11317c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 11327c478bd9Sstevel@tonic-gate ble,pt %ncc, .bc_med_lextra ! check for more long words to move 11337c478bd9Sstevel@tonic-gate nop 11347c478bd9Sstevel@tonic-gate.bc_med_lword: 11357c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 11367c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 11377c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 11387c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 11397c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_lword ! loop til 7 or fewer bytes left 11407c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 11417c478bd9Sstevel@tonic-gate.bc_med_lextra: 11427c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 11437c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 11447c478bd9Sstevel@tonic-gate deccc %o2 11457c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 11467c478bd9Sstevel@tonic-gate nop 11477c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 11487c478bd9Sstevel@tonic-gate nop 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate .align 16 11517c478bd9Sstevel@tonic-gate.bc_med_word: 11527c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 11537c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_word1 ! word alignment 11547c478bd9Sstevel@tonic-gate nop 11557c478bd9Sstevel@tonic-gate.bc_med_word0: 11567c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 11577c478bd9Sstevel@tonic-gate inc %o0 11587c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 11597c478bd9Sstevel@tonic-gate inc %o1 11607c478bd9Sstevel@tonic-gate btst 3, %o0 11617c478bd9Sstevel@tonic-gate bnz,pt %ncc, .bc_med_word0 11627c478bd9Sstevel@tonic-gate dec %o2 11637c478bd9Sstevel@tonic-gate! 11647c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 11657c478bd9Sstevel@tonic-gate! 11667c478bd9Sstevel@tonic-gate.bc_med_word1: 11677c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 11687c478bd9Sstevel@tonic-gate.bc_med_wmove: 11697c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 11707c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 11717c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 11727c478bd9Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 11737c478bd9Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 11747c478bd9Sstevel@tonic-gate stw %o3, [%o1 + 4] 11757c478bd9Sstevel@tonic-gate lduw [%o0 - 8], %o3 11767c478bd9Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 11777c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 8] 11787c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 11797c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_wmove ! loop til 15 or fewer bytes left 11807c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] 11817c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 11827c478bd9Sstevel@tonic-gate ble,pt %ncc, .bc_med_wextra ! check for more words to move 11837c478bd9Sstevel@tonic-gate nop 11847c478bd9Sstevel@tonic-gate.bc_med_word2: 11857c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 11867c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 11877c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 11887c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 11897c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_word2 ! loop til 3 or fewer bytes left 11907c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 11917c478bd9Sstevel@tonic-gate.bc_med_wextra: 11927c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 11937c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit ! if zero, then done 11947c478bd9Sstevel@tonic-gate deccc %o2 11957c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 11967c478bd9Sstevel@tonic-gate nop 11977c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 11987c478bd9Sstevel@tonic-gate nop 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate .align 16 12017c478bd9Sstevel@tonic-gate.bc_med_half: 12027c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 12037c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_med_half1 ! half word alignment 12047c478bd9Sstevel@tonic-gate nop 12057c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 12067c478bd9Sstevel@tonic-gate inc %o0 12077c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 12087c478bd9Sstevel@tonic-gate inc %o1 12097c478bd9Sstevel@tonic-gate dec %o2 12107c478bd9Sstevel@tonic-gate! 12117c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 12127c478bd9Sstevel@tonic-gate! 12137c478bd9Sstevel@tonic-gate.bc_med_half1: 12147c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 12157c478bd9Sstevel@tonic-gate.bc_med_hmove: 12167c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 12177c478bd9Sstevel@tonic-gate sth %o3, [%o1] ! write half word 12187c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 12197c478bd9Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 12207c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 12217c478bd9Sstevel@tonic-gate sth %o3, [%o1 + 2] 12227c478bd9Sstevel@tonic-gate lduh [%o0 - 4], %o3 12237c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 12247c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 4] 12257c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 12267c478bd9Sstevel@tonic-gate bgt,pt %ncc, .bc_med_hmove ! loop til 7 or fewer bytes left 12277c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] 12287c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 12297c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_exit 12307c478bd9Sstevel@tonic-gate deccc %o2 12317c478bd9Sstevel@tonic-gate bz,pt %ncc, .bc_sm_byte 12327c478bd9Sstevel@tonic-gate nop 12337c478bd9Sstevel@tonic-gate ba,pt %ncc, .bc_sm_half 12347c478bd9Sstevel@tonic-gate nop 12357c478bd9Sstevel@tonic-gate 12367c478bd9Sstevel@tonic-gate SET_SIZE(bcopy) 12377c478bd9Sstevel@tonic-gate 12387c478bd9Sstevel@tonic-gate/* 12397c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 12407c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 12417c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 12427c478bd9Sstevel@tonic-gate * the floating point registers. 12437c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 12447c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 12457c478bd9Sstevel@tonic-gate */ 12467c478bd9Sstevel@tonic-gate 12477c478bd9Sstevel@tonic-gate ENTRY(bcopy_more) 1248*5d9d9091SRichard Lowe.bcopy_more: 12497c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 12507c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save t_lofault 12517c478bd9Sstevel@tonic-gate tst %l6 12527c478bd9Sstevel@tonic-gate bz,pt %ncc, .do_copy 12537c478bd9Sstevel@tonic-gate nop 12547c478bd9Sstevel@tonic-gate sethi %hi(.copyerr), %o2 12557c478bd9Sstevel@tonic-gate or %o2, %lo(.copyerr), %o2 12567c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 12577c478bd9Sstevel@tonic-gate stn %o2, [THREAD_REG + T_LOFAULT] ! install new vector 12587c478bd9Sstevel@tonic-gate ! 12597c478bd9Sstevel@tonic-gate ! We've already captured whether t_lofault was zero on entry. 12607c478bd9Sstevel@tonic-gate ! We need to mark ourselves as being from bcopy since both 12617c478bd9Sstevel@tonic-gate ! kcopy and bcopy use the same code path. If TRAMP_FLAG is set 12627c478bd9Sstevel@tonic-gate ! and the saved lofault was zero, we won't reset lofault on 12637c478bd9Sstevel@tonic-gate ! returning. 12647c478bd9Sstevel@tonic-gate ! 12657c478bd9Sstevel@tonic-gate or %l6, TRAMP_FLAG, %l6 12667c478bd9Sstevel@tonic-gate 12677c478bd9Sstevel@tonic-gate/* 12687c478bd9Sstevel@tonic-gate * Copies that reach here are larger than VIS_COPY_THRESHOLD bytes 12697c478bd9Sstevel@tonic-gate * Also, use of FP registers has been tested to be enabled 12707c478bd9Sstevel@tonic-gate */ 12717c478bd9Sstevel@tonic-gate.do_copy: 12727c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 12737c478bd9Sstevel@tonic-gate 12747c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 12757c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 12767c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 12777c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopy 12787c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%o2) 12817c478bd9Sstevel@tonic-gate 12827c478bd9Sstevel@tonic-gate.do_blockcopy: 12837c478bd9Sstevel@tonic-gate rd %gsr, %o2 12847c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 12857c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 12867c478bd9Sstevel@tonic-gate 12877c478bd9Sstevel@tonic-gate#define REALSRC %i0 12887c478bd9Sstevel@tonic-gate#define DST %i1 12897c478bd9Sstevel@tonic-gate#define CNT %i2 12907c478bd9Sstevel@tonic-gate#define SRC %i3 12917c478bd9Sstevel@tonic-gate#define TMP %i5 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 12947c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 12957c478bd9Sstevel@tonic-gate neg TMP 12967c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 12977c478bd9Sstevel@tonic-gate 12987c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 12997c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 13007c478bd9Sstevel@tonic-gate cmp TMP, 3 13017c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 13027c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 13037c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 13047c478bd9Sstevel@tonic-gate.bc_blkalign: 13057c478bd9Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 13067c478bd9Sstevel@tonic-gate stb SRC, [DST] 13077c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 13087c478bd9Sstevel@tonic-gate ldub [REALSRC + 1], SRC 13097c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 13107c478bd9Sstevel@tonic-gate stb SRC, [DST + 1] 13117c478bd9Sstevel@tonic-gate ldub [REALSRC - 2], SRC 13127c478bd9Sstevel@tonic-gate add DST, 4, DST 13137c478bd9Sstevel@tonic-gate stb SRC, [DST - 2] 13147c478bd9Sstevel@tonic-gate ldub [REALSRC - 1], SRC 13157c478bd9Sstevel@tonic-gate bgu,pt %ncc, .bc_blkalign 13167c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 13177c478bd9Sstevel@tonic-gate 13187c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 13197c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 13207c478bd9Sstevel@tonic-gate nop 13217c478bd9Sstevel@tonic-gate1: ldub [REALSRC], SRC 13227c478bd9Sstevel@tonic-gate inc REALSRC 13237c478bd9Sstevel@tonic-gate inc DST 13247c478bd9Sstevel@tonic-gate deccc TMP 13257c478bd9Sstevel@tonic-gate bgu %ncc, 1b 13267c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 13277c478bd9Sstevel@tonic-gate 13287c478bd9Sstevel@tonic-gate2: 13297c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 13307c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 13317c478bd9Sstevel@tonic-gate 13327c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 13337c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 13347c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 13357c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 13367c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 13377c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 13387c478bd9Sstevel@tonic-gate ldd [SRC], %f0 13397c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 13407c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 13417c478bd9Sstevel@tonic-gate#endif 13427c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 13437c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 13447c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 13457c478bd9Sstevel@tonic-gate#endif 13467c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 13477c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 13487c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 13497c478bd9Sstevel@tonic-gate#endif 13507c478bd9Sstevel@tonic-gate faligndata %f0, %f2, %f32 13517c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 13527c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 13537c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 13547c478bd9Sstevel@tonic-gate#endif 13557c478bd9Sstevel@tonic-gate faligndata %f2, %f4, %f34 13567c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 13577c478bd9Sstevel@tonic-gate faligndata %f4, %f6, %f36 13587c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 13597c478bd9Sstevel@tonic-gate faligndata %f6, %f8, %f38 13607c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 13617c478bd9Sstevel@tonic-gate faligndata %f8, %f10, %f40 13627c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 13637c478bd9Sstevel@tonic-gate faligndata %f10, %f12, %f42 13647c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 13657c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 13667c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 13677c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 13687c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 13697c478bd9Sstevel@tonic-gate nop 13707c478bd9Sstevel@tonic-gate .align 16 13717c478bd9Sstevel@tonic-gate1: 13727c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 13737c478bd9Sstevel@tonic-gate faligndata %f12, %f14, %f44 13747c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 13757c478bd9Sstevel@tonic-gate faligndata %f14, %f0, %f46 13767c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 13777c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 13787c478bd9Sstevel@tonic-gate faligndata %f0, %f2, %f32 13797c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 13807c478bd9Sstevel@tonic-gate faligndata %f2, %f4, %f34 13817c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 13827c478bd9Sstevel@tonic-gate faligndata %f4, %f6, %f36 13837c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 13847c478bd9Sstevel@tonic-gate faligndata %f6, %f8, %f38 13857c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 13867c478bd9Sstevel@tonic-gate faligndata %f8, %f10, %f40 13877c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 13887c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 13897c478bd9Sstevel@tonic-gate faligndata %f10, %f12, %f42 13907c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 13917c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 13927c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 13937c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 13947c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 13957c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 13967c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 13977c478bd9Sstevel@tonic-gate 13987c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 13997c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 14007c478bd9Sstevel@tonic-gate bne %ncc, 3f 14017c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 14027c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 14037c478bd9Sstevel@tonic-gate nop 1404*5d9d9091SRichard Lowe3: 14057c478bd9Sstevel@tonic-gate faligndata %f12, %f14, %f44 14067c478bd9Sstevel@tonic-gate faligndata %f14, %f0, %f46 14077c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 14087c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 14097c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 14107c478bd9Sstevel@tonic-gate nop 14117c478bd9Sstevel@tonic-gate2: 14127c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 14137c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 14147c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 14157c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 14167c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 14177c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 14187c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 14197c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 14207c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 14217c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 14227c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 14237c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 14247c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 14257c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 14267c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 14277c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 14287c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 14297c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 14307c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 14317c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 14327c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 14337c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 14347c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 14357c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 14367c478bd9Sstevel@tonic-gate ba,a,pt %ncc, .bcb_exit 14377c478bd9Sstevel@tonic-gate nop 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate3: tst CNT 14407c478bd9Sstevel@tonic-gate bz,a,pt %ncc, .bcb_exit 14417c478bd9Sstevel@tonic-gate nop 14427c478bd9Sstevel@tonic-gate 14437c478bd9Sstevel@tonic-gate5: ldub [REALSRC], TMP 14447c478bd9Sstevel@tonic-gate inc REALSRC 14457c478bd9Sstevel@tonic-gate inc DST 14467c478bd9Sstevel@tonic-gate deccc CNT 14477c478bd9Sstevel@tonic-gate bgu %ncc, 5b 14487c478bd9Sstevel@tonic-gate stb TMP, [DST - 1] 14497c478bd9Sstevel@tonic-gate.bcb_exit: 14507c478bd9Sstevel@tonic-gate membar #Sync 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_BCOPY, 0, %l5, %o2, %o3, %o4, %o5, 8) 14537c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q1(0, %l5, %o2, %o3, 8, 9) 14547c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_BCOPY, %l5, 9) ! outputs lost 14557c478bd9Sstevel@tonic-gate 14567c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 14577c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 14587c478bd9Sstevel@tonic-gate 14597c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 14607c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 14617c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 14627c478bd9Sstevel@tonic-gate nop 14637c478bd9Sstevel@tonic-gate 14647c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%o2) 14657c478bd9Sstevel@tonic-gate 1466*5d9d9091SRichard Lowe ba,pt %ncc, 2f 14677c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 14687c478bd9Sstevel@tonic-gate4: 14697c478bd9Sstevel@tonic-gate FZEROQ1Q3 14707c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 14717c478bd9Sstevel@tonic-gate2: 14727c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 14737c478bd9Sstevel@tonic-gate andn %l6, MASK_FLAGS, %l6 14747c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 14757c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 14767c478bd9Sstevel@tonic-gate ret 14777c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 14787c478bd9Sstevel@tonic-gate 14797c478bd9Sstevel@tonic-gate SET_SIZE(bcopy_more) 14807c478bd9Sstevel@tonic-gate 14817c478bd9Sstevel@tonic-gate/* 14827c478bd9Sstevel@tonic-gate * Block copy with possibly overlapped operands. 14837c478bd9Sstevel@tonic-gate */ 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate ENTRY(ovbcopy) 14867c478bd9Sstevel@tonic-gate tst %o2 ! check count 14877c478bd9Sstevel@tonic-gate bgu,a %ncc, 1f ! nothing to do or bad arguments 14887c478bd9Sstevel@tonic-gate subcc %o0, %o1, %o3 ! difference of from and to address 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate retl ! return 14917c478bd9Sstevel@tonic-gate nop 14927c478bd9Sstevel@tonic-gate1: 14937c478bd9Sstevel@tonic-gate bneg,a %ncc, 2f 14947c478bd9Sstevel@tonic-gate neg %o3 ! if < 0, make it positive 14957c478bd9Sstevel@tonic-gate2: cmp %o2, %o3 ! cmp size and abs(from - to) 14967c478bd9Sstevel@tonic-gate bleu %ncc, bcopy ! if size <= abs(diff): use bcopy, 14977c478bd9Sstevel@tonic-gate .empty ! no overlap 14987c478bd9Sstevel@tonic-gate cmp %o0, %o1 ! compare from and to addresses 14997c478bd9Sstevel@tonic-gate blu %ncc, .ov_bkwd ! if from < to, copy backwards 15007c478bd9Sstevel@tonic-gate nop 15017c478bd9Sstevel@tonic-gate ! 15027c478bd9Sstevel@tonic-gate ! Copy forwards. 15037c478bd9Sstevel@tonic-gate ! 15047c478bd9Sstevel@tonic-gate.ov_fwd: 15057c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read from address 15067c478bd9Sstevel@tonic-gate inc %o0 ! inc from address 15077c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write to address 15087c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 15097c478bd9Sstevel@tonic-gate bgu %ncc, .ov_fwd ! loop till done 15107c478bd9Sstevel@tonic-gate inc %o1 ! inc to address 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate retl ! return 15137c478bd9Sstevel@tonic-gate nop 15147c478bd9Sstevel@tonic-gate ! 15157c478bd9Sstevel@tonic-gate ! Copy backwards. 15167c478bd9Sstevel@tonic-gate ! 15177c478bd9Sstevel@tonic-gate.ov_bkwd: 15187c478bd9Sstevel@tonic-gate deccc %o2 ! dec count 15197c478bd9Sstevel@tonic-gate ldub [%o0 + %o2], %o3 ! get byte at end of src 15207c478bd9Sstevel@tonic-gate bgu %ncc, .ov_bkwd ! loop till done 15217c478bd9Sstevel@tonic-gate stb %o3, [%o1 + %o2] ! delay slot, store at end of dst 15227c478bd9Sstevel@tonic-gate 15237c478bd9Sstevel@tonic-gate retl ! return 15247c478bd9Sstevel@tonic-gate nop 15257c478bd9Sstevel@tonic-gate 15267c478bd9Sstevel@tonic-gate SET_SIZE(ovbcopy) 15277c478bd9Sstevel@tonic-gate 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate/* 15307c478bd9Sstevel@tonic-gate * hwblkpagecopy() 15317c478bd9Sstevel@tonic-gate * 15327c478bd9Sstevel@tonic-gate * Copies exactly one page. This routine assumes the caller (ppcopy) 15337c478bd9Sstevel@tonic-gate * has already disabled kernel preemption and has checked 15347c478bd9Sstevel@tonic-gate * use_hw_bcopy. Preventing preemption also prevents cpu migration. 15357c478bd9Sstevel@tonic-gate */ 15367c478bd9Sstevel@tonic-gate ENTRY(hwblkpagecopy) 15377c478bd9Sstevel@tonic-gate ! get another window w/space for three aligned blocks of saved fpregs 15387c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 15397c478bd9Sstevel@tonic-gate 15407c478bd9Sstevel@tonic-gate ! %i0 - source address (arg) 15417c478bd9Sstevel@tonic-gate ! %i1 - destination address (arg) 15427c478bd9Sstevel@tonic-gate ! %i2 - length of region (not arg) 15437c478bd9Sstevel@tonic-gate ! %l0 - saved fprs 15447c478bd9Sstevel@tonic-gate ! %l1 - pointer to saved fpregs 15457c478bd9Sstevel@tonic-gate 15467c478bd9Sstevel@tonic-gate rd %fprs, %l0 ! check for unused fp 15477c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 15487c478bd9Sstevel@tonic-gate bz,a,pt %icc, 1f 15497c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 15507c478bd9Sstevel@tonic-gate 15517c478bd9Sstevel@tonic-gate BST_FPQ1Q3_TOSTACK(%l1) 15527c478bd9Sstevel@tonic-gate 15537c478bd9Sstevel@tonic-gate1: set PAGESIZE, CNT 15547c478bd9Sstevel@tonic-gate mov REALSRC, SRC 15557c478bd9Sstevel@tonic-gate 15567c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 15577c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 15587c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 15597c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 15607c478bd9Sstevel@tonic-gate ldd [SRC], %f0 15617c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 15627c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 15637c478bd9Sstevel@tonic-gate#endif 15647c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 15657c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 15667c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 15677c478bd9Sstevel@tonic-gate#endif 15687c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 15697c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 15707c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 15717c478bd9Sstevel@tonic-gate#endif 15727c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 15737c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 15747c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 15757c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 15767c478bd9Sstevel@tonic-gate#endif 15777c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 15787c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 15797c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 15807c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 15817c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 15827c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 15837c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 15847c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 15857c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 15867c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 15877c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 15887c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 15897c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 2f 15907c478bd9Sstevel@tonic-gate nop 15917c478bd9Sstevel@tonic-gate .align 16 15927c478bd9Sstevel@tonic-gate2: 15937c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 15947c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 15957c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 15967c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 15977c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 15987c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 15997c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 16007c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 16017c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 16027c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 16037c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 16047c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 16057c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 16067c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 16077c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 16087c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f0 16097c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 16107c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 16117c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 16127c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 16137c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 16147c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 16157c478bd9Sstevel@tonic-gate bgu,pt %ncc, 2b 16167c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 16177c478bd9Sstevel@tonic-gate 16187c478bd9Sstevel@tonic-gate ! trailing block 16197c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f2 16207c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 16217c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f4 16227c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 16237c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 16247c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f6 16257c478bd9Sstevel@tonic-gate fsrc1 %f0, %f32 16267c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f8 16277c478bd9Sstevel@tonic-gate fsrc1 %f2, %f34 16287c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f10 16297c478bd9Sstevel@tonic-gate fsrc1 %f4, %f36 16307c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f12 16317c478bd9Sstevel@tonic-gate fsrc1 %f6, %f38 16327c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f14 16337c478bd9Sstevel@tonic-gate fsrc1 %f8, %f40 16347c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 16357c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 16367c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 16377c478bd9Sstevel@tonic-gate fsrc1 %f10, %f42 16387c478bd9Sstevel@tonic-gate fsrc1 %f12, %f44 16397c478bd9Sstevel@tonic-gate fsrc1 %f14, %f46 16407c478bd9Sstevel@tonic-gate stda %f32, [DST]ASI_BLK_P 16417c478bd9Sstevel@tonic-gate 16427c478bd9Sstevel@tonic-gate membar #Sync 16437c478bd9Sstevel@tonic-gate 16447c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8) 16457c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9) 16467c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9) ! lose outputs 16477c478bd9Sstevel@tonic-gate 16487c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 16497c478bd9Sstevel@tonic-gate bz,pt %icc, 2f 16507c478bd9Sstevel@tonic-gate nop 16517c478bd9Sstevel@tonic-gate 16527c478bd9Sstevel@tonic-gate BLD_FPQ1Q3_FROMSTACK(%l3) 16537c478bd9Sstevel@tonic-gate ba 3f 16547c478bd9Sstevel@tonic-gate nop 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate2: FZEROQ1Q3 16577c478bd9Sstevel@tonic-gate 16587c478bd9Sstevel@tonic-gate3: wr %l0, 0, %fprs ! restore fprs 16597c478bd9Sstevel@tonic-gate ret 16607c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate 16657c478bd9Sstevel@tonic-gate/* 16667c478bd9Sstevel@tonic-gate * Transfer data to and from user space - 16677c478bd9Sstevel@tonic-gate * Note that these routines can cause faults 16687c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at 16697c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space. 16707c478bd9Sstevel@tonic-gate * 16717c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the 16727c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors." 16737c478bd9Sstevel@tonic-gate * 16747c478bd9Sstevel@tonic-gate * Sigh. 16757c478bd9Sstevel@tonic-gate * 16767c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout() 16777c478bd9Sstevel@tonic-gate * which return the errno that we've faithfully computed. This 16787c478bd9Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly. 16797c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling 16807c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers). 16817c478bd9Sstevel@tonic-gate * 16827c478bd9Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little, 16837c478bd9Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from 16847c478bd9Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently 16857c478bd9Sstevel@tonic-gate * is left as an exercise... 16867c478bd9Sstevel@tonic-gate */ 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate/* 16897c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr) 1690*5d9d9091SRichard Lowe * 16917c478bd9Sstevel@tonic-gate * General theory of operation: 16927c478bd9Sstevel@tonic-gate * 16937c478bd9Sstevel@tonic-gate * The only difference between copy{in,out} and 16947c478bd9Sstevel@tonic-gate * xcopy{in,out} is in the error handling routine they invoke 16957c478bd9Sstevel@tonic-gate * when a memory access error occurs. xcopyOP returns the errno 16967c478bd9Sstevel@tonic-gate * while copyOP returns -1 (see above). copy{in,out}_noerr set 16977c478bd9Sstevel@tonic-gate * a special flag (by oring the TRAMP_FLAG into the fault handler address) 16987c478bd9Sstevel@tonic-gate * if they are called with a fault handler already in place. That flag 16997c478bd9Sstevel@tonic-gate * causes the default handlers to trampoline to the previous handler 17007c478bd9Sstevel@tonic-gate * upon an error. 17017c478bd9Sstevel@tonic-gate * 17027c478bd9Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that 17037c478bd9Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window 17047c478bd9Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO 17057c478bd9Sstevel@tonic-gate * path won't cause spill/fill traps. 17067c478bd9Sstevel@tonic-gate * 17077c478bd9Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will 17087c478bd9Sstevel@tonic-gate * be copied given a particular input/output address alignment. 17097c478bd9Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be performed 17107c478bd9Sstevel@tonic-gate * by the plain copy loops rather than FPBLK. 17117c478bd9Sstevel@tonic-gate * 17127c478bd9Sstevel@tonic-gate * See the description of bcopy above for more details of the 17137c478bd9Sstevel@tonic-gate * data copying algorithm and the default limits. 17147c478bd9Sstevel@tonic-gate * 17157c478bd9Sstevel@tonic-gate */ 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate/* 17187c478bd9Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little). 17197c478bd9Sstevel@tonic-gate */ 17207c478bd9Sstevel@tonic-gate 17217c478bd9Sstevel@tonic-gate/* 17227c478bd9Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault: 17237c478bd9Sstevel@tonic-gate * kaddr - %l1 17247c478bd9Sstevel@tonic-gate * uaddr - %l2 17257c478bd9Sstevel@tonic-gate * count - %l3 17267c478bd9Sstevel@tonic-gate */ 17277c478bd9Sstevel@tonic-gate#define SAVE_SRC %l1 17287c478bd9Sstevel@tonic-gate#define SAVE_DST %l2 17297c478bd9Sstevel@tonic-gate#define SAVE_COUNT %l3 17307c478bd9Sstevel@tonic-gate 17317c478bd9Sstevel@tonic-gate#define SM_SAVE_SRC %g4 17327c478bd9Sstevel@tonic-gate#define SM_SAVE_DST %g5 17337c478bd9Sstevel@tonic-gate#define SM_SAVE_COUNT %o5 17347c478bd9Sstevel@tonic-gate#define ERRNO %l5 17357c478bd9Sstevel@tonic-gate 17367c478bd9Sstevel@tonic-gate 17377c478bd9Sstevel@tonic-gate#define REAL_LOFAULT %l4 17387c478bd9Sstevel@tonic-gate/* 17397c478bd9Sstevel@tonic-gate * Generic copyio fault handler. This is the first line of defense when a 17407c478bd9Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout. In order for this to function 17417c478bd9Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT. 17427c478bd9Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy 17437c478bd9Sstevel@tonic-gate * operations, including the _noerr versions. 17447c478bd9Sstevel@tonic-gate * 17457c478bd9Sstevel@tonic-gate * Note that this function will restore the original input parameters before 17467c478bd9Sstevel@tonic-gate * calling REAL_LOFAULT. So the real handler can vector to the appropriate 17477c478bd9Sstevel@tonic-gate * member of the t_copyop structure, if needed. 17487c478bd9Sstevel@tonic-gate */ 17497c478bd9Sstevel@tonic-gate ENTRY(copyio_fault) 17507c478bd9Sstevel@tonic-gate membar #Sync 17517c478bd9Sstevel@tonic-gate mov %g1,ERRNO ! save errno in ERRNO 17527c478bd9Sstevel@tonic-gate btst FPUSED_FLAG, %l6 17537c478bd9Sstevel@tonic-gate bz %ncc, 1f 17547c478bd9Sstevel@tonic-gate nop 17557c478bd9Sstevel@tonic-gate 17567c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 17577c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 17587c478bd9Sstevel@tonic-gate 17597c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 17607c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 17617c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 17627c478bd9Sstevel@tonic-gate nop 17637c478bd9Sstevel@tonic-gate 17647c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 17677c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate4: 17707c478bd9Sstevel@tonic-gate FZEROQ2Q4 17717c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 17727c478bd9Sstevel@tonic-gate 17737c478bd9Sstevel@tonic-gate1: 17747c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 17757c478bd9Sstevel@tonic-gate membar #Sync 17767c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 17777c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 17787c478bd9Sstevel@tonic-gate 17797c478bd9Sstevel@tonic-gate mov SAVE_SRC, %i0 17807c478bd9Sstevel@tonic-gate mov SAVE_DST, %i1 17817c478bd9Sstevel@tonic-gate jmp REAL_LOFAULT 17827c478bd9Sstevel@tonic-gate mov SAVE_COUNT, %i2 17837c478bd9Sstevel@tonic-gate 17847c478bd9Sstevel@tonic-gate SET_SIZE(copyio_fault) 17857c478bd9Sstevel@tonic-gate 17867c478bd9Sstevel@tonic-gate 17877c478bd9Sstevel@tonic-gate ENTRY(copyout) 17887c478bd9Sstevel@tonic-gate 17897c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 17907c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to larger cases 17917c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 17927c478bd9Sstevel@tonic-gate btst 7, %o3 ! 17937c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_8 ! check for longword alignment 17947c478bd9Sstevel@tonic-gate nop 1795*5d9d9091SRichard Lowe btst 1, %o3 ! 17967c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_2 ! check for half-word 17977c478bd9Sstevel@tonic-gate nop 17987c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 17997c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 18007c478bd9Sstevel@tonic-gate tst %o3 18017c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 18027c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 18037c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 18047c478bd9Sstevel@tonic-gate nop 18057c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 18067c478bd9Sstevel@tonic-gate nop 18077c478bd9Sstevel@tonic-gate.copyout_2: 18087c478bd9Sstevel@tonic-gate btst 3, %o3 ! 18097c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_4 ! check for word alignment 18107c478bd9Sstevel@tonic-gate nop 18117c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 18127c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 18137c478bd9Sstevel@tonic-gate tst %o3 18147c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 18157c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 18167c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 18177c478bd9Sstevel@tonic-gate nop 18187c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 18197c478bd9Sstevel@tonic-gate nop 18207c478bd9Sstevel@tonic-gate.copyout_4: 18217c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 18227c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 18237c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 18247c478bd9Sstevel@tonic-gate tst %o3 18257c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 18267c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 18277c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 18287c478bd9Sstevel@tonic-gate nop 18297c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 18307c478bd9Sstevel@tonic-gate nop 18317c478bd9Sstevel@tonic-gate.copyout_8: 18327c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 18337c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 18347c478bd9Sstevel@tonic-gate tst %o3 18357c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_small ! if zero, disable HW copy 18367c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 18377c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_small ! go to small copy 18387c478bd9Sstevel@tonic-gate nop 18397c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_more ! otherwise go to large copy 18407c478bd9Sstevel@tonic-gate nop 18417c478bd9Sstevel@tonic-gate 18427c478bd9Sstevel@tonic-gate .align 16 18437c478bd9Sstevel@tonic-gate nop ! instruction alignment 18447c478bd9Sstevel@tonic-gate ! see discussion at start of file 18457c478bd9Sstevel@tonic-gate.copyout_small: 18467c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyout_err), %o5 ! .sm_copyout_err is lofault 18477c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyout_err), %o5 18487c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 18497c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 18507c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 18517c478bd9Sstevel@tonic-gate.sm_do_copyout: 18527c478bd9Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 18537c478bd9Sstevel@tonic-gate mov %o1, SM_SAVE_DST 18547c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 18557c478bd9Sstevel@tonic-gate bleu,pt %ncc, .co_sm_left ! 18567c478bd9Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 18577c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 18587c478bd9Sstevel@tonic-gate bgu,pn %ncc, .co_med ! 18597c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 18607c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 18617c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_word ! branch to word aligned case 18627c478bd9Sstevel@tonic-gate.co_sm_movebytes: 18637c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 18647c478bd9Sstevel@tonic-gate.co_sm_notalign4: 18657c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! read byte 18667c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 18677c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! write byte 18687c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 18697c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! repeat for a total of 4 bytes 18707c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 18717c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 18727c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 18737c478bd9Sstevel@tonic-gate ldub [%o0 - 2], %o3 18747c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 18757c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 18767c478bd9Sstevel@tonic-gate ldub [%o0 - 1], %o3 18777c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 18787c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_notalign4 ! loop til 3 or fewer bytes remain 18797c478bd9Sstevel@tonic-gate inc %o1 ! advance DST by 1 18807c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 18817c478bd9Sstevel@tonic-gate.co_sm_left: 18827c478bd9Sstevel@tonic-gate tst %o2 18837c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! check for zero length 18847c478bd9Sstevel@tonic-gate nop 18857c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 18867c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 18877c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 18887c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store one byte 18897c478bd9Sstevel@tonic-gate ldub [%o0 + 1], %o3 ! load second byte 18907c478bd9Sstevel@tonic-gate deccc %o2 18917c478bd9Sstevel@tonic-gate inc %o1 18927c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 18937c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store second byte 18947c478bd9Sstevel@tonic-gate ldub [%o0 + 2], %o3 ! load third byte 18957c478bd9Sstevel@tonic-gate inc %o1 18967c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store third byte 18977c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 18987c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 18997c478bd9Sstevel@tonic-gate retl 19007c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 19017c478bd9Sstevel@tonic-gate .align 16 19027c478bd9Sstevel@tonic-gate.co_sm_words: 19037c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 19047c478bd9Sstevel@tonic-gate.co_sm_wordx: 19057c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 19067c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 19077c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! update SRC 19087c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 ! read word 19097c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 19107c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 19117c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_words ! loop til done 19127c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! update DST 19137c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 19147c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 19157c478bd9Sstevel@tonic-gate nop 19167c478bd9Sstevel@tonic-gate deccc %o2 19177c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 19187c478bd9Sstevel@tonic-gate.co_sm_half: 19197c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 19207c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 19217c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 19227c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 19237c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_half ! loop til done 19247c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 19257c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 19267c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 19277c478bd9Sstevel@tonic-gate nop 19287c478bd9Sstevel@tonic-gate.co_sm_byte: 19297c478bd9Sstevel@tonic-gate ldub [%o0], %o3 19307c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER 19317c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 19327c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 19337c478bd9Sstevel@tonic-gate retl 19347c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 19357c478bd9Sstevel@tonic-gate .align 16 19367c478bd9Sstevel@tonic-gate.co_sm_word: 19377c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 19387c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_sm_wordx 19397c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 19407c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 19417c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 19427c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 19437c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 19447c478bd9Sstevel@tonic-gate ldub [%o0 + 4], %o3 ! load one byte 19457c478bd9Sstevel@tonic-gate add %o1, 4, %o1 19467c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 19477c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store one byte 19487c478bd9Sstevel@tonic-gate ldub [%o0 + 5], %o3 ! load second byte 19497c478bd9Sstevel@tonic-gate deccc %o2 19507c478bd9Sstevel@tonic-gate inc %o1 19517c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 19527c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store second byte 19537c478bd9Sstevel@tonic-gate ldub [%o0 + 6], %o3 ! load third byte 19547c478bd9Sstevel@tonic-gate inc %o1 19557c478bd9Sstevel@tonic-gate stba %o3, [%o1]ASI_USER ! store third byte 19567c478bd9Sstevel@tonic-gate.co_sm_exit: 19577c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 19587c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 19597c478bd9Sstevel@tonic-gate retl 19607c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate .align 16 19637c478bd9Sstevel@tonic-gate.co_med: 19647c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 19657c478bd9Sstevel@tonic-gate btst 1, %o3 19667c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_sm_movebytes ! unaligned 19677c478bd9Sstevel@tonic-gate nop 19687c478bd9Sstevel@tonic-gate btst 3, %o3 19697c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_half ! halfword aligned 19707c478bd9Sstevel@tonic-gate nop 19717c478bd9Sstevel@tonic-gate btst 7, %o3 19727c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_word ! word aligned 19737c478bd9Sstevel@tonic-gate nop 19747c478bd9Sstevel@tonic-gate.co_med_long: 19757c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 19767c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_long1 ! word alignment 19777c478bd9Sstevel@tonic-gate nop 19787c478bd9Sstevel@tonic-gate.co_med_long0: 19797c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 19807c478bd9Sstevel@tonic-gate inc %o0 19817c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 19827c478bd9Sstevel@tonic-gate inc %o1 19837c478bd9Sstevel@tonic-gate btst 3, %o0 19847c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_long0 19857c478bd9Sstevel@tonic-gate dec %o2 19867c478bd9Sstevel@tonic-gate.co_med_long1: ! word aligned 19877c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 19887c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_long2 19897c478bd9Sstevel@tonic-gate nop 19907c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! load word 19917c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 19927c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! store word 19937c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 19947c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 19957c478bd9Sstevel@tonic-gate! 19967c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 19977c478bd9Sstevel@tonic-gate! 19987c478bd9Sstevel@tonic-gate.co_med_long2: 19997c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 20007c478bd9Sstevel@tonic-gate sub %o1, 8, %o1 ! adjust pointer to allow store in 20017c478bd9Sstevel@tonic-gate ! branch delay slot instead of add 20027c478bd9Sstevel@tonic-gate.co_med_lmove: 20037c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20047c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 20057c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 20067c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 20077c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20087c478bd9Sstevel@tonic-gate ldx [%o0 + 8], %o3 ! repeat for a total for 4 long words 20097c478bd9Sstevel@tonic-gate add %o0, 32, %o0 ! advance SRC by 32 20107c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 20117c478bd9Sstevel@tonic-gate ldx [%o0 - 16], %o3 20127c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20137c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 20147c478bd9Sstevel@tonic-gate ldx [%o0 - 8], %o3 20157c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20167c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_lmove ! loop til 31 or fewer bytes left 20177c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER 20187c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20197c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 20207c478bd9Sstevel@tonic-gate ble,pt %ncc, .co_med_lextra ! check for more long words to move 20217c478bd9Sstevel@tonic-gate nop 20227c478bd9Sstevel@tonic-gate.co_med_lword: 20237c478bd9Sstevel@tonic-gate ldx [%o0], %o3 ! read long word 20247c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 20257c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_USER ! write long word 20267c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 20277c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_lword ! loop til 7 or fewer bytes left 20287c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 20297c478bd9Sstevel@tonic-gate.co_med_lextra: 20307c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 20317c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 20327c478bd9Sstevel@tonic-gate deccc %o2 20337c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 20347c478bd9Sstevel@tonic-gate nop 20357c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 20367c478bd9Sstevel@tonic-gate nop 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate .align 16 20397c478bd9Sstevel@tonic-gate nop ! instruction alignment 20407c478bd9Sstevel@tonic-gate ! see discussion at start of file 20417c478bd9Sstevel@tonic-gate.co_med_word: 20427c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 20437c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_word1 ! word alignment 20447c478bd9Sstevel@tonic-gate nop 20457c478bd9Sstevel@tonic-gate.co_med_word0: 20467c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 20477c478bd9Sstevel@tonic-gate inc %o0 20487c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 20497c478bd9Sstevel@tonic-gate inc %o1 20507c478bd9Sstevel@tonic-gate btst 3, %o0 20517c478bd9Sstevel@tonic-gate bnz,pt %ncc, .co_med_word0 20527c478bd9Sstevel@tonic-gate dec %o2 20537c478bd9Sstevel@tonic-gate! 20547c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 20557c478bd9Sstevel@tonic-gate! 20567c478bd9Sstevel@tonic-gate.co_med_word1: 20577c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 20587c478bd9Sstevel@tonic-gate.co_med_wmove: 20597c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 20607c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 20617c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 20627c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 20637c478bd9Sstevel@tonic-gate lduw [%o0 + 4], %o3 ! repeat for a total for 4 words 20647c478bd9Sstevel@tonic-gate add %o0, 16, %o0 ! advance SRC by 16 20657c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 20667c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 20677c478bd9Sstevel@tonic-gate lduw [%o0 - 8], %o3 20687c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 20697c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 20707c478bd9Sstevel@tonic-gate lduw [%o0 - 4], %o3 20717c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER 20727c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_wmove ! loop til 15 or fewer bytes left 20737c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 20747c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 20757c478bd9Sstevel@tonic-gate ble,pt %ncc, .co_med_wextra ! check for more words to move 20767c478bd9Sstevel@tonic-gate nop 20777c478bd9Sstevel@tonic-gate.co_med_word2: 20787c478bd9Sstevel@tonic-gate lduw [%o0], %o3 ! read word 20797c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 20807c478bd9Sstevel@tonic-gate stwa %o3, [%o1]ASI_USER ! write word 20817c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 20827c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_word2 ! loop til 3 or fewer bytes left 20837c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 20847c478bd9Sstevel@tonic-gate.co_med_wextra: 20857c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 20867c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit ! if zero, then done 20877c478bd9Sstevel@tonic-gate deccc %o2 20887c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 20897c478bd9Sstevel@tonic-gate nop 20907c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 20917c478bd9Sstevel@tonic-gate nop 20927c478bd9Sstevel@tonic-gate 20937c478bd9Sstevel@tonic-gate .align 16 20947c478bd9Sstevel@tonic-gate nop ! instruction alignment 20957c478bd9Sstevel@tonic-gate nop ! see discussion at start of file 20967c478bd9Sstevel@tonic-gate nop 20977c478bd9Sstevel@tonic-gate.co_med_half: 20987c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 20997c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_med_half1 ! half word alignment 21007c478bd9Sstevel@tonic-gate nop 21017c478bd9Sstevel@tonic-gate ldub [%o0], %o3 ! load one byte 21027c478bd9Sstevel@tonic-gate inc %o0 21037c478bd9Sstevel@tonic-gate stba %o3,[%o1]ASI_USER ! store byte 21047c478bd9Sstevel@tonic-gate inc %o1 21057c478bd9Sstevel@tonic-gate dec %o2 21067c478bd9Sstevel@tonic-gate! 21077c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 21087c478bd9Sstevel@tonic-gate! 21097c478bd9Sstevel@tonic-gate.co_med_half1: 21107c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 21117c478bd9Sstevel@tonic-gate.co_med_hmove: 21127c478bd9Sstevel@tonic-gate lduh [%o0], %o3 ! read half word 21137c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 21147c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER ! write half word 21157c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 21167c478bd9Sstevel@tonic-gate lduh [%o0 + 2], %o3 ! repeat for a total for 4 halfwords 21177c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 21187c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 21197c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 21207c478bd9Sstevel@tonic-gate lduh [%o0 - 4], %o3 21217c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 21227c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 21237c478bd9Sstevel@tonic-gate lduh [%o0 - 2], %o3 21247c478bd9Sstevel@tonic-gate stha %o3, [%o1]ASI_USER 21257c478bd9Sstevel@tonic-gate bgt,pt %ncc, .co_med_hmove ! loop til 7 or fewer bytes left 21267c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 21277c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 21287c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_exit 21297c478bd9Sstevel@tonic-gate deccc %o2 21307c478bd9Sstevel@tonic-gate bz,pt %ncc, .co_sm_byte 21317c478bd9Sstevel@tonic-gate nop 21327c478bd9Sstevel@tonic-gate ba,pt %ncc, .co_sm_half 21337c478bd9Sstevel@tonic-gate nop 21347c478bd9Sstevel@tonic-gate 21357c478bd9Sstevel@tonic-gate/* 21367c478bd9Sstevel@tonic-gate * We got here because of a fault during short copyout. 21377c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 21387c478bd9Sstevel@tonic-gate */ 21397c478bd9Sstevel@tonic-gate.sm_copyout_err: 21407c478bd9Sstevel@tonic-gate membar #Sync 21417c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 21427c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 21437c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 21447c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 21457c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 21467c478bd9Sstevel@tonic-gate tst %o3 21477c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 21487c478bd9Sstevel@tonic-gate nop 21497c478bd9Sstevel@tonic-gate ldn [%o3 + CP_COPYOUT], %o5 ! if handler, invoke it with 21507c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 21517c478bd9Sstevel@tonic-gate nop 21527c478bd9Sstevel@tonic-gate3: 21537c478bd9Sstevel@tonic-gate retl 21547c478bd9Sstevel@tonic-gate or %g0, -1, %o0 ! return error value 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate SET_SIZE(copyout) 21577c478bd9Sstevel@tonic-gate 21587c478bd9Sstevel@tonic-gate/* 21597c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 21607c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 21617c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 21627c478bd9Sstevel@tonic-gate * the floating point registers. 21637c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 21647c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 21657c478bd9Sstevel@tonic-gate */ 21667c478bd9Sstevel@tonic-gate 21677c478bd9Sstevel@tonic-gate ENTRY(copyout_more) 21687c478bd9Sstevel@tonic-gate.copyout_more: 21697c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 21707c478bd9Sstevel@tonic-gate set .copyout_err, REAL_LOFAULT 21717c478bd9Sstevel@tonic-gate 21727c478bd9Sstevel@tonic-gate/* 21737c478bd9Sstevel@tonic-gate * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes 21747c478bd9Sstevel@tonic-gate */ 21757c478bd9Sstevel@tonic-gate.do_copyout: 21767c478bd9Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 21777c478bd9Sstevel@tonic-gate 21787c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 21797c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 21807c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 21817c478bd9Sstevel@tonic-gate 21827c478bd9Sstevel@tonic-gate mov %i0, SAVE_SRC 21837c478bd9Sstevel@tonic-gate mov %i1, SAVE_DST 21847c478bd9Sstevel@tonic-gate mov %i2, SAVE_COUNT 21857c478bd9Sstevel@tonic-gate 21867c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 21877c478bd9Sstevel@tonic-gate 21887c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 21897c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 21907c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 21917c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyout 21927c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 21937c478bd9Sstevel@tonic-gate 21947c478bd9Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 21957c478bd9Sstevel@tonic-gate 21967c478bd9Sstevel@tonic-gate.do_blockcopyout: 21977c478bd9Sstevel@tonic-gate rd %gsr, %o2 21987c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 21997c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 22007c478bd9Sstevel@tonic-gate 22017c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 22027c478bd9Sstevel@tonic-gate mov ASI_USER, %asi 22037c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 22047c478bd9Sstevel@tonic-gate neg TMP 22057c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 22067c478bd9Sstevel@tonic-gate 22077c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 22087c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 22097c478bd9Sstevel@tonic-gate cmp TMP, 3 22107c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 22117c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 22127c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 22137c478bd9Sstevel@tonic-gate.co_blkalign: 22147c478bd9Sstevel@tonic-gate ldub [REALSRC], SRC ! move 4 bytes per loop iteration 22157c478bd9Sstevel@tonic-gate stba SRC, [DST]%asi 22167c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 22177c478bd9Sstevel@tonic-gate ldub [REALSRC + 1], SRC 22187c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 22197c478bd9Sstevel@tonic-gate stba SRC, [DST + 1]%asi 22207c478bd9Sstevel@tonic-gate ldub [REALSRC - 2], SRC 22217c478bd9Sstevel@tonic-gate add DST, 4, DST 22227c478bd9Sstevel@tonic-gate stba SRC, [DST - 2]%asi 22237c478bd9Sstevel@tonic-gate ldub [REALSRC - 1], SRC 22247c478bd9Sstevel@tonic-gate bgu,pt %ncc, .co_blkalign 22257c478bd9Sstevel@tonic-gate stba SRC, [DST - 1]%asi 22267c478bd9Sstevel@tonic-gate 22277c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 22287c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 22297c478bd9Sstevel@tonic-gate nop 22307c478bd9Sstevel@tonic-gate1: ldub [REALSRC], SRC 22317c478bd9Sstevel@tonic-gate inc REALSRC 22327c478bd9Sstevel@tonic-gate inc DST 22337c478bd9Sstevel@tonic-gate deccc TMP 22347c478bd9Sstevel@tonic-gate bgu %ncc, 1b 22357c478bd9Sstevel@tonic-gate stba SRC, [DST - 1]%asi 22367c478bd9Sstevel@tonic-gate 22377c478bd9Sstevel@tonic-gate2: 22387c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 22397c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 22427c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 22437c478bd9Sstevel@tonic-gate prefetch [SRC], #one_read 22447c478bd9Sstevel@tonic-gate prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read 22457c478bd9Sstevel@tonic-gate prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read 22467c478bd9Sstevel@tonic-gate prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read 22477c478bd9Sstevel@tonic-gate ldd [SRC], %f16 22487c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 22497c478bd9Sstevel@tonic-gate prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read 22507c478bd9Sstevel@tonic-gate#endif 22517c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 22527c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 22537c478bd9Sstevel@tonic-gate prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read 22547c478bd9Sstevel@tonic-gate#endif 22557c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 22567c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 22577c478bd9Sstevel@tonic-gate prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read 22587c478bd9Sstevel@tonic-gate#endif 22597c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 22607c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 22617c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 22627c478bd9Sstevel@tonic-gate prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read 22637c478bd9Sstevel@tonic-gate#endif 22647c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 22657c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 22667c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 22677c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 22687c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 22697c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 22707c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 22717c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 22727c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 22737c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 22747c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 22757c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 22767c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 22777c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 22787c478bd9Sstevel@tonic-gate nop 22797c478bd9Sstevel@tonic-gate .align 16 22807c478bd9Sstevel@tonic-gate1: 22817c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 22827c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 22837c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 22847c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 22857c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 22867c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 22877c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 22887c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 22897c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 22907c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 22917c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 22927c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 22937c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 22947c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 22957c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 22967c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 22977c478bd9Sstevel@tonic-gate ldd [SRC + VIS_BLOCKSIZE], %f16 22987c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 22997c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read 23007c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 23017c478bd9Sstevel@tonic-gate prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read 23027c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 23037c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 23047c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 23057c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 23067c478bd9Sstevel@tonic-gate 23077c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 23087c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 23097c478bd9Sstevel@tonic-gate bne %ncc, 3f 23107c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 23117c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 23127c478bd9Sstevel@tonic-gate nop 2313*5d9d9091SRichard Lowe3: 23147c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 23157c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 23167c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 23177c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 23187c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 23197c478bd9Sstevel@tonic-gate nop 23207c478bd9Sstevel@tonic-gate2: 23217c478bd9Sstevel@tonic-gate ldd [SRC + 0x08], %f18 23227c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 23237c478bd9Sstevel@tonic-gate ldd [SRC + 0x10], %f20 23247c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 23257c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 23267c478bd9Sstevel@tonic-gate ldd [SRC + 0x18], %f22 23277c478bd9Sstevel@tonic-gate fsrc1 %f16, %f48 23287c478bd9Sstevel@tonic-gate ldd [SRC + 0x20], %f24 23297c478bd9Sstevel@tonic-gate fsrc1 %f18, %f50 23307c478bd9Sstevel@tonic-gate ldd [SRC + 0x28], %f26 23317c478bd9Sstevel@tonic-gate fsrc1 %f20, %f52 23327c478bd9Sstevel@tonic-gate ldd [SRC + 0x30], %f28 23337c478bd9Sstevel@tonic-gate fsrc1 %f22, %f54 23347c478bd9Sstevel@tonic-gate ldd [SRC + 0x38], %f30 23357c478bd9Sstevel@tonic-gate fsrc1 %f24, %f56 23367c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 23377c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 23387c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 23397c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 23407c478bd9Sstevel@tonic-gate fsrc1 %f26, %f58 23417c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 23427c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 23437c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_AIUS 23447c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 23457c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 4f 23467c478bd9Sstevel@tonic-gate nop 23477c478bd9Sstevel@tonic-gate 23487c478bd9Sstevel@tonic-gate3: tst CNT 23497c478bd9Sstevel@tonic-gate bz,a %ncc, 4f 23507c478bd9Sstevel@tonic-gate nop 23517c478bd9Sstevel@tonic-gate 23527c478bd9Sstevel@tonic-gate5: ldub [REALSRC], TMP 23537c478bd9Sstevel@tonic-gate inc REALSRC 23547c478bd9Sstevel@tonic-gate inc DST 23557c478bd9Sstevel@tonic-gate deccc CNT 23567c478bd9Sstevel@tonic-gate bgu %ncc, 5b 23577c478bd9Sstevel@tonic-gate stba TMP, [DST - 1]%asi 23587c478bd9Sstevel@tonic-gate4: 23597c478bd9Sstevel@tonic-gate 23607c478bd9Sstevel@tonic-gate.copyout_exit: 23617c478bd9Sstevel@tonic-gate membar #Sync 23627c478bd9Sstevel@tonic-gate 23637c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYOUT, 0, %l5, %o2, %o3, %o4, %o5, 8) 23647c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE2Q2(0, %l5, %o2, %o3, 8, 9) 23657c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYOUT, %l5, 9) ! lose outputs 23667c478bd9Sstevel@tonic-gate 23677c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 23687c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr ! restore gsr 23697c478bd9Sstevel@tonic-gate 23707c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 23717c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 23727c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 23737c478bd9Sstevel@tonic-gate nop 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 23767c478bd9Sstevel@tonic-gate 23777c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 23787c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 23797c478bd9Sstevel@tonic-gate 23807c478bd9Sstevel@tonic-gate4: 23817c478bd9Sstevel@tonic-gate FZEROQ2Q4 23827c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 23837c478bd9Sstevel@tonic-gate 23847c478bd9Sstevel@tonic-gate1: 23857c478bd9Sstevel@tonic-gate membar #Sync 23867c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 23877c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 23887c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 23897c478bd9Sstevel@tonic-gate ret 23907c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 23917c478bd9Sstevel@tonic-gate 23927c478bd9Sstevel@tonic-gate/* 23937c478bd9Sstevel@tonic-gate * We got here because of a fault during copyout. 23947c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 23957c478bd9Sstevel@tonic-gate */ 23967c478bd9Sstevel@tonic-gate.copyout_err: 23977c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 23987c478bd9Sstevel@tonic-gate tst %o4 23997c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 24007c478bd9Sstevel@tonic-gate nop 24017c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYOUT], %g2 ! if handler, invoke it with 24027c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 24037c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 24047c478bd9Sstevel@tonic-gate2: 24057c478bd9Sstevel@tonic-gate ret 24067c478bd9Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 24077c478bd9Sstevel@tonic-gate 24087c478bd9Sstevel@tonic-gate 24097c478bd9Sstevel@tonic-gate SET_SIZE(copyout_more) 24107c478bd9Sstevel@tonic-gate 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate ENTRY(xcopyout) 24137c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 24147c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to larger cases 24157c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 24167c478bd9Sstevel@tonic-gate btst 7, %o3 ! 24177c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_8 ! 24187c478bd9Sstevel@tonic-gate nop 2419*5d9d9091SRichard Lowe btst 1, %o3 ! 24207c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_2 ! check for half-word 24217c478bd9Sstevel@tonic-gate nop 24227c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 24237c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 24247c478bd9Sstevel@tonic-gate tst %o3 24257c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 24267c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 24277c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 24287c478bd9Sstevel@tonic-gate nop 24297c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 24307c478bd9Sstevel@tonic-gate nop 24317c478bd9Sstevel@tonic-gate.xcopyout_2: 24327c478bd9Sstevel@tonic-gate btst 3, %o3 ! 24337c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyout_4 ! check for word alignment 24347c478bd9Sstevel@tonic-gate nop 24357c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 24367c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 24377c478bd9Sstevel@tonic-gate tst %o3 24387c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 24397c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 24407c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 24417c478bd9Sstevel@tonic-gate nop 24427c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 24437c478bd9Sstevel@tonic-gate nop 24447c478bd9Sstevel@tonic-gate.xcopyout_4: 24457c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 24467c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 24477c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 24487c478bd9Sstevel@tonic-gate tst %o3 24497c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 24507c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 24517c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 24527c478bd9Sstevel@tonic-gate nop 24537c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 24547c478bd9Sstevel@tonic-gate nop 24557c478bd9Sstevel@tonic-gate.xcopyout_8: 24567c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 24577c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 24587c478bd9Sstevel@tonic-gate tst %o3 24597c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyout_small ! if zero, disable HW copy 24607c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 24617c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyout_small ! go to small copy 24627c478bd9Sstevel@tonic-gate nop 24637c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyout_more ! otherwise go to large copy 24647c478bd9Sstevel@tonic-gate nop 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate.xcopyout_small: 24677c478bd9Sstevel@tonic-gate sethi %hi(.sm_xcopyout_err), %o5 ! .sm_xcopyout_err is lofault 24687c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_xcopyout_err), %o5 24697c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! save existing handler 24707c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 24717c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout ! common code 24727c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set t_lofault 24737c478bd9Sstevel@tonic-gate 24747c478bd9Sstevel@tonic-gate.xcopyout_more: 24757c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 24767c478bd9Sstevel@tonic-gate sethi %hi(.xcopyout_err), REAL_LOFAULT 24777c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyout ! common code 24787c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT 24797c478bd9Sstevel@tonic-gate 24807c478bd9Sstevel@tonic-gate/* 24817c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyout 24827c478bd9Sstevel@tonic-gate * Errno value is in ERRNO 24837c478bd9Sstevel@tonic-gate */ 24847c478bd9Sstevel@tonic-gate.xcopyout_err: 24857c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 24867c478bd9Sstevel@tonic-gate tst %o4 24877c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 24887c478bd9Sstevel@tonic-gate nop 24897c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYOUT], %g2 ! if handler, invoke it with 24907c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 24917c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 24927c478bd9Sstevel@tonic-gate2: 24937c478bd9Sstevel@tonic-gate ret 24947c478bd9Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 24957c478bd9Sstevel@tonic-gate 24967c478bd9Sstevel@tonic-gate.sm_xcopyout_err: 24977c478bd9Sstevel@tonic-gate 24987c478bd9Sstevel@tonic-gate membar #Sync 24997c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 25007c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 25017c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 25027c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 25037c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 25047c478bd9Sstevel@tonic-gate tst %o3 25057c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 25067c478bd9Sstevel@tonic-gate nop 25077c478bd9Sstevel@tonic-gate ldn [%o3 + CP_XCOPYOUT], %o5 ! if handler, invoke it with 25087c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 25097c478bd9Sstevel@tonic-gate nop 25107c478bd9Sstevel@tonic-gate3: 25117c478bd9Sstevel@tonic-gate retl 25127c478bd9Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 25137c478bd9Sstevel@tonic-gate 25147c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout) 25157c478bd9Sstevel@tonic-gate 25167c478bd9Sstevel@tonic-gate ENTRY(xcopyout_little) 25177c478bd9Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 25187c478bd9Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 25197c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 25207c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 25217c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 25227c478bd9Sstevel@tonic-gate mov %o4, %o5 25237c478bd9Sstevel@tonic-gate 25247c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 25257c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 25267c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 25277c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 25287c478bd9Sstevel@tonic-gate add %o0, %o4, %o0 ! start w/last byte 25297c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 25307c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 25317c478bd9Sstevel@tonic-gate 25327c478bd9Sstevel@tonic-gate1: stba %o4, [%o1 + %o3]ASI_AIUSL 25337c478bd9Sstevel@tonic-gate inccc %o3 25347c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 25357c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 25367c478bd9Sstevel@tonic-gate ldub [%o0 + %o3], %o4 25377c478bd9Sstevel@tonic-gate 25387c478bd9Sstevel@tonic-gate2: 25397c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 25407c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 25417c478bd9Sstevel@tonic-gate retl 25427c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 25437c478bd9Sstevel@tonic-gate 25447c478bd9Sstevel@tonic-gate SET_SIZE(xcopyout_little) 25457c478bd9Sstevel@tonic-gate 25467c478bd9Sstevel@tonic-gate/* 25477c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little) 25487c478bd9Sstevel@tonic-gate */ 25497c478bd9Sstevel@tonic-gate 25507c478bd9Sstevel@tonic-gate ENTRY(copyin) 25517c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 25527c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to larger cases 25537c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 25547c478bd9Sstevel@tonic-gate btst 7, %o3 ! 25557c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_8 ! check for longword alignment 25567c478bd9Sstevel@tonic-gate nop 2557*5d9d9091SRichard Lowe btst 1, %o3 ! 25587c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_2 ! check for half-word 25597c478bd9Sstevel@tonic-gate nop 25607c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 25617c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 25627c478bd9Sstevel@tonic-gate tst %o3 25637c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 25647c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 25657c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 25667c478bd9Sstevel@tonic-gate nop 25677c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 25687c478bd9Sstevel@tonic-gate nop 25697c478bd9Sstevel@tonic-gate.copyin_2: 25707c478bd9Sstevel@tonic-gate btst 3, %o3 ! 25717c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_4 ! check for word alignment 25727c478bd9Sstevel@tonic-gate nop 25737c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 25747c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 25757c478bd9Sstevel@tonic-gate tst %o3 25767c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 25777c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 25787c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 25797c478bd9Sstevel@tonic-gate nop 25807c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 25817c478bd9Sstevel@tonic-gate nop 25827c478bd9Sstevel@tonic-gate.copyin_4: 25837c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 25847c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 25857c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 25867c478bd9Sstevel@tonic-gate tst %o3 25877c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 25887c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 25897c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 25907c478bd9Sstevel@tonic-gate nop 25917c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 25927c478bd9Sstevel@tonic-gate nop 25937c478bd9Sstevel@tonic-gate.copyin_8: 25947c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 25957c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 25967c478bd9Sstevel@tonic-gate tst %o3 25977c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_small ! if zero, disable HW copy 25987c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 25997c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_small ! go to small copy 26007c478bd9Sstevel@tonic-gate nop 26017c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_more ! otherwise go to large copy 26027c478bd9Sstevel@tonic-gate nop 26037c478bd9Sstevel@tonic-gate 26047c478bd9Sstevel@tonic-gate .align 16 26057c478bd9Sstevel@tonic-gate nop ! instruction alignment 26067c478bd9Sstevel@tonic-gate ! see discussion at start of file 26077c478bd9Sstevel@tonic-gate.copyin_small: 2608*5d9d9091SRichard Lowe sethi %hi(.sm_copyin_err), %o5 ! .sm_copyin_err is lofault 26097c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyin_err), %o5 26107c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofault, no tramp 26117c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 26127c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 26137c478bd9Sstevel@tonic-gate.sm_do_copyin: 26147c478bd9Sstevel@tonic-gate mov %o0, SM_SAVE_SRC 26157c478bd9Sstevel@tonic-gate mov %o1, SM_SAVE_DST 26167c478bd9Sstevel@tonic-gate cmp %o2, SHORTCOPY ! check for really short case 26177c478bd9Sstevel@tonic-gate bleu,pt %ncc, .ci_sm_left ! 26187c478bd9Sstevel@tonic-gate mov %o2, SM_SAVE_COUNT 26197c478bd9Sstevel@tonic-gate cmp %o2, CHKSIZE ! check for medium length cases 26207c478bd9Sstevel@tonic-gate bgu,pn %ncc, .ci_med ! 26217c478bd9Sstevel@tonic-gate or %o0, %o1, %o3 ! prepare alignment check 26227c478bd9Sstevel@tonic-gate andcc %o3, 0x3, %g0 ! test for alignment 26237c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_word ! branch to word aligned case 26247c478bd9Sstevel@tonic-gate.ci_sm_movebytes: 26257c478bd9Sstevel@tonic-gate sub %o2, 3, %o2 ! adjust count to allow cc zero test 26267c478bd9Sstevel@tonic-gate.ci_sm_notalign4: 26277c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! read byte 26287c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 26297c478bd9Sstevel@tonic-gate stb %o3, [%o1] ! write byte 26307c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 26317c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! repeat for a total of 4 bytes 26327c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 26337c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 1] 26347c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 26357c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 26367c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 26377c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 2] 26387c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 26397c478bd9Sstevel@tonic-gate add %o0, 1, %o0 ! advance SRC by 1 26407c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_notalign4 ! loop til 3 or fewer bytes remain 26417c478bd9Sstevel@tonic-gate stb %o3, [%o1 - 1] 26427c478bd9Sstevel@tonic-gate add %o2, 3, %o2 ! restore count 26437c478bd9Sstevel@tonic-gate.ci_sm_left: 26447c478bd9Sstevel@tonic-gate tst %o2 26457c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 26467c478bd9Sstevel@tonic-gate nop 26477c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 26487c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 26497c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 26507c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store one byte 26517c478bd9Sstevel@tonic-gate inc %o0 26527c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 26537c478bd9Sstevel@tonic-gate deccc %o2 26547c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 26557c478bd9Sstevel@tonic-gate stb %o3,[%o1 + 1] ! store second byte 26567c478bd9Sstevel@tonic-gate inc %o0 26577c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 26587c478bd9Sstevel@tonic-gate stb %o3,[%o1 + 2] ! store third byte 26597c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 26607c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 26617c478bd9Sstevel@tonic-gate retl 26627c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 26637c478bd9Sstevel@tonic-gate .align 16 26647c478bd9Sstevel@tonic-gate.ci_sm_words: 26657c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 26667c478bd9Sstevel@tonic-gate.ci_sm_wordx: 26677c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! update count 26687c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 26697c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 26707c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! update DST 26717c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 26727c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! update SRC 26737c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_words ! loop til done 26747c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] ! write word 26757c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 26767c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 26777c478bd9Sstevel@tonic-gate nop 26787c478bd9Sstevel@tonic-gate deccc %o2 26797c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 26807c478bd9Sstevel@tonic-gate.ci_sm_half: 26817c478bd9Sstevel@tonic-gate subcc %o2, 2, %o2 ! reduce count by 2 26827c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 26837c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 26847c478bd9Sstevel@tonic-gate add %o1, 2, %o1 ! advance DST by 2 26857c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_half ! loop til done 26867c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] ! write half word 26877c478bd9Sstevel@tonic-gate addcc %o2, 1, %o2 ! restore count 26887c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 26897c478bd9Sstevel@tonic-gate nop 26907c478bd9Sstevel@tonic-gate.ci_sm_byte: 26917c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 26927c478bd9Sstevel@tonic-gate stb %o3, [%o1] 26937c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 26947c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 26957c478bd9Sstevel@tonic-gate retl 26967c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 26977c478bd9Sstevel@tonic-gate .align 16 26987c478bd9Sstevel@tonic-gate.ci_sm_word: 26997c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! update count 27007c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_sm_wordx 27017c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 27027c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore count 27037c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 27047c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 27057c478bd9Sstevel@tonic-gate deccc %o2 ! reduce count for cc test 27067c478bd9Sstevel@tonic-gate add %o0, 4, %o0 27077c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 27087c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 27097c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 4] ! store one byte 27107c478bd9Sstevel@tonic-gate inc %o0 27117c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load second byte 27127c478bd9Sstevel@tonic-gate deccc %o2 27137c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 27147c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 5] ! store second byte 27157c478bd9Sstevel@tonic-gate inc %o0 27167c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load third byte 27177c478bd9Sstevel@tonic-gate stb %o3, [%o1 + 6] ! store third byte 27187c478bd9Sstevel@tonic-gate.ci_sm_exit: 27197c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 27207c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 27217c478bd9Sstevel@tonic-gate retl 27227c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return 0 27237c478bd9Sstevel@tonic-gate 27247c478bd9Sstevel@tonic-gate .align 16 27257c478bd9Sstevel@tonic-gate.ci_med: 27267c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! setup alignment check 27277c478bd9Sstevel@tonic-gate btst 1, %o3 27287c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_sm_movebytes ! unaligned 27297c478bd9Sstevel@tonic-gate nop 27307c478bd9Sstevel@tonic-gate btst 3, %o3 27317c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_half ! halfword aligned 27327c478bd9Sstevel@tonic-gate nop 27337c478bd9Sstevel@tonic-gate btst 7, %o3 27347c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word ! word aligned 27357c478bd9Sstevel@tonic-gate nop 27367c478bd9Sstevel@tonic-gate.ci_med_long: 27377c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 27387c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_long1 ! word alignment 27397c478bd9Sstevel@tonic-gate nop 27407c478bd9Sstevel@tonic-gate.ci_med_long0: 27417c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 27427c478bd9Sstevel@tonic-gate inc %o0 27437c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 27447c478bd9Sstevel@tonic-gate inc %o1 27457c478bd9Sstevel@tonic-gate btst 3, %o0 27467c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_long0 27477c478bd9Sstevel@tonic-gate dec %o2 27487c478bd9Sstevel@tonic-gate.ci_med_long1: ! word aligned 27497c478bd9Sstevel@tonic-gate btst 7, %o0 ! check for long word 27507c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_long2 27517c478bd9Sstevel@tonic-gate nop 27527c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! load word 27537c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 27547c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! store word 27557c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 27567c478bd9Sstevel@tonic-gate sub %o2, 4, %o2 ! reduce count by 4 27577c478bd9Sstevel@tonic-gate! 27587c478bd9Sstevel@tonic-gate! Now long word aligned and have at least 32 bytes to move 27597c478bd9Sstevel@tonic-gate! 27607c478bd9Sstevel@tonic-gate.ci_med_long2: 27617c478bd9Sstevel@tonic-gate sub %o2, 31, %o2 ! adjust count to allow cc zero test 27627c478bd9Sstevel@tonic-gate.ci_med_lmove: 27637c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 27647c478bd9Sstevel@tonic-gate subcc %o2, 32, %o2 ! reduce count by 32 27657c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 27667c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 27677c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! repeat for a total for 4 long words 27687c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 27697c478bd9Sstevel@tonic-gate stx %o3, [%o1 + 8] 27707c478bd9Sstevel@tonic-gate add %o1, 32, %o1 ! advance DST by 32 27717c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 27727c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 27737c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 16] 27747c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 27757c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 27767c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lmove ! loop til 31 or fewer bytes left 27777c478bd9Sstevel@tonic-gate stx %o3, [%o1 - 8] 27787c478bd9Sstevel@tonic-gate addcc %o2, 24, %o2 ! restore count to long word offset 27797c478bd9Sstevel@tonic-gate ble,pt %ncc, .ci_med_lextra ! check for more long words to move 27807c478bd9Sstevel@tonic-gate nop 27817c478bd9Sstevel@tonic-gate.ci_med_lword: 27827c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_USER, %o3 ! read long word 27837c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 27847c478bd9Sstevel@tonic-gate stx %o3, [%o1] ! write long word 27857c478bd9Sstevel@tonic-gate add %o0, 8, %o0 ! advance SRC by 8 27867c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_lword ! loop til 7 or fewer bytes left 27877c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 27887c478bd9Sstevel@tonic-gate.ci_med_lextra: 27897c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore rest of count 27907c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 27917c478bd9Sstevel@tonic-gate deccc %o2 27927c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 27937c478bd9Sstevel@tonic-gate nop 27947c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 27957c478bd9Sstevel@tonic-gate nop 27967c478bd9Sstevel@tonic-gate 27977c478bd9Sstevel@tonic-gate .align 16 27987c478bd9Sstevel@tonic-gate nop ! instruction alignment 27997c478bd9Sstevel@tonic-gate ! see discussion at start of file 28007c478bd9Sstevel@tonic-gate.ci_med_word: 28017c478bd9Sstevel@tonic-gate btst 3, %o0 ! check for 28027c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_word1 ! word alignment 28037c478bd9Sstevel@tonic-gate nop 28047c478bd9Sstevel@tonic-gate.ci_med_word0: 28057c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 28067c478bd9Sstevel@tonic-gate inc %o0 28077c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 28087c478bd9Sstevel@tonic-gate inc %o1 28097c478bd9Sstevel@tonic-gate btst 3, %o0 28107c478bd9Sstevel@tonic-gate bnz,pt %ncc, .ci_med_word0 28117c478bd9Sstevel@tonic-gate dec %o2 28127c478bd9Sstevel@tonic-gate! 28137c478bd9Sstevel@tonic-gate! Now word aligned and have at least 36 bytes to move 28147c478bd9Sstevel@tonic-gate! 28157c478bd9Sstevel@tonic-gate.ci_med_word1: 28167c478bd9Sstevel@tonic-gate sub %o2, 15, %o2 ! adjust count to allow cc zero test 28177c478bd9Sstevel@tonic-gate.ci_med_wmove: 28187c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 28197c478bd9Sstevel@tonic-gate subcc %o2, 16, %o2 ! reduce count by 16 28207c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 28217c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 28227c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! repeat for a total for 4 words 28237c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 28247c478bd9Sstevel@tonic-gate stw %o3, [%o1 + 4] 28257c478bd9Sstevel@tonic-gate add %o1, 16, %o1 ! advance DST by 16 28267c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 28277c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 28287c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 8] 28297c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 28307c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 28317c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_wmove ! loop til 15 or fewer bytes left 28327c478bd9Sstevel@tonic-gate stw %o3, [%o1 - 4] 28337c478bd9Sstevel@tonic-gate addcc %o2, 12, %o2 ! restore count to word offset 28347c478bd9Sstevel@tonic-gate ble,pt %ncc, .ci_med_wextra ! check for more words to move 28357c478bd9Sstevel@tonic-gate nop 28367c478bd9Sstevel@tonic-gate.ci_med_word2: 28377c478bd9Sstevel@tonic-gate lduwa [%o0]ASI_USER, %o3 ! read word 28387c478bd9Sstevel@tonic-gate subcc %o2, 4, %o2 ! reduce count by 4 28397c478bd9Sstevel@tonic-gate stw %o3, [%o1] ! write word 28407c478bd9Sstevel@tonic-gate add %o0, 4, %o0 ! advance SRC by 4 28417c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_word2 ! loop til 3 or fewer bytes left 28427c478bd9Sstevel@tonic-gate add %o1, 4, %o1 ! advance DST by 4 28437c478bd9Sstevel@tonic-gate.ci_med_wextra: 28447c478bd9Sstevel@tonic-gate addcc %o2, 3, %o2 ! restore rest of count 28457c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit ! if zero, then done 28467c478bd9Sstevel@tonic-gate deccc %o2 28477c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 28487c478bd9Sstevel@tonic-gate nop 28497c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 28507c478bd9Sstevel@tonic-gate nop 28517c478bd9Sstevel@tonic-gate 28527c478bd9Sstevel@tonic-gate .align 16 28537c478bd9Sstevel@tonic-gate nop ! instruction alignment 28547c478bd9Sstevel@tonic-gate ! see discussion at start of file 28557c478bd9Sstevel@tonic-gate.ci_med_half: 28567c478bd9Sstevel@tonic-gate btst 1, %o0 ! check for 28577c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_med_half1 ! half word alignment 28587c478bd9Sstevel@tonic-gate nop 28597c478bd9Sstevel@tonic-gate lduba [%o0]ASI_USER, %o3 ! load one byte 28607c478bd9Sstevel@tonic-gate inc %o0 28617c478bd9Sstevel@tonic-gate stb %o3,[%o1] ! store byte 28627c478bd9Sstevel@tonic-gate inc %o1 28637c478bd9Sstevel@tonic-gate dec %o2 28647c478bd9Sstevel@tonic-gate! 28657c478bd9Sstevel@tonic-gate! Now half word aligned and have at least 38 bytes to move 28667c478bd9Sstevel@tonic-gate! 28677c478bd9Sstevel@tonic-gate.ci_med_half1: 28687c478bd9Sstevel@tonic-gate sub %o2, 7, %o2 ! adjust count to allow cc zero test 28697c478bd9Sstevel@tonic-gate.ci_med_hmove: 28707c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! read half word 28717c478bd9Sstevel@tonic-gate subcc %o2, 8, %o2 ! reduce count by 8 28727c478bd9Sstevel@tonic-gate sth %o3, [%o1] ! write half word 28737c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 28747c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 ! repeat for a total for 4 halfwords 28757c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 28767c478bd9Sstevel@tonic-gate sth %o3, [%o1 + 2] 28777c478bd9Sstevel@tonic-gate add %o1, 8, %o1 ! advance DST by 8 28787c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 28797c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 28807c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 4] 28817c478bd9Sstevel@tonic-gate lduha [%o0]ASI_USER, %o3 28827c478bd9Sstevel@tonic-gate add %o0, 2, %o0 ! advance SRC by 2 28837c478bd9Sstevel@tonic-gate bgt,pt %ncc, .ci_med_hmove ! loop til 7 or fewer bytes left 28847c478bd9Sstevel@tonic-gate sth %o3, [%o1 - 2] 28857c478bd9Sstevel@tonic-gate addcc %o2, 7, %o2 ! restore count 28867c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_exit 28877c478bd9Sstevel@tonic-gate deccc %o2 28887c478bd9Sstevel@tonic-gate bz,pt %ncc, .ci_sm_byte 28897c478bd9Sstevel@tonic-gate nop 28907c478bd9Sstevel@tonic-gate ba,pt %ncc, .ci_sm_half 28917c478bd9Sstevel@tonic-gate nop 28927c478bd9Sstevel@tonic-gate 28937c478bd9Sstevel@tonic-gate.sm_copyin_err: 28947c478bd9Sstevel@tonic-gate membar #Sync 28957c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 28967c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 28977c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 28987c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 28997c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 29007c478bd9Sstevel@tonic-gate tst %o3 29017c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 29027c478bd9Sstevel@tonic-gate nop 29037c478bd9Sstevel@tonic-gate ldn [%o3 + CP_COPYIN], %o5 ! if handler, invoke it with 29047c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 29057c478bd9Sstevel@tonic-gate nop 29067c478bd9Sstevel@tonic-gate3: 29077c478bd9Sstevel@tonic-gate retl 29087c478bd9Sstevel@tonic-gate or %g0, -1, %o0 ! return errno value 29097c478bd9Sstevel@tonic-gate 29107c478bd9Sstevel@tonic-gate SET_SIZE(copyin) 29117c478bd9Sstevel@tonic-gate 29127c478bd9Sstevel@tonic-gate 29137c478bd9Sstevel@tonic-gate/* 29147c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by 29157c478bd9Sstevel@tonic-gate * any caller from outside this file. They are provided to allow 29167c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses 29177c478bd9Sstevel@tonic-gate * the floating point registers. 29187c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of 29197c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions. 29207c478bd9Sstevel@tonic-gate */ 29217c478bd9Sstevel@tonic-gate 29227c478bd9Sstevel@tonic-gate ENTRY(copyin_more) 29237c478bd9Sstevel@tonic-gate.copyin_more: 29247c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 29257c478bd9Sstevel@tonic-gate set .copyin_err, REAL_LOFAULT 29267c478bd9Sstevel@tonic-gate 29277c478bd9Sstevel@tonic-gate/* 29287c478bd9Sstevel@tonic-gate * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes 29297c478bd9Sstevel@tonic-gate */ 29307c478bd9Sstevel@tonic-gate.do_copyin: 29317c478bd9Sstevel@tonic-gate set copyio_fault, %l7 ! .copyio_fault is lofault val 29327c478bd9Sstevel@tonic-gate 29337c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %l6 ! save existing handler 29347c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 29357c478bd9Sstevel@tonic-gate stn %l7, [THREAD_REG + T_LOFAULT] ! set t_lofault 29367c478bd9Sstevel@tonic-gate 29377c478bd9Sstevel@tonic-gate mov %i0, SAVE_SRC 29387c478bd9Sstevel@tonic-gate mov %i1, SAVE_DST 29397c478bd9Sstevel@tonic-gate mov %i2, SAVE_COUNT 29407c478bd9Sstevel@tonic-gate 29417c478bd9Sstevel@tonic-gate FP_NOMIGRATE(6, 7) 29427c478bd9Sstevel@tonic-gate 29437c478bd9Sstevel@tonic-gate rd %fprs, %o2 ! check for unused fp 29447c478bd9Sstevel@tonic-gate st %o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs 29457c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o2 29467c478bd9Sstevel@tonic-gate bz,a,pt %icc, .do_blockcopyin 29477c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 29487c478bd9Sstevel@tonic-gate 29497c478bd9Sstevel@tonic-gate BST_FPQ2Q4_TOSTACK(%o2) 29507c478bd9Sstevel@tonic-gate 29517c478bd9Sstevel@tonic-gate.do_blockcopyin: 29527c478bd9Sstevel@tonic-gate rd %gsr, %o2 29537c478bd9Sstevel@tonic-gate stx %o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET] ! save gsr 29547c478bd9Sstevel@tonic-gate or %l6, FPUSED_FLAG, %l6 29557c478bd9Sstevel@tonic-gate 29567c478bd9Sstevel@tonic-gate andcc DST, VIS_BLOCKSIZE - 1, TMP 29577c478bd9Sstevel@tonic-gate mov ASI_USER, %asi 29587c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 29597c478bd9Sstevel@tonic-gate neg TMP 29607c478bd9Sstevel@tonic-gate add TMP, VIS_BLOCKSIZE, TMP 29617c478bd9Sstevel@tonic-gate 29627c478bd9Sstevel@tonic-gate ! TMP = bytes required to align DST on FP_BLOCK boundary 29637c478bd9Sstevel@tonic-gate ! Using SRC as a tmp here 29647c478bd9Sstevel@tonic-gate cmp TMP, 3 29657c478bd9Sstevel@tonic-gate bleu,pt %ncc, 1f 29667c478bd9Sstevel@tonic-gate sub CNT,TMP,CNT ! adjust main count 29677c478bd9Sstevel@tonic-gate sub TMP, 3, TMP ! adjust for end of loop test 29687c478bd9Sstevel@tonic-gate.ci_blkalign: 29697c478bd9Sstevel@tonic-gate lduba [REALSRC]%asi, SRC ! move 4 bytes per loop iteration 29707c478bd9Sstevel@tonic-gate stb SRC, [DST] 29717c478bd9Sstevel@tonic-gate subcc TMP, 4, TMP 29727c478bd9Sstevel@tonic-gate lduba [REALSRC + 1]%asi, SRC 29737c478bd9Sstevel@tonic-gate add REALSRC, 4, REALSRC 29747c478bd9Sstevel@tonic-gate stb SRC, [DST + 1] 29757c478bd9Sstevel@tonic-gate lduba [REALSRC - 2]%asi, SRC 29767c478bd9Sstevel@tonic-gate add DST, 4, DST 29777c478bd9Sstevel@tonic-gate stb SRC, [DST - 2] 29787c478bd9Sstevel@tonic-gate lduba [REALSRC - 1]%asi, SRC 29797c478bd9Sstevel@tonic-gate bgu,pt %ncc, .ci_blkalign 29807c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 29817c478bd9Sstevel@tonic-gate 29827c478bd9Sstevel@tonic-gate addcc TMP, 3, TMP ! restore count adjustment 29837c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! no bytes left? 29847c478bd9Sstevel@tonic-gate nop 29857c478bd9Sstevel@tonic-gate1: lduba [REALSRC]%asi, SRC 29867c478bd9Sstevel@tonic-gate inc REALSRC 29877c478bd9Sstevel@tonic-gate inc DST 29887c478bd9Sstevel@tonic-gate deccc TMP 29897c478bd9Sstevel@tonic-gate bgu %ncc, 1b 29907c478bd9Sstevel@tonic-gate stb SRC, [DST - 1] 29917c478bd9Sstevel@tonic-gate 29927c478bd9Sstevel@tonic-gate2: 29937c478bd9Sstevel@tonic-gate andn REALSRC, 0x7, SRC 29947c478bd9Sstevel@tonic-gate alignaddr REALSRC, %g0, %g0 29957c478bd9Sstevel@tonic-gate 29967c478bd9Sstevel@tonic-gate ! SRC - 8-byte aligned 29977c478bd9Sstevel@tonic-gate ! DST - 64-byte aligned 29987c478bd9Sstevel@tonic-gate prefetcha [SRC]%asi, #one_read 29997c478bd9Sstevel@tonic-gate prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read 30007c478bd9Sstevel@tonic-gate prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read 30017c478bd9Sstevel@tonic-gate prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read 30027c478bd9Sstevel@tonic-gate ldda [SRC]%asi, %f16 30037c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4 30047c478bd9Sstevel@tonic-gate prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read 30057c478bd9Sstevel@tonic-gate#endif 30067c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 30077c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5 30087c478bd9Sstevel@tonic-gate prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read 30097c478bd9Sstevel@tonic-gate#endif 30107c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 30117c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6 30127c478bd9Sstevel@tonic-gate prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read 30137c478bd9Sstevel@tonic-gate#endif 30147c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 30157c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 30167c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7 30177c478bd9Sstevel@tonic-gate prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read 30187c478bd9Sstevel@tonic-gate#endif 30197c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 30207c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 30217c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 30227c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 30237c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 30247c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 30257c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 30267c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 30277c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 30287c478bd9Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 30297c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 30307c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 30317c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 30327c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 1f 30337c478bd9Sstevel@tonic-gate nop 30347c478bd9Sstevel@tonic-gate .align 16 30357c478bd9Sstevel@tonic-gate1: 30367c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 30377c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 30387c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 30397c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 30407c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 30417c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 30427c478bd9Sstevel@tonic-gate faligndata %f16, %f18, %f48 30437c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 30447c478bd9Sstevel@tonic-gate faligndata %f18, %f20, %f50 30457c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 30467c478bd9Sstevel@tonic-gate faligndata %f20, %f22, %f52 30477c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 30487c478bd9Sstevel@tonic-gate faligndata %f22, %f24, %f54 30497c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 30507c478bd9Sstevel@tonic-gate faligndata %f24, %f26, %f56 30517c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 30527c478bd9Sstevel@tonic-gate ldda [SRC + VIS_BLOCKSIZE]%asi, %f16 30537c478bd9Sstevel@tonic-gate faligndata %f26, %f28, %f58 30547c478bd9Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read 30557c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 30567c478bd9Sstevel@tonic-gate prefetcha [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read 30577c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 30587c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE + 8 30597c478bd9Sstevel@tonic-gate bgu,pt %ncc, 1b 30607c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 30617c478bd9Sstevel@tonic-gate 30627c478bd9Sstevel@tonic-gate ! only if REALSRC & 0x7 is 0 30637c478bd9Sstevel@tonic-gate cmp CNT, VIS_BLOCKSIZE 30647c478bd9Sstevel@tonic-gate bne %ncc, 3f 30657c478bd9Sstevel@tonic-gate andcc REALSRC, 0x7, %g0 30667c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f 30677c478bd9Sstevel@tonic-gate nop 3068*5d9d9091SRichard Lowe3: 30697c478bd9Sstevel@tonic-gate faligndata %f28, %f30, %f60 30707c478bd9Sstevel@tonic-gate faligndata %f30, %f16, %f62 30717c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 30727c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 30737c478bd9Sstevel@tonic-gate ba,pt %ncc, 3f 30747c478bd9Sstevel@tonic-gate nop 30757c478bd9Sstevel@tonic-gate2: 30767c478bd9Sstevel@tonic-gate ldda [SRC + 0x08]%asi, %f18 30777c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 30787c478bd9Sstevel@tonic-gate ldda [SRC + 0x10]%asi, %f20 30797c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 30807c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 30817c478bd9Sstevel@tonic-gate ldda [SRC + 0x18]%asi, %f22 30827c478bd9Sstevel@tonic-gate fsrc1 %f16, %f48 30837c478bd9Sstevel@tonic-gate ldda [SRC + 0x20]%asi, %f24 30847c478bd9Sstevel@tonic-gate fsrc1 %f18, %f50 30857c478bd9Sstevel@tonic-gate ldda [SRC + 0x28]%asi, %f26 30867c478bd9Sstevel@tonic-gate fsrc1 %f20, %f52 30877c478bd9Sstevel@tonic-gate ldda [SRC + 0x30]%asi, %f28 30887c478bd9Sstevel@tonic-gate fsrc1 %f22, %f54 30897c478bd9Sstevel@tonic-gate ldda [SRC + 0x38]%asi, %f30 30907c478bd9Sstevel@tonic-gate fsrc1 %f24, %f56 30917c478bd9Sstevel@tonic-gate sub CNT, VIS_BLOCKSIZE, CNT 30927c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 30937c478bd9Sstevel@tonic-gate add SRC, VIS_BLOCKSIZE, SRC 30947c478bd9Sstevel@tonic-gate add REALSRC, VIS_BLOCKSIZE, REALSRC 30957c478bd9Sstevel@tonic-gate fsrc1 %f26, %f58 30967c478bd9Sstevel@tonic-gate fsrc1 %f28, %f60 30977c478bd9Sstevel@tonic-gate fsrc1 %f30, %f62 30987c478bd9Sstevel@tonic-gate stda %f48, [DST]ASI_BLK_P 30997c478bd9Sstevel@tonic-gate add DST, VIS_BLOCKSIZE, DST 31007c478bd9Sstevel@tonic-gate ba,a,pt %ncc, 4f 31017c478bd9Sstevel@tonic-gate nop 31027c478bd9Sstevel@tonic-gate 31037c478bd9Sstevel@tonic-gate3: tst CNT 31047c478bd9Sstevel@tonic-gate bz,a %ncc, 4f 31057c478bd9Sstevel@tonic-gate nop 31067c478bd9Sstevel@tonic-gate 31077c478bd9Sstevel@tonic-gate5: lduba [REALSRC]ASI_USER, TMP 31087c478bd9Sstevel@tonic-gate inc REALSRC 31097c478bd9Sstevel@tonic-gate inc DST 31107c478bd9Sstevel@tonic-gate deccc CNT 31117c478bd9Sstevel@tonic-gate bgu %ncc, 5b 31127c478bd9Sstevel@tonic-gate stb TMP, [DST - 1] 31137c478bd9Sstevel@tonic-gate4: 31147c478bd9Sstevel@tonic-gate 31157c478bd9Sstevel@tonic-gate.copyin_exit: 31167c478bd9Sstevel@tonic-gate membar #Sync 31177c478bd9Sstevel@tonic-gate 31187c478bd9Sstevel@tonic-gate FPRAS_INTERVAL(FPRAS_COPYIN, 1, %l5, %o2, %o3, %o4, %o5, 8) 31197c478bd9Sstevel@tonic-gate FPRAS_REWRITE_TYPE1(1, %l5, %f48, %o2, 9) 31207c478bd9Sstevel@tonic-gate FPRAS_CHECK(FPRAS_COPYIN, %l5, 9) ! lose outputs 31217c478bd9Sstevel@tonic-gate 31227c478bd9Sstevel@tonic-gate ldx [%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2 ! restore gsr 31237c478bd9Sstevel@tonic-gate wr %o2, 0, %gsr 31247c478bd9Sstevel@tonic-gate 31257c478bd9Sstevel@tonic-gate ld [%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3 31267c478bd9Sstevel@tonic-gate btst FPRS_FEF, %o3 31277c478bd9Sstevel@tonic-gate bz,pt %icc, 4f 31287c478bd9Sstevel@tonic-gate nop 31297c478bd9Sstevel@tonic-gate 31307c478bd9Sstevel@tonic-gate BLD_FPQ2Q4_FROMSTACK(%o2) 31317c478bd9Sstevel@tonic-gate 31327c478bd9Sstevel@tonic-gate ba,pt %ncc, 1f 31337c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 31347c478bd9Sstevel@tonic-gate 31357c478bd9Sstevel@tonic-gate4: 31367c478bd9Sstevel@tonic-gate FZEROQ2Q4 31377c478bd9Sstevel@tonic-gate wr %o3, 0, %fprs ! restore fprs 31387c478bd9Sstevel@tonic-gate 31397c478bd9Sstevel@tonic-gate1: 31407c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 31417c478bd9Sstevel@tonic-gate andn %l6, FPUSED_FLAG, %l6 31427c478bd9Sstevel@tonic-gate stn %l6, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 31437c478bd9Sstevel@tonic-gate FP_ALLOWMIGRATE(5, 6) 31447c478bd9Sstevel@tonic-gate ret 31457c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 31467c478bd9Sstevel@tonic-gate/* 31477c478bd9Sstevel@tonic-gate * We got here because of a fault during copyin 31487c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh). 31497c478bd9Sstevel@tonic-gate */ 31507c478bd9Sstevel@tonic-gate.copyin_err: 31517c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 31527c478bd9Sstevel@tonic-gate tst %o4 31537c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 31547c478bd9Sstevel@tonic-gate nop 31557c478bd9Sstevel@tonic-gate ldn [%o4 + CP_COPYIN], %g2 ! if handler, invoke it with 31567c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 31577c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 31587c478bd9Sstevel@tonic-gate2: 31597c478bd9Sstevel@tonic-gate ret 31607c478bd9Sstevel@tonic-gate restore %g0, -1, %o0 ! return error value 31617c478bd9Sstevel@tonic-gate 31627c478bd9Sstevel@tonic-gate 31637c478bd9Sstevel@tonic-gate SET_SIZE(copyin_more) 31647c478bd9Sstevel@tonic-gate 31657c478bd9Sstevel@tonic-gate ENTRY(xcopyin) 31667c478bd9Sstevel@tonic-gate 31677c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 31687c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to larger cases 31697c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 31707c478bd9Sstevel@tonic-gate btst 7, %o3 ! 31717c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_8 ! check for longword alignment 31727c478bd9Sstevel@tonic-gate nop 3173*5d9d9091SRichard Lowe btst 1, %o3 ! 31747c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_2 ! check for half-word 31757c478bd9Sstevel@tonic-gate nop 31767c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 31777c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 31787c478bd9Sstevel@tonic-gate tst %o3 31797c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 31807c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 31817c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 31827c478bd9Sstevel@tonic-gate nop 31837c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 31847c478bd9Sstevel@tonic-gate nop 31857c478bd9Sstevel@tonic-gate.xcopyin_2: 31867c478bd9Sstevel@tonic-gate btst 3, %o3 ! 31877c478bd9Sstevel@tonic-gate bz,pt %ncc, .xcopyin_4 ! check for word alignment 31887c478bd9Sstevel@tonic-gate nop 31897c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 31907c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 31917c478bd9Sstevel@tonic-gate tst %o3 31927c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 31937c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 31947c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 31957c478bd9Sstevel@tonic-gate nop 31967c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 31977c478bd9Sstevel@tonic-gate nop 31987c478bd9Sstevel@tonic-gate.xcopyin_4: 31997c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 32007c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 32017c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 32027c478bd9Sstevel@tonic-gate tst %o3 32037c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 32047c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 32057c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 32067c478bd9Sstevel@tonic-gate nop 32077c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 32087c478bd9Sstevel@tonic-gate nop 32097c478bd9Sstevel@tonic-gate.xcopyin_8: 32107c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 32117c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 32127c478bd9Sstevel@tonic-gate tst %o3 32137c478bd9Sstevel@tonic-gate bz,pn %icc, .xcopyin_small ! if zero, disable HW copy 32147c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 32157c478bd9Sstevel@tonic-gate bleu,pt %ncc, .xcopyin_small ! go to small copy 32167c478bd9Sstevel@tonic-gate nop 32177c478bd9Sstevel@tonic-gate ba,pt %ncc, .xcopyin_more ! otherwise go to large copy 32187c478bd9Sstevel@tonic-gate nop 32197c478bd9Sstevel@tonic-gate 32207c478bd9Sstevel@tonic-gate.xcopyin_small: 32217c478bd9Sstevel@tonic-gate sethi %hi(.sm_xcopyin_err), %o5 ! .sm_xcopyin_err is lofault value 32227c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_xcopyin_err), %o5 32237c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 ! set/save t_lofaul 32247c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 32257c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin ! common code 32267c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] 3227*5d9d9091SRichard Lowe 32287c478bd9Sstevel@tonic-gate.xcopyin_more: 32297c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 32307c478bd9Sstevel@tonic-gate sethi %hi(.xcopyin_err), REAL_LOFAULT ! .xcopyin_err is lofault value 32317c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyin 32327c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT 32337c478bd9Sstevel@tonic-gate 32347c478bd9Sstevel@tonic-gate/* 32357c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyin 32367c478bd9Sstevel@tonic-gate * Errno value is in ERRNO 32377c478bd9Sstevel@tonic-gate */ 32387c478bd9Sstevel@tonic-gate.xcopyin_err: 32397c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o4 ! check for copyop handler 32407c478bd9Sstevel@tonic-gate tst %o4 32417c478bd9Sstevel@tonic-gate bz,pt %ncc, 2f ! if not, return error 32427c478bd9Sstevel@tonic-gate nop 32437c478bd9Sstevel@tonic-gate ldn [%o4 + CP_XCOPYIN], %g2 ! if handler, invoke it with 32447c478bd9Sstevel@tonic-gate jmp %g2 ! original arguments 32457c478bd9Sstevel@tonic-gate restore %g0, 0, %g0 ! dispose of copy window 32467c478bd9Sstevel@tonic-gate2: 32477c478bd9Sstevel@tonic-gate ret 32487c478bd9Sstevel@tonic-gate restore ERRNO, 0, %o0 ! return errno value 32497c478bd9Sstevel@tonic-gate 32507c478bd9Sstevel@tonic-gate.sm_xcopyin_err: 32517c478bd9Sstevel@tonic-gate 32527c478bd9Sstevel@tonic-gate membar #Sync 32537c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 32547c478bd9Sstevel@tonic-gate mov SM_SAVE_SRC, %o0 32557c478bd9Sstevel@tonic-gate mov SM_SAVE_DST, %o1 32567c478bd9Sstevel@tonic-gate mov SM_SAVE_COUNT, %o2 32577c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_COPYOPS], %o3 ! check for copyop handler 32587c478bd9Sstevel@tonic-gate tst %o3 32597c478bd9Sstevel@tonic-gate bz,pt %ncc, 3f ! if not, return error 32607c478bd9Sstevel@tonic-gate nop 32617c478bd9Sstevel@tonic-gate ldn [%o3 + CP_XCOPYIN], %o5 ! if handler, invoke it with 32627c478bd9Sstevel@tonic-gate jmp %o5 ! original arguments 32637c478bd9Sstevel@tonic-gate nop 32647c478bd9Sstevel@tonic-gate3: 32657c478bd9Sstevel@tonic-gate retl 32667c478bd9Sstevel@tonic-gate or %g1, 0, %o0 ! return errno value 32677c478bd9Sstevel@tonic-gate 32687c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin) 32697c478bd9Sstevel@tonic-gate 32707c478bd9Sstevel@tonic-gate ENTRY(xcopyin_little) 32717c478bd9Sstevel@tonic-gate sethi %hi(.xcopyio_err), %o5 32727c478bd9Sstevel@tonic-gate or %o5, %lo(.xcopyio_err), %o5 32737c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 32747c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 3275*5d9d9091SRichard Lowe stn %o5, [THREAD_REG + T_LOFAULT] 32767c478bd9Sstevel@tonic-gate mov %o4, %o5 32777c478bd9Sstevel@tonic-gate 32787c478bd9Sstevel@tonic-gate subcc %g0, %o2, %o3 32797c478bd9Sstevel@tonic-gate add %o0, %o2, %o0 32807c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f ! check for zero bytes 32817c478bd9Sstevel@tonic-gate sub %o2, 1, %o4 3282*5d9d9091SRichard Lowe add %o0, %o4, %o0 ! start w/last byte 32837c478bd9Sstevel@tonic-gate add %o1, %o2, %o1 32847c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 32857c478bd9Sstevel@tonic-gate 32867c478bd9Sstevel@tonic-gate1: stb %o4, [%o1 + %o3] 32877c478bd9Sstevel@tonic-gate inccc %o3 32887c478bd9Sstevel@tonic-gate sub %o0, 2, %o0 ! get next byte 32897c478bd9Sstevel@tonic-gate bcc,a,pt %ncc, 1b 32907c478bd9Sstevel@tonic-gate lduba [%o0 + %o3]ASI_AIUSL, %o4 32917c478bd9Sstevel@tonic-gate 32927c478bd9Sstevel@tonic-gate2: 32937c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 32947c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 32957c478bd9Sstevel@tonic-gate retl 32967c478bd9Sstevel@tonic-gate mov %g0, %o0 ! return (0) 32977c478bd9Sstevel@tonic-gate 32987c478bd9Sstevel@tonic-gate.xcopyio_err: 32997c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 33007c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! restore old t_lofault 33017c478bd9Sstevel@tonic-gate retl 33027c478bd9Sstevel@tonic-gate mov %g1, %o0 33037c478bd9Sstevel@tonic-gate 33047c478bd9Sstevel@tonic-gate SET_SIZE(xcopyin_little) 33057c478bd9Sstevel@tonic-gate 33067c478bd9Sstevel@tonic-gate 33077c478bd9Sstevel@tonic-gate/* 33087c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 33097c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 33107c478bd9Sstevel@tonic-gate */ 33117c478bd9Sstevel@tonic-gate ENTRY(copyin_noerr) 33127c478bd9Sstevel@tonic-gate 33137c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 33147c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to larger cases 33157c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 33167c478bd9Sstevel@tonic-gate btst 7, %o3 ! 33177c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_8 ! check for longword alignment 33187c478bd9Sstevel@tonic-gate nop 3319*5d9d9091SRichard Lowe btst 1, %o3 ! 33207c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_2 ! check for half-word 33217c478bd9Sstevel@tonic-gate nop 33227c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 33237c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 33247c478bd9Sstevel@tonic-gate tst %o3 33257c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 33267c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 33277c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 33287c478bd9Sstevel@tonic-gate nop 33297c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 33307c478bd9Sstevel@tonic-gate nop 33317c478bd9Sstevel@tonic-gate.copyin_ne_2: 33327c478bd9Sstevel@tonic-gate btst 3, %o3 ! 33337c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyin_ne_4 ! check for word alignment 33347c478bd9Sstevel@tonic-gate nop 33357c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 33367c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 33377c478bd9Sstevel@tonic-gate tst %o3 33387c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 33397c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 33407c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 33417c478bd9Sstevel@tonic-gate nop 33427c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 33437c478bd9Sstevel@tonic-gate nop 33447c478bd9Sstevel@tonic-gate.copyin_ne_4: 33457c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 33467c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 33477c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 33487c478bd9Sstevel@tonic-gate tst %o3 33497c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 33507c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 33517c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 33527c478bd9Sstevel@tonic-gate nop 33537c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 33547c478bd9Sstevel@tonic-gate nop 33557c478bd9Sstevel@tonic-gate.copyin_ne_8: 33567c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 33577c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 33587c478bd9Sstevel@tonic-gate tst %o3 33597c478bd9Sstevel@tonic-gate bz,pn %icc, .copyin_ne_small ! if zero, disable HW copy 33607c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 33617c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyin_ne_small ! go to small copy 33627c478bd9Sstevel@tonic-gate nop 33637c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyin_noerr_more ! otherwise go to large copy 33647c478bd9Sstevel@tonic-gate nop 33657c478bd9Sstevel@tonic-gate 33667c478bd9Sstevel@tonic-gate.copyin_ne_small: 33677c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 33687c478bd9Sstevel@tonic-gate tst %o4 33697c478bd9Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyin 33707c478bd9Sstevel@tonic-gate nop 33717c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 33727c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 33737c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 33747c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyin 33757c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 33767c478bd9Sstevel@tonic-gate 33777c478bd9Sstevel@tonic-gate.copyin_noerr_more: 33787c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 33797c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 33807c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyin 33817c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 33827c478bd9Sstevel@tonic-gate 33837c478bd9Sstevel@tonic-gate.copyio_noerr: 33847c478bd9Sstevel@tonic-gate jmp %l6 33857c478bd9Sstevel@tonic-gate restore %g0,0,%g0 33867c478bd9Sstevel@tonic-gate 33877c478bd9Sstevel@tonic-gate.sm_copyio_noerr: 33887c478bd9Sstevel@tonic-gate membar #Sync 33897c478bd9Sstevel@tonic-gate stn %o4, [THREAD_REG + T_LOFAULT] ! restore t_lofault 33907c478bd9Sstevel@tonic-gate jmp %o4 33917c478bd9Sstevel@tonic-gate nop 33927c478bd9Sstevel@tonic-gate 33937c478bd9Sstevel@tonic-gate SET_SIZE(copyin_noerr) 33947c478bd9Sstevel@tonic-gate 33957c478bd9Sstevel@tonic-gate/* 33967c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to). 33977c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault()) 33987c478bd9Sstevel@tonic-gate */ 33997c478bd9Sstevel@tonic-gate 34007c478bd9Sstevel@tonic-gate ENTRY(copyout_noerr) 34017c478bd9Sstevel@tonic-gate 34027c478bd9Sstevel@tonic-gate cmp %o2, VIS_COPY_THRESHOLD ! check for leaf rtn case 34037c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to larger cases 34047c478bd9Sstevel@tonic-gate xor %o0, %o1, %o3 ! are src, dst alignable? 34057c478bd9Sstevel@tonic-gate btst 7, %o3 ! 34067c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_8 ! check for longword alignment 34077c478bd9Sstevel@tonic-gate nop 3408*5d9d9091SRichard Lowe btst 1, %o3 ! 34097c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_2 ! check for half-word 34107c478bd9Sstevel@tonic-gate nop 34117c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_1), %o3 ! Check copy limit 34127c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_1)], %o3 34137c478bd9Sstevel@tonic-gate tst %o3 34147c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 34157c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 34167c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 34177c478bd9Sstevel@tonic-gate nop 34187c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 34197c478bd9Sstevel@tonic-gate nop 34207c478bd9Sstevel@tonic-gate.copyout_ne_2: 34217c478bd9Sstevel@tonic-gate btst 3, %o3 ! 34227c478bd9Sstevel@tonic-gate bz,pt %ncc, .copyout_ne_4 ! check for word alignment 34237c478bd9Sstevel@tonic-gate nop 34247c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_2), %o3 ! Check copy limit 34257c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_2)], %o3 34267c478bd9Sstevel@tonic-gate tst %o3 34277c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 34287c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 34297c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 34307c478bd9Sstevel@tonic-gate nop 34317c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 34327c478bd9Sstevel@tonic-gate nop 34337c478bd9Sstevel@tonic-gate.copyout_ne_4: 34347c478bd9Sstevel@tonic-gate ! already checked longword, must be word aligned 34357c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_4), %o3 ! Check copy limit 34367c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_4)], %o3 34377c478bd9Sstevel@tonic-gate tst %o3 34387c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 34397c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 34407c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 34417c478bd9Sstevel@tonic-gate nop 34427c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 34437c478bd9Sstevel@tonic-gate nop 34447c478bd9Sstevel@tonic-gate.copyout_ne_8: 34457c478bd9Sstevel@tonic-gate sethi %hi(hw_copy_limit_8), %o3 ! Check copy limit 34467c478bd9Sstevel@tonic-gate ld [%o3 + %lo(hw_copy_limit_8)], %o3 34477c478bd9Sstevel@tonic-gate tst %o3 34487c478bd9Sstevel@tonic-gate bz,pn %icc, .copyout_ne_small ! if zero, disable HW copy 34497c478bd9Sstevel@tonic-gate cmp %o2, %o3 ! if length <= limit 34507c478bd9Sstevel@tonic-gate bleu,pt %ncc, .copyout_ne_small ! go to small copy 34517c478bd9Sstevel@tonic-gate nop 34527c478bd9Sstevel@tonic-gate ba,pt %ncc, .copyout_noerr_more ! otherwise go to large copy 34537c478bd9Sstevel@tonic-gate nop 34547c478bd9Sstevel@tonic-gate 34557c478bd9Sstevel@tonic-gate.copyout_ne_small: 34567c478bd9Sstevel@tonic-gate ldn [THREAD_REG + T_LOFAULT], %o4 34577c478bd9Sstevel@tonic-gate tst %o4 34587c478bd9Sstevel@tonic-gate bz,pn %ncc, .sm_do_copyout 34597c478bd9Sstevel@tonic-gate nop 34607c478bd9Sstevel@tonic-gate sethi %hi(.sm_copyio_noerr), %o5 34617c478bd9Sstevel@tonic-gate or %o5, %lo(.sm_copyio_noerr), %o5 34627c478bd9Sstevel@tonic-gate membar #Sync ! sync error barrier 34637c478bd9Sstevel@tonic-gate ba,pt %ncc, .sm_do_copyout 34647c478bd9Sstevel@tonic-gate stn %o5, [THREAD_REG + T_LOFAULT] ! set/save t_lofault 34657c478bd9Sstevel@tonic-gate 34667c478bd9Sstevel@tonic-gate.copyout_noerr_more: 34677c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp 34687c478bd9Sstevel@tonic-gate sethi %hi(.copyio_noerr), REAL_LOFAULT 34697c478bd9Sstevel@tonic-gate ba,pt %ncc, .do_copyout 34707c478bd9Sstevel@tonic-gate or REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT 34717c478bd9Sstevel@tonic-gate 34727c478bd9Sstevel@tonic-gate SET_SIZE(copyout_noerr) 34737c478bd9Sstevel@tonic-gate 34747c478bd9Sstevel@tonic-gate 34757c478bd9Sstevel@tonic-gate/* 34767c478bd9Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are 34777c478bd9Sstevel@tonic-gate * longer than 256 bytes in length using spitfire's block stores. If 34787c478bd9Sstevel@tonic-gate * the criteria for using this routine are not met then it calls bzero 34797c478bd9Sstevel@tonic-gate * and returns 1. Otherwise 0 is returned indicating success. 34807c478bd9Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that 34817c478bd9Sstevel@tonic-gate * kpreempt_disable() has been called. 34827c478bd9Sstevel@tonic-gate */ 34837c478bd9Sstevel@tonic-gate ! %i0 - start address 34847c478bd9Sstevel@tonic-gate ! %i1 - length of region (multiple of 64) 34857c478bd9Sstevel@tonic-gate ! %l0 - saved fprs 34867c478bd9Sstevel@tonic-gate ! %l1 - pointer to saved %d0 block 34877c478bd9Sstevel@tonic-gate ! %l2 - saved curthread->t_lwp 34887c478bd9Sstevel@tonic-gate 34897c478bd9Sstevel@tonic-gate ENTRY(hwblkclr) 34907c478bd9Sstevel@tonic-gate ! get another window w/space for one aligned block of saved fpregs 34917c478bd9Sstevel@tonic-gate save %sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp 34927c478bd9Sstevel@tonic-gate 34937c478bd9Sstevel@tonic-gate ! Must be block-aligned 34947c478bd9Sstevel@tonic-gate andcc %i0, (VIS_BLOCKSIZE-1), %g0 34957c478bd9Sstevel@tonic-gate bnz,pn %ncc, 1f 34967c478bd9Sstevel@tonic-gate nop 34977c478bd9Sstevel@tonic-gate 34987c478bd9Sstevel@tonic-gate ! ... and must be 256 bytes or more 34997c478bd9Sstevel@tonic-gate cmp %i1, 256 35007c478bd9Sstevel@tonic-gate blu,pn %ncc, 1f 35017c478bd9Sstevel@tonic-gate nop 35027c478bd9Sstevel@tonic-gate 35037c478bd9Sstevel@tonic-gate ! ... and length must be a multiple of VIS_BLOCKSIZE 35047c478bd9Sstevel@tonic-gate andcc %i1, (VIS_BLOCKSIZE-1), %g0 35057c478bd9Sstevel@tonic-gate bz,pn %ncc, 2f 35067c478bd9Sstevel@tonic-gate nop 35077c478bd9Sstevel@tonic-gate 35087c478bd9Sstevel@tonic-gate1: ! punt, call bzero but notify the caller that bzero was used 35097c478bd9Sstevel@tonic-gate mov %i0, %o0 35107c478bd9Sstevel@tonic-gate call bzero 35117c478bd9Sstevel@tonic-gate mov %i1, %o1 35127c478bd9Sstevel@tonic-gate ret 35137c478bd9Sstevel@tonic-gate restore %g0, 1, %o0 ! return (1) - did not use block operations 35147c478bd9Sstevel@tonic-gate 35157c478bd9Sstevel@tonic-gate2: rd %fprs, %l0 ! check for unused fp 35167c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 35177c478bd9Sstevel@tonic-gate bz,pt %icc, 1f 35187c478bd9Sstevel@tonic-gate nop 35197c478bd9Sstevel@tonic-gate 35207c478bd9Sstevel@tonic-gate ! save in-use fpregs on stack 35217c478bd9Sstevel@tonic-gate membar #Sync 35227c478bd9Sstevel@tonic-gate add %fp, STACK_BIAS - 65, %l1 35237c478bd9Sstevel@tonic-gate and %l1, -VIS_BLOCKSIZE, %l1 35247c478bd9Sstevel@tonic-gate stda %d0, [%l1]ASI_BLK_P 35257c478bd9Sstevel@tonic-gate 35267c478bd9Sstevel@tonic-gate1: membar #StoreStore|#StoreLoad|#LoadStore 35277c478bd9Sstevel@tonic-gate wr %g0, FPRS_FEF, %fprs 35287c478bd9Sstevel@tonic-gate wr %g0, ASI_BLK_P, %asi 35297c478bd9Sstevel@tonic-gate 35307c478bd9Sstevel@tonic-gate ! Clear block 35317c478bd9Sstevel@tonic-gate fzero %d0 35327c478bd9Sstevel@tonic-gate fzero %d2 35337c478bd9Sstevel@tonic-gate fzero %d4 35347c478bd9Sstevel@tonic-gate fzero %d6 35357c478bd9Sstevel@tonic-gate fzero %d8 35367c478bd9Sstevel@tonic-gate fzero %d10 35377c478bd9Sstevel@tonic-gate fzero %d12 35387c478bd9Sstevel@tonic-gate fzero %d14 35397c478bd9Sstevel@tonic-gate 35407c478bd9Sstevel@tonic-gate mov 256, %i3 35417c478bd9Sstevel@tonic-gate ba,pt %ncc, .pz_doblock 35427c478bd9Sstevel@tonic-gate nop 35437c478bd9Sstevel@tonic-gate 3544*5d9d9091SRichard Lowe.pz_blkstart: 35457c478bd9Sstevel@tonic-gate ! stda %d0, [%i0 + 192]%asi ! in dly slot of branch that got us here 35467c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 128]%asi 35477c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 64]%asi 35487c478bd9Sstevel@tonic-gate stda %d0, [%i0]%asi 35497c478bd9Sstevel@tonic-gate.pz_zinst: 35507c478bd9Sstevel@tonic-gate add %i0, %i3, %i0 35517c478bd9Sstevel@tonic-gate sub %i1, %i3, %i1 35527c478bd9Sstevel@tonic-gate.pz_doblock: 35537c478bd9Sstevel@tonic-gate cmp %i1, 256 35547c478bd9Sstevel@tonic-gate bgeu,a %ncc, .pz_blkstart 35557c478bd9Sstevel@tonic-gate stda %d0, [%i0 + 192]%asi 35567c478bd9Sstevel@tonic-gate 35577c478bd9Sstevel@tonic-gate cmp %i1, 64 35587c478bd9Sstevel@tonic-gate blu %ncc, .pz_finish 3559*5d9d9091SRichard Lowe 35607c478bd9Sstevel@tonic-gate andn %i1, (64-1), %i3 35617c478bd9Sstevel@tonic-gate srl %i3, 4, %i2 ! using blocks, 1 instr / 16 words 35627c478bd9Sstevel@tonic-gate set .pz_zinst, %i4 35637c478bd9Sstevel@tonic-gate sub %i4, %i2, %i4 35647c478bd9Sstevel@tonic-gate jmp %i4 35657c478bd9Sstevel@tonic-gate nop 35667c478bd9Sstevel@tonic-gate 35677c478bd9Sstevel@tonic-gate.pz_finish: 35687c478bd9Sstevel@tonic-gate membar #Sync 35697c478bd9Sstevel@tonic-gate btst FPRS_FEF, %l0 35707c478bd9Sstevel@tonic-gate bz,a .pz_finished 35717c478bd9Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 35727c478bd9Sstevel@tonic-gate 35737c478bd9Sstevel@tonic-gate ! restore fpregs from stack 35747c478bd9Sstevel@tonic-gate ldda [%l1]ASI_BLK_P, %d0 35757c478bd9Sstevel@tonic-gate membar #Sync 35767c478bd9Sstevel@tonic-gate wr %l0, 0, %fprs ! restore fprs 35777c478bd9Sstevel@tonic-gate 35787c478bd9Sstevel@tonic-gate.pz_finished: 35797c478bd9Sstevel@tonic-gate ret 35807c478bd9Sstevel@tonic-gate restore %g0, 0, %o0 ! return (bzero or not) 35817c478bd9Sstevel@tonic-gate 35827c478bd9Sstevel@tonic-gate SET_SIZE(hwblkclr) 35839b0bb795SJohn Levon 35847c478bd9Sstevel@tonic-gate /* 35857c478bd9Sstevel@tonic-gate * Copy 32 bytes of data from src (%o0) to dst (%o1) 35867c478bd9Sstevel@tonic-gate * using physical addresses. 35877c478bd9Sstevel@tonic-gate */ 35887c478bd9Sstevel@tonic-gate ENTRY_NP(hw_pa_bcopy32) 35897c478bd9Sstevel@tonic-gate rdpr %pstate, %g1 35907c478bd9Sstevel@tonic-gate andn %g1, PSTATE_IE, %g2 35917c478bd9Sstevel@tonic-gate wrpr %g0, %g2, %pstate 35927c478bd9Sstevel@tonic-gate 35937c478bd9Sstevel@tonic-gate rdpr %pstate, %g0 35947c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o2 35957c478bd9Sstevel@tonic-gate add %o0, 8, %o0 35967c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o3 35977c478bd9Sstevel@tonic-gate add %o0, 8, %o0 35987c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o4 35997c478bd9Sstevel@tonic-gate add %o0, 8, %o0 36007c478bd9Sstevel@tonic-gate ldxa [%o0]ASI_MEM, %o5 36017c478bd9Sstevel@tonic-gate 36027c478bd9Sstevel@tonic-gate stxa %g0, [%o1]ASI_DC_INVAL 36037c478bd9Sstevel@tonic-gate membar #Sync 36047c478bd9Sstevel@tonic-gate 36057c478bd9Sstevel@tonic-gate stxa %o2, [%o1]ASI_MEM 36067c478bd9Sstevel@tonic-gate add %o1, 8, %o1 36077c478bd9Sstevel@tonic-gate stxa %o3, [%o1]ASI_MEM 36087c478bd9Sstevel@tonic-gate add %o1, 8, %o1 36097c478bd9Sstevel@tonic-gate stxa %o4, [%o1]ASI_MEM 36107c478bd9Sstevel@tonic-gate add %o1, 8, %o1 36117c478bd9Sstevel@tonic-gate stxa %o5, [%o1]ASI_MEM 36127c478bd9Sstevel@tonic-gate 36137c478bd9Sstevel@tonic-gate retl 36147c478bd9Sstevel@tonic-gate wrpr %g0, %g1, %pstate 36157c478bd9Sstevel@tonic-gate 36167c478bd9Sstevel@tonic-gate SET_SIZE(hw_pa_bcopy32) 36177c478bd9Sstevel@tonic-gate 36187c478bd9Sstevel@tonic-gate DGDEF(use_hw_bcopy) 36197c478bd9Sstevel@tonic-gate .word 1 36207c478bd9Sstevel@tonic-gate DGDEF(use_hw_bzero) 36217c478bd9Sstevel@tonic-gate .word 1 36227c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_1) 36237c478bd9Sstevel@tonic-gate .word 0 36247c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_2) 36257c478bd9Sstevel@tonic-gate .word 0 36267c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_4) 36277c478bd9Sstevel@tonic-gate .word 0 36287c478bd9Sstevel@tonic-gate DGDEF(hw_copy_limit_8) 36297c478bd9Sstevel@tonic-gate .word 0 36307c478bd9Sstevel@tonic-gate 36317c478bd9Sstevel@tonic-gate .align 64 36327c478bd9Sstevel@tonic-gate .section ".text" 3633