1afd1ac7bSwesolows /* 2afd1ac7bSwesolows * CDDL HEADER START 3afd1ac7bSwesolows * 4afd1ac7bSwesolows * The contents of this file are subject to the terms of the 5afd1ac7bSwesolows * Common Development and Distribution License (the "License"). 6afd1ac7bSwesolows * You may not use this file except in compliance with the License. 7afd1ac7bSwesolows * 8afd1ac7bSwesolows * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9afd1ac7bSwesolows * or http://www.opensolaris.org/os/licensing. 10afd1ac7bSwesolows * See the License for the specific language governing permissions 11afd1ac7bSwesolows * and limitations under the License. 12afd1ac7bSwesolows * 13afd1ac7bSwesolows * When distributing Covered Code, include this CDDL HEADER in each 14afd1ac7bSwesolows * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15afd1ac7bSwesolows * If applicable, add the following below this CDDL HEADER, with the 16afd1ac7bSwesolows * fields enclosed by brackets "[]" replaced with your own identifying 17afd1ac7bSwesolows * information: Portions Copyright [yyyy] [name of copyright owner] 18afd1ac7bSwesolows * 19afd1ac7bSwesolows * CDDL HEADER END 20afd1ac7bSwesolows */ 21afd1ac7bSwesolows 22afd1ac7bSwesolows /* 238de5c4f4SDan OpenSolaris Anderson * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24afd1ac7bSwesolows * Use is subject to license terms. 25afd1ac7bSwesolows */ 26afd1ac7bSwesolows 27afd1ac7bSwesolows #ifndef _MD5_BYTESWAP_H 28afd1ac7bSwesolows #define _MD5_BYTESWAP_H 29afd1ac7bSwesolows 30afd1ac7bSwesolows /* 31afd1ac7bSwesolows * definitions for inline functions for little-endian loads. 32afd1ac7bSwesolows * 33afd1ac7bSwesolows * This file has special definitions for UltraSPARC architectures, 34afd1ac7bSwesolows * which have a special address space identifier for loading 32 and 16 bit 35afd1ac7bSwesolows * integers in little-endian byte order. 36afd1ac7bSwesolows */ 37afd1ac7bSwesolows 384b56a003SDaniel Anderson #include <sys/types.h> 39afd1ac7bSwesolows #if defined(__sparc) 40afd1ac7bSwesolows #include <v9/sys/asi.h> 414b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN) 424b56a003SDaniel Anderson #include <sys/byteorder.h> 43afd1ac7bSwesolows #endif 44afd1ac7bSwesolows 45afd1ac7bSwesolows #ifdef __cplusplus 46afd1ac7bSwesolows extern "C" { 47afd1ac7bSwesolows #endif 48afd1ac7bSwesolows 49afd1ac7bSwesolows #if defined(_LITTLE_ENDIAN) 50afd1ac7bSwesolows 51afd1ac7bSwesolows /* 52afd1ac7bSwesolows * Little-endian optimization: I don't need to do any weirdness. On 53afd1ac7bSwesolows * some little-endian boxen, I'll have to do alignment checks, but I can do 54afd1ac7bSwesolows * that below. 55afd1ac7bSwesolows */ 56afd1ac7bSwesolows 57afd1ac7bSwesolows #if !defined(__i386) && !defined(__amd64) 58afd1ac7bSwesolows /* 59afd1ac7bSwesolows * i386 and amd64 don't require aligned 4-byte loads. The symbol 60afd1ac7bSwesolows * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function 61afd1ac7bSwesolows * requires alignment checking. 62afd1ac7bSwesolows */ 63afd1ac7bSwesolows #define _MD5_CHECK_ALIGNMENT 64afd1ac7bSwesolows #endif /* !__i386 && !__amd64 */ 65afd1ac7bSwesolows 668de5c4f4SDan OpenSolaris Anderson #define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr)) 67afd1ac7bSwesolows 68afd1ac7bSwesolows #else /* !_LITTLE_ENDIAN */ 69afd1ac7bSwesolows 70afd1ac7bSwesolows /* 71afd1ac7bSwesolows * sparc v9/v8plus optimization: 72afd1ac7bSwesolows * 73afd1ac7bSwesolows * on the sparc v9/v8plus, we can load data little endian. however, since 74afd1ac7bSwesolows * the compiler doesn't have direct support for little endian, we 75afd1ac7bSwesolows * link to an assembly-language routine `load_little_32' to do 76afd1ac7bSwesolows * the magic. note that special care must be taken to ensure the 77afd1ac7bSwesolows * address is 32-bit aligned -- in the interest of speed, we don't 78afd1ac7bSwesolows * check to make sure, since careful programming can guarantee this 79afd1ac7bSwesolows * for us. 80afd1ac7bSwesolows */ 81afd1ac7bSwesolows #if defined(sun4u) 82afd1ac7bSwesolows 83afd1ac7bSwesolows /* Define alignment check because we can 4-byte load as little endian. */ 84afd1ac7bSwesolows #define _MD5_CHECK_ALIGNMENT 858de5c4f4SDan OpenSolaris Anderson #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr)) 86afd1ac7bSwesolows 87afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__) 88afd1ac7bSwesolows 89afd1ac7bSwesolows static __inline__ uint32_t 90afd1ac7bSwesolows load_little_32(uint32_t *addr) 91afd1ac7bSwesolows { 92afd1ac7bSwesolows uint32_t value; 93afd1ac7bSwesolows 94afd1ac7bSwesolows __asm__( 95afd1ac7bSwesolows "lduwa [%1] %2, %0\n\t" 964b56a003SDaniel Anderson : "=r" (value) 974b56a003SDaniel Anderson : "r" (addr), "i" (ASI_PL)); 98afd1ac7bSwesolows 99afd1ac7bSwesolows return (value); 100afd1ac7bSwesolows } 101afd1ac7bSwesolows #endif /* !__lint && __GNUC__ */ 102afd1ac7bSwesolows 103afd1ac7bSwesolows #if !defined(__GNUC__) 104afd1ac7bSwesolows extern uint32_t load_little_32(uint32_t *); 105afd1ac7bSwesolows #endif /* !__GNUC__ */ 106afd1ac7bSwesolows 107734b6a94Sdarrenm /* Placate lint */ 108734b6a94Sdarrenm #if defined(__lint) 109734b6a94Sdarrenm uint32_t 110734b6a94Sdarrenm load_little_32(uint32_t *addr) 111734b6a94Sdarrenm { 112734b6a94Sdarrenm return (*addr); 113734b6a94Sdarrenm } 114734b6a94Sdarrenm #endif /* __lint */ 115734b6a94Sdarrenm 1164b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN) 1174b56a003SDaniel Anderson #define LOAD_LITTLE_32(addr) htonl(addr) 118734b6a94Sdarrenm 1194b56a003SDaniel Anderson #else 120734b6a94Sdarrenm /* big endian -- will work on little endian, but slowly */ 121734b6a94Sdarrenm /* Since we do byte operations, we don't have to check for alignment. */ 122734b6a94Sdarrenm #define LOAD_LITTLE_32(addr) \ 123734b6a94Sdarrenm ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24)) 124734b6a94Sdarrenm #endif /* sun4u */ 125734b6a94Sdarrenm 126afd1ac7bSwesolows #if defined(sun4v) 127afd1ac7bSwesolows 128afd1ac7bSwesolows /* 129afd1ac7bSwesolows * For N1 want to minimize number of arithmetic operations. This is best 130afd1ac7bSwesolows * achieved by using the %asi register to specify ASI for the lduwa operations. 131afd1ac7bSwesolows * Also, have a separate inline template for each word, so can utilize the 132afd1ac7bSwesolows * immediate offset in lduwa, without relying on the compiler to do the right 133afd1ac7bSwesolows * thing. 134afd1ac7bSwesolows * 135afd1ac7bSwesolows * Moving to 64-bit loads might also be beneficial. 136afd1ac7bSwesolows */ 137afd1ac7bSwesolows #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr)) 138afd1ac7bSwesolows #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr)) 139afd1ac7bSwesolows #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr)) 140afd1ac7bSwesolows #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr)) 141afd1ac7bSwesolows #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr)) 142afd1ac7bSwesolows #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr)) 143afd1ac7bSwesolows #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr)) 144afd1ac7bSwesolows #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr)) 145afd1ac7bSwesolows #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr)) 146afd1ac7bSwesolows #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr)) 147afd1ac7bSwesolows #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr)) 148afd1ac7bSwesolows #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr)) 149afd1ac7bSwesolows #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr)) 150afd1ac7bSwesolows #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr)) 151afd1ac7bSwesolows #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr)) 152afd1ac7bSwesolows #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr)) 153afd1ac7bSwesolows 154afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__) 155afd1ac7bSwesolows 156afd1ac7bSwesolows /* 157afd1ac7bSwesolows * This actually sets the ASI register, not necessarily to ASI_PL. 158afd1ac7bSwesolows */ 159afd1ac7bSwesolows static __inline__ void 160afd1ac7bSwesolows set_little(uint8_t asi) 161afd1ac7bSwesolows { 162afd1ac7bSwesolows __asm__ __volatile__( 1634b56a003SDaniel Anderson "wr %%g0, %0, %%asi\n\t" 1644b56a003SDaniel Anderson : /* Nothing */ 1654b56a003SDaniel Anderson : "r" (asi)); 166afd1ac7bSwesolows } 167afd1ac7bSwesolows 168afd1ac7bSwesolows static __inline__ uint8_t 169afd1ac7bSwesolows get_little(void) 170afd1ac7bSwesolows { 171afd1ac7bSwesolows uint8_t asi; 172afd1ac7bSwesolows 173afd1ac7bSwesolows __asm__ __volatile__( 1744b56a003SDaniel Anderson "rd %%asi, %0\n\t" 1754b56a003SDaniel Anderson : "=r" (asi)); 176afd1ac7bSwesolows 177afd1ac7bSwesolows return (asi); 178afd1ac7bSwesolows } 179afd1ac7bSwesolows 180afd1ac7bSwesolows /* 181afd1ac7bSwesolows * We have 16 functions which differ only in the offset from which they 182afd1ac7bSwesolows * load. Use this preprocessor template to simplify maintenance. Its 183afd1ac7bSwesolows * argument is the offset in hex, without the 0x. 184afd1ac7bSwesolows */ 185afd1ac7bSwesolows #define LL_TEMPLATE(__off) \ 186afd1ac7bSwesolows static __inline__ uint32_t \ 187afd1ac7bSwesolows load_little_32_##__off(uint32_t *addr) \ 188afd1ac7bSwesolows { \ 189afd1ac7bSwesolows uint32_t value; \ 190afd1ac7bSwesolows __asm__( \ 191afd1ac7bSwesolows "lduwa [%1 + %2]%%asi, %0\n\t" \ 192afd1ac7bSwesolows : "=r" (value) \ 193afd1ac7bSwesolows : "r" (addr), "i" ((0x##__off) << 2)); \ 194afd1ac7bSwesolows return (value); \ 195afd1ac7bSwesolows } 196afd1ac7bSwesolows 197*564d5236SRichard Lowe /* BEGIN CSTYLED */ 198afd1ac7bSwesolows LL_TEMPLATE(0) 199afd1ac7bSwesolows LL_TEMPLATE(1) 200afd1ac7bSwesolows LL_TEMPLATE(2) 201afd1ac7bSwesolows LL_TEMPLATE(3) 202afd1ac7bSwesolows LL_TEMPLATE(4) 203afd1ac7bSwesolows LL_TEMPLATE(5) 204afd1ac7bSwesolows LL_TEMPLATE(6) 205afd1ac7bSwesolows LL_TEMPLATE(7) 206afd1ac7bSwesolows LL_TEMPLATE(8) 207afd1ac7bSwesolows LL_TEMPLATE(9) 208afd1ac7bSwesolows LL_TEMPLATE(a) 209afd1ac7bSwesolows LL_TEMPLATE(b) 210afd1ac7bSwesolows LL_TEMPLATE(c) 211afd1ac7bSwesolows LL_TEMPLATE(d) 212afd1ac7bSwesolows LL_TEMPLATE(e) 213afd1ac7bSwesolows LL_TEMPLATE(f) 214*564d5236SRichard Lowe /* END CSTYLED */ 215afd1ac7bSwesolows #undef LL_TEMPLATE 216afd1ac7bSwesolows 217afd1ac7bSwesolows #endif /* !__lint && __GNUC__ */ 218afd1ac7bSwesolows 219afd1ac7bSwesolows #if !defined(__GNUC__) 220afd1ac7bSwesolows /* 221afd1ac7bSwesolows * Using the %asi register to achieve little endian loads - register 222afd1ac7bSwesolows * is set using a inline template. 223afd1ac7bSwesolows * 224afd1ac7bSwesolows * Saves a few arithmetic ops as can now use an immediate offset with the 225afd1ac7bSwesolows * lduwa instructions. 226afd1ac7bSwesolows */ 227afd1ac7bSwesolows extern void set_little(uint32_t); 228afd1ac7bSwesolows extern uint32_t get_little(void); 229afd1ac7bSwesolows 230afd1ac7bSwesolows extern uint32_t load_little_32_0(uint32_t *); 231afd1ac7bSwesolows extern uint32_t load_little_32_1(uint32_t *); 232afd1ac7bSwesolows extern uint32_t load_little_32_2(uint32_t *); 233afd1ac7bSwesolows extern uint32_t load_little_32_3(uint32_t *); 234afd1ac7bSwesolows extern uint32_t load_little_32_4(uint32_t *); 235afd1ac7bSwesolows extern uint32_t load_little_32_5(uint32_t *); 236afd1ac7bSwesolows extern uint32_t load_little_32_6(uint32_t *); 237afd1ac7bSwesolows extern uint32_t load_little_32_7(uint32_t *); 238afd1ac7bSwesolows extern uint32_t load_little_32_8(uint32_t *); 239afd1ac7bSwesolows extern uint32_t load_little_32_9(uint32_t *); 240afd1ac7bSwesolows extern uint32_t load_little_32_a(uint32_t *); 241afd1ac7bSwesolows extern uint32_t load_little_32_b(uint32_t *); 242afd1ac7bSwesolows extern uint32_t load_little_32_c(uint32_t *); 243afd1ac7bSwesolows extern uint32_t load_little_32_d(uint32_t *); 244afd1ac7bSwesolows extern uint32_t load_little_32_e(uint32_t *); 245afd1ac7bSwesolows extern uint32_t load_little_32_f(uint32_t *); 246afd1ac7bSwesolows #endif /* !__GNUC__ */ 247afd1ac7bSwesolows #endif /* sun4v */ 248afd1ac7bSwesolows 249afd1ac7bSwesolows #endif /* _LITTLE_ENDIAN */ 250afd1ac7bSwesolows 251afd1ac7bSwesolows #ifdef __cplusplus 252afd1ac7bSwesolows } 253afd1ac7bSwesolows #endif 254afd1ac7bSwesolows 255afd1ac7bSwesolows #endif /* !_MD5_BYTESWAP_H */ 256