1afd1ac7bSwesolows /*
2afd1ac7bSwesolows  * CDDL HEADER START
3afd1ac7bSwesolows  *
4afd1ac7bSwesolows  * The contents of this file are subject to the terms of the
5afd1ac7bSwesolows  * Common Development and Distribution License (the "License").
6afd1ac7bSwesolows  * You may not use this file except in compliance with the License.
7afd1ac7bSwesolows  *
8afd1ac7bSwesolows  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9afd1ac7bSwesolows  * or http://www.opensolaris.org/os/licensing.
10afd1ac7bSwesolows  * See the License for the specific language governing permissions
11afd1ac7bSwesolows  * and limitations under the License.
12afd1ac7bSwesolows  *
13afd1ac7bSwesolows  * When distributing Covered Code, include this CDDL HEADER in each
14afd1ac7bSwesolows  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15afd1ac7bSwesolows  * If applicable, add the following below this CDDL HEADER, with the
16afd1ac7bSwesolows  * fields enclosed by brackets "[]" replaced with your own identifying
17afd1ac7bSwesolows  * information: Portions Copyright [yyyy] [name of copyright owner]
18afd1ac7bSwesolows  *
19afd1ac7bSwesolows  * CDDL HEADER END
20afd1ac7bSwesolows  */
21afd1ac7bSwesolows 
22afd1ac7bSwesolows /*
238de5c4f4SDan OpenSolaris Anderson  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24afd1ac7bSwesolows  * Use is subject to license terms.
25afd1ac7bSwesolows  */
26afd1ac7bSwesolows 
27afd1ac7bSwesolows #ifndef	_MD5_BYTESWAP_H
28afd1ac7bSwesolows #define	_MD5_BYTESWAP_H
29afd1ac7bSwesolows 
30afd1ac7bSwesolows /*
31afd1ac7bSwesolows  * definitions for inline functions for little-endian loads.
32afd1ac7bSwesolows  *
33afd1ac7bSwesolows  * This file has special definitions for UltraSPARC architectures,
34afd1ac7bSwesolows  * which have a special address space identifier for loading 32 and 16 bit
35afd1ac7bSwesolows  * integers in little-endian byte order.
36afd1ac7bSwesolows  */
37afd1ac7bSwesolows 
384b56a003SDaniel Anderson #include <sys/types.h>
39afd1ac7bSwesolows #if defined(__sparc)
40afd1ac7bSwesolows #include <v9/sys/asi.h>
414b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN)
424b56a003SDaniel Anderson #include <sys/byteorder.h>
43afd1ac7bSwesolows #endif
44afd1ac7bSwesolows 
45afd1ac7bSwesolows #ifdef	__cplusplus
46afd1ac7bSwesolows extern "C" {
47afd1ac7bSwesolows #endif
48afd1ac7bSwesolows 
49afd1ac7bSwesolows #if defined(_LITTLE_ENDIAN)
50afd1ac7bSwesolows 
51afd1ac7bSwesolows /*
52afd1ac7bSwesolows  * Little-endian optimization:  I don't need to do any weirdness.   On
53afd1ac7bSwesolows  * some little-endian boxen, I'll have to do alignment checks, but I can do
54afd1ac7bSwesolows  * that below.
55afd1ac7bSwesolows  */
56afd1ac7bSwesolows 
57afd1ac7bSwesolows #if !defined(__i386) && !defined(__amd64)
58afd1ac7bSwesolows /*
59afd1ac7bSwesolows  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
60afd1ac7bSwesolows  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
61afd1ac7bSwesolows  * requires alignment checking.
62afd1ac7bSwesolows  */
63afd1ac7bSwesolows #define	_MD5_CHECK_ALIGNMENT
64afd1ac7bSwesolows #endif /* !__i386 && !__amd64 */
65afd1ac7bSwesolows 
668de5c4f4SDan OpenSolaris Anderson #define	LOAD_LITTLE_32(addr)	(*(uint32_t *)(void *)(addr))
67afd1ac7bSwesolows 
68afd1ac7bSwesolows #else	/* !_LITTLE_ENDIAN */
69afd1ac7bSwesolows 
70afd1ac7bSwesolows /*
71afd1ac7bSwesolows  * sparc v9/v8plus optimization:
72afd1ac7bSwesolows  *
73afd1ac7bSwesolows  * on the sparc v9/v8plus, we can load data little endian.  however, since
74afd1ac7bSwesolows  * the compiler doesn't have direct support for little endian, we
75afd1ac7bSwesolows  * link to an assembly-language routine `load_little_32' to do
76afd1ac7bSwesolows  * the magic.  note that special care must be taken to ensure the
77afd1ac7bSwesolows  * address is 32-bit aligned -- in the interest of speed, we don't
78afd1ac7bSwesolows  * check to make sure, since careful programming can guarantee this
79afd1ac7bSwesolows  * for us.
80afd1ac7bSwesolows  */
81afd1ac7bSwesolows #if defined(sun4u)
82afd1ac7bSwesolows 
83afd1ac7bSwesolows /* Define alignment check because we can 4-byte load as little endian. */
84afd1ac7bSwesolows #define	_MD5_CHECK_ALIGNMENT
858de5c4f4SDan OpenSolaris Anderson #define	LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))
86afd1ac7bSwesolows 
87afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__)
88afd1ac7bSwesolows 
89afd1ac7bSwesolows static __inline__ uint32_t
90afd1ac7bSwesolows load_little_32(uint32_t *addr)
91afd1ac7bSwesolows {
92afd1ac7bSwesolows 	uint32_t value;
93afd1ac7bSwesolows 
94afd1ac7bSwesolows 	__asm__(
95afd1ac7bSwesolows 	    "lduwa	[%1] %2, %0\n\t"
964b56a003SDaniel Anderson 	    : "=r" (value)
974b56a003SDaniel Anderson 	    : "r" (addr), "i" (ASI_PL));
98afd1ac7bSwesolows 
99afd1ac7bSwesolows 	return (value);
100afd1ac7bSwesolows }
101afd1ac7bSwesolows #endif	/* !__lint && __GNUC__ */
102afd1ac7bSwesolows 
103afd1ac7bSwesolows #if !defined(__GNUC__)
104afd1ac7bSwesolows extern	uint32_t load_little_32(uint32_t *);
105afd1ac7bSwesolows #endif	/* !__GNUC__ */
106afd1ac7bSwesolows 
107734b6a94Sdarrenm /* Placate lint */
108734b6a94Sdarrenm #if defined(__lint)
109734b6a94Sdarrenm uint32_t
110734b6a94Sdarrenm load_little_32(uint32_t *addr)
111734b6a94Sdarrenm {
112734b6a94Sdarrenm 	return (*addr);
113734b6a94Sdarrenm }
114734b6a94Sdarrenm #endif	/* __lint */
115734b6a94Sdarrenm 
1164b56a003SDaniel Anderson #elif defined(_LITTLE_ENDIAN)
1174b56a003SDaniel Anderson #define	LOAD_LITTLE_32(addr)	htonl(addr)
118734b6a94Sdarrenm 
1194b56a003SDaniel Anderson #else
120734b6a94Sdarrenm /* big endian -- will work on little endian, but slowly */
121734b6a94Sdarrenm /* Since we do byte operations, we don't have to check for alignment. */
122734b6a94Sdarrenm #define	LOAD_LITTLE_32(addr)	\
123734b6a94Sdarrenm 	((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
124734b6a94Sdarrenm #endif	/* sun4u */
125734b6a94Sdarrenm 
126afd1ac7bSwesolows #if defined(sun4v)
127afd1ac7bSwesolows 
128afd1ac7bSwesolows /*
129afd1ac7bSwesolows  * For N1 want to minimize number of arithmetic operations. This is best
130afd1ac7bSwesolows  * achieved by using the %asi register to specify ASI for the lduwa operations.
131afd1ac7bSwesolows  * Also, have a separate inline template for each word, so can utilize the
132afd1ac7bSwesolows  * immediate offset in lduwa, without relying on the compiler to do the right
133afd1ac7bSwesolows  * thing.
134afd1ac7bSwesolows  *
135afd1ac7bSwesolows  * Moving to 64-bit loads might also be beneficial.
136afd1ac7bSwesolows  */
137afd1ac7bSwesolows #define	LOAD_LITTLE_32_0(addr)	load_little_32_0((uint32_t *)(addr))
138afd1ac7bSwesolows #define	LOAD_LITTLE_32_1(addr)	load_little_32_1((uint32_t *)(addr))
139afd1ac7bSwesolows #define	LOAD_LITTLE_32_2(addr)	load_little_32_2((uint32_t *)(addr))
140afd1ac7bSwesolows #define	LOAD_LITTLE_32_3(addr)	load_little_32_3((uint32_t *)(addr))
141afd1ac7bSwesolows #define	LOAD_LITTLE_32_4(addr)	load_little_32_4((uint32_t *)(addr))
142afd1ac7bSwesolows #define	LOAD_LITTLE_32_5(addr)	load_little_32_5((uint32_t *)(addr))
143afd1ac7bSwesolows #define	LOAD_LITTLE_32_6(addr)	load_little_32_6((uint32_t *)(addr))
144afd1ac7bSwesolows #define	LOAD_LITTLE_32_7(addr)	load_little_32_7((uint32_t *)(addr))
145afd1ac7bSwesolows #define	LOAD_LITTLE_32_8(addr)	load_little_32_8((uint32_t *)(addr))
146afd1ac7bSwesolows #define	LOAD_LITTLE_32_9(addr)	load_little_32_9((uint32_t *)(addr))
147afd1ac7bSwesolows #define	LOAD_LITTLE_32_a(addr)	load_little_32_a((uint32_t *)(addr))
148afd1ac7bSwesolows #define	LOAD_LITTLE_32_b(addr)	load_little_32_b((uint32_t *)(addr))
149afd1ac7bSwesolows #define	LOAD_LITTLE_32_c(addr)	load_little_32_c((uint32_t *)(addr))
150afd1ac7bSwesolows #define	LOAD_LITTLE_32_d(addr)	load_little_32_d((uint32_t *)(addr))
151afd1ac7bSwesolows #define	LOAD_LITTLE_32_e(addr)	load_little_32_e((uint32_t *)(addr))
152afd1ac7bSwesolows #define	LOAD_LITTLE_32_f(addr)	load_little_32_f((uint32_t *)(addr))
153afd1ac7bSwesolows 
154afd1ac7bSwesolows #if !defined(__lint) && defined(__GNUC__)
155afd1ac7bSwesolows 
156afd1ac7bSwesolows /*
157afd1ac7bSwesolows  * This actually sets the ASI register, not necessarily to ASI_PL.
158afd1ac7bSwesolows  */
159afd1ac7bSwesolows static __inline__ void
160afd1ac7bSwesolows set_little(uint8_t asi)
161afd1ac7bSwesolows {
162afd1ac7bSwesolows 	__asm__ __volatile__(
1634b56a003SDaniel Anderson 	    "wr	%%g0, %0, %%asi\n\t"
1644b56a003SDaniel Anderson 	    : /* Nothing */
1654b56a003SDaniel Anderson 	    : "r" (asi));
166afd1ac7bSwesolows }
167afd1ac7bSwesolows 
168afd1ac7bSwesolows static __inline__ uint8_t
169afd1ac7bSwesolows get_little(void)
170afd1ac7bSwesolows {
171afd1ac7bSwesolows 	uint8_t asi;
172afd1ac7bSwesolows 
173afd1ac7bSwesolows 	__asm__ __volatile__(
1744b56a003SDaniel Anderson 	    "rd	%%asi, %0\n\t"
1754b56a003SDaniel Anderson 	    : "=r" (asi));
176afd1ac7bSwesolows 
177afd1ac7bSwesolows 	return (asi);
178afd1ac7bSwesolows }
179afd1ac7bSwesolows 
180afd1ac7bSwesolows /*
181afd1ac7bSwesolows  * We have 16 functions which differ only in the offset from which they
182afd1ac7bSwesolows  * load.  Use this preprocessor template to simplify maintenance.  Its
183afd1ac7bSwesolows  * argument is the offset in hex, without the 0x.
184afd1ac7bSwesolows  */
185afd1ac7bSwesolows #define	LL_TEMPLATE(__off)			\
186afd1ac7bSwesolows static __inline__ uint32_t			\
187afd1ac7bSwesolows load_little_32_##__off(uint32_t *addr)		\
188afd1ac7bSwesolows {						\
189afd1ac7bSwesolows 	uint32_t value;				\
190afd1ac7bSwesolows 	__asm__(				\
191afd1ac7bSwesolows 		"lduwa	[%1 + %2]%%asi, %0\n\t"	\
192afd1ac7bSwesolows 	: "=r" (value)				\
193afd1ac7bSwesolows 	: "r" (addr), "i" ((0x##__off) << 2));	\
194afd1ac7bSwesolows 	return (value);				\
195afd1ac7bSwesolows }
196afd1ac7bSwesolows 
197*564d5236SRichard Lowe /* BEGIN CSTYLED */
198afd1ac7bSwesolows LL_TEMPLATE(0)
199afd1ac7bSwesolows LL_TEMPLATE(1)
200afd1ac7bSwesolows LL_TEMPLATE(2)
201afd1ac7bSwesolows LL_TEMPLATE(3)
202afd1ac7bSwesolows LL_TEMPLATE(4)
203afd1ac7bSwesolows LL_TEMPLATE(5)
204afd1ac7bSwesolows LL_TEMPLATE(6)
205afd1ac7bSwesolows LL_TEMPLATE(7)
206afd1ac7bSwesolows LL_TEMPLATE(8)
207afd1ac7bSwesolows LL_TEMPLATE(9)
208afd1ac7bSwesolows LL_TEMPLATE(a)
209afd1ac7bSwesolows LL_TEMPLATE(b)
210afd1ac7bSwesolows LL_TEMPLATE(c)
211afd1ac7bSwesolows LL_TEMPLATE(d)
212afd1ac7bSwesolows LL_TEMPLATE(e)
213afd1ac7bSwesolows LL_TEMPLATE(f)
214*564d5236SRichard Lowe /* END CSTYLED */
215afd1ac7bSwesolows #undef	LL_TEMPLATE
216afd1ac7bSwesolows 
217afd1ac7bSwesolows #endif	/* !__lint && __GNUC__ */
218afd1ac7bSwesolows 
219afd1ac7bSwesolows #if !defined(__GNUC__)
220afd1ac7bSwesolows /*
221afd1ac7bSwesolows  * Using the %asi register to achieve little endian loads - register
222afd1ac7bSwesolows  * is set using a inline template.
223afd1ac7bSwesolows  *
224afd1ac7bSwesolows  * Saves a few arithmetic ops as can now use an immediate offset with the
225afd1ac7bSwesolows  * lduwa instructions.
226afd1ac7bSwesolows  */
227afd1ac7bSwesolows extern void set_little(uint32_t);
228afd1ac7bSwesolows extern uint32_t get_little(void);
229afd1ac7bSwesolows 
230afd1ac7bSwesolows extern	uint32_t load_little_32_0(uint32_t *);
231afd1ac7bSwesolows extern	uint32_t load_little_32_1(uint32_t *);
232afd1ac7bSwesolows extern	uint32_t load_little_32_2(uint32_t *);
233afd1ac7bSwesolows extern	uint32_t load_little_32_3(uint32_t *);
234afd1ac7bSwesolows extern	uint32_t load_little_32_4(uint32_t *);
235afd1ac7bSwesolows extern	uint32_t load_little_32_5(uint32_t *);
236afd1ac7bSwesolows extern	uint32_t load_little_32_6(uint32_t *);
237afd1ac7bSwesolows extern	uint32_t load_little_32_7(uint32_t *);
238afd1ac7bSwesolows extern	uint32_t load_little_32_8(uint32_t *);
239afd1ac7bSwesolows extern	uint32_t load_little_32_9(uint32_t *);
240afd1ac7bSwesolows extern	uint32_t load_little_32_a(uint32_t *);
241afd1ac7bSwesolows extern	uint32_t load_little_32_b(uint32_t *);
242afd1ac7bSwesolows extern	uint32_t load_little_32_c(uint32_t *);
243afd1ac7bSwesolows extern	uint32_t load_little_32_d(uint32_t *);
244afd1ac7bSwesolows extern	uint32_t load_little_32_e(uint32_t *);
245afd1ac7bSwesolows extern	uint32_t load_little_32_f(uint32_t *);
246afd1ac7bSwesolows #endif	/* !__GNUC__ */
247afd1ac7bSwesolows #endif	/* sun4v */
248afd1ac7bSwesolows 
249afd1ac7bSwesolows #endif	/* _LITTLE_ENDIAN */
250afd1ac7bSwesolows 
251afd1ac7bSwesolows #ifdef	__cplusplus
252afd1ac7bSwesolows }
253afd1ac7bSwesolows #endif
254afd1ac7bSwesolows 
255afd1ac7bSwesolows #endif	/* !_MD5_BYTESWAP_H */
256