avx512fintrin.h revision d111c7844ec26448764ced627e153f406d730c5f
1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512FINTRIN_H
14#define __AVX512FINTRIN_H
15
16typedef char __v64qi __attribute__((__vector_size__(64)));
17typedef short __v32hi __attribute__((__vector_size__(64)));
18typedef double __v8df __attribute__((__vector_size__(64)));
19typedef float __v16sf __attribute__((__vector_size__(64)));
20typedef long long __v8di __attribute__((__vector_size__(64)));
21typedef int __v16si __attribute__((__vector_size__(64)));
22
23/* Unsigned types */
24typedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27typedef unsigned int __v16su __attribute__((__vector_size__(64)));
28
29typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32
33typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
36
37typedef unsigned char __mmask8;
38typedef unsigned short __mmask16;
39
40/* Rounding mode macros.  */
41#define _MM_FROUND_TO_NEAREST_INT   0x00
42#define _MM_FROUND_TO_NEG_INF       0x01
43#define _MM_FROUND_TO_POS_INF       0x02
44#define _MM_FROUND_TO_ZERO          0x03
45#define _MM_FROUND_CUR_DIRECTION    0x04
46
47/* Constants for integer comparison predicates */
48typedef enum {
49    _MM_CMPINT_EQ,      /* Equal */
50    _MM_CMPINT_LT,      /* Less than */
51    _MM_CMPINT_LE,      /* Less than or Equal */
52    _MM_CMPINT_UNUSED,
53    _MM_CMPINT_NE,      /* Not Equal */
54    _MM_CMPINT_NLT,     /* Not Less than */
55#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
56    _MM_CMPINT_NLE      /* Not Less than or Equal */
57#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
58} _MM_CMPINT_ENUM;
59
60typedef enum
61{
62  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
63  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
64  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
65  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
66  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
67  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
68  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
69  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
70  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
71  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
72  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
73  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
74  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
75  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
76  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
77  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
78  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
79  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
80  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
81  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
82  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
83  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
84  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
85  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
86  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
87  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
88  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
89  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
90  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
91  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
92  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
93  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
94  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
95  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
96  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
97  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
98  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
99  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
100  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
101  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
102  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
103  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
104  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
105  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
106  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
107  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
108  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
109  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
110  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
111  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
112  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
113  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
114  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
115  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
116  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
117  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
118  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
119  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
120  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
121  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
122  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
123  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
124  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
125  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
126  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
127  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
128  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
129  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
130  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
131  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
132  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
133  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
134  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
135  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
136  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
137  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
138  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
139  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
140  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
141  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
142  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
143  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
144  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
145  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
146  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
147  _MM_PERM_DDDD = 0xFF
148} _MM_PERM_ENUM;
149
150typedef enum
151{
152  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
153  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
154  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
155  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
156} _MM_MANTISSA_NORM_ENUM;
157
158typedef enum
159{
160  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
161  _MM_MANT_SIGN_zero,   /* sign = 0             */
162  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
163} _MM_MANTISSA_SIGN_ENUM;
164
165/* Define the default attributes for the functions in this file. */
166#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
169
170/* Create vectors with repeated elements */
171
172static  __inline __m512i __DEFAULT_FN_ATTRS512
173_mm512_setzero_si512(void)
174{
175  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
176}
177
178#define _mm512_setzero_epi32 _mm512_setzero_si512
179
180static __inline__ __m512d __DEFAULT_FN_ATTRS512
181_mm512_undefined_pd(void)
182{
183  return (__m512d)__builtin_ia32_undef512();
184}
185
186static __inline__ __m512 __DEFAULT_FN_ATTRS512
187_mm512_undefined(void)
188{
189  return (__m512)__builtin_ia32_undef512();
190}
191
192static __inline__ __m512 __DEFAULT_FN_ATTRS512
193_mm512_undefined_ps(void)
194{
195  return (__m512)__builtin_ia32_undef512();
196}
197
198static __inline__ __m512i __DEFAULT_FN_ATTRS512
199_mm512_undefined_epi32(void)
200{
201  return (__m512i)__builtin_ia32_undef512();
202}
203
204static __inline__ __m512i __DEFAULT_FN_ATTRS512
205_mm512_broadcastd_epi32 (__m128i __A)
206{
207  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209}
210
211static __inline__ __m512i __DEFAULT_FN_ATTRS512
212_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213{
214  return (__m512i)__builtin_ia32_selectd_512(__M,
215                                             (__v16si) _mm512_broadcastd_epi32(__A),
216                                             (__v16si) __O);
217}
218
219static __inline__ __m512i __DEFAULT_FN_ATTRS512
220_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
221{
222  return (__m512i)__builtin_ia32_selectd_512(__M,
223                                             (__v16si) _mm512_broadcastd_epi32(__A),
224                                             (__v16si) _mm512_setzero_si512());
225}
226
227static __inline__ __m512i __DEFAULT_FN_ATTRS512
228_mm512_broadcastq_epi64 (__m128i __A)
229{
230  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231                                          0, 0, 0, 0, 0, 0, 0, 0);
232}
233
234static __inline__ __m512i __DEFAULT_FN_ATTRS512
235_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236{
237  return (__m512i)__builtin_ia32_selectq_512(__M,
238                                             (__v8di) _mm512_broadcastq_epi64(__A),
239                                             (__v8di) __O);
240
241}
242
243static __inline__ __m512i __DEFAULT_FN_ATTRS512
244_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
245{
246  return (__m512i)__builtin_ia32_selectq_512(__M,
247                                             (__v8di) _mm512_broadcastq_epi64(__A),
248                                             (__v8di) _mm512_setzero_si512());
249}
250
251
252static __inline __m512 __DEFAULT_FN_ATTRS512
253_mm512_setzero_ps(void)
254{
255  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256                                 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
257}
258
259#define _mm512_setzero _mm512_setzero_ps
260
261static  __inline __m512d __DEFAULT_FN_ATTRS512
262_mm512_setzero_pd(void)
263{
264  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
265}
266
267static __inline __m512 __DEFAULT_FN_ATTRS512
268_mm512_set1_ps(float __w)
269{
270  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271                                 __w, __w, __w, __w, __w, __w, __w, __w  };
272}
273
274static __inline __m512d __DEFAULT_FN_ATTRS512
275_mm512_set1_pd(double __w)
276{
277  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
278}
279
280static __inline __m512i __DEFAULT_FN_ATTRS512
281_mm512_set1_epi8(char __w)
282{
283  return __extension__ (__m512i)(__v64qi){
284    __w, __w, __w, __w, __w, __w, __w, __w,
285    __w, __w, __w, __w, __w, __w, __w, __w,
286    __w, __w, __w, __w, __w, __w, __w, __w,
287    __w, __w, __w, __w, __w, __w, __w, __w,
288    __w, __w, __w, __w, __w, __w, __w, __w,
289    __w, __w, __w, __w, __w, __w, __w, __w,
290    __w, __w, __w, __w, __w, __w, __w, __w,
291    __w, __w, __w, __w, __w, __w, __w, __w  };
292}
293
294static __inline __m512i __DEFAULT_FN_ATTRS512
295_mm512_set1_epi16(short __w)
296{
297  return __extension__ (__m512i)(__v32hi){
298    __w, __w, __w, __w, __w, __w, __w, __w,
299    __w, __w, __w, __w, __w, __w, __w, __w,
300    __w, __w, __w, __w, __w, __w, __w, __w,
301    __w, __w, __w, __w, __w, __w, __w, __w };
302}
303
304static __inline __m512i __DEFAULT_FN_ATTRS512
305_mm512_set1_epi32(int __s)
306{
307  return __extension__ (__m512i)(__v16si){
308    __s, __s, __s, __s, __s, __s, __s, __s,
309    __s, __s, __s, __s, __s, __s, __s, __s };
310}
311
312static __inline __m512i __DEFAULT_FN_ATTRS512
313_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
314{
315  return (__m512i)__builtin_ia32_selectd_512(__M,
316                                             (__v16si)_mm512_set1_epi32(__A),
317                                             (__v16si)_mm512_setzero_si512());
318}
319
320static __inline __m512i __DEFAULT_FN_ATTRS512
321_mm512_set1_epi64(long long __d)
322{
323  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
324}
325
326static __inline __m512i __DEFAULT_FN_ATTRS512
327_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
328{
329  return (__m512i)__builtin_ia32_selectq_512(__M,
330                                             (__v8di)_mm512_set1_epi64(__A),
331                                             (__v8di)_mm512_setzero_si512());
332}
333
334static __inline__ __m512 __DEFAULT_FN_ATTRS512
335_mm512_broadcastss_ps(__m128 __A)
336{
337  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339}
340
341static __inline __m512i __DEFAULT_FN_ATTRS512
342_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343{
344  return __extension__ (__m512i)(__v16si)
345   { __D, __C, __B, __A, __D, __C, __B, __A,
346     __D, __C, __B, __A, __D, __C, __B, __A };
347}
348
349static __inline __m512i __DEFAULT_FN_ATTRS512
350_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351       long long __D)
352{
353  return __extension__ (__m512i) (__v8di)
354   { __D, __C, __B, __A, __D, __C, __B, __A };
355}
356
357static __inline __m512d __DEFAULT_FN_ATTRS512
358_mm512_set4_pd (double __A, double __B, double __C, double __D)
359{
360  return __extension__ (__m512d)
361   { __D, __C, __B, __A, __D, __C, __B, __A };
362}
363
364static __inline __m512 __DEFAULT_FN_ATTRS512
365_mm512_set4_ps (float __A, float __B, float __C, float __D)
366{
367  return __extension__ (__m512)
368   { __D, __C, __B, __A, __D, __C, __B, __A,
369     __D, __C, __B, __A, __D, __C, __B, __A };
370}
371
372#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
373  _mm512_set4_epi32((e3),(e2),(e1),(e0))
374
375#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
376  _mm512_set4_epi64((e3),(e2),(e1),(e0))
377
378#define _mm512_setr4_pd(e0,e1,e2,e3)                \
379  _mm512_set4_pd((e3),(e2),(e1),(e0))
380
381#define _mm512_setr4_ps(e0,e1,e2,e3)                \
382  _mm512_set4_ps((e3),(e2),(e1),(e0))
383
384static __inline__ __m512d __DEFAULT_FN_ATTRS512
385_mm512_broadcastsd_pd(__m128d __A)
386{
387  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388                                          0, 0, 0, 0, 0, 0, 0, 0);
389}
390
391/* Cast between vector types */
392
393static __inline __m512d __DEFAULT_FN_ATTRS512
394_mm512_castpd256_pd512(__m256d __a)
395{
396  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397}
398
399static __inline __m512 __DEFAULT_FN_ATTRS512
400_mm512_castps256_ps512(__m256 __a)
401{
402  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
403                                          -1, -1, -1, -1, -1, -1, -1, -1);
404}
405
406static __inline __m128d __DEFAULT_FN_ATTRS512
407_mm512_castpd512_pd128(__m512d __a)
408{
409  return __builtin_shufflevector(__a, __a, 0, 1);
410}
411
412static __inline __m256d __DEFAULT_FN_ATTRS512
413_mm512_castpd512_pd256 (__m512d __A)
414{
415  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416}
417
418static __inline __m128 __DEFAULT_FN_ATTRS512
419_mm512_castps512_ps128(__m512 __a)
420{
421  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422}
423
424static __inline __m256 __DEFAULT_FN_ATTRS512
425_mm512_castps512_ps256 (__m512 __A)
426{
427  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428}
429
430static __inline __m512 __DEFAULT_FN_ATTRS512
431_mm512_castpd_ps (__m512d __A)
432{
433  return (__m512) (__A);
434}
435
436static __inline __m512i __DEFAULT_FN_ATTRS512
437_mm512_castpd_si512 (__m512d __A)
438{
439  return (__m512i) (__A);
440}
441
442static __inline__ __m512d __DEFAULT_FN_ATTRS512
443_mm512_castpd128_pd512 (__m128d __A)
444{
445  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446}
447
448static __inline __m512d __DEFAULT_FN_ATTRS512
449_mm512_castps_pd (__m512 __A)
450{
451  return (__m512d) (__A);
452}
453
454static __inline __m512i __DEFAULT_FN_ATTRS512
455_mm512_castps_si512 (__m512 __A)
456{
457  return (__m512i) (__A);
458}
459
460static __inline__ __m512 __DEFAULT_FN_ATTRS512
461_mm512_castps128_ps512 (__m128 __A)
462{
463    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464}
465
466static __inline__ __m512i __DEFAULT_FN_ATTRS512
467_mm512_castsi128_si512 (__m128i __A)
468{
469   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470}
471
472static __inline__ __m512i __DEFAULT_FN_ATTRS512
473_mm512_castsi256_si512 (__m256i __A)
474{
475   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476}
477
478static __inline __m512 __DEFAULT_FN_ATTRS512
479_mm512_castsi512_ps (__m512i __A)
480{
481  return (__m512) (__A);
482}
483
484static __inline __m512d __DEFAULT_FN_ATTRS512
485_mm512_castsi512_pd (__m512i __A)
486{
487  return (__m512d) (__A);
488}
489
490static __inline __m128i __DEFAULT_FN_ATTRS512
491_mm512_castsi512_si128 (__m512i __A)
492{
493  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494}
495
496static __inline __m256i __DEFAULT_FN_ATTRS512
497_mm512_castsi512_si256 (__m512i __A)
498{
499  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500}
501
502static __inline__ __mmask16 __DEFAULT_FN_ATTRS
503_mm512_int2mask(int __a)
504{
505  return (__mmask16)__a;
506}
507
508static __inline__ int __DEFAULT_FN_ATTRS
509_mm512_mask2int(__mmask16 __a)
510{
511  return (int)__a;
512}
513
514/// Constructs a 512-bit floating-point vector of [8 x double] from a
515///    128-bit floating-point vector of [2 x double]. The lower 128 bits
516///    contain the value of the source vector. The upper 384 bits are set
517///    to zero.
518///
519/// \headerfile <x86intrin.h>
520///
521/// This intrinsic has no corresponding instruction.
522///
523/// \param __a
524///    A 128-bit vector of [2 x double].
525/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
526///    contain the value of the parameter. The upper 384 bits are set to zero.
527static __inline __m512d __DEFAULT_FN_ATTRS512
528_mm512_zextpd128_pd512(__m128d __a)
529{
530  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531}
532
533/// Constructs a 512-bit floating-point vector of [8 x double] from a
534///    256-bit floating-point vector of [4 x double]. The lower 256 bits
535///    contain the value of the source vector. The upper 256 bits are set
536///    to zero.
537///
538/// \headerfile <x86intrin.h>
539///
540/// This intrinsic has no corresponding instruction.
541///
542/// \param __a
543///    A 256-bit vector of [4 x double].
544/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
545///    contain the value of the parameter. The upper 256 bits are set to zero.
546static __inline __m512d __DEFAULT_FN_ATTRS512
547_mm512_zextpd256_pd512(__m256d __a)
548{
549  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550}
551
552/// Constructs a 512-bit floating-point vector of [16 x float] from a
553///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
554///    the value of the source vector. The upper 384 bits are set to zero.
555///
556/// \headerfile <x86intrin.h>
557///
558/// This intrinsic has no corresponding instruction.
559///
560/// \param __a
561///    A 128-bit vector of [4 x float].
562/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
563///    contain the value of the parameter. The upper 384 bits are set to zero.
564static __inline __m512 __DEFAULT_FN_ATTRS512
565_mm512_zextps128_ps512(__m128 __a)
566{
567  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568}
569
570/// Constructs a 512-bit floating-point vector of [16 x float] from a
571///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
572///    the value of the source vector. The upper 256 bits are set to zero.
573///
574/// \headerfile <x86intrin.h>
575///
576/// This intrinsic has no corresponding instruction.
577///
578/// \param __a
579///    A 256-bit vector of [8 x float].
580/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
581///    contain the value of the parameter. The upper 256 bits are set to zero.
582static __inline __m512 __DEFAULT_FN_ATTRS512
583_mm512_zextps256_ps512(__m256 __a)
584{
585  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586}
587
588/// Constructs a 512-bit integer vector from a 128-bit integer vector.
589///    The lower 128 bits contain the value of the source vector. The upper
590///    384 bits are set to zero.
591///
592/// \headerfile <x86intrin.h>
593///
594/// This intrinsic has no corresponding instruction.
595///
596/// \param __a
597///    A 128-bit integer vector.
598/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
599///    the parameter. The upper 384 bits are set to zero.
600static __inline __m512i __DEFAULT_FN_ATTRS512
601_mm512_zextsi128_si512(__m128i __a)
602{
603  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604}
605
606/// Constructs a 512-bit integer vector from a 256-bit integer vector.
607///    The lower 256 bits contain the value of the source vector. The upper
608///    256 bits are set to zero.
609///
610/// \headerfile <x86intrin.h>
611///
612/// This intrinsic has no corresponding instruction.
613///
614/// \param __a
615///    A 256-bit integer vector.
616/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
617///    the parameter. The upper 256 bits are set to zero.
618static __inline __m512i __DEFAULT_FN_ATTRS512
619_mm512_zextsi256_si512(__m256i __a)
620{
621  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622}
623
624/* Bitwise operators */
625static __inline__ __m512i __DEFAULT_FN_ATTRS512
626_mm512_and_epi32(__m512i __a, __m512i __b)
627{
628  return (__m512i)((__v16su)__a & (__v16su)__b);
629}
630
631static __inline__ __m512i __DEFAULT_FN_ATTRS512
632_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633{
634  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635                (__v16si) _mm512_and_epi32(__a, __b),
636                (__v16si) __src);
637}
638
639static __inline__ __m512i __DEFAULT_FN_ATTRS512
640_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
641{
642  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
643                                         __k, __a, __b);
644}
645
646static __inline__ __m512i __DEFAULT_FN_ATTRS512
647_mm512_and_epi64(__m512i __a, __m512i __b)
648{
649  return (__m512i)((__v8du)__a & (__v8du)__b);
650}
651
652static __inline__ __m512i __DEFAULT_FN_ATTRS512
653_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654{
655    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656                (__v8di) _mm512_and_epi64(__a, __b),
657                (__v8di) __src);
658}
659
660static __inline__ __m512i __DEFAULT_FN_ATTRS512
661_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
662{
663  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
664                                         __k, __a, __b);
665}
666
667static __inline__ __m512i __DEFAULT_FN_ATTRS512
668_mm512_andnot_si512 (__m512i __A, __m512i __B)
669{
670  return (__m512i)(~(__v8du)__A & (__v8du)__B);
671}
672
673static __inline__ __m512i __DEFAULT_FN_ATTRS512
674_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675{
676  return (__m512i)(~(__v16su)__A & (__v16su)__B);
677}
678
679static __inline__ __m512i __DEFAULT_FN_ATTRS512
680_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681{
682  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683                                         (__v16si)_mm512_andnot_epi32(__A, __B),
684                                         (__v16si)__W);
685}
686
687static __inline__ __m512i __DEFAULT_FN_ATTRS512
688_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689{
690  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
691                                           __U, __A, __B);
692}
693
694static __inline__ __m512i __DEFAULT_FN_ATTRS512
695_mm512_andnot_epi64(__m512i __A, __m512i __B)
696{
697  return (__m512i)(~(__v8du)__A & (__v8du)__B);
698}
699
700static __inline__ __m512i __DEFAULT_FN_ATTRS512
701_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702{
703  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704                                          (__v8di)_mm512_andnot_epi64(__A, __B),
705                                          (__v8di)__W);
706}
707
708static __inline__ __m512i __DEFAULT_FN_ATTRS512
709_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710{
711  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
712                                           __U, __A, __B);
713}
714
715static __inline__ __m512i __DEFAULT_FN_ATTRS512
716_mm512_or_epi32(__m512i __a, __m512i __b)
717{
718  return (__m512i)((__v16su)__a | (__v16su)__b);
719}
720
721static __inline__ __m512i __DEFAULT_FN_ATTRS512
722_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723{
724  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725                                             (__v16si)_mm512_or_epi32(__a, __b),
726                                             (__v16si)__src);
727}
728
729static __inline__ __m512i __DEFAULT_FN_ATTRS512
730_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
731{
732  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733}
734
735static __inline__ __m512i __DEFAULT_FN_ATTRS512
736_mm512_or_epi64(__m512i __a, __m512i __b)
737{
738  return (__m512i)((__v8du)__a | (__v8du)__b);
739}
740
741static __inline__ __m512i __DEFAULT_FN_ATTRS512
742_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743{
744  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745                                             (__v8di)_mm512_or_epi64(__a, __b),
746                                             (__v8di)__src);
747}
748
749static __inline__ __m512i __DEFAULT_FN_ATTRS512
750_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751{
752  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753}
754
755static __inline__ __m512i __DEFAULT_FN_ATTRS512
756_mm512_xor_epi32(__m512i __a, __m512i __b)
757{
758  return (__m512i)((__v16su)__a ^ (__v16su)__b);
759}
760
761static __inline__ __m512i __DEFAULT_FN_ATTRS512
762_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763{
764  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765                                            (__v16si)_mm512_xor_epi32(__a, __b),
766                                            (__v16si)__src);
767}
768
769static __inline__ __m512i __DEFAULT_FN_ATTRS512
770_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
771{
772  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773}
774
775static __inline__ __m512i __DEFAULT_FN_ATTRS512
776_mm512_xor_epi64(__m512i __a, __m512i __b)
777{
778  return (__m512i)((__v8du)__a ^ (__v8du)__b);
779}
780
781static __inline__ __m512i __DEFAULT_FN_ATTRS512
782_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783{
784  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785                                             (__v8di)_mm512_xor_epi64(__a, __b),
786                                             (__v8di)__src);
787}
788
789static __inline__ __m512i __DEFAULT_FN_ATTRS512
790_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
791{
792  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793}
794
795static __inline__ __m512i __DEFAULT_FN_ATTRS512
796_mm512_and_si512(__m512i __a, __m512i __b)
797{
798  return (__m512i)((__v8du)__a & (__v8du)__b);
799}
800
801static __inline__ __m512i __DEFAULT_FN_ATTRS512
802_mm512_or_si512(__m512i __a, __m512i __b)
803{
804  return (__m512i)((__v8du)__a | (__v8du)__b);
805}
806
807static __inline__ __m512i __DEFAULT_FN_ATTRS512
808_mm512_xor_si512(__m512i __a, __m512i __b)
809{
810  return (__m512i)((__v8du)__a ^ (__v8du)__b);
811}
812
813/* Arithmetic */
814
815static __inline __m512d __DEFAULT_FN_ATTRS512
816_mm512_add_pd(__m512d __a, __m512d __b)
817{
818  return (__m512d)((__v8df)__a + (__v8df)__b);
819}
820
821static __inline __m512 __DEFAULT_FN_ATTRS512
822_mm512_add_ps(__m512 __a, __m512 __b)
823{
824  return (__m512)((__v16sf)__a + (__v16sf)__b);
825}
826
827static __inline __m512d __DEFAULT_FN_ATTRS512
828_mm512_mul_pd(__m512d __a, __m512d __b)
829{
830  return (__m512d)((__v8df)__a * (__v8df)__b);
831}
832
833static __inline __m512 __DEFAULT_FN_ATTRS512
834_mm512_mul_ps(__m512 __a, __m512 __b)
835{
836  return (__m512)((__v16sf)__a * (__v16sf)__b);
837}
838
839static __inline __m512d __DEFAULT_FN_ATTRS512
840_mm512_sub_pd(__m512d __a, __m512d __b)
841{
842  return (__m512d)((__v8df)__a - (__v8df)__b);
843}
844
845static __inline __m512 __DEFAULT_FN_ATTRS512
846_mm512_sub_ps(__m512 __a, __m512 __b)
847{
848  return (__m512)((__v16sf)__a - (__v16sf)__b);
849}
850
851static __inline__ __m512i __DEFAULT_FN_ATTRS512
852_mm512_add_epi64 (__m512i __A, __m512i __B)
853{
854  return (__m512i) ((__v8du) __A + (__v8du) __B);
855}
856
857static __inline__ __m512i __DEFAULT_FN_ATTRS512
858_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
859{
860  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861                                             (__v8di)_mm512_add_epi64(__A, __B),
862                                             (__v8di)__W);
863}
864
865static __inline__ __m512i __DEFAULT_FN_ATTRS512
866_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
867{
868  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869                                             (__v8di)_mm512_add_epi64(__A, __B),
870                                             (__v8di)_mm512_setzero_si512());
871}
872
873static __inline__ __m512i __DEFAULT_FN_ATTRS512
874_mm512_sub_epi64 (__m512i __A, __m512i __B)
875{
876  return (__m512i) ((__v8du) __A - (__v8du) __B);
877}
878
879static __inline__ __m512i __DEFAULT_FN_ATTRS512
880_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
881{
882  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883                                             (__v8di)_mm512_sub_epi64(__A, __B),
884                                             (__v8di)__W);
885}
886
887static __inline__ __m512i __DEFAULT_FN_ATTRS512
888_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
889{
890  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891                                             (__v8di)_mm512_sub_epi64(__A, __B),
892                                             (__v8di)_mm512_setzero_si512());
893}
894
895static __inline__ __m512i __DEFAULT_FN_ATTRS512
896_mm512_add_epi32 (__m512i __A, __m512i __B)
897{
898  return (__m512i) ((__v16su) __A + (__v16su) __B);
899}
900
901static __inline__ __m512i __DEFAULT_FN_ATTRS512
902_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
903{
904  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905                                             (__v16si)_mm512_add_epi32(__A, __B),
906                                             (__v16si)__W);
907}
908
909static __inline__ __m512i __DEFAULT_FN_ATTRS512
910_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911{
912  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913                                             (__v16si)_mm512_add_epi32(__A, __B),
914                                             (__v16si)_mm512_setzero_si512());
915}
916
917static __inline__ __m512i __DEFAULT_FN_ATTRS512
918_mm512_sub_epi32 (__m512i __A, __m512i __B)
919{
920  return (__m512i) ((__v16su) __A - (__v16su) __B);
921}
922
923static __inline__ __m512i __DEFAULT_FN_ATTRS512
924_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
925{
926  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927                                             (__v16si)_mm512_sub_epi32(__A, __B),
928                                             (__v16si)__W);
929}
930
931static __inline__ __m512i __DEFAULT_FN_ATTRS512
932_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
933{
934  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935                                             (__v16si)_mm512_sub_epi32(__A, __B),
936                                             (__v16si)_mm512_setzero_si512());
937}
938
939#define _mm512_max_round_pd(A, B, R) \
940  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941                                   (__v8df)(__m512d)(B), (int)(R))
942
943#define _mm512_mask_max_round_pd(W, U, A, B, R) \
944  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
946                                   (__v8df)(W))
947
948#define _mm512_maskz_max_round_pd(U, A, B, R) \
949  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951                                   (__v8df)_mm512_setzero_pd())
952
953static  __inline__ __m512d __DEFAULT_FN_ATTRS512
954_mm512_max_pd(__m512d __A, __m512d __B)
955{
956  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
957                                           _MM_FROUND_CUR_DIRECTION);
958}
959
960static __inline__ __m512d __DEFAULT_FN_ATTRS512
961_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962{
963  return (__m512d)__builtin_ia32_selectpd_512(__U,
964                                              (__v8df)_mm512_max_pd(__A, __B),
965                                              (__v8df)__W);
966}
967
968static __inline__ __m512d __DEFAULT_FN_ATTRS512
969_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970{
971  return (__m512d)__builtin_ia32_selectpd_512(__U,
972                                              (__v8df)_mm512_max_pd(__A, __B),
973                                              (__v8df)_mm512_setzero_pd());
974}
975
976#define _mm512_max_round_ps(A, B, R) \
977  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978                                  (__v16sf)(__m512)(B), (int)(R))
979
980#define _mm512_mask_max_round_ps(W, U, A, B, R) \
981  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983                                  (__v16sf)(W))
984
985#define _mm512_maskz_max_round_ps(U, A, B, R) \
986  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988                                  (__v16sf)_mm512_setzero_ps())
989
990static  __inline__ __m512 __DEFAULT_FN_ATTRS512
991_mm512_max_ps(__m512 __A, __m512 __B)
992{
993  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
994                                          _MM_FROUND_CUR_DIRECTION);
995}
996
997static __inline__ __m512 __DEFAULT_FN_ATTRS512
998_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999{
1000  return (__m512)__builtin_ia32_selectps_512(__U,
1001                                             (__v16sf)_mm512_max_ps(__A, __B),
1002                                             (__v16sf)__W);
1003}
1004
1005static __inline__ __m512 __DEFAULT_FN_ATTRS512
1006_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007{
1008  return (__m512)__builtin_ia32_selectps_512(__U,
1009                                             (__v16sf)_mm512_max_ps(__A, __B),
1010                                             (__v16sf)_mm512_setzero_ps());
1011}
1012
1013static __inline__ __m128 __DEFAULT_FN_ATTRS128
1014_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016                (__v4sf) __B,
1017                (__v4sf) __W,
1018                (__mmask8) __U,
1019                _MM_FROUND_CUR_DIRECTION);
1020}
1021
1022static __inline__ __m128 __DEFAULT_FN_ATTRS128
1023_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025                (__v4sf) __B,
1026                (__v4sf)  _mm_setzero_ps (),
1027                (__mmask8) __U,
1028                _MM_FROUND_CUR_DIRECTION);
1029}
1030
1031#define _mm_max_round_ss(A, B, R) \
1032  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033                                          (__v4sf)(__m128)(B), \
1034                                          (__v4sf)_mm_setzero_ps(), \
1035                                          (__mmask8)-1, (int)(R))
1036
1037#define _mm_mask_max_round_ss(W, U, A, B, R) \
1038  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039                                          (__v4sf)(__m128)(B), \
1040                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1041                                          (int)(R))
1042
1043#define _mm_maskz_max_round_ss(U, A, B, R) \
1044  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045                                          (__v4sf)(__m128)(B), \
1046                                          (__v4sf)_mm_setzero_ps(), \
1047                                          (__mmask8)(U), (int)(R))
1048
1049static __inline__ __m128d __DEFAULT_FN_ATTRS128
1050_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052                (__v2df) __B,
1053                (__v2df) __W,
1054                (__mmask8) __U,
1055                _MM_FROUND_CUR_DIRECTION);
1056}
1057
1058static __inline__ __m128d __DEFAULT_FN_ATTRS128
1059_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061                (__v2df) __B,
1062                (__v2df)  _mm_setzero_pd (),
1063                (__mmask8) __U,
1064                _MM_FROUND_CUR_DIRECTION);
1065}
1066
1067#define _mm_max_round_sd(A, B, R) \
1068  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069                                           (__v2df)(__m128d)(B), \
1070                                           (__v2df)_mm_setzero_pd(), \
1071                                           (__mmask8)-1, (int)(R))
1072
1073#define _mm_mask_max_round_sd(W, U, A, B, R) \
1074  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075                                           (__v2df)(__m128d)(B), \
1076                                           (__v2df)(__m128d)(W), \
1077                                           (__mmask8)(U), (int)(R))
1078
1079#define _mm_maskz_max_round_sd(U, A, B, R) \
1080  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081                                           (__v2df)(__m128d)(B), \
1082                                           (__v2df)_mm_setzero_pd(), \
1083                                           (__mmask8)(U), (int)(R))
1084
1085static __inline __m512i
1086__DEFAULT_FN_ATTRS512
1087_mm512_max_epi32(__m512i __A, __m512i __B)
1088{
1089  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1090}
1091
1092static __inline__ __m512i __DEFAULT_FN_ATTRS512
1093_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094{
1095  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096                                            (__v16si)_mm512_max_epi32(__A, __B),
1097                                            (__v16si)__W);
1098}
1099
1100static __inline__ __m512i __DEFAULT_FN_ATTRS512
1101_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102{
1103  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104                                            (__v16si)_mm512_max_epi32(__A, __B),
1105                                            (__v16si)_mm512_setzero_si512());
1106}
1107
1108static __inline __m512i __DEFAULT_FN_ATTRS512
1109_mm512_max_epu32(__m512i __A, __m512i __B)
1110{
1111  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1112}
1113
1114static __inline__ __m512i __DEFAULT_FN_ATTRS512
1115_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116{
1117  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118                                            (__v16si)_mm512_max_epu32(__A, __B),
1119                                            (__v16si)__W);
1120}
1121
1122static __inline__ __m512i __DEFAULT_FN_ATTRS512
1123_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124{
1125  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126                                            (__v16si)_mm512_max_epu32(__A, __B),
1127                                            (__v16si)_mm512_setzero_si512());
1128}
1129
1130static __inline __m512i __DEFAULT_FN_ATTRS512
1131_mm512_max_epi64(__m512i __A, __m512i __B)
1132{
1133  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1134}
1135
1136static __inline__ __m512i __DEFAULT_FN_ATTRS512
1137_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138{
1139  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140                                             (__v8di)_mm512_max_epi64(__A, __B),
1141                                             (__v8di)__W);
1142}
1143
1144static __inline__ __m512i __DEFAULT_FN_ATTRS512
1145_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146{
1147  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148                                             (__v8di)_mm512_max_epi64(__A, __B),
1149                                             (__v8di)_mm512_setzero_si512());
1150}
1151
1152static __inline __m512i __DEFAULT_FN_ATTRS512
1153_mm512_max_epu64(__m512i __A, __m512i __B)
1154{
1155  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1156}
1157
1158static __inline__ __m512i __DEFAULT_FN_ATTRS512
1159_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160{
1161  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162                                             (__v8di)_mm512_max_epu64(__A, __B),
1163                                             (__v8di)__W);
1164}
1165
1166static __inline__ __m512i __DEFAULT_FN_ATTRS512
1167_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168{
1169  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170                                             (__v8di)_mm512_max_epu64(__A, __B),
1171                                             (__v8di)_mm512_setzero_si512());
1172}
1173
1174#define _mm512_min_round_pd(A, B, R) \
1175  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176                                   (__v8df)(__m512d)(B), (int)(R))
1177
1178#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1181                                   (__v8df)(W))
1182
1183#define _mm512_maskz_min_round_pd(U, A, B, R) \
1184  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186                                   (__v8df)_mm512_setzero_pd())
1187
1188static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1189_mm512_min_pd(__m512d __A, __m512d __B)
1190{
1191  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1192                                           _MM_FROUND_CUR_DIRECTION);
1193}
1194
1195static __inline__ __m512d __DEFAULT_FN_ATTRS512
1196_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197{
1198  return (__m512d)__builtin_ia32_selectpd_512(__U,
1199                                              (__v8df)_mm512_min_pd(__A, __B),
1200                                              (__v8df)__W);
1201}
1202
1203static __inline__ __m512d __DEFAULT_FN_ATTRS512
1204_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205{
1206  return (__m512d)__builtin_ia32_selectpd_512(__U,
1207                                              (__v8df)_mm512_min_pd(__A, __B),
1208                                              (__v8df)_mm512_setzero_pd());
1209}
1210
1211#define _mm512_min_round_ps(A, B, R) \
1212  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213                                  (__v16sf)(__m512)(B), (int)(R))
1214
1215#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218                                  (__v16sf)(W))
1219
1220#define _mm512_maskz_min_round_ps(U, A, B, R) \
1221  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223                                  (__v16sf)_mm512_setzero_ps())
1224
1225static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1226_mm512_min_ps(__m512 __A, __m512 __B)
1227{
1228  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1229                                          _MM_FROUND_CUR_DIRECTION);
1230}
1231
1232static __inline__ __m512 __DEFAULT_FN_ATTRS512
1233_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234{
1235  return (__m512)__builtin_ia32_selectps_512(__U,
1236                                             (__v16sf)_mm512_min_ps(__A, __B),
1237                                             (__v16sf)__W);
1238}
1239
1240static __inline__ __m512 __DEFAULT_FN_ATTRS512
1241_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242{
1243  return (__m512)__builtin_ia32_selectps_512(__U,
1244                                             (__v16sf)_mm512_min_ps(__A, __B),
1245                                             (__v16sf)_mm512_setzero_ps());
1246}
1247
1248static __inline__ __m128 __DEFAULT_FN_ATTRS128
1249_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251                (__v4sf) __B,
1252                (__v4sf) __W,
1253                (__mmask8) __U,
1254                _MM_FROUND_CUR_DIRECTION);
1255}
1256
1257static __inline__ __m128 __DEFAULT_FN_ATTRS128
1258_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260                (__v4sf) __B,
1261                (__v4sf)  _mm_setzero_ps (),
1262                (__mmask8) __U,
1263                _MM_FROUND_CUR_DIRECTION);
1264}
1265
1266#define _mm_min_round_ss(A, B, R) \
1267  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268                                          (__v4sf)(__m128)(B), \
1269                                          (__v4sf)_mm_setzero_ps(), \
1270                                          (__mmask8)-1, (int)(R))
1271
1272#define _mm_mask_min_round_ss(W, U, A, B, R) \
1273  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274                                          (__v4sf)(__m128)(B), \
1275                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1276                                          (int)(R))
1277
1278#define _mm_maskz_min_round_ss(U, A, B, R) \
1279  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280                                          (__v4sf)(__m128)(B), \
1281                                          (__v4sf)_mm_setzero_ps(), \
1282                                          (__mmask8)(U), (int)(R))
1283
1284static __inline__ __m128d __DEFAULT_FN_ATTRS128
1285_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287                (__v2df) __B,
1288                (__v2df) __W,
1289                (__mmask8) __U,
1290                _MM_FROUND_CUR_DIRECTION);
1291}
1292
1293static __inline__ __m128d __DEFAULT_FN_ATTRS128
1294_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296                (__v2df) __B,
1297                (__v2df)  _mm_setzero_pd (),
1298                (__mmask8) __U,
1299                _MM_FROUND_CUR_DIRECTION);
1300}
1301
1302#define _mm_min_round_sd(A, B, R) \
1303  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304                                           (__v2df)(__m128d)(B), \
1305                                           (__v2df)_mm_setzero_pd(), \
1306                                           (__mmask8)-1, (int)(R))
1307
1308#define _mm_mask_min_round_sd(W, U, A, B, R) \
1309  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310                                           (__v2df)(__m128d)(B), \
1311                                           (__v2df)(__m128d)(W), \
1312                                           (__mmask8)(U), (int)(R))
1313
1314#define _mm_maskz_min_round_sd(U, A, B, R) \
1315  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316                                           (__v2df)(__m128d)(B), \
1317                                           (__v2df)_mm_setzero_pd(), \
1318                                           (__mmask8)(U), (int)(R))
1319
1320static __inline __m512i
1321__DEFAULT_FN_ATTRS512
1322_mm512_min_epi32(__m512i __A, __m512i __B)
1323{
1324  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1325}
1326
1327static __inline__ __m512i __DEFAULT_FN_ATTRS512
1328_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329{
1330  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331                                            (__v16si)_mm512_min_epi32(__A, __B),
1332                                            (__v16si)__W);
1333}
1334
1335static __inline__ __m512i __DEFAULT_FN_ATTRS512
1336_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337{
1338  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339                                            (__v16si)_mm512_min_epi32(__A, __B),
1340                                            (__v16si)_mm512_setzero_si512());
1341}
1342
1343static __inline __m512i __DEFAULT_FN_ATTRS512
1344_mm512_min_epu32(__m512i __A, __m512i __B)
1345{
1346  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1347}
1348
1349static __inline__ __m512i __DEFAULT_FN_ATTRS512
1350_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351{
1352  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353                                            (__v16si)_mm512_min_epu32(__A, __B),
1354                                            (__v16si)__W);
1355}
1356
1357static __inline__ __m512i __DEFAULT_FN_ATTRS512
1358_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359{
1360  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361                                            (__v16si)_mm512_min_epu32(__A, __B),
1362                                            (__v16si)_mm512_setzero_si512());
1363}
1364
1365static __inline __m512i __DEFAULT_FN_ATTRS512
1366_mm512_min_epi64(__m512i __A, __m512i __B)
1367{
1368  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1369}
1370
1371static __inline__ __m512i __DEFAULT_FN_ATTRS512
1372_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373{
1374  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375                                             (__v8di)_mm512_min_epi64(__A, __B),
1376                                             (__v8di)__W);
1377}
1378
1379static __inline__ __m512i __DEFAULT_FN_ATTRS512
1380_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381{
1382  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383                                             (__v8di)_mm512_min_epi64(__A, __B),
1384                                             (__v8di)_mm512_setzero_si512());
1385}
1386
1387static __inline __m512i __DEFAULT_FN_ATTRS512
1388_mm512_min_epu64(__m512i __A, __m512i __B)
1389{
1390  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1391}
1392
1393static __inline__ __m512i __DEFAULT_FN_ATTRS512
1394_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395{
1396  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397                                             (__v8di)_mm512_min_epu64(__A, __B),
1398                                             (__v8di)__W);
1399}
1400
1401static __inline__ __m512i __DEFAULT_FN_ATTRS512
1402_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403{
1404  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405                                             (__v8di)_mm512_min_epu64(__A, __B),
1406                                             (__v8di)_mm512_setzero_si512());
1407}
1408
1409static __inline __m512i __DEFAULT_FN_ATTRS512
1410_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411{
1412  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1413}
1414
1415static __inline __m512i __DEFAULT_FN_ATTRS512
1416_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1417{
1418  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1420                                             (__v8di)__W);
1421}
1422
1423static __inline __m512i __DEFAULT_FN_ATTRS512
1424_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1425{
1426  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1428                                             (__v8di)_mm512_setzero_si512 ());
1429}
1430
1431static __inline __m512i __DEFAULT_FN_ATTRS512
1432_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433{
1434  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1435}
1436
1437static __inline __m512i __DEFAULT_FN_ATTRS512
1438_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1439{
1440  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1442                                             (__v8di)__W);
1443}
1444
1445static __inline __m512i __DEFAULT_FN_ATTRS512
1446_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1447{
1448  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1450                                             (__v8di)_mm512_setzero_si512 ());
1451}
1452
1453static __inline __m512i __DEFAULT_FN_ATTRS512
1454_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455{
1456  return (__m512i) ((__v16su) __A * (__v16su) __B);
1457}
1458
1459static __inline __m512i __DEFAULT_FN_ATTRS512
1460_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1461{
1462  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1464                                             (__v16si)_mm512_setzero_si512());
1465}
1466
1467static __inline __m512i __DEFAULT_FN_ATTRS512
1468_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1469{
1470  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1472                                             (__v16si)__W);
1473}
1474
1475static __inline__ __m512i __DEFAULT_FN_ATTRS512
1476_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477  return (__m512i) ((__v8du) __A * (__v8du) __B);
1478}
1479
1480static __inline__ __m512i __DEFAULT_FN_ATTRS512
1481_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483                                             (__v8di)_mm512_mullox_epi64(__A, __B),
1484                                             (__v8di)__W);
1485}
1486
1487#define _mm512_sqrt_round_pd(A, R) \
1488  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1489
1490#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493                                       (__v8df)(__m512d)(W))
1494
1495#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498                                       (__v8df)_mm512_setzero_pd())
1499
1500static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1501_mm512_sqrt_pd(__m512d __A)
1502{
1503  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1504                                           _MM_FROUND_CUR_DIRECTION);
1505}
1506
1507static __inline__ __m512d __DEFAULT_FN_ATTRS512
1508_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509{
1510  return (__m512d)__builtin_ia32_selectpd_512(__U,
1511                                              (__v8df)_mm512_sqrt_pd(__A),
1512                                              (__v8df)__W);
1513}
1514
1515static __inline__ __m512d __DEFAULT_FN_ATTRS512
1516_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517{
1518  return (__m512d)__builtin_ia32_selectpd_512(__U,
1519                                              (__v8df)_mm512_sqrt_pd(__A),
1520                                              (__v8df)_mm512_setzero_pd());
1521}
1522
1523#define _mm512_sqrt_round_ps(A, R) \
1524  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1525
1526#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529                                      (__v16sf)(__m512)(W))
1530
1531#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534                                      (__v16sf)_mm512_setzero_ps())
1535
1536static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1537_mm512_sqrt_ps(__m512 __A)
1538{
1539  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1540                                          _MM_FROUND_CUR_DIRECTION);
1541}
1542
1543static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1544_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545{
1546  return (__m512)__builtin_ia32_selectps_512(__U,
1547                                             (__v16sf)_mm512_sqrt_ps(__A),
1548                                             (__v16sf)__W);
1549}
1550
1551static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1552_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1553{
1554  return (__m512)__builtin_ia32_selectps_512(__U,
1555                                             (__v16sf)_mm512_sqrt_ps(__A),
1556                                             (__v16sf)_mm512_setzero_ps());
1557}
1558
1559static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1560_mm512_rsqrt14_pd(__m512d __A)
1561{
1562  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563                 (__v8df)
1564                 _mm512_setzero_pd (),
1565                 (__mmask8) -1);}
1566
1567static __inline__ __m512d __DEFAULT_FN_ATTRS512
1568_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569{
1570  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571                  (__v8df) __W,
1572                  (__mmask8) __U);
1573}
1574
1575static __inline__ __m512d __DEFAULT_FN_ATTRS512
1576_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1577{
1578  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579                  (__v8df)
1580                  _mm512_setzero_pd (),
1581                  (__mmask8) __U);
1582}
1583
1584static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1585_mm512_rsqrt14_ps(__m512 __A)
1586{
1587  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588                (__v16sf)
1589                _mm512_setzero_ps (),
1590                (__mmask16) -1);
1591}
1592
1593static __inline__ __m512 __DEFAULT_FN_ATTRS512
1594_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595{
1596  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597                 (__v16sf) __W,
1598                 (__mmask16) __U);
1599}
1600
1601static __inline__ __m512 __DEFAULT_FN_ATTRS512
1602_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1603{
1604  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605                 (__v16sf)
1606                 _mm512_setzero_ps (),
1607                 (__mmask16) __U);
1608}
1609
1610static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1611_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612{
1613  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614             (__v4sf) __B,
1615             (__v4sf)
1616             _mm_setzero_ps (),
1617             (__mmask8) -1);
1618}
1619
1620static __inline__ __m128 __DEFAULT_FN_ATTRS128
1621_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622{
1623 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624          (__v4sf) __B,
1625          (__v4sf) __W,
1626          (__mmask8) __U);
1627}
1628
1629static __inline__ __m128 __DEFAULT_FN_ATTRS128
1630_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631{
1632 return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633          (__v4sf) __B,
1634          (__v4sf) _mm_setzero_ps (),
1635          (__mmask8) __U);
1636}
1637
1638static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1639_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640{
1641  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642              (__v2df) __B,
1643              (__v2df)
1644              _mm_setzero_pd (),
1645              (__mmask8) -1);
1646}
1647
1648static __inline__ __m128d __DEFAULT_FN_ATTRS128
1649_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650{
1651 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652          (__v2df) __B,
1653          (__v2df) __W,
1654          (__mmask8) __U);
1655}
1656
1657static __inline__ __m128d __DEFAULT_FN_ATTRS128
1658_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659{
1660 return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661          (__v2df) __B,
1662          (__v2df) _mm_setzero_pd (),
1663          (__mmask8) __U);
1664}
1665
1666static  __inline__ __m512d __DEFAULT_FN_ATTRS512
1667_mm512_rcp14_pd(__m512d __A)
1668{
1669  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670               (__v8df)
1671               _mm512_setzero_pd (),
1672               (__mmask8) -1);
1673}
1674
1675static __inline__ __m512d __DEFAULT_FN_ATTRS512
1676_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677{
1678  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679                (__v8df) __W,
1680                (__mmask8) __U);
1681}
1682
1683static __inline__ __m512d __DEFAULT_FN_ATTRS512
1684_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1685{
1686  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687                (__v8df)
1688                _mm512_setzero_pd (),
1689                (__mmask8) __U);
1690}
1691
1692static  __inline__ __m512 __DEFAULT_FN_ATTRS512
1693_mm512_rcp14_ps(__m512 __A)
1694{
1695  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696              (__v16sf)
1697              _mm512_setzero_ps (),
1698              (__mmask16) -1);
1699}
1700
1701static __inline__ __m512 __DEFAULT_FN_ATTRS512
1702_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703{
1704  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705                   (__v16sf) __W,
1706                   (__mmask16) __U);
1707}
1708
1709static __inline__ __m512 __DEFAULT_FN_ATTRS512
1710_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1711{
1712  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713                   (__v16sf)
1714                   _mm512_setzero_ps (),
1715                   (__mmask16) __U);
1716}
1717
1718static  __inline__ __m128 __DEFAULT_FN_ATTRS128
1719_mm_rcp14_ss(__m128 __A, __m128 __B)
1720{
1721  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722                 (__v4sf) __B,
1723                 (__v4sf)
1724                 _mm_setzero_ps (),
1725                 (__mmask8) -1);
1726}
1727
1728static __inline__ __m128 __DEFAULT_FN_ATTRS128
1729_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730{
1731 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732          (__v4sf) __B,
1733          (__v4sf) __W,
1734          (__mmask8) __U);
1735}
1736
1737static __inline__ __m128 __DEFAULT_FN_ATTRS128
1738_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739{
1740 return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741          (__v4sf) __B,
1742          (__v4sf) _mm_setzero_ps (),
1743          (__mmask8) __U);
1744}
1745
1746static  __inline__ __m128d __DEFAULT_FN_ATTRS128
1747_mm_rcp14_sd(__m128d __A, __m128d __B)
1748{
1749  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750            (__v2df) __B,
1751            (__v2df)
1752            _mm_setzero_pd (),
1753            (__mmask8) -1);
1754}
1755
1756static __inline__ __m128d __DEFAULT_FN_ATTRS128
1757_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758{
1759 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760          (__v2df) __B,
1761          (__v2df) __W,
1762          (__mmask8) __U);
1763}
1764
1765static __inline__ __m128d __DEFAULT_FN_ATTRS128
1766_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767{
1768 return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769          (__v2df) __B,
1770          (__v2df) _mm_setzero_pd (),
1771          (__mmask8) __U);
1772}
1773
1774static __inline __m512 __DEFAULT_FN_ATTRS512
1775_mm512_floor_ps(__m512 __A)
1776{
1777  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1778                                                  _MM_FROUND_FLOOR,
1779                                                  (__v16sf) __A, -1,
1780                                                  _MM_FROUND_CUR_DIRECTION);
1781}
1782
1783static __inline__ __m512 __DEFAULT_FN_ATTRS512
1784_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785{
1786  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787                   _MM_FROUND_FLOOR,
1788                   (__v16sf) __W, __U,
1789                   _MM_FROUND_CUR_DIRECTION);
1790}
1791
1792static __inline __m512d __DEFAULT_FN_ATTRS512
1793_mm512_floor_pd(__m512d __A)
1794{
1795  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1796                                                   _MM_FROUND_FLOOR,
1797                                                   (__v8df) __A, -1,
1798                                                   _MM_FROUND_CUR_DIRECTION);
1799}
1800
1801static __inline__ __m512d __DEFAULT_FN_ATTRS512
1802_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803{
1804  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805                _MM_FROUND_FLOOR,
1806                (__v8df) __W, __U,
1807                _MM_FROUND_CUR_DIRECTION);
1808}
1809
1810static __inline__ __m512 __DEFAULT_FN_ATTRS512
1811_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812{
1813  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1814                   _MM_FROUND_CEIL,
1815                   (__v16sf) __W, __U,
1816                   _MM_FROUND_CUR_DIRECTION);
1817}
1818
1819static __inline __m512 __DEFAULT_FN_ATTRS512
1820_mm512_ceil_ps(__m512 __A)
1821{
1822  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823                                                  _MM_FROUND_CEIL,
1824                                                  (__v16sf) __A, -1,
1825                                                  _MM_FROUND_CUR_DIRECTION);
1826}
1827
1828static __inline __m512d __DEFAULT_FN_ATTRS512
1829_mm512_ceil_pd(__m512d __A)
1830{
1831  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1832                                                   _MM_FROUND_CEIL,
1833                                                   (__v8df) __A, -1,
1834                                                   _MM_FROUND_CUR_DIRECTION);
1835}
1836
1837static __inline__ __m512d __DEFAULT_FN_ATTRS512
1838_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839{
1840  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841                _MM_FROUND_CEIL,
1842                (__v8df) __W, __U,
1843                _MM_FROUND_CUR_DIRECTION);
1844}
1845
1846static __inline __m512i __DEFAULT_FN_ATTRS512
1847_mm512_abs_epi64(__m512i __A)
1848{
1849  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1850}
1851
1852static __inline__ __m512i __DEFAULT_FN_ATTRS512
1853_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854{
1855  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856                                             (__v8di)_mm512_abs_epi64(__A),
1857                                             (__v8di)__W);
1858}
1859
1860static __inline__ __m512i __DEFAULT_FN_ATTRS512
1861_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1862{
1863  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864                                             (__v8di)_mm512_abs_epi64(__A),
1865                                             (__v8di)_mm512_setzero_si512());
1866}
1867
1868static __inline __m512i __DEFAULT_FN_ATTRS512
1869_mm512_abs_epi32(__m512i __A)
1870{
1871  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1872}
1873
1874static __inline__ __m512i __DEFAULT_FN_ATTRS512
1875_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876{
1877  return (__m512i)__builtin_ia32_selectd_512(__U,
1878                                             (__v16si)_mm512_abs_epi32(__A),
1879                                             (__v16si)__W);
1880}
1881
1882static __inline__ __m512i __DEFAULT_FN_ATTRS512
1883_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1884{
1885  return (__m512i)__builtin_ia32_selectd_512(__U,
1886                                             (__v16si)_mm512_abs_epi32(__A),
1887                                             (__v16si)_mm512_setzero_si512());
1888}
1889
1890static __inline__ __m128 __DEFAULT_FN_ATTRS128
1891_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1892  __A = _mm_add_ss(__A, __B);
1893  return __builtin_ia32_selectss_128(__U, __A, __W);
1894}
1895
1896static __inline__ __m128 __DEFAULT_FN_ATTRS128
1897_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1898  __A = _mm_add_ss(__A, __B);
1899  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1900}
1901
1902#define _mm_add_round_ss(A, B, R) \
1903  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904                                          (__v4sf)(__m128)(B), \
1905                                          (__v4sf)_mm_setzero_ps(), \
1906                                          (__mmask8)-1, (int)(R))
1907
1908#define _mm_mask_add_round_ss(W, U, A, B, R) \
1909  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910                                          (__v4sf)(__m128)(B), \
1911                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1912                                          (int)(R))
1913
1914#define _mm_maskz_add_round_ss(U, A, B, R) \
1915  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916                                          (__v4sf)(__m128)(B), \
1917                                          (__v4sf)_mm_setzero_ps(), \
1918                                          (__mmask8)(U), (int)(R))
1919
1920static __inline__ __m128d __DEFAULT_FN_ATTRS128
1921_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1922  __A = _mm_add_sd(__A, __B);
1923  return __builtin_ia32_selectsd_128(__U, __A, __W);
1924}
1925
1926static __inline__ __m128d __DEFAULT_FN_ATTRS128
1927_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1928  __A = _mm_add_sd(__A, __B);
1929  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1930}
1931#define _mm_add_round_sd(A, B, R) \
1932  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933                                           (__v2df)(__m128d)(B), \
1934                                           (__v2df)_mm_setzero_pd(), \
1935                                           (__mmask8)-1, (int)(R))
1936
1937#define _mm_mask_add_round_sd(W, U, A, B, R) \
1938  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939                                           (__v2df)(__m128d)(B), \
1940                                           (__v2df)(__m128d)(W), \
1941                                           (__mmask8)(U), (int)(R))
1942
1943#define _mm_maskz_add_round_sd(U, A, B, R) \
1944  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945                                           (__v2df)(__m128d)(B), \
1946                                           (__v2df)_mm_setzero_pd(), \
1947                                           (__mmask8)(U), (int)(R))
1948
1949static __inline__ __m512d __DEFAULT_FN_ATTRS512
1950_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1951  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952                                              (__v8df)_mm512_add_pd(__A, __B),
1953                                              (__v8df)__W);
1954}
1955
1956static __inline__ __m512d __DEFAULT_FN_ATTRS512
1957_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1958  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959                                              (__v8df)_mm512_add_pd(__A, __B),
1960                                              (__v8df)_mm512_setzero_pd());
1961}
1962
1963static __inline__ __m512 __DEFAULT_FN_ATTRS512
1964_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1965  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966                                             (__v16sf)_mm512_add_ps(__A, __B),
1967                                             (__v16sf)__W);
1968}
1969
1970static __inline__ __m512 __DEFAULT_FN_ATTRS512
1971_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1972  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973                                             (__v16sf)_mm512_add_ps(__A, __B),
1974                                             (__v16sf)_mm512_setzero_ps());
1975}
1976
1977#define _mm512_add_round_pd(A, B, R) \
1978  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979                                   (__v8df)(__m512d)(B), (int)(R))
1980
1981#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984                                   (__v8df)(__m512d)(W))
1985
1986#define _mm512_maskz_add_round_pd(U, A, B, R) \
1987  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989                                   (__v8df)_mm512_setzero_pd())
1990
1991#define _mm512_add_round_ps(A, B, R) \
1992  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993                                  (__v16sf)(__m512)(B), (int)(R))
1994
1995#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998                                  (__v16sf)(__m512)(W))
1999
2000#define _mm512_maskz_add_round_ps(U, A, B, R) \
2001  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003                                  (__v16sf)_mm512_setzero_ps())
2004
2005static __inline__ __m128 __DEFAULT_FN_ATTRS128
2006_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2007  __A = _mm_sub_ss(__A, __B);
2008  return __builtin_ia32_selectss_128(__U, __A, __W);
2009}
2010
2011static __inline__ __m128 __DEFAULT_FN_ATTRS128
2012_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2013  __A = _mm_sub_ss(__A, __B);
2014  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2015}
2016#define _mm_sub_round_ss(A, B, R) \
2017  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018                                          (__v4sf)(__m128)(B), \
2019                                          (__v4sf)_mm_setzero_ps(), \
2020                                          (__mmask8)-1, (int)(R))
2021
2022#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024                                          (__v4sf)(__m128)(B), \
2025                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2026                                          (int)(R))
2027
2028#define _mm_maskz_sub_round_ss(U, A, B, R) \
2029  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030                                          (__v4sf)(__m128)(B), \
2031                                          (__v4sf)_mm_setzero_ps(), \
2032                                          (__mmask8)(U), (int)(R))
2033
2034static __inline__ __m128d __DEFAULT_FN_ATTRS128
2035_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2036  __A = _mm_sub_sd(__A, __B);
2037  return __builtin_ia32_selectsd_128(__U, __A, __W);
2038}
2039
2040static __inline__ __m128d __DEFAULT_FN_ATTRS128
2041_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2042  __A = _mm_sub_sd(__A, __B);
2043  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2044}
2045
2046#define _mm_sub_round_sd(A, B, R) \
2047  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048                                           (__v2df)(__m128d)(B), \
2049                                           (__v2df)_mm_setzero_pd(), \
2050                                           (__mmask8)-1, (int)(R))
2051
2052#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054                                           (__v2df)(__m128d)(B), \
2055                                           (__v2df)(__m128d)(W), \
2056                                           (__mmask8)(U), (int)(R))
2057
2058#define _mm_maskz_sub_round_sd(U, A, B, R) \
2059  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060                                           (__v2df)(__m128d)(B), \
2061                                           (__v2df)_mm_setzero_pd(), \
2062                                           (__mmask8)(U), (int)(R))
2063
2064static __inline__ __m512d __DEFAULT_FN_ATTRS512
2065_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2066  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067                                              (__v8df)_mm512_sub_pd(__A, __B),
2068                                              (__v8df)__W);
2069}
2070
2071static __inline__ __m512d __DEFAULT_FN_ATTRS512
2072_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2073  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074                                              (__v8df)_mm512_sub_pd(__A, __B),
2075                                              (__v8df)_mm512_setzero_pd());
2076}
2077
2078static __inline__ __m512 __DEFAULT_FN_ATTRS512
2079_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2080  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081                                             (__v16sf)_mm512_sub_ps(__A, __B),
2082                                             (__v16sf)__W);
2083}
2084
2085static __inline__ __m512 __DEFAULT_FN_ATTRS512
2086_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2087  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088                                             (__v16sf)_mm512_sub_ps(__A, __B),
2089                                             (__v16sf)_mm512_setzero_ps());
2090}
2091
2092#define _mm512_sub_round_pd(A, B, R) \
2093  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094                                   (__v8df)(__m512d)(B), (int)(R))
2095
2096#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099                                   (__v8df)(__m512d)(W))
2100
2101#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104                                   (__v8df)_mm512_setzero_pd())
2105
2106#define _mm512_sub_round_ps(A, B, R) \
2107  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108                                  (__v16sf)(__m512)(B), (int)(R))
2109
2110#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113                                  (__v16sf)(__m512)(W))
2114
2115#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118                                  (__v16sf)_mm512_setzero_ps())
2119
2120static __inline__ __m128 __DEFAULT_FN_ATTRS128
2121_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2122  __A = _mm_mul_ss(__A, __B);
2123  return __builtin_ia32_selectss_128(__U, __A, __W);
2124}
2125
2126static __inline__ __m128 __DEFAULT_FN_ATTRS128
2127_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2128  __A = _mm_mul_ss(__A, __B);
2129  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2130}
2131#define _mm_mul_round_ss(A, B, R) \
2132  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133                                          (__v4sf)(__m128)(B), \
2134                                          (__v4sf)_mm_setzero_ps(), \
2135                                          (__mmask8)-1, (int)(R))
2136
2137#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139                                          (__v4sf)(__m128)(B), \
2140                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2141                                          (int)(R))
2142
2143#define _mm_maskz_mul_round_ss(U, A, B, R) \
2144  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145                                          (__v4sf)(__m128)(B), \
2146                                          (__v4sf)_mm_setzero_ps(), \
2147                                          (__mmask8)(U), (int)(R))
2148
2149static __inline__ __m128d __DEFAULT_FN_ATTRS128
2150_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2151  __A = _mm_mul_sd(__A, __B);
2152  return __builtin_ia32_selectsd_128(__U, __A, __W);
2153}
2154
2155static __inline__ __m128d __DEFAULT_FN_ATTRS128
2156_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2157  __A = _mm_mul_sd(__A, __B);
2158  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2159}
2160
2161#define _mm_mul_round_sd(A, B, R) \
2162  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163                                           (__v2df)(__m128d)(B), \
2164                                           (__v2df)_mm_setzero_pd(), \
2165                                           (__mmask8)-1, (int)(R))
2166
2167#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169                                           (__v2df)(__m128d)(B), \
2170                                           (__v2df)(__m128d)(W), \
2171                                           (__mmask8)(U), (int)(R))
2172
2173#define _mm_maskz_mul_round_sd(U, A, B, R) \
2174  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175                                           (__v2df)(__m128d)(B), \
2176                                           (__v2df)_mm_setzero_pd(), \
2177                                           (__mmask8)(U), (int)(R))
2178
2179static __inline__ __m512d __DEFAULT_FN_ATTRS512
2180_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2181  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182                                              (__v8df)_mm512_mul_pd(__A, __B),
2183                                              (__v8df)__W);
2184}
2185
2186static __inline__ __m512d __DEFAULT_FN_ATTRS512
2187_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2188  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189                                              (__v8df)_mm512_mul_pd(__A, __B),
2190                                              (__v8df)_mm512_setzero_pd());
2191}
2192
2193static __inline__ __m512 __DEFAULT_FN_ATTRS512
2194_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2195  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196                                             (__v16sf)_mm512_mul_ps(__A, __B),
2197                                             (__v16sf)__W);
2198}
2199
2200static __inline__ __m512 __DEFAULT_FN_ATTRS512
2201_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2202  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203                                             (__v16sf)_mm512_mul_ps(__A, __B),
2204                                             (__v16sf)_mm512_setzero_ps());
2205}
2206
2207#define _mm512_mul_round_pd(A, B, R) \
2208  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209                                   (__v8df)(__m512d)(B), (int)(R))
2210
2211#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214                                   (__v8df)(__m512d)(W))
2215
2216#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219                                   (__v8df)_mm512_setzero_pd())
2220
2221#define _mm512_mul_round_ps(A, B, R) \
2222  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223                                  (__v16sf)(__m512)(B), (int)(R))
2224
2225#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228                                  (__v16sf)(__m512)(W))
2229
2230#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233                                  (__v16sf)_mm512_setzero_ps())
2234
2235static __inline__ __m128 __DEFAULT_FN_ATTRS128
2236_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2237  __A = _mm_div_ss(__A, __B);
2238  return __builtin_ia32_selectss_128(__U, __A, __W);
2239}
2240
2241static __inline__ __m128 __DEFAULT_FN_ATTRS128
2242_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2243  __A = _mm_div_ss(__A, __B);
2244  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2245}
2246
2247#define _mm_div_round_ss(A, B, R) \
2248  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249                                          (__v4sf)(__m128)(B), \
2250                                          (__v4sf)_mm_setzero_ps(), \
2251                                          (__mmask8)-1, (int)(R))
2252
2253#define _mm_mask_div_round_ss(W, U, A, B, R) \
2254  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255                                          (__v4sf)(__m128)(B), \
2256                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2257                                          (int)(R))
2258
2259#define _mm_maskz_div_round_ss(U, A, B, R) \
2260  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261                                          (__v4sf)(__m128)(B), \
2262                                          (__v4sf)_mm_setzero_ps(), \
2263                                          (__mmask8)(U), (int)(R))
2264
2265static __inline__ __m128d __DEFAULT_FN_ATTRS128
2266_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2267  __A = _mm_div_sd(__A, __B);
2268  return __builtin_ia32_selectsd_128(__U, __A, __W);
2269}
2270
2271static __inline__ __m128d __DEFAULT_FN_ATTRS128
2272_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2273  __A = _mm_div_sd(__A, __B);
2274  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2275}
2276
2277#define _mm_div_round_sd(A, B, R) \
2278  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2279                                           (__v2df)(__m128d)(B), \
2280                                           (__v2df)_mm_setzero_pd(), \
2281                                           (__mmask8)-1, (int)(R))
2282
2283#define _mm_mask_div_round_sd(W, U, A, B, R) \
2284  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2285                                           (__v2df)(__m128d)(B), \
2286                                           (__v2df)(__m128d)(W), \
2287                                           (__mmask8)(U), (int)(R))
2288
2289#define _mm_maskz_div_round_sd(U, A, B, R) \
2290  (__m128d)__builtin_ia32_divsd_round_mask((__v2df)(__m128d)(A), \
2291                                           (__v2df)(__m128d)(B), \
2292                                           (__v2df)_mm_setzero_pd(), \
2293                                           (__mmask8)(U), (int)(R))
2294
2295static __inline __m512d __DEFAULT_FN_ATTRS512
2296_mm512_div_pd(__m512d __a, __m512d __b)
2297{
2298  return (__m512d)((__v8df)__a/(__v8df)__b);
2299}
2300
2301static __inline__ __m512d __DEFAULT_FN_ATTRS512
2302_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2303  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2304                                              (__v8df)_mm512_div_pd(__A, __B),
2305                                              (__v8df)__W);
2306}
2307
2308static __inline__ __m512d __DEFAULT_FN_ATTRS512
2309_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2310  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2311                                              (__v8df)_mm512_div_pd(__A, __B),
2312                                              (__v8df)_mm512_setzero_pd());
2313}
2314
2315static __inline __m512 __DEFAULT_FN_ATTRS512
2316_mm512_div_ps(__m512 __a, __m512 __b)
2317{
2318  return (__m512)((__v16sf)__a/(__v16sf)__b);
2319}
2320
2321static __inline__ __m512 __DEFAULT_FN_ATTRS512
2322_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2323  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2324                                             (__v16sf)_mm512_div_ps(__A, __B),
2325                                             (__v16sf)__W);
2326}
2327
2328static __inline__ __m512 __DEFAULT_FN_ATTRS512
2329_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2330  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2331                                             (__v16sf)_mm512_div_ps(__A, __B),
2332                                             (__v16sf)_mm512_setzero_ps());
2333}
2334
2335#define _mm512_div_round_pd(A, B, R) \
2336  (__m512d)__builtin_ia32_divpd512((__v8df)(__m512d)(A), \
2337                                   (__v8df)(__m512d)(B), (int)(R))
2338
2339#define _mm512_mask_div_round_pd(W, U, A, B, R) \
2340  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2341                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2342                                   (__v8df)(__m512d)(W))
2343
2344#define _mm512_maskz_div_round_pd(U, A, B, R) \
2345  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2346                                   (__v8df)_mm512_div_round_pd((A), (B), (R)), \
2347                                   (__v8df)_mm512_setzero_pd())
2348
2349#define _mm512_div_round_ps(A, B, R) \
2350  (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \
2351                                  (__v16sf)(__m512)(B), (int)(R))
2352
2353#define _mm512_mask_div_round_ps(W, U, A, B, R) \
2354  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2355                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2356                                  (__v16sf)(__m512)(W))
2357
2358#define _mm512_maskz_div_round_ps(U, A, B, R) \
2359  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2360                                  (__v16sf)_mm512_div_round_ps((A), (B), (R)), \
2361                                  (__v16sf)_mm512_setzero_ps())
2362
2363#define _mm512_roundscale_ps(A, B) \
2364  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \
2365                                         (__v16sf)_mm512_undefined_ps(), \
2366                                         (__mmask16)-1, \
2367                                         _MM_FROUND_CUR_DIRECTION)
2368
2369#define _mm512_mask_roundscale_ps(A, B, C, imm) \
2370  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2371                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2372                                         _MM_FROUND_CUR_DIRECTION)
2373
2374#define _mm512_maskz_roundscale_ps(A, B, imm) \
2375  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2376                                         (__v16sf)_mm512_setzero_ps(), \
2377                                         (__mmask16)(A), \
2378                                         _MM_FROUND_CUR_DIRECTION)
2379
2380#define _mm512_mask_roundscale_round_ps(A, B, C, imm, R) \
2381  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(C), (int)(imm), \
2382                                         (__v16sf)(__m512)(A), (__mmask16)(B), \
2383                                         (int)(R))
2384
2385#define _mm512_maskz_roundscale_round_ps(A, B, imm, R) \
2386  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(B), (int)(imm), \
2387                                         (__v16sf)_mm512_setzero_ps(), \
2388                                         (__mmask16)(A), (int)(R))
2389
2390#define _mm512_roundscale_round_ps(A, imm, R) \
2391  (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(imm), \
2392                                         (__v16sf)_mm512_undefined_ps(), \
2393                                         (__mmask16)-1, (int)(R))
2394
2395#define _mm512_roundscale_pd(A, B) \
2396  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(B), \
2397                                          (__v8df)_mm512_undefined_pd(), \
2398                                          (__mmask8)-1, \
2399                                          _MM_FROUND_CUR_DIRECTION)
2400
2401#define _mm512_mask_roundscale_pd(A, B, C, imm) \
2402  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2403                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2404                                          _MM_FROUND_CUR_DIRECTION)
2405
2406#define _mm512_maskz_roundscale_pd(A, B, imm) \
2407  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2408                                          (__v8df)_mm512_setzero_pd(), \
2409                                          (__mmask8)(A), \
2410                                          _MM_FROUND_CUR_DIRECTION)
2411
2412#define _mm512_mask_roundscale_round_pd(A, B, C, imm, R) \
2413  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(C), (int)(imm), \
2414                                          (__v8df)(__m512d)(A), (__mmask8)(B), \
2415                                          (int)(R))
2416
2417#define _mm512_maskz_roundscale_round_pd(A, B, imm, R) \
2418  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(B), (int)(imm), \
2419                                          (__v8df)_mm512_setzero_pd(), \
2420                                          (__mmask8)(A), (int)(R))
2421
2422#define _mm512_roundscale_round_pd(A, imm, R) \
2423  (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(__m512d)(A), (int)(imm), \
2424                                          (__v8df)_mm512_undefined_pd(), \
2425                                          (__mmask8)-1, (int)(R))
2426
2427#define _mm512_fmadd_round_pd(A, B, C, R) \
2428  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2429                                           (__v8df)(__m512d)(B), \
2430                                           (__v8df)(__m512d)(C), \
2431                                           (__mmask8)-1, (int)(R))
2432
2433
2434#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \
2435  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2436                                           (__v8df)(__m512d)(B), \
2437                                           (__v8df)(__m512d)(C), \
2438                                           (__mmask8)(U), (int)(R))
2439
2440
2441#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \
2442  (__m512d)__builtin_ia32_vfmaddpd512_mask3((__v8df)(__m512d)(A), \
2443                                            (__v8df)(__m512d)(B), \
2444                                            (__v8df)(__m512d)(C), \
2445                                            (__mmask8)(U), (int)(R))
2446
2447
2448#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \
2449  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2450                                            (__v8df)(__m512d)(B), \
2451                                            (__v8df)(__m512d)(C), \
2452                                            (__mmask8)(U), (int)(R))
2453
2454
2455#define _mm512_fmsub_round_pd(A, B, C, R) \
2456  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2457                                           (__v8df)(__m512d)(B), \
2458                                           -(__v8df)(__m512d)(C), \
2459                                           (__mmask8)-1, (int)(R))
2460
2461
2462#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \
2463  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
2464                                           (__v8df)(__m512d)(B), \
2465                                           -(__v8df)(__m512d)(C), \
2466                                           (__mmask8)(U), (int)(R))
2467
2468
2469#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \
2470  (__m512d)__builtin_ia32_vfmaddpd512_maskz((__v8df)(__m512d)(A), \
2471                                            (__v8df)(__m512d)(B), \
2472                                            -(__v8df)(__m512d)(C), \
2473                                            (__mmask8)(U), (int)(R))
2474
2475
2476#define _mm512_fnmadd_round_pd(A, B, C, R) \
2477  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2478                                           (__v8df)(__m512d)(B), \
2479                                           (__v8df)(__m512d)(C), \
2480                                           (__mmask8)-1, (int)(R))
2481
2482
2483#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \
2484  (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(__v8df)(__m512d)(A), \
2485                                            (__v8df)(__m512d)(B), \
2486                                            (__v8df)(__m512d)(C), \
2487                                            (__mmask8)(U), (int)(R))
2488
2489
2490#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \
2491  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2492                                            (__v8df)(__m512d)(B), \
2493                                            (__v8df)(__m512d)(C), \
2494                                            (__mmask8)(U), (int)(R))
2495
2496
2497#define _mm512_fnmsub_round_pd(A, B, C, R) \
2498  (__m512d)__builtin_ia32_vfmaddpd512_mask(-(__v8df)(__m512d)(A), \
2499                                           (__v8df)(__m512d)(B), \
2500                                           -(__v8df)(__m512d)(C), \
2501                                           (__mmask8)-1, (int)(R))
2502
2503
2504#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \
2505  (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(__v8df)(__m512d)(A), \
2506                                            (__v8df)(__m512d)(B), \
2507                                            -(__v8df)(__m512d)(C), \
2508                                            (__mmask8)(U), (int)(R))
2509
2510
2511static __inline__ __m512d __DEFAULT_FN_ATTRS512
2512_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2513{
2514  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2515                                                    (__v8df) __B,
2516                                                    (__v8df) __C,
2517                                                    (__mmask8) -1,
2518                                                    _MM_FROUND_CUR_DIRECTION);
2519}
2520
2521static __inline__ __m512d __DEFAULT_FN_ATTRS512
2522_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2523{
2524  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2525                                                    (__v8df) __B,
2526                                                    (__v8df) __C,
2527                                                    (__mmask8) __U,
2528                                                    _MM_FROUND_CUR_DIRECTION);
2529}
2530
2531static __inline__ __m512d __DEFAULT_FN_ATTRS512
2532_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2533{
2534  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
2535                                                     (__v8df) __B,
2536                                                     (__v8df) __C,
2537                                                     (__mmask8) __U,
2538                                                     _MM_FROUND_CUR_DIRECTION);
2539}
2540
2541static __inline__ __m512d __DEFAULT_FN_ATTRS512
2542_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2543{
2544  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2545                                                     (__v8df) __B,
2546                                                     (__v8df) __C,
2547                                                     (__mmask8) __U,
2548                                                     _MM_FROUND_CUR_DIRECTION);
2549}
2550
2551static __inline__ __m512d __DEFAULT_FN_ATTRS512
2552_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2553{
2554  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2555                                                    (__v8df) __B,
2556                                                    -(__v8df) __C,
2557                                                    (__mmask8) -1,
2558                                                    _MM_FROUND_CUR_DIRECTION);
2559}
2560
2561static __inline__ __m512d __DEFAULT_FN_ATTRS512
2562_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2563{
2564  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2565                                                    (__v8df) __B,
2566                                                    -(__v8df) __C,
2567                                                    (__mmask8) __U,
2568                                                    _MM_FROUND_CUR_DIRECTION);
2569}
2570
2571static __inline__ __m512d __DEFAULT_FN_ATTRS512
2572_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2573{
2574  return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
2575                                                     (__v8df) __B,
2576                                                     -(__v8df) __C,
2577                                                     (__mmask8) __U,
2578                                                     _MM_FROUND_CUR_DIRECTION);
2579}
2580
2581static __inline__ __m512d __DEFAULT_FN_ATTRS512
2582_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
2583{
2584  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2585                                                    -(__v8df) __B,
2586                                                    (__v8df) __C,
2587                                                    (__mmask8) -1,
2588                                                    _MM_FROUND_CUR_DIRECTION);
2589}
2590
2591static __inline__ __m512d __DEFAULT_FN_ATTRS512
2592_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2593{
2594  return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
2595                                                     (__v8df) __B,
2596                                                     (__v8df) __C,
2597                                                     (__mmask8) __U,
2598                                                     _MM_FROUND_CUR_DIRECTION);
2599}
2600
2601static __inline__ __m512d __DEFAULT_FN_ATTRS512
2602_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2603{
2604  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2605                                                     (__v8df) __B,
2606                                                     (__v8df) __C,
2607                                                     (__mmask8) __U,
2608                                                     _MM_FROUND_CUR_DIRECTION);
2609}
2610
2611static __inline__ __m512d __DEFAULT_FN_ATTRS512
2612_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
2613{
2614  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
2615                                                    -(__v8df) __B,
2616                                                    -(__v8df) __C,
2617                                                    (__mmask8) -1,
2618                                                    _MM_FROUND_CUR_DIRECTION);
2619}
2620
2621static __inline__ __m512d __DEFAULT_FN_ATTRS512
2622_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2623{
2624  return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
2625                                                     (__v8df) __B,
2626                                                     -(__v8df) __C,
2627                                                     (__mmask8) __U,
2628                                                     _MM_FROUND_CUR_DIRECTION);
2629}
2630
2631#define _mm512_fmadd_round_ps(A, B, C, R) \
2632  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2633                                          (__v16sf)(__m512)(B), \
2634                                          (__v16sf)(__m512)(C), \
2635                                          (__mmask16)-1, (int)(R))
2636
2637
2638#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \
2639  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2640                                          (__v16sf)(__m512)(B), \
2641                                          (__v16sf)(__m512)(C), \
2642                                          (__mmask16)(U), (int)(R))
2643
2644
2645#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \
2646  (__m512)__builtin_ia32_vfmaddps512_mask3((__v16sf)(__m512)(A), \
2647                                           (__v16sf)(__m512)(B), \
2648                                           (__v16sf)(__m512)(C), \
2649                                           (__mmask16)(U), (int)(R))
2650
2651
2652#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \
2653  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2654                                           (__v16sf)(__m512)(B), \
2655                                           (__v16sf)(__m512)(C), \
2656                                           (__mmask16)(U), (int)(R))
2657
2658
2659#define _mm512_fmsub_round_ps(A, B, C, R) \
2660  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2661                                          (__v16sf)(__m512)(B), \
2662                                          -(__v16sf)(__m512)(C), \
2663                                          (__mmask16)-1, (int)(R))
2664
2665
2666#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \
2667  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2668                                          (__v16sf)(__m512)(B), \
2669                                          -(__v16sf)(__m512)(C), \
2670                                          (__mmask16)(U), (int)(R))
2671
2672
2673#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \
2674  (__m512)__builtin_ia32_vfmaddps512_maskz((__v16sf)(__m512)(A), \
2675                                           (__v16sf)(__m512)(B), \
2676                                           -(__v16sf)(__m512)(C), \
2677                                           (__mmask16)(U), (int)(R))
2678
2679
2680#define _mm512_fnmadd_round_ps(A, B, C, R) \
2681  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2682                                          -(__v16sf)(__m512)(B), \
2683                                          (__v16sf)(__m512)(C), \
2684                                          (__mmask16)-1, (int)(R))
2685
2686
2687#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \
2688  (__m512)__builtin_ia32_vfmaddps512_mask3(-(__v16sf)(__m512)(A), \
2689                                           (__v16sf)(__m512)(B), \
2690                                           (__v16sf)(__m512)(C), \
2691                                           (__mmask16)(U), (int)(R))
2692
2693
2694#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \
2695  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2696                                           (__v16sf)(__m512)(B), \
2697                                           (__v16sf)(__m512)(C), \
2698                                           (__mmask16)(U), (int)(R))
2699
2700
2701#define _mm512_fnmsub_round_ps(A, B, C, R) \
2702  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
2703                                          -(__v16sf)(__m512)(B), \
2704                                          -(__v16sf)(__m512)(C), \
2705                                          (__mmask16)-1, (int)(R))
2706
2707
2708#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \
2709  (__m512)__builtin_ia32_vfmaddps512_maskz(-(__v16sf)(__m512)(A), \
2710                                           (__v16sf)(__m512)(B), \
2711                                           -(__v16sf)(__m512)(C), \
2712                                           (__mmask16)(U), (int)(R))
2713
2714
2715static __inline__ __m512 __DEFAULT_FN_ATTRS512
2716_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2717{
2718  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2719                                                   (__v16sf) __B,
2720                                                   (__v16sf) __C,
2721                                                   (__mmask16) -1,
2722                                                   _MM_FROUND_CUR_DIRECTION);
2723}
2724
2725static __inline__ __m512 __DEFAULT_FN_ATTRS512
2726_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2727{
2728  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2729                                                   (__v16sf) __B,
2730                                                   (__v16sf) __C,
2731                                                   (__mmask16) __U,
2732                                                   _MM_FROUND_CUR_DIRECTION);
2733}
2734
2735static __inline__ __m512 __DEFAULT_FN_ATTRS512
2736_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2737{
2738  return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
2739                                                    (__v16sf) __B,
2740                                                    (__v16sf) __C,
2741                                                    (__mmask16) __U,
2742                                                    _MM_FROUND_CUR_DIRECTION);
2743}
2744
2745static __inline__ __m512 __DEFAULT_FN_ATTRS512
2746_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2747{
2748  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2749                                                    (__v16sf) __B,
2750                                                    (__v16sf) __C,
2751                                                    (__mmask16) __U,
2752                                                    _MM_FROUND_CUR_DIRECTION);
2753}
2754
2755static __inline__ __m512 __DEFAULT_FN_ATTRS512
2756_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2757{
2758  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2759                                                   (__v16sf) __B,
2760                                                   -(__v16sf) __C,
2761                                                   (__mmask16) -1,
2762                                                   _MM_FROUND_CUR_DIRECTION);
2763}
2764
2765static __inline__ __m512 __DEFAULT_FN_ATTRS512
2766_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
2767{
2768  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2769                                                   (__v16sf) __B,
2770                                                   -(__v16sf) __C,
2771                                                   (__mmask16) __U,
2772                                                   _MM_FROUND_CUR_DIRECTION);
2773}
2774
2775static __inline__ __m512 __DEFAULT_FN_ATTRS512
2776_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2777{
2778  return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
2779                                                    (__v16sf) __B,
2780                                                    -(__v16sf) __C,
2781                                                    (__mmask16) __U,
2782                                                    _MM_FROUND_CUR_DIRECTION);
2783}
2784
2785static __inline__ __m512 __DEFAULT_FN_ATTRS512
2786_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
2787{
2788  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2789                                                   -(__v16sf) __B,
2790                                                   (__v16sf) __C,
2791                                                   (__mmask16) -1,
2792                                                   _MM_FROUND_CUR_DIRECTION);
2793}
2794
2795static __inline__ __m512 __DEFAULT_FN_ATTRS512
2796_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
2797{
2798  return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
2799                                                    (__v16sf) __B,
2800                                                    (__v16sf) __C,
2801                                                    (__mmask16) __U,
2802                                                    _MM_FROUND_CUR_DIRECTION);
2803}
2804
2805static __inline__ __m512 __DEFAULT_FN_ATTRS512
2806_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2807{
2808  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2809                                                    (__v16sf) __B,
2810                                                    (__v16sf) __C,
2811                                                    (__mmask16) __U,
2812                                                    _MM_FROUND_CUR_DIRECTION);
2813}
2814
2815static __inline__ __m512 __DEFAULT_FN_ATTRS512
2816_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
2817{
2818  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
2819                                                   -(__v16sf) __B,
2820                                                   -(__v16sf) __C,
2821                                                   (__mmask16) -1,
2822                                                   _MM_FROUND_CUR_DIRECTION);
2823}
2824
2825static __inline__ __m512 __DEFAULT_FN_ATTRS512
2826_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
2827{
2828  return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
2829                                                    (__v16sf) __B,
2830                                                    -(__v16sf) __C,
2831                                                    (__mmask16) __U,
2832                                                    _MM_FROUND_CUR_DIRECTION);
2833}
2834
2835#define _mm512_fmaddsub_round_pd(A, B, C, R) \
2836  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2837                                              (__v8df)(__m512d)(B), \
2838                                              (__v8df)(__m512d)(C), \
2839                                              (__mmask8)-1, (int)(R))
2840
2841
2842#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \
2843  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2844                                              (__v8df)(__m512d)(B), \
2845                                              (__v8df)(__m512d)(C), \
2846                                              (__mmask8)(U), (int)(R))
2847
2848
2849#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \
2850  (__m512d)__builtin_ia32_vfmaddsubpd512_mask3((__v8df)(__m512d)(A), \
2851                                               (__v8df)(__m512d)(B), \
2852                                               (__v8df)(__m512d)(C), \
2853                                               (__mmask8)(U), (int)(R))
2854
2855
2856#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \
2857  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2858                                               (__v8df)(__m512d)(B), \
2859                                               (__v8df)(__m512d)(C), \
2860                                               (__mmask8)(U), (int)(R))
2861
2862
2863#define _mm512_fmsubadd_round_pd(A, B, C, R) \
2864  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2865                                              (__v8df)(__m512d)(B), \
2866                                              -(__v8df)(__m512d)(C), \
2867                                              (__mmask8)-1, (int)(R))
2868
2869
2870#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \
2871  (__m512d)__builtin_ia32_vfmaddsubpd512_mask((__v8df)(__m512d)(A), \
2872                                              (__v8df)(__m512d)(B), \
2873                                              -(__v8df)(__m512d)(C), \
2874                                              (__mmask8)(U), (int)(R))
2875
2876
2877#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \
2878  (__m512d)__builtin_ia32_vfmaddsubpd512_maskz((__v8df)(__m512d)(A), \
2879                                               (__v8df)(__m512d)(B), \
2880                                               -(__v8df)(__m512d)(C), \
2881                                               (__mmask8)(U), (int)(R))
2882
2883
2884static __inline__ __m512d __DEFAULT_FN_ATTRS512
2885_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
2886{
2887  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2888                                                      (__v8df) __B,
2889                                                      (__v8df) __C,
2890                                                      (__mmask8) -1,
2891                                                      _MM_FROUND_CUR_DIRECTION);
2892}
2893
2894static __inline__ __m512d __DEFAULT_FN_ATTRS512
2895_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2896{
2897  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2898                                                      (__v8df) __B,
2899                                                      (__v8df) __C,
2900                                                      (__mmask8) __U,
2901                                                      _MM_FROUND_CUR_DIRECTION);
2902}
2903
2904static __inline__ __m512d __DEFAULT_FN_ATTRS512
2905_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
2906{
2907  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
2908                                                       (__v8df) __B,
2909                                                       (__v8df) __C,
2910                                                       (__mmask8) __U,
2911                                                       _MM_FROUND_CUR_DIRECTION);
2912}
2913
2914static __inline__ __m512d __DEFAULT_FN_ATTRS512
2915_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2916{
2917  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2918                                                       (__v8df) __B,
2919                                                       (__v8df) __C,
2920                                                       (__mmask8) __U,
2921                                                       _MM_FROUND_CUR_DIRECTION);
2922}
2923
2924static __inline__ __m512d __DEFAULT_FN_ATTRS512
2925_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
2926{
2927  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2928                                                       (__v8df) __B,
2929                                                       -(__v8df) __C,
2930                                                       (__mmask8) -1,
2931                                                       _MM_FROUND_CUR_DIRECTION);
2932}
2933
2934static __inline__ __m512d __DEFAULT_FN_ATTRS512
2935_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
2936{
2937  return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
2938                                                       (__v8df) __B,
2939                                                       -(__v8df) __C,
2940                                                       (__mmask8) __U,
2941                                                       _MM_FROUND_CUR_DIRECTION);
2942}
2943
2944static __inline__ __m512d __DEFAULT_FN_ATTRS512
2945_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
2946{
2947  return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
2948                                                        (__v8df) __B,
2949                                                        -(__v8df) __C,
2950                                                        (__mmask8) __U,
2951                                                        _MM_FROUND_CUR_DIRECTION);
2952}
2953
2954#define _mm512_fmaddsub_round_ps(A, B, C, R) \
2955  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2956                                             (__v16sf)(__m512)(B), \
2957                                             (__v16sf)(__m512)(C), \
2958                                             (__mmask16)-1, (int)(R))
2959
2960
2961#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \
2962  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2963                                             (__v16sf)(__m512)(B), \
2964                                             (__v16sf)(__m512)(C), \
2965                                             (__mmask16)(U), (int)(R))
2966
2967
2968#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \
2969  (__m512)__builtin_ia32_vfmaddsubps512_mask3((__v16sf)(__m512)(A), \
2970                                              (__v16sf)(__m512)(B), \
2971                                              (__v16sf)(__m512)(C), \
2972                                              (__mmask16)(U), (int)(R))
2973
2974
2975#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \
2976  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2977                                              (__v16sf)(__m512)(B), \
2978                                              (__v16sf)(__m512)(C), \
2979                                              (__mmask16)(U), (int)(R))
2980
2981
2982#define _mm512_fmsubadd_round_ps(A, B, C, R) \
2983  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2984                                             (__v16sf)(__m512)(B), \
2985                                             -(__v16sf)(__m512)(C), \
2986                                             (__mmask16)-1, (int)(R))
2987
2988
2989#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \
2990  (__m512)__builtin_ia32_vfmaddsubps512_mask((__v16sf)(__m512)(A), \
2991                                             (__v16sf)(__m512)(B), \
2992                                             -(__v16sf)(__m512)(C), \
2993                                             (__mmask16)(U), (int)(R))
2994
2995
2996#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \
2997  (__m512)__builtin_ia32_vfmaddsubps512_maskz((__v16sf)(__m512)(A), \
2998                                              (__v16sf)(__m512)(B), \
2999                                              -(__v16sf)(__m512)(C), \
3000                                              (__mmask16)(U), (int)(R))
3001
3002
3003static __inline__ __m512 __DEFAULT_FN_ATTRS512
3004_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
3005{
3006  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3007                                                      (__v16sf) __B,
3008                                                      (__v16sf) __C,
3009                                                      (__mmask16) -1,
3010                                                      _MM_FROUND_CUR_DIRECTION);
3011}
3012
3013static __inline__ __m512 __DEFAULT_FN_ATTRS512
3014_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3015{
3016  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3017                                                      (__v16sf) __B,
3018                                                      (__v16sf) __C,
3019                                                      (__mmask16) __U,
3020                                                      _MM_FROUND_CUR_DIRECTION);
3021}
3022
3023static __inline__ __m512 __DEFAULT_FN_ATTRS512
3024_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3025{
3026  return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
3027                                                       (__v16sf) __B,
3028                                                       (__v16sf) __C,
3029                                                       (__mmask16) __U,
3030                                                       _MM_FROUND_CUR_DIRECTION);
3031}
3032
3033static __inline__ __m512 __DEFAULT_FN_ATTRS512
3034_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3035{
3036  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3037                                                       (__v16sf) __B,
3038                                                       (__v16sf) __C,
3039                                                       (__mmask16) __U,
3040                                                       _MM_FROUND_CUR_DIRECTION);
3041}
3042
3043static __inline__ __m512 __DEFAULT_FN_ATTRS512
3044_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
3045{
3046  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3047                                                      (__v16sf) __B,
3048                                                      -(__v16sf) __C,
3049                                                      (__mmask16) -1,
3050                                                      _MM_FROUND_CUR_DIRECTION);
3051}
3052
3053static __inline__ __m512 __DEFAULT_FN_ATTRS512
3054_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3055{
3056  return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
3057                                                      (__v16sf) __B,
3058                                                      -(__v16sf) __C,
3059                                                      (__mmask16) __U,
3060                                                      _MM_FROUND_CUR_DIRECTION);
3061}
3062
3063static __inline__ __m512 __DEFAULT_FN_ATTRS512
3064_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
3065{
3066  return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
3067                                                       (__v16sf) __B,
3068                                                       -(__v16sf) __C,
3069                                                       (__mmask16) __U,
3070                                                       _MM_FROUND_CUR_DIRECTION);
3071}
3072
3073#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \
3074  (__m512d)__builtin_ia32_vfmsubpd512_mask3((__v8df)(__m512d)(A), \
3075                                            (__v8df)(__m512d)(B), \
3076                                            (__v8df)(__m512d)(C), \
3077                                            (__mmask8)(U), (int)(R))
3078
3079
3080static __inline__ __m512d __DEFAULT_FN_ATTRS512
3081_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3082{
3083  return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
3084                                                    (__v8df) __B,
3085                                                    (__v8df) __C,
3086                                                    (__mmask8) __U,
3087                                                    _MM_FROUND_CUR_DIRECTION);
3088}
3089
3090#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \
3091  (__m512)__builtin_ia32_vfmsubps512_mask3((__v16sf)(__m512)(A), \
3092                                           (__v16sf)(__m512)(B), \
3093                                           (__v16sf)(__m512)(C), \
3094                                           (__mmask16)(U), (int)(R))
3095
3096static __inline__ __m512 __DEFAULT_FN_ATTRS512
3097_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3098{
3099  return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
3100                                                   (__v16sf) __B,
3101                                                   (__v16sf) __C,
3102                                                   (__mmask16) __U,
3103                                                   _MM_FROUND_CUR_DIRECTION);
3104}
3105
3106#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \
3107  (__m512d)__builtin_ia32_vfmsubaddpd512_mask3((__v8df)(__m512d)(A), \
3108                                               (__v8df)(__m512d)(B), \
3109                                               (__v8df)(__m512d)(C), \
3110                                               (__mmask8)(U), (int)(R))
3111
3112
3113static __inline__ __m512d __DEFAULT_FN_ATTRS512
3114_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3115{
3116  return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
3117                                                       (__v8df) __B,
3118                                                       (__v8df) __C,
3119                                                       (__mmask8) __U,
3120                                                       _MM_FROUND_CUR_DIRECTION);
3121}
3122
3123#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \
3124  (__m512)__builtin_ia32_vfmsubaddps512_mask3((__v16sf)(__m512)(A), \
3125                                              (__v16sf)(__m512)(B), \
3126                                              (__v16sf)(__m512)(C), \
3127                                              (__mmask16)(U), (int)(R))
3128
3129
3130static __inline__ __m512 __DEFAULT_FN_ATTRS512
3131_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3132{
3133  return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
3134                                                      (__v16sf) __B,
3135                                                      (__v16sf) __C,
3136                                                      (__mmask16) __U,
3137                                                      _MM_FROUND_CUR_DIRECTION);
3138}
3139
3140#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \
3141  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3142                                           -(__v8df)(__m512d)(B), \
3143                                           (__v8df)(__m512d)(C), \
3144                                           (__mmask8)(U), (int)(R))
3145
3146
3147static __inline__ __m512d __DEFAULT_FN_ATTRS512
3148_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3149{
3150  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3151                                                    -(__v8df) __B,
3152                                                    (__v8df) __C,
3153                                                    (__mmask8) __U,
3154                                                    _MM_FROUND_CUR_DIRECTION);
3155}
3156
3157#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \
3158  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3159                                          -(__v16sf)(__m512)(B), \
3160                                          (__v16sf)(__m512)(C), \
3161                                          (__mmask16)(U), (int)(R))
3162
3163
3164static __inline__ __m512 __DEFAULT_FN_ATTRS512
3165_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3166{
3167  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3168                                                   -(__v16sf) __B,
3169                                                   (__v16sf) __C,
3170                                                   (__mmask16) __U,
3171                                                   _MM_FROUND_CUR_DIRECTION);
3172}
3173
3174#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \
3175  (__m512d)__builtin_ia32_vfmaddpd512_mask((__v8df)(__m512d)(A), \
3176                                           -(__v8df)(__m512d)(B), \
3177                                           -(__v8df)(__m512d)(C), \
3178                                           (__mmask8)(U), (int)(R))
3179
3180
3181#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \
3182  (__m512d)__builtin_ia32_vfmsubpd512_mask3(-(__v8df)(__m512d)(A), \
3183                                            (__v8df)(__m512d)(B), \
3184                                            (__v8df)(__m512d)(C), \
3185                                            (__mmask8)(U), (int)(R))
3186
3187
3188static __inline__ __m512d __DEFAULT_FN_ATTRS512
3189_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
3190{
3191  return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
3192                                                    -(__v8df) __B,
3193                                                    -(__v8df) __C,
3194                                                    (__mmask8) __U,
3195                                                    _MM_FROUND_CUR_DIRECTION);
3196}
3197
3198static __inline__ __m512d __DEFAULT_FN_ATTRS512
3199_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
3200{
3201  return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
3202                                                     (__v8df) __B,
3203                                                     (__v8df) __C,
3204                                                     (__mmask8) __U,
3205                                                     _MM_FROUND_CUR_DIRECTION);
3206}
3207
3208#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \
3209  (__m512)__builtin_ia32_vfmaddps512_mask((__v16sf)(__m512)(A), \
3210                                          -(__v16sf)(__m512)(B), \
3211                                          -(__v16sf)(__m512)(C), \
3212                                          (__mmask16)(U), (int)(R))
3213
3214
3215#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \
3216  (__m512)__builtin_ia32_vfmsubps512_mask3(-(__v16sf)(__m512)(A), \
3217                                           (__v16sf)(__m512)(B), \
3218                                           (__v16sf)(__m512)(C), \
3219                                           (__mmask16)(U), (int)(R))
3220
3221
3222static __inline__ __m512 __DEFAULT_FN_ATTRS512
3223_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
3224{
3225  return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
3226                                                   -(__v16sf) __B,
3227                                                   -(__v16sf) __C,
3228                                                   (__mmask16) __U,
3229                                                   _MM_FROUND_CUR_DIRECTION);
3230}
3231
3232static __inline__ __m512 __DEFAULT_FN_ATTRS512
3233_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
3234{
3235  return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
3236                                                    (__v16sf) __B,
3237                                                    (__v16sf) __C,
3238                                                    (__mmask16) __U,
3239                                                    _MM_FROUND_CUR_DIRECTION);
3240}
3241
3242
3243
3244/* Vector permutations */
3245
3246static __inline __m512i __DEFAULT_FN_ATTRS512
3247_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
3248{
3249  return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
3250                                                (__v16si) __B);
3251}
3252
3253static __inline__ __m512i __DEFAULT_FN_ATTRS512
3254_mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
3255                               __m512i __B)
3256{
3257  return (__m512i)__builtin_ia32_selectd_512(__U,
3258                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3259                              (__v16si)__A);
3260}
3261
3262static __inline__ __m512i __DEFAULT_FN_ATTRS512
3263_mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
3264                                __m512i __B)
3265{
3266  return (__m512i)__builtin_ia32_selectd_512(__U,
3267                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3268                              (__v16si)__I);
3269}
3270
3271static __inline__ __m512i __DEFAULT_FN_ATTRS512
3272_mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
3273                                __m512i __B)
3274{
3275  return (__m512i)__builtin_ia32_selectd_512(__U,
3276                              (__v16si)_mm512_permutex2var_epi32(__A, __I, __B),
3277                              (__v16si)_mm512_setzero_si512());
3278}
3279
3280static __inline __m512i __DEFAULT_FN_ATTRS512
3281_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
3282{
3283  return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
3284                                                (__v8di) __B);
3285}
3286
3287static __inline__ __m512i __DEFAULT_FN_ATTRS512
3288_mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
3289                               __m512i __B)
3290{
3291  return (__m512i)__builtin_ia32_selectq_512(__U,
3292                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3293                               (__v8di)__A);
3294}
3295
3296static __inline__ __m512i __DEFAULT_FN_ATTRS512
3297_mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
3298                                __m512i __B)
3299{
3300  return (__m512i)__builtin_ia32_selectq_512(__U,
3301                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3302                               (__v8di)__I);
3303}
3304
3305static __inline__ __m512i __DEFAULT_FN_ATTRS512
3306_mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
3307                                __m512i __B)
3308{
3309  return (__m512i)__builtin_ia32_selectq_512(__U,
3310                               (__v8di)_mm512_permutex2var_epi64(__A, __I, __B),
3311                               (__v8di)_mm512_setzero_si512());
3312}
3313
3314#define _mm512_alignr_epi64(A, B, I) \
3315  (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
3316                                    (__v8di)(__m512i)(B), (int)(I))
3317
3318#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
3319  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3320                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3321                                 (__v8di)(__m512i)(W))
3322
3323#define _mm512_maskz_alignr_epi64(U, A, B, imm) \
3324  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
3325                                 (__v8di)_mm512_alignr_epi64((A), (B), (imm)), \
3326                                 (__v8di)_mm512_setzero_si512())
3327
3328#define _mm512_alignr_epi32(A, B, I) \
3329  (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
3330                                    (__v16si)(__m512i)(B), (int)(I))
3331
3332#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
3333  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3334                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3335                                (__v16si)(__m512i)(W))
3336
3337#define _mm512_maskz_alignr_epi32(U, A, B, imm) \
3338  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
3339                                (__v16si)_mm512_alignr_epi32((A), (B), (imm)), \
3340                                (__v16si)_mm512_setzero_si512())
3341/* Vector Extract */
3342
3343#define _mm512_extractf64x4_pd(A, I) \
3344  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(I), \
3345                                            (__v4df)_mm256_undefined_pd(), \
3346                                            (__mmask8)-1)
3347
3348#define _mm512_mask_extractf64x4_pd(W, U, A, imm) \
3349  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3350                                            (__v4df)(__m256d)(W), \
3351                                            (__mmask8)(U))
3352
3353#define _mm512_maskz_extractf64x4_pd(U, A, imm) \
3354  (__m256d)__builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), (int)(imm), \
3355                                            (__v4df)_mm256_setzero_pd(), \
3356                                            (__mmask8)(U))
3357
3358#define _mm512_extractf32x4_ps(A, I) \
3359  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(I), \
3360                                           (__v4sf)_mm_undefined_ps(), \
3361                                           (__mmask8)-1)
3362
3363#define _mm512_mask_extractf32x4_ps(W, U, A, imm) \
3364  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3365                                           (__v4sf)(__m128)(W), \
3366                                           (__mmask8)(U))
3367
3368#define _mm512_maskz_extractf32x4_ps(U, A, imm) \
3369  (__m128)__builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), (int)(imm), \
3370                                           (__v4sf)_mm_setzero_ps(), \
3371                                           (__mmask8)(U))
3372
3373/* Vector Blend */
3374
3375static __inline __m512d __DEFAULT_FN_ATTRS512
3376_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
3377{
3378  return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
3379                 (__v8df) __W,
3380                 (__v8df) __A);
3381}
3382
3383static __inline __m512 __DEFAULT_FN_ATTRS512
3384_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
3385{
3386  return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
3387                (__v16sf) __W,
3388                (__v16sf) __A);
3389}
3390
3391static __inline __m512i __DEFAULT_FN_ATTRS512
3392_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
3393{
3394  return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
3395                (__v8di) __W,
3396                (__v8di) __A);
3397}
3398
3399static __inline __m512i __DEFAULT_FN_ATTRS512
3400_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
3401{
3402  return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
3403                (__v16si) __W,
3404                (__v16si) __A);
3405}
3406
3407/* Compare */
3408
3409#define _mm512_cmp_round_ps_mask(A, B, P, R) \
3410  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3411                                          (__v16sf)(__m512)(B), (int)(P), \
3412                                          (__mmask16)-1, (int)(R))
3413
3414#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) \
3415  (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \
3416                                          (__v16sf)(__m512)(B), (int)(P), \
3417                                          (__mmask16)(U), (int)(R))
3418
3419#define _mm512_cmp_ps_mask(A, B, P) \
3420  _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3421#define _mm512_mask_cmp_ps_mask(U, A, B, P) \
3422  _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3423
3424#define _mm512_cmpeq_ps_mask(A, B) \
3425    _mm512_cmp_ps_mask((A), (B), _CMP_EQ_OQ)
3426#define _mm512_mask_cmpeq_ps_mask(k, A, B) \
3427    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_EQ_OQ)
3428
3429#define _mm512_cmplt_ps_mask(A, B) \
3430    _mm512_cmp_ps_mask((A), (B), _CMP_LT_OS)
3431#define _mm512_mask_cmplt_ps_mask(k, A, B) \
3432    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LT_OS)
3433
3434#define _mm512_cmple_ps_mask(A, B) \
3435    _mm512_cmp_ps_mask((A), (B), _CMP_LE_OS)
3436#define _mm512_mask_cmple_ps_mask(k, A, B) \
3437    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_LE_OS)
3438
3439#define _mm512_cmpunord_ps_mask(A, B) \
3440    _mm512_cmp_ps_mask((A), (B), _CMP_UNORD_Q)
3441#define _mm512_mask_cmpunord_ps_mask(k, A, B) \
3442    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_UNORD_Q)
3443
3444#define _mm512_cmpneq_ps_mask(A, B) \
3445    _mm512_cmp_ps_mask((A), (B), _CMP_NEQ_UQ)
3446#define _mm512_mask_cmpneq_ps_mask(k, A, B) \
3447    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NEQ_UQ)
3448
3449#define _mm512_cmpnlt_ps_mask(A, B) \
3450    _mm512_cmp_ps_mask((A), (B), _CMP_NLT_US)
3451#define _mm512_mask_cmpnlt_ps_mask(k, A, B) \
3452    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLT_US)
3453
3454#define _mm512_cmpnle_ps_mask(A, B) \
3455    _mm512_cmp_ps_mask((A), (B), _CMP_NLE_US)
3456#define _mm512_mask_cmpnle_ps_mask(k, A, B) \
3457    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_NLE_US)
3458
3459#define _mm512_cmpord_ps_mask(A, B) \
3460    _mm512_cmp_ps_mask((A), (B), _CMP_ORD_Q)
3461#define _mm512_mask_cmpord_ps_mask(k, A, B) \
3462    _mm512_mask_cmp_ps_mask((k), (A), (B), _CMP_ORD_Q)
3463
3464#define _mm512_cmp_round_pd_mask(A, B, P, R) \
3465  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3466                                         (__v8df)(__m512d)(B), (int)(P), \
3467                                         (__mmask8)-1, (int)(R))
3468
3469#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) \
3470  (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \
3471                                         (__v8df)(__m512d)(B), (int)(P), \
3472                                         (__mmask8)(U), (int)(R))
3473
3474#define _mm512_cmp_pd_mask(A, B, P) \
3475  _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3476#define _mm512_mask_cmp_pd_mask(U, A, B, P) \
3477  _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION)
3478
3479#define _mm512_cmpeq_pd_mask(A, B) \
3480    _mm512_cmp_pd_mask((A), (B), _CMP_EQ_OQ)
3481#define _mm512_mask_cmpeq_pd_mask(k, A, B) \
3482    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_EQ_OQ)
3483
3484#define _mm512_cmplt_pd_mask(A, B) \
3485    _mm512_cmp_pd_mask((A), (B), _CMP_LT_OS)
3486#define _mm512_mask_cmplt_pd_mask(k, A, B) \
3487    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LT_OS)
3488
3489#define _mm512_cmple_pd_mask(A, B) \
3490    _mm512_cmp_pd_mask((A), (B), _CMP_LE_OS)
3491#define _mm512_mask_cmple_pd_mask(k, A, B) \
3492    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_LE_OS)
3493
3494#define _mm512_cmpunord_pd_mask(A, B) \
3495    _mm512_cmp_pd_mask((A), (B), _CMP_UNORD_Q)
3496#define _mm512_mask_cmpunord_pd_mask(k, A, B) \
3497    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_UNORD_Q)
3498
3499#define _mm512_cmpneq_pd_mask(A, B) \
3500    _mm512_cmp_pd_mask((A), (B), _CMP_NEQ_UQ)
3501#define _mm512_mask_cmpneq_pd_mask(k, A, B) \
3502    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NEQ_UQ)
3503
3504#define _mm512_cmpnlt_pd_mask(A, B) \
3505    _mm512_cmp_pd_mask((A), (B), _CMP_NLT_US)
3506#define _mm512_mask_cmpnlt_pd_mask(k, A, B) \
3507    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLT_US)
3508
3509#define _mm512_cmpnle_pd_mask(A, B) \
3510    _mm512_cmp_pd_mask((A), (B), _CMP_NLE_US)
3511#define _mm512_mask_cmpnle_pd_mask(k, A, B) \
3512    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_NLE_US)
3513
3514#define _mm512_cmpord_pd_mask(A, B) \
3515    _mm512_cmp_pd_mask((A), (B), _CMP_ORD_Q)
3516#define _mm512_mask_cmpord_pd_mask(k, A, B) \
3517    _mm512_mask_cmp_pd_mask((k), (A), (B), _CMP_ORD_Q)
3518
3519/* Conversion */
3520
3521#define _mm512_cvtt_roundps_epu32(A, R) \
3522  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3523                                             (__v16si)_mm512_undefined_epi32(), \
3524                                             (__mmask16)-1, (int)(R))
3525
3526#define _mm512_mask_cvtt_roundps_epu32(W, U, A, R) \
3527  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3528                                             (__v16si)(__m512i)(W), \
3529                                             (__mmask16)(U), (int)(R))
3530
3531#define _mm512_maskz_cvtt_roundps_epu32(U, A, R) \
3532  (__m512i)__builtin_ia32_cvttps2udq512_mask((__v16sf)(__m512)(A), \
3533                                             (__v16si)_mm512_setzero_si512(), \
3534                                             (__mmask16)(U), (int)(R))
3535
3536
3537static __inline __m512i __DEFAULT_FN_ATTRS512
3538_mm512_cvttps_epu32(__m512 __A)
3539{
3540  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3541                  (__v16si)
3542                  _mm512_setzero_si512 (),
3543                  (__mmask16) -1,
3544                  _MM_FROUND_CUR_DIRECTION);
3545}
3546
3547static __inline__ __m512i __DEFAULT_FN_ATTRS512
3548_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
3549{
3550  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3551                   (__v16si) __W,
3552                   (__mmask16) __U,
3553                   _MM_FROUND_CUR_DIRECTION);
3554}
3555
3556static __inline__ __m512i __DEFAULT_FN_ATTRS512
3557_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
3558{
3559  return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
3560                   (__v16si) _mm512_setzero_si512 (),
3561                   (__mmask16) __U,
3562                   _MM_FROUND_CUR_DIRECTION);
3563}
3564
3565#define _mm512_cvt_roundepi32_ps(A, R) \
3566  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3567                                          (__v16sf)_mm512_setzero_ps(), \
3568                                          (__mmask16)-1, (int)(R))
3569
3570#define _mm512_mask_cvt_roundepi32_ps(W, U, A, R) \
3571  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3572                                          (__v16sf)(__m512)(W), \
3573                                          (__mmask16)(U), (int)(R))
3574
3575#define _mm512_maskz_cvt_roundepi32_ps(U, A, R) \
3576  (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(__m512i)(A), \
3577                                          (__v16sf)_mm512_setzero_ps(), \
3578                                          (__mmask16)(U), (int)(R))
3579
3580#define _mm512_cvt_roundepu32_ps(A, R) \
3581  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3582                                           (__v16sf)_mm512_setzero_ps(), \
3583                                           (__mmask16)-1, (int)(R))
3584
3585#define _mm512_mask_cvt_roundepu32_ps(W, U, A, R) \
3586  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3587                                           (__v16sf)(__m512)(W), \
3588                                           (__mmask16)(U), (int)(R))
3589
3590#define _mm512_maskz_cvt_roundepu32_ps(U, A, R) \
3591  (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(__m512i)(A), \
3592                                           (__v16sf)_mm512_setzero_ps(), \
3593                                           (__mmask16)(U), (int)(R))
3594
3595static __inline__ __m512 __DEFAULT_FN_ATTRS512
3596_mm512_cvtepu32_ps (__m512i __A)
3597{
3598  return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
3599}
3600
3601static __inline__ __m512 __DEFAULT_FN_ATTRS512
3602_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3603{
3604  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3605                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3606                                             (__v16sf)__W);
3607}
3608
3609static __inline__ __m512 __DEFAULT_FN_ATTRS512
3610_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
3611{
3612  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3613                                             (__v16sf)_mm512_cvtepu32_ps(__A),
3614                                             (__v16sf)_mm512_setzero_ps());
3615}
3616
3617static __inline __m512d __DEFAULT_FN_ATTRS512
3618_mm512_cvtepi32_pd(__m256i __A)
3619{
3620  return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
3621}
3622
3623static __inline__ __m512d __DEFAULT_FN_ATTRS512
3624_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3625{
3626  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3627                                              (__v8df)_mm512_cvtepi32_pd(__A),
3628                                              (__v8df)__W);
3629}
3630
3631static __inline__ __m512d __DEFAULT_FN_ATTRS512
3632_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
3633{
3634  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3635                                              (__v8df)_mm512_cvtepi32_pd(__A),
3636                                              (__v8df)_mm512_setzero_pd());
3637}
3638
3639static __inline__ __m512d __DEFAULT_FN_ATTRS512
3640_mm512_cvtepi32lo_pd(__m512i __A)
3641{
3642  return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
3643}
3644
3645static __inline__ __m512d __DEFAULT_FN_ATTRS512
3646_mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3647{
3648  return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
3649}
3650
3651static __inline__ __m512 __DEFAULT_FN_ATTRS512
3652_mm512_cvtepi32_ps (__m512i __A)
3653{
3654  return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
3655}
3656
3657static __inline__ __m512 __DEFAULT_FN_ATTRS512
3658_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
3659{
3660  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3661                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3662                                             (__v16sf)__W);
3663}
3664
3665static __inline__ __m512 __DEFAULT_FN_ATTRS512
3666_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
3667{
3668  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
3669                                             (__v16sf)_mm512_cvtepi32_ps(__A),
3670                                             (__v16sf)_mm512_setzero_ps());
3671}
3672
3673static __inline __m512d __DEFAULT_FN_ATTRS512
3674_mm512_cvtepu32_pd(__m256i __A)
3675{
3676  return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
3677}
3678
3679static __inline__ __m512d __DEFAULT_FN_ATTRS512
3680_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
3681{
3682  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3683                                              (__v8df)_mm512_cvtepu32_pd(__A),
3684                                              (__v8df)__W);
3685}
3686
3687static __inline__ __m512d __DEFAULT_FN_ATTRS512
3688_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
3689{
3690  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
3691                                              (__v8df)_mm512_cvtepu32_pd(__A),
3692                                              (__v8df)_mm512_setzero_pd());
3693}
3694
3695static __inline__ __m512d __DEFAULT_FN_ATTRS512
3696_mm512_cvtepu32lo_pd(__m512i __A)
3697{
3698  return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
3699}
3700
3701static __inline__ __m512d __DEFAULT_FN_ATTRS512
3702_mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
3703{
3704  return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
3705}
3706
3707#define _mm512_cvt_roundpd_ps(A, R) \
3708  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3709                                          (__v8sf)_mm256_setzero_ps(), \
3710                                          (__mmask8)-1, (int)(R))
3711
3712#define _mm512_mask_cvt_roundpd_ps(W, U, A, R) \
3713  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3714                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
3715                                          (int)(R))
3716
3717#define _mm512_maskz_cvt_roundpd_ps(U, A, R) \
3718  (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(__m512d)(A), \
3719                                          (__v8sf)_mm256_setzero_ps(), \
3720                                          (__mmask8)(U), (int)(R))
3721
3722static __inline__ __m256 __DEFAULT_FN_ATTRS512
3723_mm512_cvtpd_ps (__m512d __A)
3724{
3725  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3726                (__v8sf) _mm256_undefined_ps (),
3727                (__mmask8) -1,
3728                _MM_FROUND_CUR_DIRECTION);
3729}
3730
3731static __inline__ __m256 __DEFAULT_FN_ATTRS512
3732_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
3733{
3734  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3735                (__v8sf) __W,
3736                (__mmask8) __U,
3737                _MM_FROUND_CUR_DIRECTION);
3738}
3739
3740static __inline__ __m256 __DEFAULT_FN_ATTRS512
3741_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
3742{
3743  return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
3744                (__v8sf) _mm256_setzero_ps (),
3745                (__mmask8) __U,
3746                _MM_FROUND_CUR_DIRECTION);
3747}
3748
3749static __inline__ __m512 __DEFAULT_FN_ATTRS512
3750_mm512_cvtpd_pslo (__m512d __A)
3751{
3752  return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
3753                (__v8sf) _mm256_setzero_ps (),
3754                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3755}
3756
3757static __inline__ __m512 __DEFAULT_FN_ATTRS512
3758_mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
3759{
3760  return (__m512) __builtin_shufflevector (
3761                (__v8sf) _mm512_mask_cvtpd_ps (_mm512_castps512_ps256(__W),
3762                                               __U, __A),
3763                (__v8sf) _mm256_setzero_ps (),
3764                0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3765}
3766
3767#define _mm512_cvt_roundps_ph(A, I) \
3768  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3769                                            (__v16hi)_mm256_undefined_si256(), \
3770                                            (__mmask16)-1)
3771
3772#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \
3773  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3774                                            (__v16hi)(__m256i)(U), \
3775                                            (__mmask16)(W))
3776
3777#define _mm512_maskz_cvt_roundps_ph(W, A, I) \
3778  (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \
3779                                            (__v16hi)_mm256_setzero_si256(), \
3780                                            (__mmask16)(W))
3781
3782#define _mm512_cvtps_ph       _mm512_cvt_roundps_ph
3783#define _mm512_mask_cvtps_ph  _mm512_mask_cvt_roundps_ph
3784#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph
3785
3786#define _mm512_cvt_roundph_ps(A, R) \
3787  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3788                                           (__v16sf)_mm512_undefined_ps(), \
3789                                           (__mmask16)-1, (int)(R))
3790
3791#define _mm512_mask_cvt_roundph_ps(W, U, A, R) \
3792  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3793                                           (__v16sf)(__m512)(W), \
3794                                           (__mmask16)(U), (int)(R))
3795
3796#define _mm512_maskz_cvt_roundph_ps(U, A, R) \
3797  (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \
3798                                           (__v16sf)_mm512_setzero_ps(), \
3799                                           (__mmask16)(U), (int)(R))
3800
3801
3802static  __inline __m512 __DEFAULT_FN_ATTRS512
3803_mm512_cvtph_ps(__m256i __A)
3804{
3805  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3806                (__v16sf)
3807                _mm512_setzero_ps (),
3808                (__mmask16) -1,
3809                _MM_FROUND_CUR_DIRECTION);
3810}
3811
3812static __inline__ __m512 __DEFAULT_FN_ATTRS512
3813_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
3814{
3815  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3816                 (__v16sf) __W,
3817                 (__mmask16) __U,
3818                 _MM_FROUND_CUR_DIRECTION);
3819}
3820
3821static __inline__ __m512 __DEFAULT_FN_ATTRS512
3822_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
3823{
3824  return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
3825                 (__v16sf) _mm512_setzero_ps (),
3826                 (__mmask16) __U,
3827                 _MM_FROUND_CUR_DIRECTION);
3828}
3829
3830#define _mm512_cvtt_roundpd_epi32(A, R) \
3831  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3832                                            (__v8si)_mm256_setzero_si256(), \
3833                                            (__mmask8)-1, (int)(R))
3834
3835#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, R) \
3836  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3837                                            (__v8si)(__m256i)(W), \
3838                                            (__mmask8)(U), (int)(R))
3839
3840#define _mm512_maskz_cvtt_roundpd_epi32(U, A, R) \
3841  (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(__m512d)(A), \
3842                                            (__v8si)_mm256_setzero_si256(), \
3843                                            (__mmask8)(U), (int)(R))
3844
3845static __inline __m256i __DEFAULT_FN_ATTRS512
3846_mm512_cvttpd_epi32(__m512d __a)
3847{
3848  return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
3849                                                   (__v8si)_mm256_setzero_si256(),
3850                                                   (__mmask8) -1,
3851                                                    _MM_FROUND_CUR_DIRECTION);
3852}
3853
3854static __inline__ __m256i __DEFAULT_FN_ATTRS512
3855_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3856{
3857  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3858                  (__v8si) __W,
3859                  (__mmask8) __U,
3860                  _MM_FROUND_CUR_DIRECTION);
3861}
3862
3863static __inline__ __m256i __DEFAULT_FN_ATTRS512
3864_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
3865{
3866  return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
3867                  (__v8si) _mm256_setzero_si256 (),
3868                  (__mmask8) __U,
3869                  _MM_FROUND_CUR_DIRECTION);
3870}
3871
3872#define _mm512_cvtt_roundps_epi32(A, R) \
3873  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3874                                            (__v16si)_mm512_setzero_si512(), \
3875                                            (__mmask16)-1, (int)(R))
3876
3877#define _mm512_mask_cvtt_roundps_epi32(W, U, A, R) \
3878  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3879                                            (__v16si)(__m512i)(W), \
3880                                            (__mmask16)(U), (int)(R))
3881
3882#define _mm512_maskz_cvtt_roundps_epi32(U, A, R) \
3883  (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(__m512)(A), \
3884                                            (__v16si)_mm512_setzero_si512(), \
3885                                            (__mmask16)(U), (int)(R))
3886
3887static __inline __m512i __DEFAULT_FN_ATTRS512
3888_mm512_cvttps_epi32(__m512 __a)
3889{
3890  return (__m512i)
3891    __builtin_ia32_cvttps2dq512_mask((__v16sf) __a,
3892                                     (__v16si) _mm512_setzero_si512 (),
3893                                     (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
3894}
3895
3896static __inline__ __m512i __DEFAULT_FN_ATTRS512
3897_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3898{
3899  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3900                  (__v16si) __W,
3901                  (__mmask16) __U,
3902                  _MM_FROUND_CUR_DIRECTION);
3903}
3904
3905static __inline__ __m512i __DEFAULT_FN_ATTRS512
3906_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
3907{
3908  return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
3909                  (__v16si) _mm512_setzero_si512 (),
3910                  (__mmask16) __U,
3911                  _MM_FROUND_CUR_DIRECTION);
3912}
3913
3914#define _mm512_cvt_roundps_epi32(A, R) \
3915  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3916                                           (__v16si)_mm512_setzero_si512(), \
3917                                           (__mmask16)-1, (int)(R))
3918
3919#define _mm512_mask_cvt_roundps_epi32(W, U, A, R) \
3920  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3921                                           (__v16si)(__m512i)(W), \
3922                                           (__mmask16)(U), (int)(R))
3923
3924#define _mm512_maskz_cvt_roundps_epi32(U, A, R) \
3925  (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(__m512)(A), \
3926                                           (__v16si)_mm512_setzero_si512(), \
3927                                           (__mmask16)(U), (int)(R))
3928
3929static __inline__ __m512i __DEFAULT_FN_ATTRS512
3930_mm512_cvtps_epi32 (__m512 __A)
3931{
3932  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3933                 (__v16si) _mm512_undefined_epi32 (),
3934                 (__mmask16) -1,
3935                 _MM_FROUND_CUR_DIRECTION);
3936}
3937
3938static __inline__ __m512i __DEFAULT_FN_ATTRS512
3939_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
3940{
3941  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3942                 (__v16si) __W,
3943                 (__mmask16) __U,
3944                 _MM_FROUND_CUR_DIRECTION);
3945}
3946
3947static __inline__ __m512i __DEFAULT_FN_ATTRS512
3948_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
3949{
3950  return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
3951                 (__v16si)
3952                 _mm512_setzero_si512 (),
3953                 (__mmask16) __U,
3954                 _MM_FROUND_CUR_DIRECTION);
3955}
3956
3957#define _mm512_cvt_roundpd_epi32(A, R) \
3958  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3959                                           (__v8si)_mm256_setzero_si256(), \
3960                                           (__mmask8)-1, (int)(R))
3961
3962#define _mm512_mask_cvt_roundpd_epi32(W, U, A, R) \
3963  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3964                                           (__v8si)(__m256i)(W), \
3965                                           (__mmask8)(U), (int)(R))
3966
3967#define _mm512_maskz_cvt_roundpd_epi32(U, A, R) \
3968  (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(__m512d)(A), \
3969                                           (__v8si)_mm256_setzero_si256(), \
3970                                           (__mmask8)(U), (int)(R))
3971
3972static __inline__ __m256i __DEFAULT_FN_ATTRS512
3973_mm512_cvtpd_epi32 (__m512d __A)
3974{
3975  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3976                 (__v8si)
3977                 _mm256_undefined_si256 (),
3978                 (__mmask8) -1,
3979                 _MM_FROUND_CUR_DIRECTION);
3980}
3981
3982static __inline__ __m256i __DEFAULT_FN_ATTRS512
3983_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
3984{
3985  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3986                 (__v8si) __W,
3987                 (__mmask8) __U,
3988                 _MM_FROUND_CUR_DIRECTION);
3989}
3990
3991static __inline__ __m256i __DEFAULT_FN_ATTRS512
3992_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
3993{
3994  return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
3995                 (__v8si)
3996                 _mm256_setzero_si256 (),
3997                 (__mmask8) __U,
3998                 _MM_FROUND_CUR_DIRECTION);
3999}
4000
4001#define _mm512_cvt_roundps_epu32(A, R) \
4002  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4003                                            (__v16si)_mm512_setzero_si512(), \
4004                                            (__mmask16)-1, (int)(R))
4005
4006#define _mm512_mask_cvt_roundps_epu32(W, U, A, R) \
4007  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4008                                            (__v16si)(__m512i)(W), \
4009                                            (__mmask16)(U), (int)(R))
4010
4011#define _mm512_maskz_cvt_roundps_epu32(U, A, R) \
4012  (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(__m512)(A), \
4013                                            (__v16si)_mm512_setzero_si512(), \
4014                                            (__mmask16)(U), (int)(R))
4015
4016static __inline__ __m512i __DEFAULT_FN_ATTRS512
4017_mm512_cvtps_epu32 ( __m512 __A)
4018{
4019  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
4020                  (__v16si)\
4021                  _mm512_undefined_epi32 (),
4022                  (__mmask16) -1,\
4023                  _MM_FROUND_CUR_DIRECTION);
4024}
4025
4026static __inline__ __m512i __DEFAULT_FN_ATTRS512
4027_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
4028{
4029  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4030                  (__v16si) __W,
4031                  (__mmask16) __U,
4032                  _MM_FROUND_CUR_DIRECTION);
4033}
4034
4035static __inline__ __m512i __DEFAULT_FN_ATTRS512
4036_mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
4037{
4038  return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
4039                  (__v16si)
4040                  _mm512_setzero_si512 (),
4041                  (__mmask16) __U ,
4042                  _MM_FROUND_CUR_DIRECTION);
4043}
4044
4045#define _mm512_cvt_roundpd_epu32(A, R) \
4046  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4047                                            (__v8si)_mm256_setzero_si256(), \
4048                                            (__mmask8)-1, (int)(R))
4049
4050#define _mm512_mask_cvt_roundpd_epu32(W, U, A, R) \
4051  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4052                                            (__v8si)(__m256i)(W), \
4053                                            (__mmask8)(U), (int)(R))
4054
4055#define _mm512_maskz_cvt_roundpd_epu32(U, A, R) \
4056  (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(__m512d)(A), \
4057                                            (__v8si)_mm256_setzero_si256(), \
4058                                            (__mmask8)(U), (int)(R))
4059
4060static __inline__ __m256i __DEFAULT_FN_ATTRS512
4061_mm512_cvtpd_epu32 (__m512d __A)
4062{
4063  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4064                  (__v8si)
4065                  _mm256_undefined_si256 (),
4066                  (__mmask8) -1,
4067                  _MM_FROUND_CUR_DIRECTION);
4068}
4069
4070static __inline__ __m256i __DEFAULT_FN_ATTRS512
4071_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
4072{
4073  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4074                  (__v8si) __W,
4075                  (__mmask8) __U,
4076                  _MM_FROUND_CUR_DIRECTION);
4077}
4078
4079static __inline__ __m256i __DEFAULT_FN_ATTRS512
4080_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
4081{
4082  return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
4083                  (__v8si)
4084                  _mm256_setzero_si256 (),
4085                  (__mmask8) __U,
4086                  _MM_FROUND_CUR_DIRECTION);
4087}
4088
4089static __inline__ double __DEFAULT_FN_ATTRS512
4090_mm512_cvtsd_f64(__m512d __a)
4091{
4092  return __a[0];
4093}
4094
4095static __inline__ float __DEFAULT_FN_ATTRS512
4096_mm512_cvtss_f32(__m512 __a)
4097{
4098  return __a[0];
4099}
4100
4101/* Unpack and Interleave */
4102
4103static __inline __m512d __DEFAULT_FN_ATTRS512
4104_mm512_unpackhi_pd(__m512d __a, __m512d __b)
4105{
4106  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4107                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4108}
4109
4110static __inline__ __m512d __DEFAULT_FN_ATTRS512
4111_mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4112{
4113  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4114                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4115                                           (__v8df)__W);
4116}
4117
4118static __inline__ __m512d __DEFAULT_FN_ATTRS512
4119_mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
4120{
4121  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4122                                           (__v8df)_mm512_unpackhi_pd(__A, __B),
4123                                           (__v8df)_mm512_setzero_pd());
4124}
4125
4126static __inline __m512d __DEFAULT_FN_ATTRS512
4127_mm512_unpacklo_pd(__m512d __a, __m512d __b)
4128{
4129  return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
4130                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4131}
4132
4133static __inline__ __m512d __DEFAULT_FN_ATTRS512
4134_mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
4135{
4136  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4137                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4138                                           (__v8df)__W);
4139}
4140
4141static __inline__ __m512d __DEFAULT_FN_ATTRS512
4142_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
4143{
4144  return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
4145                                           (__v8df)_mm512_unpacklo_pd(__A, __B),
4146                                           (__v8df)_mm512_setzero_pd());
4147}
4148
4149static __inline __m512 __DEFAULT_FN_ATTRS512
4150_mm512_unpackhi_ps(__m512 __a, __m512 __b)
4151{
4152  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4153                                         2,    18,    3,    19,
4154                                         2+4,  18+4,  3+4,  19+4,
4155                                         2+8,  18+8,  3+8,  19+8,
4156                                         2+12, 18+12, 3+12, 19+12);
4157}
4158
4159static __inline__ __m512 __DEFAULT_FN_ATTRS512
4160_mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4161{
4162  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4163                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4164                                          (__v16sf)__W);
4165}
4166
4167static __inline__ __m512 __DEFAULT_FN_ATTRS512
4168_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
4169{
4170  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4171                                          (__v16sf)_mm512_unpackhi_ps(__A, __B),
4172                                          (__v16sf)_mm512_setzero_ps());
4173}
4174
4175static __inline __m512 __DEFAULT_FN_ATTRS512
4176_mm512_unpacklo_ps(__m512 __a, __m512 __b)
4177{
4178  return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
4179                                         0,    16,    1,    17,
4180                                         0+4,  16+4,  1+4,  17+4,
4181                                         0+8,  16+8,  1+8,  17+8,
4182                                         0+12, 16+12, 1+12, 17+12);
4183}
4184
4185static __inline__ __m512 __DEFAULT_FN_ATTRS512
4186_mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
4187{
4188  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4189                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4190                                          (__v16sf)__W);
4191}
4192
4193static __inline__ __m512 __DEFAULT_FN_ATTRS512
4194_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
4195{
4196  return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
4197                                          (__v16sf)_mm512_unpacklo_ps(__A, __B),
4198                                          (__v16sf)_mm512_setzero_ps());
4199}
4200
4201static __inline__ __m512i __DEFAULT_FN_ATTRS512
4202_mm512_unpackhi_epi32(__m512i __A, __m512i __B)
4203{
4204  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4205                                          2,    18,    3,    19,
4206                                          2+4,  18+4,  3+4,  19+4,
4207                                          2+8,  18+8,  3+8,  19+8,
4208                                          2+12, 18+12, 3+12, 19+12);
4209}
4210
4211static __inline__ __m512i __DEFAULT_FN_ATTRS512
4212_mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4213{
4214  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4215                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4216                                       (__v16si)__W);
4217}
4218
4219static __inline__ __m512i __DEFAULT_FN_ATTRS512
4220_mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4221{
4222  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4223                                       (__v16si)_mm512_unpackhi_epi32(__A, __B),
4224                                       (__v16si)_mm512_setzero_si512());
4225}
4226
4227static __inline__ __m512i __DEFAULT_FN_ATTRS512
4228_mm512_unpacklo_epi32(__m512i __A, __m512i __B)
4229{
4230  return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
4231                                          0,    16,    1,    17,
4232                                          0+4,  16+4,  1+4,  17+4,
4233                                          0+8,  16+8,  1+8,  17+8,
4234                                          0+12, 16+12, 1+12, 17+12);
4235}
4236
4237static __inline__ __m512i __DEFAULT_FN_ATTRS512
4238_mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
4239{
4240  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4241                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4242                                       (__v16si)__W);
4243}
4244
4245static __inline__ __m512i __DEFAULT_FN_ATTRS512
4246_mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
4247{
4248  return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
4249                                       (__v16si)_mm512_unpacklo_epi32(__A, __B),
4250                                       (__v16si)_mm512_setzero_si512());
4251}
4252
4253static __inline__ __m512i __DEFAULT_FN_ATTRS512
4254_mm512_unpackhi_epi64(__m512i __A, __m512i __B)
4255{
4256  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4257                                          1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
4258}
4259
4260static __inline__ __m512i __DEFAULT_FN_ATTRS512
4261_mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4262{
4263  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4264                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4265                                        (__v8di)__W);
4266}
4267
4268static __inline__ __m512i __DEFAULT_FN_ATTRS512
4269_mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
4270{
4271  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4272                                        (__v8di)_mm512_unpackhi_epi64(__A, __B),
4273                                        (__v8di)_mm512_setzero_si512());
4274}
4275
4276static __inline__ __m512i __DEFAULT_FN_ATTRS512
4277_mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
4278{
4279  return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
4280                                          0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
4281}
4282
4283static __inline__ __m512i __DEFAULT_FN_ATTRS512
4284_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
4285{
4286  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4287                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4288                                        (__v8di)__W);
4289}
4290
4291static __inline__ __m512i __DEFAULT_FN_ATTRS512
4292_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
4293{
4294  return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
4295                                        (__v8di)_mm512_unpacklo_epi64(__A, __B),
4296                                        (__v8di)_mm512_setzero_si512());
4297}
4298
4299
4300/* SIMD load ops */
4301
4302static __inline __m512i __DEFAULT_FN_ATTRS512
4303_mm512_loadu_si512 (void const *__P)
4304{
4305  struct __loadu_si512 {
4306    __m512i_u __v;
4307  } __attribute__((__packed__, __may_alias__));
4308  return ((struct __loadu_si512*)__P)->__v;
4309}
4310
4311static __inline __m512i __DEFAULT_FN_ATTRS512
4312_mm512_loadu_epi32 (void const *__P)
4313{
4314  struct __loadu_epi32 {
4315    __m512i_u __v;
4316  } __attribute__((__packed__, __may_alias__));
4317  return ((struct __loadu_epi32*)__P)->__v;
4318}
4319
4320static __inline __m512i __DEFAULT_FN_ATTRS512
4321_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
4322{
4323  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
4324                  (__v16si) __W,
4325                  (__mmask16) __U);
4326}
4327
4328
4329static __inline __m512i __DEFAULT_FN_ATTRS512
4330_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
4331{
4332  return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
4333                                                     (__v16si)
4334                                                     _mm512_setzero_si512 (),
4335                                                     (__mmask16) __U);
4336}
4337
4338static __inline __m512i __DEFAULT_FN_ATTRS512
4339_mm512_loadu_epi64 (void const *__P)
4340{
4341  struct __loadu_epi64 {
4342    __m512i_u __v;
4343  } __attribute__((__packed__, __may_alias__));
4344  return ((struct __loadu_epi64*)__P)->__v;
4345}
4346
4347static __inline __m512i __DEFAULT_FN_ATTRS512
4348_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
4349{
4350  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
4351                  (__v8di) __W,
4352                  (__mmask8) __U);
4353}
4354
4355static __inline __m512i __DEFAULT_FN_ATTRS512
4356_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
4357{
4358  return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
4359                                                     (__v8di)
4360                                                     _mm512_setzero_si512 (),
4361                                                     (__mmask8) __U);
4362}
4363
4364static __inline __m512 __DEFAULT_FN_ATTRS512
4365_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
4366{
4367  return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
4368                   (__v16sf) __W,
4369                   (__mmask16) __U);
4370}
4371
4372static __inline __m512 __DEFAULT_FN_ATTRS512
4373_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
4374{
4375  return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
4376                                                  (__v16sf)
4377                                                  _mm512_setzero_ps (),
4378                                                  (__mmask16) __U);
4379}
4380
4381static __inline __m512d __DEFAULT_FN_ATTRS512
4382_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
4383{
4384  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
4385                (__v8df) __W,
4386                (__mmask8) __U);
4387}
4388
4389static __inline __m512d __DEFAULT_FN_ATTRS512
4390_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
4391{
4392  return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
4393                                                   (__v8df)
4394                                                   _mm512_setzero_pd (),
4395                                                   (__mmask8) __U);
4396}
4397
4398static __inline __m512d __DEFAULT_FN_ATTRS512
4399_mm512_loadu_pd(void const *__p)
4400{
4401  struct __loadu_pd {
4402    __m512d_u __v;
4403  } __attribute__((__packed__, __may_alias__));
4404  return ((struct __loadu_pd*)__p)->__v;
4405}
4406
4407static __inline __m512 __DEFAULT_FN_ATTRS512
4408_mm512_loadu_ps(void const *__p)
4409{
4410  struct __loadu_ps {
4411    __m512_u __v;
4412  } __attribute__((__packed__, __may_alias__));
4413  return ((struct __loadu_ps*)__p)->__v;
4414}
4415
4416static __inline __m512 __DEFAULT_FN_ATTRS512
4417_mm512_load_ps(void const *__p)
4418{
4419  return *(__m512*)__p;
4420}
4421
4422static __inline __m512 __DEFAULT_FN_ATTRS512
4423_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
4424{
4425  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
4426                   (__v16sf) __W,
4427                   (__mmask16) __U);
4428}
4429
4430static __inline __m512 __DEFAULT_FN_ATTRS512
4431_mm512_maskz_load_ps(__mmask16 __U, void const *__P)
4432{
4433  return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
4434                                                  (__v16sf)
4435                                                  _mm512_setzero_ps (),
4436                                                  (__mmask16) __U);
4437}
4438
4439static __inline __m512d __DEFAULT_FN_ATTRS512
4440_mm512_load_pd(void const *__p)
4441{
4442  return *(__m512d*)__p;
4443}
4444
4445static __inline __m512d __DEFAULT_FN_ATTRS512
4446_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
4447{
4448  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
4449                          (__v8df) __W,
4450                          (__mmask8) __U);
4451}
4452
4453static __inline __m512d __DEFAULT_FN_ATTRS512
4454_mm512_maskz_load_pd(__mmask8 __U, void const *__P)
4455{
4456  return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
4457                                                   (__v8df)
4458                                                   _mm512_setzero_pd (),
4459                                                   (__mmask8) __U);
4460}
4461
4462static __inline __m512i __DEFAULT_FN_ATTRS512
4463_mm512_load_si512 (void const *__P)
4464{
4465  return *(__m512i *) __P;
4466}
4467
4468static __inline __m512i __DEFAULT_FN_ATTRS512
4469_mm512_load_epi32 (void const *__P)
4470{
4471  return *(__m512i *) __P;
4472}
4473
4474static __inline __m512i __DEFAULT_FN_ATTRS512
4475_mm512_load_epi64 (void const *__P)
4476{
4477  return *(__m512i *) __P;
4478}
4479
4480/* SIMD store ops */
4481
4482static __inline void __DEFAULT_FN_ATTRS512
4483_mm512_storeu_epi64 (void *__P, __m512i __A)
4484{
4485  struct __storeu_epi64 {
4486    __m512i_u __v;
4487  } __attribute__((__packed__, __may_alias__));
4488  ((struct __storeu_epi64*)__P)->__v = __A;
4489}
4490
4491static __inline void __DEFAULT_FN_ATTRS512
4492_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
4493{
4494  __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
4495                                     (__mmask8) __U);
4496}
4497
4498static __inline void __DEFAULT_FN_ATTRS512
4499_mm512_storeu_si512 (void *__P, __m512i __A)
4500{
4501  struct __storeu_si512 {
4502    __m512i_u __v;
4503  } __attribute__((__packed__, __may_alias__));
4504  ((struct __storeu_si512*)__P)->__v = __A;
4505}
4506
4507static __inline void __DEFAULT_FN_ATTRS512
4508_mm512_storeu_epi32 (void *__P, __m512i __A)
4509{
4510  struct __storeu_epi32 {
4511    __m512i_u __v;
4512  } __attribute__((__packed__, __may_alias__));
4513  ((struct __storeu_epi32*)__P)->__v = __A;
4514}
4515
4516static __inline void __DEFAULT_FN_ATTRS512
4517_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
4518{
4519  __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
4520                                     (__mmask16) __U);
4521}
4522
4523static __inline void __DEFAULT_FN_ATTRS512
4524_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
4525{
4526  __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
4527}
4528
4529static __inline void __DEFAULT_FN_ATTRS512
4530_mm512_storeu_pd(void *__P, __m512d __A)
4531{
4532  struct __storeu_pd {
4533    __m512d_u __v;
4534  } __attribute__((__packed__, __may_alias__));
4535  ((struct __storeu_pd*)__P)->__v = __A;
4536}
4537
4538static __inline void __DEFAULT_FN_ATTRS512
4539_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
4540{
4541  __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
4542                                   (__mmask16) __U);
4543}
4544
4545static __inline void __DEFAULT_FN_ATTRS512
4546_mm512_storeu_ps(void *__P, __m512 __A)
4547{
4548  struct __storeu_ps {
4549    __m512_u __v;
4550  } __attribute__((__packed__, __may_alias__));
4551  ((struct __storeu_ps*)__P)->__v = __A;
4552}
4553
4554static __inline void __DEFAULT_FN_ATTRS512
4555_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
4556{
4557  __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
4558}
4559
4560static __inline void __DEFAULT_FN_ATTRS512
4561_mm512_store_pd(void *__P, __m512d __A)
4562{
4563  *(__m512d*)__P = __A;
4564}
4565
4566static __inline void __DEFAULT_FN_ATTRS512
4567_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
4568{
4569  __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
4570                                   (__mmask16) __U);
4571}
4572
4573static __inline void __DEFAULT_FN_ATTRS512
4574_mm512_store_ps(void *__P, __m512 __A)
4575{
4576  *(__m512*)__P = __A;
4577}
4578
4579static __inline void __DEFAULT_FN_ATTRS512
4580_mm512_store_si512 (void *__P, __m512i __A)
4581{
4582  *(__m512i *) __P = __A;
4583}
4584
4585static __inline void __DEFAULT_FN_ATTRS512
4586_mm512_store_epi32 (void *__P, __m512i __A)
4587{
4588  *(__m512i *) __P = __A;
4589}
4590
4591static __inline void __DEFAULT_FN_ATTRS512
4592_mm512_store_epi64 (void *__P, __m512i __A)
4593{
4594  *(__m512i *) __P = __A;
4595}
4596
4597/* Mask ops */
4598
4599static __inline __mmask16 __DEFAULT_FN_ATTRS
4600_mm512_knot(__mmask16 __M)
4601{
4602  return __builtin_ia32_knothi(__M);
4603}
4604
4605/* Integer compare */
4606
4607#define _mm512_cmpeq_epi32_mask(A, B) \
4608    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
4609#define _mm512_mask_cmpeq_epi32_mask(k, A, B) \
4610    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
4611#define _mm512_cmpge_epi32_mask(A, B) \
4612    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
4613#define _mm512_mask_cmpge_epi32_mask(k, A, B) \
4614    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
4615#define _mm512_cmpgt_epi32_mask(A, B) \
4616    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
4617#define _mm512_mask_cmpgt_epi32_mask(k, A, B) \
4618    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
4619#define _mm512_cmple_epi32_mask(A, B) \
4620    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
4621#define _mm512_mask_cmple_epi32_mask(k, A, B) \
4622    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
4623#define _mm512_cmplt_epi32_mask(A, B) \
4624    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
4625#define _mm512_mask_cmplt_epi32_mask(k, A, B) \
4626    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
4627#define _mm512_cmpneq_epi32_mask(A, B) \
4628    _mm512_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
4629#define _mm512_mask_cmpneq_epi32_mask(k, A, B) \
4630    _mm512_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
4631
4632#define _mm512_cmpeq_epu32_mask(A, B) \
4633    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
4634#define _mm512_mask_cmpeq_epu32_mask(k, A, B) \
4635    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
4636#define _mm512_cmpge_epu32_mask(A, B) \
4637    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
4638#define _mm512_mask_cmpge_epu32_mask(k, A, B) \
4639    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
4640#define _mm512_cmpgt_epu32_mask(A, B) \
4641    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
4642#define _mm512_mask_cmpgt_epu32_mask(k, A, B) \
4643    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
4644#define _mm512_cmple_epu32_mask(A, B) \
4645    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
4646#define _mm512_mask_cmple_epu32_mask(k, A, B) \
4647    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
4648#define _mm512_cmplt_epu32_mask(A, B) \
4649    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
4650#define _mm512_mask_cmplt_epu32_mask(k, A, B) \
4651    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
4652#define _mm512_cmpneq_epu32_mask(A, B) \
4653    _mm512_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
4654#define _mm512_mask_cmpneq_epu32_mask(k, A, B) \
4655    _mm512_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
4656
4657#define _mm512_cmpeq_epi64_mask(A, B) \
4658    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
4659#define _mm512_mask_cmpeq_epi64_mask(k, A, B) \
4660    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
4661#define _mm512_cmpge_epi64_mask(A, B) \
4662    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
4663#define _mm512_mask_cmpge_epi64_mask(k, A, B) \
4664    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
4665#define _mm512_cmpgt_epi64_mask(A, B) \
4666    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
4667#define _mm512_mask_cmpgt_epi64_mask(k, A, B) \
4668    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
4669#define _mm512_cmple_epi64_mask(A, B) \
4670    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
4671#define _mm512_mask_cmple_epi64_mask(k, A, B) \
4672    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
4673#define _mm512_cmplt_epi64_mask(A, B) \
4674    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
4675#define _mm512_mask_cmplt_epi64_mask(k, A, B) \
4676    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
4677#define _mm512_cmpneq_epi64_mask(A, B) \
4678    _mm512_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
4679#define _mm512_mask_cmpneq_epi64_mask(k, A, B) \
4680    _mm512_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
4681
4682#define _mm512_cmpeq_epu64_mask(A, B) \
4683    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
4684#define _mm512_mask_cmpeq_epu64_mask(k, A, B) \
4685    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
4686#define _mm512_cmpge_epu64_mask(A, B) \
4687    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
4688#define _mm512_mask_cmpge_epu64_mask(k, A, B) \
4689    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
4690#define _mm512_cmpgt_epu64_mask(A, B) \
4691    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
4692#define _mm512_mask_cmpgt_epu64_mask(k, A, B) \
4693    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
4694#define _mm512_cmple_epu64_mask(A, B) \
4695    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
4696#define _mm512_mask_cmple_epu64_mask(k, A, B) \
4697    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
4698#define _mm512_cmplt_epu64_mask(A, B) \
4699    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
4700#define _mm512_mask_cmplt_epu64_mask(k, A, B) \
4701    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
4702#define _mm512_cmpneq_epu64_mask(A, B) \
4703    _mm512_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
4704#define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
4705    _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
4706
4707static __inline__ __m512i __DEFAULT_FN_ATTRS512
4708_mm512_cvtepi8_epi32(__m128i __A)
4709{
4710  /* This function always performs a signed extension, but __v16qi is a char
4711     which may be signed or unsigned, so use __v16qs. */
4712  return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
4713}
4714
4715static __inline__ __m512i __DEFAULT_FN_ATTRS512
4716_mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
4717{
4718  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4719                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4720                                             (__v16si)__W);
4721}
4722
4723static __inline__ __m512i __DEFAULT_FN_ATTRS512
4724_mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
4725{
4726  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
4727                                             (__v16si)_mm512_cvtepi8_epi32(__A),
4728                                             (__v16si)_mm512_setzero_si512());
4729}
4730
4731static __inline__ __m512i __DEFAULT_FN_ATTRS512
4732_mm512_cvtepi8_epi64(__m128i __A)
4733{
4734  /* This function always performs a signed extension, but __v16qi is a char
4735     which may be signed or unsigned, so use __v16qs. */
4736  return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
4737}
4738
4739static __inline__ __m512i __DEFAULT_FN_ATTRS512
4740_mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
4741{
4742  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4743                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4744                                             (__v8di)__W);
4745}
4746
4747static __inline__ __m512i __DEFAULT_FN_ATTRS512
4748_mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
4749{
4750  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
4751                                             (__v8di)_mm512_cvtepi8_epi64(__A),
4752                                             (__v8di)_mm512_setzero_si512 ());
4753}
4754
4755static __inline__ __m512i __DEFAULT_FN_ATTRS512
<