1d874057dim/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------===
2d874057dim *
3d874057dim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4d874057dim * See https://llvm.org/LICENSE.txt for license information.
5d874057dim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6d874057dim *
7d874057dim *===-----------------------------------------------------------------------===
8d874057dim */
9d874057dim#ifndef __IMMINTRIN_H
10d874057dim#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
11d874057dim#endif
12d874057dim
13d874057dim#ifndef __AVX512FINTRIN_H
14d874057dim#define __AVX512FINTRIN_H
15d874057dim
16d874057dimtypedef char __v64qi __attribute__((__vector_size__(64)));
17d874057dimtypedef short __v32hi __attribute__((__vector_size__(64)));
18d874057dimtypedef double __v8df __attribute__((__vector_size__(64)));
19d874057dimtypedef float __v16sf __attribute__((__vector_size__(64)));
20d874057dimtypedef long long __v8di __attribute__((__vector_size__(64)));
21d874057dimtypedef int __v16si __attribute__((__vector_size__(64)));
22d874057dim
23d874057dim/* Unsigned types */
24d874057dimtypedef unsigned char __v64qu __attribute__((__vector_size__(64)));
25d874057dimtypedef unsigned short __v32hu __attribute__((__vector_size__(64)));
26d874057dimtypedef unsigned long long __v8du __attribute__((__vector_size__(64)));
27d874057dimtypedef unsigned int __v16su __attribute__((__vector_size__(64)));
28d874057dim
29d874057dimtypedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
30d874057dimtypedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
31d874057dimtypedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
32d874057dim
33d874057dimtypedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1)));
34d874057dimtypedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1)));
35d874057dimtypedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)));
36d874057dim
37d874057dimtypedef unsigned char __mmask8;
38d874057dimtypedef unsigned short __mmask16;
39d874057dim
40d874057dim/* Rounding mode macros.  */
41d874057dim#define _MM_FROUND_TO_NEAREST_INT   0x00
42d874057dim#define _MM_FROUND_TO_NEG_INF       0x01
43d874057dim#define _MM_FROUND_TO_POS_INF       0x02
44d874057dim#define _MM_FROUND_TO_ZERO          0x03
45d874057dim#define _MM_FROUND_CUR_DIRECTION    0x04
46d874057dim
47d874057dim/* Constants for integer comparison predicates */
48d874057dimtypedef enum {
49d874057dim    _MM_CMPINT_EQ,      /* Equal */
50d874057dim    _MM_CMPINT_LT,      /* Less than */
51d874057dim    _MM_CMPINT_LE,      /* Less than or Equal */
52d874057dim    _MM_CMPINT_UNUSED,
53d874057dim    _MM_CMPINT_NE,      /* Not Equal */
54d874057dim    _MM_CMPINT_NLT,     /* Not Less than */
55d874057dim#define _MM_CMPINT_GE   _MM_CMPINT_NLT  /* Greater than or Equal */
56d874057dim    _MM_CMPINT_NLE      /* Not Less than or Equal */
57d874057dim#define _MM_CMPINT_GT   _MM_CMPINT_NLE  /* Greater than */
58d874057dim} _MM_CMPINT_ENUM;
59d874057dim
60d874057dimtypedef enum
61d874057dim{
62d874057dim  _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02,
63d874057dim  _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05,
64d874057dim  _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08,
65d874057dim  _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B,
66d874057dim  _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E,
67d874057dim  _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11,
68d874057dim  _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14,
69d874057dim  _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17,
70d874057dim  _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A,
71d874057dim  _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D,
72d874057dim  _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20,
73d874057dim  _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23,
74d874057dim  _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26,
75d874057dim  _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29,
76d874057dim  _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C,
77d874057dim  _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F,
78d874057dim  _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32,
79d874057dim  _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35,
80d874057dim  _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38,
81d874057dim  _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B,
82d874057dim  _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E,
83d874057dim  _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41,
84d874057dim  _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44,
85d874057dim  _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47,
86d874057dim  _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A,
87d874057dim  _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D,
88d874057dim  _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50,
89d874057dim  _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53,
90d874057dim  _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56,
91d874057dim  _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59,
92d874057dim  _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C,
93d874057dim  _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F,
94d874057dim  _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62,
95d874057dim  _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65,
96d874057dim  _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68,
97d874057dim  _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B,
98d874057dim  _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E,
99d874057dim  _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71,
100d874057dim  _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74,
101d874057dim  _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77,
102d874057dim  _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A,
103d874057dim  _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D,
104d874057dim  _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80,
105d874057dim  _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83,
106d874057dim  _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86,
107d874057dim  _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89,
108d874057dim  _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C,
109d874057dim  _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F,
110d874057dim  _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92,
111d874057dim  _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95,
112d874057dim  _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98,
113d874057dim  _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B,
114d874057dim  _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E,
115d874057dim  _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1,
116d874057dim  _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4,
117d874057dim  _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7,
118d874057dim  _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA,
119d874057dim  _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD,
120d874057dim  _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0,
121d874057dim  _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3,
122d874057dim  _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6,
123d874057dim  _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9,
124d874057dim  _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC,
125d874057dim  _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF,
126d874057dim  _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2,
127d874057dim  _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5,
128d874057dim  _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8,
129d874057dim  _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB,
130d874057dim  _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE,
131d874057dim  _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1,
132d874057dim  _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4,
133d874057dim  _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7,
134d874057dim  _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA,
135d874057dim  _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD,
136d874057dim  _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0,
137d874057dim  _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3,
138d874057dim  _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6,
139d874057dim  _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9,
140d874057dim  _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC,
141d874057dim  _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF,
142d874057dim  _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2,
143d874057dim  _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5,
144d874057dim  _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8,
145d874057dim  _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB,
146d874057dim  _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE,
147d874057dim  _MM_PERM_DDDD = 0xFF
148d874057dim} _MM_PERM_ENUM;
149d874057dim
150d874057dimtypedef enum
151d874057dim{
152d874057dim  _MM_MANT_NORM_1_2,    /* interval [1, 2)      */
153d874057dim  _MM_MANT_NORM_p5_2,   /* interval [0.5, 2)    */
154d874057dim  _MM_MANT_NORM_p5_1,   /* interval [0.5, 1)    */
155d874057dim  _MM_MANT_NORM_p75_1p5   /* interval [0.75, 1.5) */
156d874057dim} _MM_MANTISSA_NORM_ENUM;
157d874057dim
158d874057dimtypedef enum
159d874057dim{
160d874057dim  _MM_MANT_SIGN_src,    /* sign = sign(SRC)     */
161d874057dim  _MM_MANT_SIGN_zero,   /* sign = 0             */
162d874057dim  _MM_MANT_SIGN_nan   /* DEST = NaN if sign(SRC) = 1 */
163d874057dim} _MM_MANTISSA_SIGN_ENUM;
164d874057dim
165d874057dim/* Define the default attributes for the functions in this file. */
166d874057dim#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
167d874057dim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
168d874057dim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
169d874057dim
170d874057dim/* Create vectors with repeated elements */
171d874057dim
172d874057dimstatic  __inline __m512i __DEFAULT_FN_ATTRS512
173d874057dim_mm512_setzero_si512(void)
174d874057dim{
175d874057dim  return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
176d874057dim}
177d874057dim
178d874057dim#define _mm512_setzero_epi32 _mm512_setzero_si512
179d874057dim
180d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
181d874057dim_mm512_undefined_pd(void)
182d874057dim{
183d874057dim  return (__m512d)__builtin_ia32_undef512();
184d874057dim}
185d874057dim
186d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
187d874057dim_mm512_undefined(void)
188d874057dim{
189d874057dim  return (__m512)__builtin_ia32_undef512();
190d874057dim}
191d874057dim
192d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
193d874057dim_mm512_undefined_ps(void)
194d874057dim{
195d874057dim  return (__m512)__builtin_ia32_undef512();
196d874057dim}
197d874057dim
198d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
199d874057dim_mm512_undefined_epi32(void)
200d874057dim{
201d874057dim  return (__m512i)__builtin_ia32_undef512();
202d874057dim}
203d874057dim
204d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
205d874057dim_mm512_broadcastd_epi32 (__m128i __A)
206d874057dim{
207d874057dim  return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
208d874057dim                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
209d874057dim}
210d874057dim
211d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
212d874057dim_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
213d874057dim{
214d874057dim  return (__m512i)__builtin_ia32_selectd_512(__M,
215d874057dim                                             (__v16si) _mm512_broadcastd_epi32(__A),
216d874057dim                                             (__v16si) __O);
217d874057dim}
218d874057dim
219d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
220d874057dim_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
221d874057dim{
222d874057dim  return (__m512i)__builtin_ia32_selectd_512(__M,
223d874057dim                                             (__v16si) _mm512_broadcastd_epi32(__A),
224d874057dim                                             (__v16si) _mm512_setzero_si512());
225d874057dim}
226d874057dim
227d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
228d874057dim_mm512_broadcastq_epi64 (__m128i __A)
229d874057dim{
230d874057dim  return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
231d874057dim                                          0, 0, 0, 0, 0, 0, 0, 0);
232d874057dim}
233d874057dim
234d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
235d874057dim_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
236d874057dim{
237d874057dim  return (__m512i)__builtin_ia32_selectq_512(__M,
238d874057dim                                             (__v8di) _mm512_broadcastq_epi64(__A),
239d874057dim                                             (__v8di) __O);
240d874057dim
241d874057dim}
242d874057dim
243d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
244d874057dim_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
245d874057dim{
246d874057dim  return (__m512i)__builtin_ia32_selectq_512(__M,
247d874057dim                                             (__v8di) _mm512_broadcastq_epi64(__A),
248d874057dim                                             (__v8di) _mm512_setzero_si512());
249d874057dim}
250d874057dim
251d874057dim
252d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
253d874057dim_mm512_setzero_ps(void)
254d874057dim{
255d874057dim  return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
256d874057dim                                 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
257d874057dim}
258d874057dim
259d874057dim#define _mm512_setzero _mm512_setzero_ps
260d874057dim
261d874057dimstatic  __inline __m512d __DEFAULT_FN_ATTRS512
262d874057dim_mm512_setzero_pd(void)
263d874057dim{
264d874057dim  return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
265d874057dim}
266d874057dim
267d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
268d874057dim_mm512_set1_ps(float __w)
269d874057dim{
270d874057dim  return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
271d874057dim                                 __w, __w, __w, __w, __w, __w, __w, __w  };
272d874057dim}
273d874057dim
274d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
275d874057dim_mm512_set1_pd(double __w)
276d874057dim{
277d874057dim  return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
278d874057dim}
279d874057dim
280d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
281d874057dim_mm512_set1_epi8(char __w)
282d874057dim{
283d874057dim  return __extension__ (__m512i)(__v64qi){
284d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
285d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
286d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
287d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
288d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
289d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
290d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
291d874057dim    __w, __w, __w, __w, __w, __w, __w, __w  };
292d874057dim}
293d874057dim
294d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
295d874057dim_mm512_set1_epi16(short __w)
296d874057dim{
297d874057dim  return __extension__ (__m512i)(__v32hi){
298d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
299d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
300d874057dim    __w, __w, __w, __w, __w, __w, __w, __w,
301d874057dim    __w, __w, __w, __w, __w, __w, __w, __w };
302d874057dim}
303d874057dim
304d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
305d874057dim_mm512_set1_epi32(int __s)
306d874057dim{
307d874057dim  return __extension__ (__m512i)(__v16si){
308d874057dim    __s, __s, __s, __s, __s, __s, __s, __s,
309d874057dim    __s, __s, __s, __s, __s, __s, __s, __s };
310d874057dim}
311d874057dim
312d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
313d874057dim_mm512_maskz_set1_epi32(__mmask16 __M, int __A)
314d874057dim{
315d874057dim  return (__m512i)__builtin_ia32_selectd_512(__M,
316d874057dim                                             (__v16si)_mm512_set1_epi32(__A),
317d874057dim                                             (__v16si)_mm512_setzero_si512());
318d874057dim}
319d874057dim
320d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
321d874057dim_mm512_set1_epi64(long long __d)
322d874057dim{
323d874057dim  return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
324d874057dim}
325d874057dim
326d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
327d874057dim_mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
328d874057dim{
329d874057dim  return (__m512i)__builtin_ia32_selectq_512(__M,
330d874057dim                                             (__v8di)_mm512_set1_epi64(__A),
331d874057dim                                             (__v8di)_mm512_setzero_si512());
332d874057dim}
333d874057dim
334d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
335d874057dim_mm512_broadcastss_ps(__m128 __A)
336d874057dim{
337d874057dim  return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
338d874057dim                                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
339d874057dim}
340d874057dim
341d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
342d874057dim_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
343d874057dim{
344d874057dim  return __extension__ (__m512i)(__v16si)
345d874057dim   { __D, __C, __B, __A, __D, __C, __B, __A,
346d874057dim     __D, __C, __B, __A, __D, __C, __B, __A };
347d874057dim}
348d874057dim
349d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
350d874057dim_mm512_set4_epi64 (long long __A, long long __B, long long __C,
351d874057dim       long long __D)
352d874057dim{
353d874057dim  return __extension__ (__m512i) (__v8di)
354d874057dim   { __D, __C, __B, __A, __D, __C, __B, __A };
355d874057dim}
356d874057dim
357d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
358d874057dim_mm512_set4_pd (double __A, double __B, double __C, double __D)
359d874057dim{
360d874057dim  return __extension__ (__m512d)
361d874057dim   { __D, __C, __B, __A, __D, __C, __B, __A };
362d874057dim}
363d874057dim
364d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
365d874057dim_mm512_set4_ps (float __A, float __B, float __C, float __D)
366d874057dim{
367d874057dim  return __extension__ (__m512)
368d874057dim   { __D, __C, __B, __A, __D, __C, __B, __A,
369d874057dim     __D, __C, __B, __A, __D, __C, __B, __A };
370d874057dim}
371d874057dim
372d874057dim#define _mm512_setr4_epi32(e0,e1,e2,e3)               \
373d874057dim  _mm512_set4_epi32((e3),(e2),(e1),(e0))
374d874057dim
375d874057dim#define _mm512_setr4_epi64(e0,e1,e2,e3)               \
376d874057dim  _mm512_set4_epi64((e3),(e2),(e1),(e0))
377d874057dim
378d874057dim#define _mm512_setr4_pd(e0,e1,e2,e3)                \
379d874057dim  _mm512_set4_pd((e3),(e2),(e1),(e0))
380d874057dim
381d874057dim#define _mm512_setr4_ps(e0,e1,e2,e3)                \
382d874057dim  _mm512_set4_ps((e3),(e2),(e1),(e0))
383d874057dim
384d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
385d874057dim_mm512_broadcastsd_pd(__m128d __A)
386d874057dim{
387d874057dim  return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
388d874057dim                                          0, 0, 0, 0, 0, 0, 0, 0);
389d874057dim}
390d874057dim
391d874057dim/* Cast between vector types */
392d874057dim
393d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
394d874057dim_mm512_castpd256_pd512(__m256d __a)
395d874057dim{
396d874057dim  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
397d874057dim}
398d874057dim
399d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
400d874057dim_mm512_castps256_ps512(__m256 __a)
401d874057dim{
402d874057dim  return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
403d874057dim                                          -1, -1, -1, -1, -1, -1, -1, -1);
404d874057dim}
405d874057dim
406d874057dimstatic __inline __m128d __DEFAULT_FN_ATTRS512
407d874057dim_mm512_castpd512_pd128(__m512d __a)
408d874057dim{
409d874057dim  return __builtin_shufflevector(__a, __a, 0, 1);
410d874057dim}
411d874057dim
412d874057dimstatic __inline __m256d __DEFAULT_FN_ATTRS512
413d874057dim_mm512_castpd512_pd256 (__m512d __A)
414d874057dim{
415d874057dim  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
416d874057dim}
417d874057dim
418d874057dimstatic __inline __m128 __DEFAULT_FN_ATTRS512
419d874057dim_mm512_castps512_ps128(__m512 __a)
420d874057dim{
421d874057dim  return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
422d874057dim}
423d874057dim
424d874057dimstatic __inline __m256 __DEFAULT_FN_ATTRS512
425d874057dim_mm512_castps512_ps256 (__m512 __A)
426d874057dim{
427d874057dim  return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
428d874057dim}
429d874057dim
430d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
431d874057dim_mm512_castpd_ps (__m512d __A)
432d874057dim{
433d874057dim  return (__m512) (__A);
434d874057dim}
435d874057dim
436d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
437d874057dim_mm512_castpd_si512 (__m512d __A)
438d874057dim{
439d874057dim  return (__m512i) (__A);
440d874057dim}
441d874057dim
442d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
443d874057dim_mm512_castpd128_pd512 (__m128d __A)
444d874057dim{
445d874057dim  return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
446d874057dim}
447d874057dim
448d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
449d874057dim_mm512_castps_pd (__m512 __A)
450d874057dim{
451d874057dim  return (__m512d) (__A);
452d874057dim}
453d874057dim
454d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
455d874057dim_mm512_castps_si512 (__m512 __A)
456d874057dim{
457d874057dim  return (__m512i) (__A);
458d874057dim}
459d874057dim
460d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
461d874057dim_mm512_castps128_ps512 (__m128 __A)
462d874057dim{
463d874057dim    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
464d874057dim}
465d874057dim
466d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
467d874057dim_mm512_castsi128_si512 (__m128i __A)
468d874057dim{
469d874057dim   return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
470d874057dim}
471d874057dim
472d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
473d874057dim_mm512_castsi256_si512 (__m256i __A)
474d874057dim{
475d874057dim   return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
476d874057dim}
477d874057dim
478d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
479d874057dim_mm512_castsi512_ps (__m512i __A)
480d874057dim{
481d874057dim  return (__m512) (__A);
482d874057dim}
483d874057dim
484d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
485d874057dim_mm512_castsi512_pd (__m512i __A)
486d874057dim{
487d874057dim  return (__m512d) (__A);
488d874057dim}
489d874057dim
490d874057dimstatic __inline __m128i __DEFAULT_FN_ATTRS512
491d874057dim_mm512_castsi512_si128 (__m512i __A)
492d874057dim{
493d874057dim  return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
494d874057dim}
495d874057dim
496d874057dimstatic __inline __m256i __DEFAULT_FN_ATTRS512
497d874057dim_mm512_castsi512_si256 (__m512i __A)
498d874057dim{
499d874057dim  return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
500d874057dim}
501d874057dim
502d874057dimstatic __inline__ __mmask16 __DEFAULT_FN_ATTRS
503d874057dim_mm512_int2mask(int __a)
504d874057dim{
505d874057dim  return (__mmask16)__a;
506d874057dim}
507d874057dim
508d874057dimstatic __inline__ int __DEFAULT_FN_ATTRS
509d874057dim_mm512_mask2int(__mmask16 __a)
510d874057dim{
511d874057dim  return (int)__a;
512d874057dim}
513d874057dim
514d874057dim/// Constructs a 512-bit floating-point vector of [8 x double] from a
515d874057dim///    128-bit floating-point vector of [2 x double]. The lower 128 bits
516d874057dim///    contain the value of the source vector. The upper 384 bits are set
517d874057dim///    to zero.
518d874057dim///
519d874057dim/// \headerfile <x86intrin.h>
520d874057dim///
521d874057dim/// This intrinsic has no corresponding instruction.
522d874057dim///
523d874057dim/// \param __a
524d874057dim///    A 128-bit vector of [2 x double].
525d874057dim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
526d874057dim///    contain the value of the parameter. The upper 384 bits are set to zero.
527d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
528d874057dim_mm512_zextpd128_pd512(__m128d __a)
529d874057dim{
530d874057dim  return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
531d874057dim}
532d874057dim
533d874057dim/// Constructs a 512-bit floating-point vector of [8 x double] from a
534d874057dim///    256-bit floating-point vector of [4 x double]. The lower 256 bits
535d874057dim///    contain the value of the source vector. The upper 256 bits are set
536d874057dim///    to zero.
537d874057dim///
538d874057dim/// \headerfile <x86intrin.h>
539d874057dim///
540d874057dim/// This intrinsic has no corresponding instruction.
541d874057dim///
542d874057dim/// \param __a
543d874057dim///    A 256-bit vector of [4 x double].
544d874057dim/// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
545d874057dim///    contain the value of the parameter. The upper 256 bits are set to zero.
546d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
547d874057dim_mm512_zextpd256_pd512(__m256d __a)
548d874057dim{
549d874057dim  return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
550d874057dim}
551d874057dim
552d874057dim/// Constructs a 512-bit floating-point vector of [16 x float] from a
553d874057dim///    128-bit floating-point vector of [4 x float]. The lower 128 bits contain
554d874057dim///    the value of the source vector. The upper 384 bits are set to zero.
555d874057dim///
556d874057dim/// \headerfile <x86intrin.h>
557d874057dim///
558d874057dim/// This intrinsic has no corresponding instruction.
559d874057dim///
560d874057dim/// \param __a
561d874057dim///    A 128-bit vector of [4 x float].
562d874057dim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
563d874057dim///    contain the value of the parameter. The upper 384 bits are set to zero.
564d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
565d874057dim_mm512_zextps128_ps512(__m128 __a)
566d874057dim{
567d874057dim  return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
568d874057dim}
569d874057dim
570d874057dim/// Constructs a 512-bit floating-point vector of [16 x float] from a
571d874057dim///    256-bit floating-point vector of [8 x float]. The lower 256 bits contain
572d874057dim///    the value of the source vector. The upper 256 bits are set to zero.
573d874057dim///
574d874057dim/// \headerfile <x86intrin.h>
575d874057dim///
576d874057dim/// This intrinsic has no corresponding instruction.
577d874057dim///
578d874057dim/// \param __a
579d874057dim///    A 256-bit vector of [8 x float].
580d874057dim/// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
581d874057dim///    contain the value of the parameter. The upper 256 bits are set to zero.
582d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
583d874057dim_mm512_zextps256_ps512(__m256 __a)
584d874057dim{
585d874057dim  return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
586d874057dim}
587d874057dim
588d874057dim/// Constructs a 512-bit integer vector from a 128-bit integer vector.
589d874057dim///    The lower 128 bits contain the value of the source vector. The upper
590d874057dim///    384 bits are set to zero.
591d874057dim///
592d874057dim/// \headerfile <x86intrin.h>
593d874057dim///
594d874057dim/// This intrinsic has no corresponding instruction.
595d874057dim///
596d874057dim/// \param __a
597d874057dim///    A 128-bit integer vector.
598d874057dim/// \returns A 512-bit integer vector. The lower 128 bits contain the value of
599d874057dim///    the parameter. The upper 384 bits are set to zero.
600d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
601d874057dim_mm512_zextsi128_si512(__m128i __a)
602d874057dim{
603d874057dim  return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
604d874057dim}
605d874057dim
606d874057dim/// Constructs a 512-bit integer vector from a 256-bit integer vector.
607d874057dim///    The lower 256 bits contain the value of the source vector. The upper
608d874057dim///    256 bits are set to zero.
609d874057dim///
610d874057dim/// \headerfile <x86intrin.h>
611d874057dim///
612d874057dim/// This intrinsic has no corresponding instruction.
613d874057dim///
614d874057dim/// \param __a
615d874057dim///    A 256-bit integer vector.
616d874057dim/// \returns A 512-bit integer vector. The lower 256 bits contain the value of
617d874057dim///    the parameter. The upper 256 bits are set to zero.
618d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
619d874057dim_mm512_zextsi256_si512(__m256i __a)
620d874057dim{
621d874057dim  return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
622d874057dim}
623d874057dim
624d874057dim/* Bitwise operators */
625d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
626d874057dim_mm512_and_epi32(__m512i __a, __m512i __b)
627d874057dim{
628d874057dim  return (__m512i)((__v16su)__a & (__v16su)__b);
629d874057dim}
630d874057dim
631d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
632d874057dim_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
633d874057dim{
634d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
635d874057dim                (__v16si) _mm512_and_epi32(__a, __b),
636d874057dim                (__v16si) __src);
637d874057dim}
638d874057dim
639d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
640d874057dim_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
641d874057dim{
642d874057dim  return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
643d874057dim                                         __k, __a, __b);
644d874057dim}
645d874057dim
646d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
647d874057dim_mm512_and_epi64(__m512i __a, __m512i __b)
648d874057dim{
649d874057dim  return (__m512i)((__v8du)__a & (__v8du)__b);
650d874057dim}
651d874057dim
652d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
653d874057dim_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
654d874057dim{
655d874057dim    return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
656d874057dim                (__v8di) _mm512_and_epi64(__a, __b),
657d874057dim                (__v8di) __src);
658d874057dim}
659d874057dim
660d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
661d874057dim_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
662d874057dim{
663d874057dim  return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
664d874057dim                                         __k, __a, __b);
665d874057dim}
666d874057dim
667d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
668d874057dim_mm512_andnot_si512 (__m512i __A, __m512i __B)
669d874057dim{
670d874057dim  return (__m512i)(~(__v8du)__A & (__v8du)__B);
671d874057dim}
672d874057dim
673d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
674d874057dim_mm512_andnot_epi32 (__m512i __A, __m512i __B)
675d874057dim{
676d874057dim  return (__m512i)(~(__v16su)__A & (__v16su)__B);
677d874057dim}
678d874057dim
679d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
680d874057dim_mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
681d874057dim{
682d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
683d874057dim                                         (__v16si)_mm512_andnot_epi32(__A, __B),
684d874057dim                                         (__v16si)__W);
685d874057dim}
686d874057dim
687d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
688d874057dim_mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
689d874057dim{
690d874057dim  return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
691d874057dim                                           __U, __A, __B);
692d874057dim}
693d874057dim
694d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
695d874057dim_mm512_andnot_epi64(__m512i __A, __m512i __B)
696d874057dim{
697d874057dim  return (__m512i)(~(__v8du)__A & (__v8du)__B);
698d874057dim}
699d874057dim
700d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
701d874057dim_mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
702d874057dim{
703d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
704d874057dim                                          (__v8di)_mm512_andnot_epi64(__A, __B),
705d874057dim                                          (__v8di)__W);
706d874057dim}
707d874057dim
708d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
709d874057dim_mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
710d874057dim{
711d874057dim  return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
712d874057dim                                           __U, __A, __B);
713d874057dim}
714d874057dim
715d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
716d874057dim_mm512_or_epi32(__m512i __a, __m512i __b)
717d874057dim{
718d874057dim  return (__m512i)((__v16su)__a | (__v16su)__b);
719d874057dim}
720d874057dim
721d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
722d874057dim_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
723d874057dim{
724d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
725d874057dim                                             (__v16si)_mm512_or_epi32(__a, __b),
726d874057dim                                             (__v16si)__src);
727d874057dim}
728d874057dim
729d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
730d874057dim_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
731d874057dim{
732d874057dim  return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
733d874057dim}
734d874057dim
735d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
736d874057dim_mm512_or_epi64(__m512i __a, __m512i __b)
737d874057dim{
738d874057dim  return (__m512i)((__v8du)__a | (__v8du)__b);
739d874057dim}
740d874057dim
741d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
742d874057dim_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
743d874057dim{
744d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
745d874057dim                                             (__v8di)_mm512_or_epi64(__a, __b),
746d874057dim                                             (__v8di)__src);
747d874057dim}
748d874057dim
749d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
750d874057dim_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
751d874057dim{
752d874057dim  return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
753d874057dim}
754d874057dim
755d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
756d874057dim_mm512_xor_epi32(__m512i __a, __m512i __b)
757d874057dim{
758d874057dim  return (__m512i)((__v16su)__a ^ (__v16su)__b);
759d874057dim}
760d874057dim
761d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
762d874057dim_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
763d874057dim{
764d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
765d874057dim                                            (__v16si)_mm512_xor_epi32(__a, __b),
766d874057dim                                            (__v16si)__src);
767d874057dim}
768d874057dim
769d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
770d874057dim_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
771d874057dim{
772d874057dim  return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
773d874057dim}
774d874057dim
775d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
776d874057dim_mm512_xor_epi64(__m512i __a, __m512i __b)
777d874057dim{
778d874057dim  return (__m512i)((__v8du)__a ^ (__v8du)__b);
779d874057dim}
780d874057dim
781d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
782d874057dim_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
783d874057dim{
784d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
785d874057dim                                             (__v8di)_mm512_xor_epi64(__a, __b),
786d874057dim                                             (__v8di)__src);
787d874057dim}
788d874057dim
789d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
790d874057dim_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
791d874057dim{
792d874057dim  return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
793d874057dim}
794d874057dim
795d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
796d874057dim_mm512_and_si512(__m512i __a, __m512i __b)
797d874057dim{
798d874057dim  return (__m512i)((__v8du)__a & (__v8du)__b);
799d874057dim}
800d874057dim
801d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
802d874057dim_mm512_or_si512(__m512i __a, __m512i __b)
803d874057dim{
804d874057dim  return (__m512i)((__v8du)__a | (__v8du)__b);
805d874057dim}
806d874057dim
807d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
808d874057dim_mm512_xor_si512(__m512i __a, __m512i __b)
809d874057dim{
810d874057dim  return (__m512i)((__v8du)__a ^ (__v8du)__b);
811d874057dim}
812d874057dim
813d874057dim/* Arithmetic */
814d874057dim
815d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
816d874057dim_mm512_add_pd(__m512d __a, __m512d __b)
817d874057dim{
818d874057dim  return (__m512d)((__v8df)__a + (__v8df)__b);
819d874057dim}
820d874057dim
821d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
822d874057dim_mm512_add_ps(__m512 __a, __m512 __b)
823d874057dim{
824d874057dim  return (__m512)((__v16sf)__a + (__v16sf)__b);
825d874057dim}
826d874057dim
827d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
828d874057dim_mm512_mul_pd(__m512d __a, __m512d __b)
829d874057dim{
830d874057dim  return (__m512d)((__v8df)__a * (__v8df)__b);
831d874057dim}
832d874057dim
833d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
834d874057dim_mm512_mul_ps(__m512 __a, __m512 __b)
835d874057dim{
836d874057dim  return (__m512)((__v16sf)__a * (__v16sf)__b);
837d874057dim}
838d874057dim
839d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
840d874057dim_mm512_sub_pd(__m512d __a, __m512d __b)
841d874057dim{
842d874057dim  return (__m512d)((__v8df)__a - (__v8df)__b);
843d874057dim}
844d874057dim
845d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
846d874057dim_mm512_sub_ps(__m512 __a, __m512 __b)
847d874057dim{
848d874057dim  return (__m512)((__v16sf)__a - (__v16sf)__b);
849d874057dim}
850d874057dim
851d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
852d874057dim_mm512_add_epi64 (__m512i __A, __m512i __B)
853d874057dim{
854d874057dim  return (__m512i) ((__v8du) __A + (__v8du) __B);
855d874057dim}
856d874057dim
857d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
858d874057dim_mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
859d874057dim{
860d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
861d874057dim                                             (__v8di)_mm512_add_epi64(__A, __B),
862d874057dim                                             (__v8di)__W);
863d874057dim}
864d874057dim
865d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
866d874057dim_mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
867d874057dim{
868d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
869d874057dim                                             (__v8di)_mm512_add_epi64(__A, __B),
870d874057dim                                             (__v8di)_mm512_setzero_si512());
871d874057dim}
872d874057dim
873d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
874d874057dim_mm512_sub_epi64 (__m512i __A, __m512i __B)
875d874057dim{
876d874057dim  return (__m512i) ((__v8du) __A - (__v8du) __B);
877d874057dim}
878d874057dim
879d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
880d874057dim_mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
881d874057dim{
882d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
883d874057dim                                             (__v8di)_mm512_sub_epi64(__A, __B),
884d874057dim                                             (__v8di)__W);
885d874057dim}
886d874057dim
887d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
888d874057dim_mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
889d874057dim{
890d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
891d874057dim                                             (__v8di)_mm512_sub_epi64(__A, __B),
892d874057dim                                             (__v8di)_mm512_setzero_si512());
893d874057dim}
894d874057dim
895d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
896d874057dim_mm512_add_epi32 (__m512i __A, __m512i __B)
897d874057dim{
898d874057dim  return (__m512i) ((__v16su) __A + (__v16su) __B);
899d874057dim}
900d874057dim
901d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
902d874057dim_mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
903d874057dim{
904d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
905d874057dim                                             (__v16si)_mm512_add_epi32(__A, __B),
906d874057dim                                             (__v16si)__W);
907d874057dim}
908d874057dim
909d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
910d874057dim_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
911d874057dim{
912d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
913d874057dim                                             (__v16si)_mm512_add_epi32(__A, __B),
914d874057dim                                             (__v16si)_mm512_setzero_si512());
915d874057dim}
916d874057dim
917d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
918d874057dim_mm512_sub_epi32 (__m512i __A, __m512i __B)
919d874057dim{
920d874057dim  return (__m512i) ((__v16su) __A - (__v16su) __B);
921d874057dim}
922d874057dim
923d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
924d874057dim_mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
925d874057dim{
926d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
927d874057dim                                             (__v16si)_mm512_sub_epi32(__A, __B),
928d874057dim                                             (__v16si)__W);
929d874057dim}
930d874057dim
931d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
932d874057dim_mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
933d874057dim{
934d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
935d874057dim                                             (__v16si)_mm512_sub_epi32(__A, __B),
936d874057dim                                             (__v16si)_mm512_setzero_si512());
937d874057dim}
938d874057dim
939d874057dim#define _mm512_max_round_pd(A, B, R) \
940d874057dim  (__m512d)__builtin_ia32_maxpd512((__v8df)(__m512d)(A), \
941d874057dim                                   (__v8df)(__m512d)(B), (int)(R))
942d874057dim
943d874057dim#define _mm512_mask_max_round_pd(W, U, A, B, R) \
944d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
945d874057dim                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
946d874057dim                                   (__v8df)(W))
947d874057dim
948d874057dim#define _mm512_maskz_max_round_pd(U, A, B, R) \
949d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
950d874057dim                                   (__v8df)_mm512_max_round_pd((A), (B), (R)), \
951d874057dim                                   (__v8df)_mm512_setzero_pd())
952d874057dim
953d874057dimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
954d874057dim_mm512_max_pd(__m512d __A, __m512d __B)
955d874057dim{
956d874057dim  return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
957d874057dim                                           _MM_FROUND_CUR_DIRECTION);
958d874057dim}
959d874057dim
960d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
961d874057dim_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
962d874057dim{
963d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
964d874057dim                                              (__v8df)_mm512_max_pd(__A, __B),
965d874057dim                                              (__v8df)__W);
966d874057dim}
967d874057dim
968d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
969d874057dim_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
970d874057dim{
971d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
972d874057dim                                              (__v8df)_mm512_max_pd(__A, __B),
973d874057dim                                              (__v8df)_mm512_setzero_pd());
974d874057dim}
975d874057dim
976d874057dim#define _mm512_max_round_ps(A, B, R) \
977d874057dim  (__m512)__builtin_ia32_maxps512((__v16sf)(__m512)(A), \
978d874057dim                                  (__v16sf)(__m512)(B), (int)(R))
979d874057dim
980d874057dim#define _mm512_mask_max_round_ps(W, U, A, B, R) \
981d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
982d874057dim                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
983d874057dim                                  (__v16sf)(W))
984d874057dim
985d874057dim#define _mm512_maskz_max_round_ps(U, A, B, R) \
986d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
987d874057dim                                  (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
988d874057dim                                  (__v16sf)_mm512_setzero_ps())
989d874057dim
990d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
991d874057dim_mm512_max_ps(__m512 __A, __m512 __B)
992d874057dim{
993d874057dim  return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
994d874057dim                                          _MM_FROUND_CUR_DIRECTION);
995d874057dim}
996d874057dim
997d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
998d874057dim_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
999d874057dim{
1000d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1001d874057dim                                             (__v16sf)_mm512_max_ps(__A, __B),
1002d874057dim                                             (__v16sf)__W);
1003d874057dim}
1004d874057dim
1005d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1006d874057dim_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
1007d874057dim{
1008d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1009d874057dim                                             (__v16sf)_mm512_max_ps(__A, __B),
1010d874057dim                                             (__v16sf)_mm512_setzero_ps());
1011d874057dim}
1012d874057dim
1013d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1014d874057dim_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1015d874057dim  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1016d874057dim                (__v4sf) __B,
1017d874057dim                (__v4sf) __W,
1018d874057dim                (__mmask8) __U,
1019d874057dim                _MM_FROUND_CUR_DIRECTION);
1020d874057dim}
1021d874057dim
1022d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1023d874057dim_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1024d874057dim  return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
1025d874057dim                (__v4sf) __B,
1026d874057dim                (__v4sf)  _mm_setzero_ps (),
1027d874057dim                (__mmask8) __U,
1028d874057dim                _MM_FROUND_CUR_DIRECTION);
1029d874057dim}
1030d874057dim
1031d874057dim#define _mm_max_round_ss(A, B, R) \
1032d874057dim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1033d874057dim                                          (__v4sf)(__m128)(B), \
1034d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1035d874057dim                                          (__mmask8)-1, (int)(R))
1036d874057dim
1037d874057dim#define _mm_mask_max_round_ss(W, U, A, B, R) \
1038d874057dim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1039d874057dim                                          (__v4sf)(__m128)(B), \
1040d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1041d874057dim                                          (int)(R))
1042d874057dim
1043d874057dim#define _mm_maskz_max_round_ss(U, A, B, R) \
1044d874057dim  (__m128)__builtin_ia32_maxss_round_mask((__v4sf)(__m128)(A), \
1045d874057dim                                          (__v4sf)(__m128)(B), \
1046d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1047d874057dim                                          (__mmask8)(U), (int)(R))
1048d874057dim
1049d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1050d874057dim_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1051d874057dim  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1052d874057dim                (__v2df) __B,
1053d874057dim                (__v2df) __W,
1054d874057dim                (__mmask8) __U,
1055d874057dim                _MM_FROUND_CUR_DIRECTION);
1056d874057dim}
1057d874057dim
1058d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1059d874057dim_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1060d874057dim  return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
1061d874057dim                (__v2df) __B,
1062d874057dim                (__v2df)  _mm_setzero_pd (),
1063d874057dim                (__mmask8) __U,
1064d874057dim                _MM_FROUND_CUR_DIRECTION);
1065d874057dim}
1066d874057dim
1067d874057dim#define _mm_max_round_sd(A, B, R) \
1068d874057dim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1069d874057dim                                           (__v2df)(__m128d)(B), \
1070d874057dim                                           (__v2df)_mm_setzero_pd(), \
1071d874057dim                                           (__mmask8)-1, (int)(R))
1072d874057dim
1073d874057dim#define _mm_mask_max_round_sd(W, U, A, B, R) \
1074d874057dim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1075d874057dim                                           (__v2df)(__m128d)(B), \
1076d874057dim                                           (__v2df)(__m128d)(W), \
1077d874057dim                                           (__mmask8)(U), (int)(R))
1078d874057dim
1079d874057dim#define _mm_maskz_max_round_sd(U, A, B, R) \
1080d874057dim  (__m128d)__builtin_ia32_maxsd_round_mask((__v2df)(__m128d)(A), \
1081d874057dim                                           (__v2df)(__m128d)(B), \
1082d874057dim                                           (__v2df)_mm_setzero_pd(), \
1083d874057dim                                           (__mmask8)(U), (int)(R))
1084d874057dim
1085d874057dimstatic __inline __m512i
1086d874057dim__DEFAULT_FN_ATTRS512
1087d874057dim_mm512_max_epi32(__m512i __A, __m512i __B)
1088d874057dim{
1089d874057dim  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
1090d874057dim}
1091d874057dim
1092d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1093d874057dim_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1094d874057dim{
1095d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1096d874057dim                                            (__v16si)_mm512_max_epi32(__A, __B),
1097d874057dim                                            (__v16si)__W);
1098d874057dim}
1099d874057dim
1100d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1101d874057dim_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1102d874057dim{
1103d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1104d874057dim                                            (__v16si)_mm512_max_epi32(__A, __B),
1105d874057dim                                            (__v16si)_mm512_setzero_si512());
1106d874057dim}
1107d874057dim
1108d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1109d874057dim_mm512_max_epu32(__m512i __A, __m512i __B)
1110d874057dim{
1111d874057dim  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
1112d874057dim}
1113d874057dim
1114d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1115d874057dim_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1116d874057dim{
1117d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1118d874057dim                                            (__v16si)_mm512_max_epu32(__A, __B),
1119d874057dim                                            (__v16si)__W);
1120d874057dim}
1121d874057dim
1122d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1123d874057dim_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1124d874057dim{
1125d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1126d874057dim                                            (__v16si)_mm512_max_epu32(__A, __B),
1127d874057dim                                            (__v16si)_mm512_setzero_si512());
1128d874057dim}
1129d874057dim
1130d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1131d874057dim_mm512_max_epi64(__m512i __A, __m512i __B)
1132d874057dim{
1133d874057dim  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
1134d874057dim}
1135d874057dim
1136d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1137d874057dim_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1138d874057dim{
1139d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1140d874057dim                                             (__v8di)_mm512_max_epi64(__A, __B),
1141d874057dim                                             (__v8di)__W);
1142d874057dim}
1143d874057dim
1144d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1145d874057dim_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1146d874057dim{
1147d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1148d874057dim                                             (__v8di)_mm512_max_epi64(__A, __B),
1149d874057dim                                             (__v8di)_mm512_setzero_si512());
1150d874057dim}
1151d874057dim
1152d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1153d874057dim_mm512_max_epu64(__m512i __A, __m512i __B)
1154d874057dim{
1155d874057dim  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
1156d874057dim}
1157d874057dim
1158d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1159d874057dim_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1160d874057dim{
1161d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1162d874057dim                                             (__v8di)_mm512_max_epu64(__A, __B),
1163d874057dim                                             (__v8di)__W);
1164d874057dim}
1165d874057dim
1166d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1167d874057dim_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1168d874057dim{
1169d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1170d874057dim                                             (__v8di)_mm512_max_epu64(__A, __B),
1171d874057dim                                             (__v8di)_mm512_setzero_si512());
1172d874057dim}
1173d874057dim
1174d874057dim#define _mm512_min_round_pd(A, B, R) \
1175d874057dim  (__m512d)__builtin_ia32_minpd512((__v8df)(__m512d)(A), \
1176d874057dim                                   (__v8df)(__m512d)(B), (int)(R))
1177d874057dim
1178d874057dim#define _mm512_mask_min_round_pd(W, U, A, B, R) \
1179d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1180d874057dim                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1181d874057dim                                   (__v8df)(W))
1182d874057dim
1183d874057dim#define _mm512_maskz_min_round_pd(U, A, B, R) \
1184d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1185d874057dim                                   (__v8df)_mm512_min_round_pd((A), (B), (R)), \
1186d874057dim                                   (__v8df)_mm512_setzero_pd())
1187d874057dim
1188d874057dimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1189d874057dim_mm512_min_pd(__m512d __A, __m512d __B)
1190d874057dim{
1191d874057dim  return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
1192d874057dim                                           _MM_FROUND_CUR_DIRECTION);
1193d874057dim}
1194d874057dim
1195d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1196d874057dim_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
1197d874057dim{
1198d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1199d874057dim                                              (__v8df)_mm512_min_pd(__A, __B),
1200d874057dim                                              (__v8df)__W);
1201d874057dim}
1202d874057dim
1203d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1204d874057dim_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
1205d874057dim{
1206d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1207d874057dim                                              (__v8df)_mm512_min_pd(__A, __B),
1208d874057dim                                              (__v8df)_mm512_setzero_pd());
1209d874057dim}
1210d874057dim
1211d874057dim#define _mm512_min_round_ps(A, B, R) \
1212d874057dim  (__m512)__builtin_ia32_minps512((__v16sf)(__m512)(A), \
1213d874057dim                                  (__v16sf)(__m512)(B), (int)(R))
1214d874057dim
1215d874057dim#define _mm512_mask_min_round_ps(W, U, A, B, R) \
1216d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1217d874057dim                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1218d874057dim                                  (__v16sf)(W))
1219d874057dim
1220d874057dim#define _mm512_maskz_min_round_ps(U, A, B, R) \
1221d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1222d874057dim                                  (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
1223d874057dim                                  (__v16sf)_mm512_setzero_ps())
1224d874057dim
1225d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1226d874057dim_mm512_min_ps(__m512 __A, __m512 __B)
1227d874057dim{
1228d874057dim  return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
1229d874057dim                                          _MM_FROUND_CUR_DIRECTION);
1230d874057dim}
1231d874057dim
1232d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1233d874057dim_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
1234d874057dim{
1235d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1236d874057dim                                             (__v16sf)_mm512_min_ps(__A, __B),
1237d874057dim                                             (__v16sf)__W);
1238d874057dim}
1239d874057dim
1240d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1241d874057dim_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
1242d874057dim{
1243d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1244d874057dim                                             (__v16sf)_mm512_min_ps(__A, __B),
1245d874057dim                                             (__v16sf)_mm512_setzero_ps());
1246d874057dim}
1247d874057dim
1248d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1249d874057dim_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1250d874057dim  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1251d874057dim                (__v4sf) __B,
1252d874057dim                (__v4sf) __W,
1253d874057dim                (__mmask8) __U,
1254d874057dim                _MM_FROUND_CUR_DIRECTION);
1255d874057dim}
1256d874057dim
1257d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1258d874057dim_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1259d874057dim  return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
1260d874057dim                (__v4sf) __B,
1261d874057dim                (__v4sf)  _mm_setzero_ps (),
1262d874057dim                (__mmask8) __U,
1263d874057dim                _MM_FROUND_CUR_DIRECTION);
1264d874057dim}
1265d874057dim
1266d874057dim#define _mm_min_round_ss(A, B, R) \
1267d874057dim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1268d874057dim                                          (__v4sf)(__m128)(B), \
1269d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1270d874057dim                                          (__mmask8)-1, (int)(R))
1271d874057dim
1272d874057dim#define _mm_mask_min_round_ss(W, U, A, B, R) \
1273d874057dim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1274d874057dim                                          (__v4sf)(__m128)(B), \
1275d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1276d874057dim                                          (int)(R))
1277d874057dim
1278d874057dim#define _mm_maskz_min_round_ss(U, A, B, R) \
1279d874057dim  (__m128)__builtin_ia32_minss_round_mask((__v4sf)(__m128)(A), \
1280d874057dim                                          (__v4sf)(__m128)(B), \
1281d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1282d874057dim                                          (__mmask8)(U), (int)(R))
1283d874057dim
1284d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1285d874057dim_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1286d874057dim  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1287d874057dim                (__v2df) __B,
1288d874057dim                (__v2df) __W,
1289d874057dim                (__mmask8) __U,
1290d874057dim                _MM_FROUND_CUR_DIRECTION);
1291d874057dim}
1292d874057dim
1293d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1294d874057dim_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1295d874057dim  return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
1296d874057dim                (__v2df) __B,
1297d874057dim                (__v2df)  _mm_setzero_pd (),
1298d874057dim                (__mmask8) __U,
1299d874057dim                _MM_FROUND_CUR_DIRECTION);
1300d874057dim}
1301d874057dim
1302d874057dim#define _mm_min_round_sd(A, B, R) \
1303d874057dim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1304d874057dim                                           (__v2df)(__m128d)(B), \
1305d874057dim                                           (__v2df)_mm_setzero_pd(), \
1306d874057dim                                           (__mmask8)-1, (int)(R))
1307d874057dim
1308d874057dim#define _mm_mask_min_round_sd(W, U, A, B, R) \
1309d874057dim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1310d874057dim                                           (__v2df)(__m128d)(B), \
1311d874057dim                                           (__v2df)(__m128d)(W), \
1312d874057dim                                           (__mmask8)(U), (int)(R))
1313d874057dim
1314d874057dim#define _mm_maskz_min_round_sd(U, A, B, R) \
1315d874057dim  (__m128d)__builtin_ia32_minsd_round_mask((__v2df)(__m128d)(A), \
1316d874057dim                                           (__v2df)(__m128d)(B), \
1317d874057dim                                           (__v2df)_mm_setzero_pd(), \
1318d874057dim                                           (__mmask8)(U), (int)(R))
1319d874057dim
1320d874057dimstatic __inline __m512i
1321d874057dim__DEFAULT_FN_ATTRS512
1322d874057dim_mm512_min_epi32(__m512i __A, __m512i __B)
1323d874057dim{
1324d874057dim  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
1325d874057dim}
1326d874057dim
1327d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1328d874057dim_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1329d874057dim{
1330d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1331d874057dim                                            (__v16si)_mm512_min_epi32(__A, __B),
1332d874057dim                                            (__v16si)__W);
1333d874057dim}
1334d874057dim
1335d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1336d874057dim_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
1337d874057dim{
1338d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1339d874057dim                                            (__v16si)_mm512_min_epi32(__A, __B),
1340d874057dim                                            (__v16si)_mm512_setzero_si512());
1341d874057dim}
1342d874057dim
1343d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1344d874057dim_mm512_min_epu32(__m512i __A, __m512i __B)
1345d874057dim{
1346d874057dim  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
1347d874057dim}
1348d874057dim
1349d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1350d874057dim_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1351d874057dim{
1352d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1353d874057dim                                            (__v16si)_mm512_min_epu32(__A, __B),
1354d874057dim                                            (__v16si)__W);
1355d874057dim}
1356d874057dim
1357d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1358d874057dim_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
1359d874057dim{
1360d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1361d874057dim                                            (__v16si)_mm512_min_epu32(__A, __B),
1362d874057dim                                            (__v16si)_mm512_setzero_si512());
1363d874057dim}
1364d874057dim
1365d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1366d874057dim_mm512_min_epi64(__m512i __A, __m512i __B)
1367d874057dim{
1368d874057dim  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
1369d874057dim}
1370d874057dim
1371d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1372d874057dim_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1373d874057dim{
1374d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1375d874057dim                                             (__v8di)_mm512_min_epi64(__A, __B),
1376d874057dim                                             (__v8di)__W);
1377d874057dim}
1378d874057dim
1379d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1380d874057dim_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
1381d874057dim{
1382d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1383d874057dim                                             (__v8di)_mm512_min_epi64(__A, __B),
1384d874057dim                                             (__v8di)_mm512_setzero_si512());
1385d874057dim}
1386d874057dim
1387d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1388d874057dim_mm512_min_epu64(__m512i __A, __m512i __B)
1389d874057dim{
1390d874057dim  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
1391d874057dim}
1392d874057dim
1393d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1394d874057dim_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
1395d874057dim{
1396d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1397d874057dim                                             (__v8di)_mm512_min_epu64(__A, __B),
1398d874057dim                                             (__v8di)__W);
1399d874057dim}
1400d874057dim
1401d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1402d874057dim_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
1403d874057dim{
1404d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1405d874057dim                                             (__v8di)_mm512_min_epu64(__A, __B),
1406d874057dim                                             (__v8di)_mm512_setzero_si512());
1407d874057dim}
1408d874057dim
1409d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1410d874057dim_mm512_mul_epi32(__m512i __X, __m512i __Y)
1411d874057dim{
1412d874057dim  return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
1413d874057dim}
1414d874057dim
1415d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1416d874057dim_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1417d874057dim{
1418d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1419d874057dim                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1420d874057dim                                             (__v8di)__W);
1421d874057dim}
1422d874057dim
1423d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1424d874057dim_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
1425d874057dim{
1426d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1427d874057dim                                             (__v8di)_mm512_mul_epi32(__X, __Y),
1428d874057dim                                             (__v8di)_mm512_setzero_si512 ());
1429d874057dim}
1430d874057dim
1431d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1432d874057dim_mm512_mul_epu32(__m512i __X, __m512i __Y)
1433d874057dim{
1434d874057dim  return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
1435d874057dim}
1436d874057dim
1437d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1438d874057dim_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
1439d874057dim{
1440d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1441d874057dim                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1442d874057dim                                             (__v8di)__W);
1443d874057dim}
1444d874057dim
1445d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1446d874057dim_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
1447d874057dim{
1448d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1449d874057dim                                             (__v8di)_mm512_mul_epu32(__X, __Y),
1450d874057dim                                             (__v8di)_mm512_setzero_si512 ());
1451d874057dim}
1452d874057dim
1453d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1454d874057dim_mm512_mullo_epi32 (__m512i __A, __m512i __B)
1455d874057dim{
1456d874057dim  return (__m512i) ((__v16su) __A * (__v16su) __B);
1457d874057dim}
1458d874057dim
1459d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1460d874057dim_mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
1461d874057dim{
1462d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1463d874057dim                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1464d874057dim                                             (__v16si)_mm512_setzero_si512());
1465d874057dim}
1466d874057dim
1467d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1468d874057dim_mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
1469d874057dim{
1470d874057dim  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1471d874057dim                                             (__v16si)_mm512_mullo_epi32(__A, __B),
1472d874057dim                                             (__v16si)__W);
1473d874057dim}
1474d874057dim
1475d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1476d874057dim_mm512_mullox_epi64 (__m512i __A, __m512i __B) {
1477d874057dim  return (__m512i) ((__v8du) __A * (__v8du) __B);
1478d874057dim}
1479d874057dim
1480d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1481d874057dim_mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
1482d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1483d874057dim                                             (__v8di)_mm512_mullox_epi64(__A, __B),
1484d874057dim                                             (__v8di)__W);
1485d874057dim}
1486d874057dim
1487d874057dim#define _mm512_sqrt_round_pd(A, R) \
1488d874057dim  (__m512d)__builtin_ia32_sqrtpd512((__v8df)(__m512d)(A), (int)(R))
1489d874057dim
1490d874057dim#define _mm512_mask_sqrt_round_pd(W, U, A, R) \
1491d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1492d874057dim                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1493d874057dim                                       (__v8df)(__m512d)(W))
1494d874057dim
1495d874057dim#define _mm512_maskz_sqrt_round_pd(U, A, R) \
1496d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1497d874057dim                                       (__v8df)_mm512_sqrt_round_pd((A), (R)), \
1498d874057dim                                       (__v8df)_mm512_setzero_pd())
1499d874057dim
1500d874057dimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1501d874057dim_mm512_sqrt_pd(__m512d __A)
1502d874057dim{
1503d874057dim  return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
1504d874057dim                                           _MM_FROUND_CUR_DIRECTION);
1505d874057dim}
1506d874057dim
1507d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1508d874057dim_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
1509d874057dim{
1510d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1511d874057dim                                              (__v8df)_mm512_sqrt_pd(__A),
1512d874057dim                                              (__v8df)__W);
1513d874057dim}
1514d874057dim
1515d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1516d874057dim_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
1517d874057dim{
1518d874057dim  return (__m512d)__builtin_ia32_selectpd_512(__U,
1519d874057dim                                              (__v8df)_mm512_sqrt_pd(__A),
1520d874057dim                                              (__v8df)_mm512_setzero_pd());
1521d874057dim}
1522d874057dim
1523d874057dim#define _mm512_sqrt_round_ps(A, R) \
1524d874057dim  (__m512)__builtin_ia32_sqrtps512((__v16sf)(__m512)(A), (int)(R))
1525d874057dim
1526d874057dim#define _mm512_mask_sqrt_round_ps(W, U, A, R) \
1527d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1528d874057dim                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1529d874057dim                                      (__v16sf)(__m512)(W))
1530d874057dim
1531d874057dim#define _mm512_maskz_sqrt_round_ps(U, A, R) \
1532d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1533d874057dim                                      (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
1534d874057dim                                      (__v16sf)_mm512_setzero_ps())
1535d874057dim
1536d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1537d874057dim_mm512_sqrt_ps(__m512 __A)
1538d874057dim{
1539d874057dim  return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
1540d874057dim                                          _MM_FROUND_CUR_DIRECTION);
1541d874057dim}
1542d874057dim
1543d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1544d874057dim_mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
1545d874057dim{
1546d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1547d874057dim                                             (__v16sf)_mm512_sqrt_ps(__A),
1548d874057dim                                             (__v16sf)__W);
1549d874057dim}
1550d874057dim
1551d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1552d874057dim_mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
1553d874057dim{
1554d874057dim  return (__m512)__builtin_ia32_selectps_512(__U,
1555d874057dim                                             (__v16sf)_mm512_sqrt_ps(__A),
1556d874057dim                                             (__v16sf)_mm512_setzero_ps());
1557d874057dim}
1558d874057dim
1559d874057dimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1560d874057dim_mm512_rsqrt14_pd(__m512d __A)
1561d874057dim{
1562d874057dim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1563d874057dim                 (__v8df)
1564d874057dim                 _mm512_setzero_pd (),
1565d874057dim                 (__mmask8) -1);}
1566d874057dim
1567d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1568d874057dim_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1569d874057dim{
1570d874057dim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1571d874057dim                  (__v8df) __W,
1572d874057dim                  (__mmask8) __U);
1573d874057dim}
1574d874057dim
1575d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1576d874057dim_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
1577d874057dim{
1578d874057dim  return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
1579d874057dim                  (__v8df)
1580d874057dim                  _mm512_setzero_pd (),
1581d874057dim                  (__mmask8) __U);
1582d874057dim}
1583d874057dim
1584d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1585d874057dim_mm512_rsqrt14_ps(__m512 __A)
1586d874057dim{
1587d874057dim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1588d874057dim                (__v16sf)
1589d874057dim                _mm512_setzero_ps (),
1590d874057dim                (__mmask16) -1);
1591d874057dim}
1592d874057dim
1593d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1594d874057dim_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1595d874057dim{
1596d874057dim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1597d874057dim                 (__v16sf) __W,
1598d874057dim                 (__mmask16) __U);
1599d874057dim}
1600d874057dim
1601d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1602d874057dim_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
1603d874057dim{
1604d874057dim  return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
1605d874057dim                 (__v16sf)
1606d874057dim                 _mm512_setzero_ps (),
1607d874057dim                 (__mmask16) __U);
1608d874057dim}
1609d874057dim
1610d874057dimstatic  __inline__ __m128 __DEFAULT_FN_ATTRS128
1611d874057dim_mm_rsqrt14_ss(__m128 __A, __m128 __B)
1612d874057dim{
1613d874057dim  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1614d874057dim             (__v4sf) __B,
1615d874057dim             (__v4sf)
1616d874057dim             _mm_setzero_ps (),
1617d874057dim             (__mmask8) -1);
1618d874057dim}
1619d874057dim
1620d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1621d874057dim_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1622d874057dim{
1623d874057dim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1624d874057dim          (__v4sf) __B,
1625d874057dim          (__v4sf) __W,
1626d874057dim          (__mmask8) __U);
1627d874057dim}
1628d874057dim
1629d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1630d874057dim_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1631d874057dim{
1632d874057dim return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
1633d874057dim          (__v4sf) __B,
1634d874057dim          (__v4sf) _mm_setzero_ps (),
1635d874057dim          (__mmask8) __U);
1636d874057dim}
1637d874057dim
1638d874057dimstatic  __inline__ __m128d __DEFAULT_FN_ATTRS128
1639d874057dim_mm_rsqrt14_sd(__m128d __A, __m128d __B)
1640d874057dim{
1641d874057dim  return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
1642d874057dim              (__v2df) __B,
1643d874057dim              (__v2df)
1644d874057dim              _mm_setzero_pd (),
1645d874057dim              (__mmask8) -1);
1646d874057dim}
1647d874057dim
1648d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1649d874057dim_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1650d874057dim{
1651d874057dim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1652d874057dim          (__v2df) __B,
1653d874057dim          (__v2df) __W,
1654d874057dim          (__mmask8) __U);
1655d874057dim}
1656d874057dim
1657d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1658d874057dim_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1659d874057dim{
1660d874057dim return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
1661d874057dim          (__v2df) __B,
1662d874057dim          (__v2df) _mm_setzero_pd (),
1663d874057dim          (__mmask8) __U);
1664d874057dim}
1665d874057dim
1666d874057dimstatic  __inline__ __m512d __DEFAULT_FN_ATTRS512
1667d874057dim_mm512_rcp14_pd(__m512d __A)
1668d874057dim{
1669d874057dim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1670d874057dim               (__v8df)
1671d874057dim               _mm512_setzero_pd (),
1672d874057dim               (__mmask8) -1);
1673d874057dim}
1674d874057dim
1675d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1676d874057dim_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
1677d874057dim{
1678d874057dim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1679d874057dim                (__v8df) __W,
1680d874057dim                (__mmask8) __U);
1681d874057dim}
1682d874057dim
1683d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1684d874057dim_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
1685d874057dim{
1686d874057dim  return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
1687d874057dim                (__v8df)
1688d874057dim                _mm512_setzero_pd (),
1689d874057dim                (__mmask8) __U);
1690d874057dim}
1691d874057dim
1692d874057dimstatic  __inline__ __m512 __DEFAULT_FN_ATTRS512
1693d874057dim_mm512_rcp14_ps(__m512 __A)
1694d874057dim{
1695d874057dim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1696d874057dim              (__v16sf)
1697d874057dim              _mm512_setzero_ps (),
1698d874057dim              (__mmask16) -1);
1699d874057dim}
1700d874057dim
1701d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1702d874057dim_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
1703d874057dim{
1704d874057dim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1705d874057dim                   (__v16sf) __W,
1706d874057dim                   (__mmask16) __U);
1707d874057dim}
1708d874057dim
1709d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1710d874057dim_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
1711d874057dim{
1712d874057dim  return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
1713d874057dim                   (__v16sf)
1714d874057dim                   _mm512_setzero_ps (),
1715d874057dim                   (__mmask16) __U);
1716d874057dim}
1717d874057dim
1718d874057dimstatic  __inline__ __m128 __DEFAULT_FN_ATTRS128
1719d874057dim_mm_rcp14_ss(__m128 __A, __m128 __B)
1720d874057dim{
1721d874057dim  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1722d874057dim                 (__v4sf) __B,
1723d874057dim                 (__v4sf)
1724d874057dim                 _mm_setzero_ps (),
1725d874057dim                 (__mmask8) -1);
1726d874057dim}
1727d874057dim
1728d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1729d874057dim_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
1730d874057dim{
1731d874057dim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1732d874057dim          (__v4sf) __B,
1733d874057dim          (__v4sf) __W,
1734d874057dim          (__mmask8) __U);
1735d874057dim}
1736d874057dim
1737d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1738d874057dim_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
1739d874057dim{
1740d874057dim return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
1741d874057dim          (__v4sf) __B,
1742d874057dim          (__v4sf) _mm_setzero_ps (),
1743d874057dim          (__mmask8) __U);
1744d874057dim}
1745d874057dim
1746d874057dimstatic  __inline__ __m128d __DEFAULT_FN_ATTRS128
1747d874057dim_mm_rcp14_sd(__m128d __A, __m128d __B)
1748d874057dim{
1749d874057dim  return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
1750d874057dim            (__v2df) __B,
1751d874057dim            (__v2df)
1752d874057dim            _mm_setzero_pd (),
1753d874057dim            (__mmask8) -1);
1754d874057dim}
1755d874057dim
1756d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1757d874057dim_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
1758d874057dim{
1759d874057dim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1760d874057dim          (__v2df) __B,
1761d874057dim          (__v2df) __W,
1762d874057dim          (__mmask8) __U);
1763d874057dim}
1764d874057dim
1765d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1766d874057dim_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
1767d874057dim{
1768d874057dim return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
1769d874057dim          (__v2df) __B,
1770d874057dim          (__v2df) _mm_setzero_pd (),
1771d874057dim          (__mmask8) __U);
1772d874057dim}
1773d874057dim
1774d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
1775d874057dim_mm512_floor_ps(__m512 __A)
1776d874057dim{
1777d874057dim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1778d874057dim                                                  _MM_FROUND_FLOOR,
1779d874057dim                                                  (__v16sf) __A, -1,
1780d874057dim                                                  _MM_FROUND_CUR_DIRECTION);
1781d874057dim}
1782d874057dim
1783d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1784d874057dim_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
1785d874057dim{
1786d874057dim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1787d874057dim                   _MM_FROUND_FLOOR,
1788d874057dim                   (__v16sf) __W, __U,
1789d874057dim                   _MM_FROUND_CUR_DIRECTION);
1790d874057dim}
1791d874057dim
1792d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
1793d874057dim_mm512_floor_pd(__m512d __A)
1794d874057dim{
1795d874057dim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1796d874057dim                                                   _MM_FROUND_FLOOR,
1797d874057dim                                                   (__v8df) __A, -1,
1798d874057dim                                                   _MM_FROUND_CUR_DIRECTION);
1799d874057dim}
1800d874057dim
1801d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1802d874057dim_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
1803d874057dim{
1804d874057dim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1805d874057dim                _MM_FROUND_FLOOR,
1806d874057dim                (__v8df) __W, __U,
1807d874057dim                _MM_FROUND_CUR_DIRECTION);
1808d874057dim}
1809d874057dim
1810d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1811d874057dim_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
1812d874057dim{
1813d874057dim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1814d874057dim                   _MM_FROUND_CEIL,
1815d874057dim                   (__v16sf) __W, __U,
1816d874057dim                   _MM_FROUND_CUR_DIRECTION);
1817d874057dim}
1818d874057dim
1819d874057dimstatic __inline __m512 __DEFAULT_FN_ATTRS512
1820d874057dim_mm512_ceil_ps(__m512 __A)
1821d874057dim{
1822d874057dim  return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
1823d874057dim                                                  _MM_FROUND_CEIL,
1824d874057dim                                                  (__v16sf) __A, -1,
1825d874057dim                                                  _MM_FROUND_CUR_DIRECTION);
1826d874057dim}
1827d874057dim
1828d874057dimstatic __inline __m512d __DEFAULT_FN_ATTRS512
1829d874057dim_mm512_ceil_pd(__m512d __A)
1830d874057dim{
1831d874057dim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1832d874057dim                                                   _MM_FROUND_CEIL,
1833d874057dim                                                   (__v8df) __A, -1,
1834d874057dim                                                   _MM_FROUND_CUR_DIRECTION);
1835d874057dim}
1836d874057dim
1837d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1838d874057dim_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
1839d874057dim{
1840d874057dim  return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
1841d874057dim                _MM_FROUND_CEIL,
1842d874057dim                (__v8df) __W, __U,
1843d874057dim                _MM_FROUND_CUR_DIRECTION);
1844d874057dim}
1845d874057dim
1846d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1847d874057dim_mm512_abs_epi64(__m512i __A)
1848d874057dim{
1849d874057dim  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
1850d874057dim}
1851d874057dim
1852d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1853d874057dim_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
1854d874057dim{
1855d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1856d874057dim                                             (__v8di)_mm512_abs_epi64(__A),
1857d874057dim                                             (__v8di)__W);
1858d874057dim}
1859d874057dim
1860d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1861d874057dim_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
1862d874057dim{
1863d874057dim  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
1864d874057dim                                             (__v8di)_mm512_abs_epi64(__A),
1865d874057dim                                             (__v8di)_mm512_setzero_si512());
1866d874057dim}
1867d874057dim
1868d874057dimstatic __inline __m512i __DEFAULT_FN_ATTRS512
1869d874057dim_mm512_abs_epi32(__m512i __A)
1870d874057dim{
1871d874057dim  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
1872d874057dim}
1873d874057dim
1874d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1875d874057dim_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
1876d874057dim{
1877d874057dim  return (__m512i)__builtin_ia32_selectd_512(__U,
1878d874057dim                                             (__v16si)_mm512_abs_epi32(__A),
1879d874057dim                                             (__v16si)__W);
1880d874057dim}
1881d874057dim
1882d874057dimstatic __inline__ __m512i __DEFAULT_FN_ATTRS512
1883d874057dim_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
1884d874057dim{
1885d874057dim  return (__m512i)__builtin_ia32_selectd_512(__U,
1886d874057dim                                             (__v16si)_mm512_abs_epi32(__A),
1887d874057dim                                             (__v16si)_mm512_setzero_si512());
1888d874057dim}
1889d874057dim
1890d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1891d874057dim_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
1892d874057dim  __A = _mm_add_ss(__A, __B);
1893d874057dim  return __builtin_ia32_selectss_128(__U, __A, __W);
1894d874057dim}
1895d874057dim
1896d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
1897d874057dim_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
1898d874057dim  __A = _mm_add_ss(__A, __B);
1899d874057dim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
1900d874057dim}
1901d874057dim
1902d874057dim#define _mm_add_round_ss(A, B, R) \
1903d874057dim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1904d874057dim                                          (__v4sf)(__m128)(B), \
1905d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1906d874057dim                                          (__mmask8)-1, (int)(R))
1907d874057dim
1908d874057dim#define _mm_mask_add_round_ss(W, U, A, B, R) \
1909d874057dim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1910d874057dim                                          (__v4sf)(__m128)(B), \
1911d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
1912d874057dim                                          (int)(R))
1913d874057dim
1914d874057dim#define _mm_maskz_add_round_ss(U, A, B, R) \
1915d874057dim  (__m128)__builtin_ia32_addss_round_mask((__v4sf)(__m128)(A), \
1916d874057dim                                          (__v4sf)(__m128)(B), \
1917d874057dim                                          (__v4sf)_mm_setzero_ps(), \
1918d874057dim                                          (__mmask8)(U), (int)(R))
1919d874057dim
1920d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1921d874057dim_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
1922d874057dim  __A = _mm_add_sd(__A, __B);
1923d874057dim  return __builtin_ia32_selectsd_128(__U, __A, __W);
1924d874057dim}
1925d874057dim
1926d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
1927d874057dim_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
1928d874057dim  __A = _mm_add_sd(__A, __B);
1929d874057dim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
1930d874057dim}
1931d874057dim#define _mm_add_round_sd(A, B, R) \
1932d874057dim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1933d874057dim                                           (__v2df)(__m128d)(B), \
1934d874057dim                                           (__v2df)_mm_setzero_pd(), \
1935d874057dim                                           (__mmask8)-1, (int)(R))
1936d874057dim
1937d874057dim#define _mm_mask_add_round_sd(W, U, A, B, R) \
1938d874057dim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1939d874057dim                                           (__v2df)(__m128d)(B), \
1940d874057dim                                           (__v2df)(__m128d)(W), \
1941d874057dim                                           (__mmask8)(U), (int)(R))
1942d874057dim
1943d874057dim#define _mm_maskz_add_round_sd(U, A, B, R) \
1944d874057dim  (__m128d)__builtin_ia32_addsd_round_mask((__v2df)(__m128d)(A), \
1945d874057dim                                           (__v2df)(__m128d)(B), \
1946d874057dim                                           (__v2df)_mm_setzero_pd(), \
1947d874057dim                                           (__mmask8)(U), (int)(R))
1948d874057dim
1949d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1950d874057dim_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
1951d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1952d874057dim                                              (__v8df)_mm512_add_pd(__A, __B),
1953d874057dim                                              (__v8df)__W);
1954d874057dim}
1955d874057dim
1956d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
1957d874057dim_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
1958d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
1959d874057dim                                              (__v8df)_mm512_add_pd(__A, __B),
1960d874057dim                                              (__v8df)_mm512_setzero_pd());
1961d874057dim}
1962d874057dim
1963d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1964d874057dim_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
1965d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1966d874057dim                                             (__v16sf)_mm512_add_ps(__A, __B),
1967d874057dim                                             (__v16sf)__W);
1968d874057dim}
1969d874057dim
1970d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
1971d874057dim_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
1972d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
1973d874057dim                                             (__v16sf)_mm512_add_ps(__A, __B),
1974d874057dim                                             (__v16sf)_mm512_setzero_ps());
1975d874057dim}
1976d874057dim
1977d874057dim#define _mm512_add_round_pd(A, B, R) \
1978d874057dim  (__m512d)__builtin_ia32_addpd512((__v8df)(__m512d)(A), \
1979d874057dim                                   (__v8df)(__m512d)(B), (int)(R))
1980d874057dim
1981d874057dim#define _mm512_mask_add_round_pd(W, U, A, B, R) \
1982d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1983d874057dim                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1984d874057dim                                   (__v8df)(__m512d)(W))
1985d874057dim
1986d874057dim#define _mm512_maskz_add_round_pd(U, A, B, R) \
1987d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1988d874057dim                                   (__v8df)_mm512_add_round_pd((A), (B), (R)), \
1989d874057dim                                   (__v8df)_mm512_setzero_pd())
1990d874057dim
1991d874057dim#define _mm512_add_round_ps(A, B, R) \
1992d874057dim  (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \
1993d874057dim                                  (__v16sf)(__m512)(B), (int)(R))
1994d874057dim
1995d874057dim#define _mm512_mask_add_round_ps(W, U, A, B, R) \
1996d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1997d874057dim                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
1998d874057dim                                  (__v16sf)(__m512)(W))
1999d874057dim
2000d874057dim#define _mm512_maskz_add_round_ps(U, A, B, R) \
2001d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2002d874057dim                                  (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
2003d874057dim                                  (__v16sf)_mm512_setzero_ps())
2004d874057dim
2005d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2006d874057dim_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2007d874057dim  __A = _mm_sub_ss(__A, __B);
2008d874057dim  return __builtin_ia32_selectss_128(__U, __A, __W);
2009d874057dim}
2010d874057dim
2011d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2012d874057dim_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2013d874057dim  __A = _mm_sub_ss(__A, __B);
2014d874057dim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2015d874057dim}
2016d874057dim#define _mm_sub_round_ss(A, B, R) \
2017d874057dim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2018d874057dim                                          (__v4sf)(__m128)(B), \
2019d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2020d874057dim                                          (__mmask8)-1, (int)(R))
2021d874057dim
2022d874057dim#define _mm_mask_sub_round_ss(W, U, A, B, R) \
2023d874057dim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2024d874057dim                                          (__v4sf)(__m128)(B), \
2025d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2026d874057dim                                          (int)(R))
2027d874057dim
2028d874057dim#define _mm_maskz_sub_round_ss(U, A, B, R) \
2029d874057dim  (__m128)__builtin_ia32_subss_round_mask((__v4sf)(__m128)(A), \
2030d874057dim                                          (__v4sf)(__m128)(B), \
2031d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2032d874057dim                                          (__mmask8)(U), (int)(R))
2033d874057dim
2034d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2035d874057dim_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2036d874057dim  __A = _mm_sub_sd(__A, __B);
2037d874057dim  return __builtin_ia32_selectsd_128(__U, __A, __W);
2038d874057dim}
2039d874057dim
2040d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2041d874057dim_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2042d874057dim  __A = _mm_sub_sd(__A, __B);
2043d874057dim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2044d874057dim}
2045d874057dim
2046d874057dim#define _mm_sub_round_sd(A, B, R) \
2047d874057dim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2048d874057dim                                           (__v2df)(__m128d)(B), \
2049d874057dim                                           (__v2df)_mm_setzero_pd(), \
2050d874057dim                                           (__mmask8)-1, (int)(R))
2051d874057dim
2052d874057dim#define _mm_mask_sub_round_sd(W, U, A, B, R) \
2053d874057dim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2054d874057dim                                           (__v2df)(__m128d)(B), \
2055d874057dim                                           (__v2df)(__m128d)(W), \
2056d874057dim                                           (__mmask8)(U), (int)(R))
2057d874057dim
2058d874057dim#define _mm_maskz_sub_round_sd(U, A, B, R) \
2059d874057dim  (__m128d)__builtin_ia32_subsd_round_mask((__v2df)(__m128d)(A), \
2060d874057dim                                           (__v2df)(__m128d)(B), \
2061d874057dim                                           (__v2df)_mm_setzero_pd(), \
2062d874057dim                                           (__mmask8)(U), (int)(R))
2063d874057dim
2064d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2065d874057dim_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2066d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2067d874057dim                                              (__v8df)_mm512_sub_pd(__A, __B),
2068d874057dim                                              (__v8df)__W);
2069d874057dim}
2070d874057dim
2071d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2072d874057dim_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2073d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2074d874057dim                                              (__v8df)_mm512_sub_pd(__A, __B),
2075d874057dim                                              (__v8df)_mm512_setzero_pd());
2076d874057dim}
2077d874057dim
2078d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2079d874057dim_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2080d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2081d874057dim                                             (__v16sf)_mm512_sub_ps(__A, __B),
2082d874057dim                                             (__v16sf)__W);
2083d874057dim}
2084d874057dim
2085d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2086d874057dim_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2087d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2088d874057dim                                             (__v16sf)_mm512_sub_ps(__A, __B),
2089d874057dim                                             (__v16sf)_mm512_setzero_ps());
2090d874057dim}
2091d874057dim
2092d874057dim#define _mm512_sub_round_pd(A, B, R) \
2093d874057dim  (__m512d)__builtin_ia32_subpd512((__v8df)(__m512d)(A), \
2094d874057dim                                   (__v8df)(__m512d)(B), (int)(R))
2095d874057dim
2096d874057dim#define _mm512_mask_sub_round_pd(W, U, A, B, R) \
2097d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2098d874057dim                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2099d874057dim                                   (__v8df)(__m512d)(W))
2100d874057dim
2101d874057dim#define _mm512_maskz_sub_round_pd(U, A, B, R) \
2102d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2103d874057dim                                   (__v8df)_mm512_sub_round_pd((A), (B), (R)), \
2104d874057dim                                   (__v8df)_mm512_setzero_pd())
2105d874057dim
2106d874057dim#define _mm512_sub_round_ps(A, B, R) \
2107d874057dim  (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \
2108d874057dim                                  (__v16sf)(__m512)(B), (int)(R))
2109d874057dim
2110d874057dim#define _mm512_mask_sub_round_ps(W, U, A, B, R) \
2111d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2112d874057dim                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2113d874057dim                                  (__v16sf)(__m512)(W))
2114d874057dim
2115d874057dim#define _mm512_maskz_sub_round_ps(U, A, B, R) \
2116d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2117d874057dim                                  (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
2118d874057dim                                  (__v16sf)_mm512_setzero_ps())
2119d874057dim
2120d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2121d874057dim_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2122d874057dim  __A = _mm_mul_ss(__A, __B);
2123d874057dim  return __builtin_ia32_selectss_128(__U, __A, __W);
2124d874057dim}
2125d874057dim
2126d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2127d874057dim_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2128d874057dim  __A = _mm_mul_ss(__A, __B);
2129d874057dim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2130d874057dim}
2131d874057dim#define _mm_mul_round_ss(A, B, R) \
2132d874057dim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2133d874057dim                                          (__v4sf)(__m128)(B), \
2134d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2135d874057dim                                          (__mmask8)-1, (int)(R))
2136d874057dim
2137d874057dim#define _mm_mask_mul_round_ss(W, U, A, B, R) \
2138d874057dim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2139d874057dim                                          (__v4sf)(__m128)(B), \
2140d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2141d874057dim                                          (int)(R))
2142d874057dim
2143d874057dim#define _mm_maskz_mul_round_ss(U, A, B, R) \
2144d874057dim  (__m128)__builtin_ia32_mulss_round_mask((__v4sf)(__m128)(A), \
2145d874057dim                                          (__v4sf)(__m128)(B), \
2146d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2147d874057dim                                          (__mmask8)(U), (int)(R))
2148d874057dim
2149d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2150d874057dim_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
2151d874057dim  __A = _mm_mul_sd(__A, __B);
2152d874057dim  return __builtin_ia32_selectsd_128(__U, __A, __W);
2153d874057dim}
2154d874057dim
2155d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2156d874057dim_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
2157d874057dim  __A = _mm_mul_sd(__A, __B);
2158d874057dim  return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
2159d874057dim}
2160d874057dim
2161d874057dim#define _mm_mul_round_sd(A, B, R) \
2162d874057dim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2163d874057dim                                           (__v2df)(__m128d)(B), \
2164d874057dim                                           (__v2df)_mm_setzero_pd(), \
2165d874057dim                                           (__mmask8)-1, (int)(R))
2166d874057dim
2167d874057dim#define _mm_mask_mul_round_sd(W, U, A, B, R) \
2168d874057dim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2169d874057dim                                           (__v2df)(__m128d)(B), \
2170d874057dim                                           (__v2df)(__m128d)(W), \
2171d874057dim                                           (__mmask8)(U), (int)(R))
2172d874057dim
2173d874057dim#define _mm_maskz_mul_round_sd(U, A, B, R) \
2174d874057dim  (__m128d)__builtin_ia32_mulsd_round_mask((__v2df)(__m128d)(A), \
2175d874057dim                                           (__v2df)(__m128d)(B), \
2176d874057dim                                           (__v2df)_mm_setzero_pd(), \
2177d874057dim                                           (__mmask8)(U), (int)(R))
2178d874057dim
2179d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2180d874057dim_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
2181d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2182d874057dim                                              (__v8df)_mm512_mul_pd(__A, __B),
2183d874057dim                                              (__v8df)__W);
2184d874057dim}
2185d874057dim
2186d874057dimstatic __inline__ __m512d __DEFAULT_FN_ATTRS512
2187d874057dim_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
2188d874057dim  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
2189d874057dim                                              (__v8df)_mm512_mul_pd(__A, __B),
2190d874057dim                                              (__v8df)_mm512_setzero_pd());
2191d874057dim}
2192d874057dim
2193d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2194d874057dim_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
2195d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2196d874057dim                                             (__v16sf)_mm512_mul_ps(__A, __B),
2197d874057dim                                             (__v16sf)__W);
2198d874057dim}
2199d874057dim
2200d874057dimstatic __inline__ __m512 __DEFAULT_FN_ATTRS512
2201d874057dim_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
2202d874057dim  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
2203d874057dim                                             (__v16sf)_mm512_mul_ps(__A, __B),
2204d874057dim                                             (__v16sf)_mm512_setzero_ps());
2205d874057dim}
2206d874057dim
2207d874057dim#define _mm512_mul_round_pd(A, B, R) \
2208d874057dim  (__m512d)__builtin_ia32_mulpd512((__v8df)(__m512d)(A), \
2209d874057dim                                   (__v8df)(__m512d)(B), (int)(R))
2210d874057dim
2211d874057dim#define _mm512_mask_mul_round_pd(W, U, A, B, R) \
2212d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2213d874057dim                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2214d874057dim                                   (__v8df)(__m512d)(W))
2215d874057dim
2216d874057dim#define _mm512_maskz_mul_round_pd(U, A, B, R) \
2217d874057dim  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
2218d874057dim                                   (__v8df)_mm512_mul_round_pd((A), (B), (R)), \
2219d874057dim                                   (__v8df)_mm512_setzero_pd())
2220d874057dim
2221d874057dim#define _mm512_mul_round_ps(A, B, R) \
2222d874057dim  (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \
2223d874057dim                                  (__v16sf)(__m512)(B), (int)(R))
2224d874057dim
2225d874057dim#define _mm512_mask_mul_round_ps(W, U, A, B, R) \
2226d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2227d874057dim                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2228d874057dim                                  (__v16sf)(__m512)(W))
2229d874057dim
2230d874057dim#define _mm512_maskz_mul_round_ps(U, A, B, R) \
2231d874057dim  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
2232d874057dim                                  (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
2233d874057dim                                  (__v16sf)_mm512_setzero_ps())
2234d874057dim
2235d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2236d874057dim_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
2237d874057dim  __A = _mm_div_ss(__A, __B);
2238d874057dim  return __builtin_ia32_selectss_128(__U, __A, __W);
2239d874057dim}
2240d874057dim
2241d874057dimstatic __inline__ __m128 __DEFAULT_FN_ATTRS128
2242d874057dim_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
2243d874057dim  __A = _mm_div_ss(__A, __B);
2244d874057dim  return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
2245d874057dim}
2246d874057dim
2247d874057dim#define _mm_div_round_ss(A, B, R) \
2248d874057dim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2249d874057dim                                          (__v4sf)(__m128)(B), \
2250d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2251d874057dim                                          (__mmask8)-1, (int)(R))
2252d874057dim
2253d874057dim#define _mm_mask_div_round_ss(W, U, A, B, R) \
2254d874057dim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2255d874057dim                                          (__v4sf)(__m128)(B), \
2256d874057dim                                          (__v4sf)(__m128)(W), (__mmask8)(U), \
2257d874057dim                                          (int)(R))
2258d874057dim
2259d874057dim#define _mm_maskz_div_round_ss(U, A, B, R) \
2260d874057dim  (__m128)__builtin_ia32_divss_round_mask((__v4sf)(__m128)(A), \
2261d874057dim                                          (__v4sf)(__m128)(B), \
2262d874057dim                                          (__v4sf)_mm_setzero_ps(), \
2263d874057dim                                          (__mmask8)(U), (int)(R))
2264d874057dim
2265d874057dimstatic __inline__ __m128d __DEFAULT_FN_ATTRS128
2266d874057dim_mm_mask_div_sd(__m128d __W, __mmask8 __U,