1#![allow(clippy::too_many_arguments)]
12
13#[cfg(feature = "dep_safe_arch")]
14use crate::_dep::safe_arch::*;
15
16#[doc = crate::TAG_NAMESPACE!()]
17pub struct Arch;
43
44impl_arch! {
45 #[doc = "# Functions not requiring any target feature.\n\n---"]
46 features = "dep_safe_arch", any_target_arch = "x86", "x86_64";
47 arch_fn! {
48 "Swap the bytes of the given 32-bit value.",
49 byte_swap_i32(i: i32) -> i32;
50 "Swap the bytes of the given 64-bit value.",
51 byte_swap_i64(i: i64) -> i64;
52 "Reads the CPU’s timestamp counter value.",
53 read_timestamp_counter() -> u64;
54 "Reads the CPU’s timestamp counter value and store the processor signature.",
55 read_timestamp_counter_p(aux: &mut u32) -> u64;
56 }
57}
58impl_arch! {
59 #[doc = "# Functions requiring the `adx` target feature.\n\n---"]
60 #[doc = "See: <https://en.wikipedia.org/wiki/Intel_ADX>"]
61 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "adx";
62 arch_fn! {
63 "Add two `u32` with a carry value.",
64 add_carry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8;
65 "Add two `u64` with a carry value.",
66 add_carry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8;
67 }
68}
69impl_arch! {
70 #[doc = "# Functions requiring the `aes` target feature.\n\n---"]
71 #[doc = "See: <https://en.wikipedia.org/wiki/AES_instruction_set>"]
72 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "aes";
73 arch_fn! {
74 "Perform the last round of an AES decryption flow on `a` using the `round_key`.",
75 aes_decrypt_last_m128i(a: m128i, round_key: m128i) -> m128i;
76 "Perform one round of an AES decryption flow on `a` using the `round_key`.",
77 aes_decrypt_m128i(a: m128i, round_key: m128i) -> m128i;
78 "Perform the last round of an AES encryption flow on `a` using the `round_key`.",
79 aes_encrypt_last_m128i(a: m128i, round_key: m128i) -> m128i;
80 "Perform one round of an AES encryption flow on `a` using the `round_key`.",
81 aes_encrypt_m128i(a: m128i, round_key: m128i) -> m128i;
82 "Perform the InvMixColumns transform on `a`.",
83 aes_inv_mix_columns_m128i(a: m128i) -> m128i;
84 "Assist in expanding an AES cipher key.",
85 aes_key_gen_assist_m128i<const IMM: i32>(a: m128i) -> m128i;
86 }
87}
88impl_arch! {
89 #[doc = "# Functions requiring the `avx` target feature.\n\n---"]
90 #[doc = "See: <https://en.wikipedia.org/wiki/Advanced_Vector_Extensions>"]
91 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "avx";
92 arch_fn! {
93 "Add adjacent `f32` lanes.",
94 add_horizontal_m256(a: m256, b: m256) -> m256;
95 "Add adjacent `f64` lanes.",
96 add_horizontal_m256d(a: m256d, b: m256d) -> m256d;
97 "Lanewise `a + b` with `f32` lanes.",
98 add_m256(a: m256, b: m256) -> m256;
99 "Lanewise `a + b` with `f64` lanes.",
100 add_m256d(a: m256d, b: m256d) -> m256d;
101 "Alternately, from the top, add `f32` then sub `f32`.",
102 addsub_m256(a: m256, b: m256) -> m256;
103 "Alternately, from the top, add `f64` then sub `f64`.",
104 addsub_m256d(a: m256d, b: m256d) -> m256d;
105 "Bitwise `a & b`.",
106 bitand_m256(a: m256, b: m256) -> m256;
107 "Bitwise `a & b`.",
108 bitand_m256d(a: m256d, b: m256d) -> m256d;
109 "Bitwise `(!a) & b`.",
110 bitandnot_m256(a: m256, b: m256) -> m256;
111 "Bitwise `(!a) & b`.",
112 bitandnot_m256d(a: m256d, b: m256d) -> m256d;
113 "Bitwise `a | b`.",
114 bitor_m256(a: m256, b: m256) -> m256;
115 "Bitwise `a | b`.",
116 bitor_m256d(a: m256d, b: m256d) -> m256d;
117 "Bitwise `a ^ b`.",
118 bitxor_m256(a: m256, b: m256) -> m256;
119 "Bitwise `a ^ b`.",
120 bitxor_m256d(a: m256d, b: m256d) -> m256d;
121 "Blends the `f32` lanes according to the immediate mask.",
122 blend_m256<const IMM: i32>(a: m256, b: m256) -> m256;
123 "Blends the `f64` lanes according to the immediate mask.",
124 blend_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d;
125 "Blend the lanes according to a runtime varying mask.",
126 blend_varying_m256(a: m256, b: m256, mask: m256) -> m256;
127 "Blend the lanes according to a runtime varying mask.",
128 blend_varying_m256d(a: m256d, b: m256d, mask: m256d) -> m256d;
129 "Bit-preserving cast to `m128` from `m256`.",
130 cast_to_m128_from_m256(a: m256) -> m128;
131 "Bit-preserving cast to `m128d` from `m256d`.",
132 cast_to_m128d_from_m256d(a: m256d) -> m128d;
133 "Bit-preserving cast to `m128i` from `m256i`.",
134 cast_to_m128i_from_m256i(a: m256i) -> m128i;
135 "Bit-preserving cast to `m256` from `m256d`.",
136 cast_to_m256_from_m256d(a: m256d) -> m256;
137 "Bit-preserving cast to `m256` from `m256i`.",
138 cast_to_m256_from_m256i(a: m256i) -> m256;
139 "Bit-preserving cast to `m256i` from `m256`.",
140 cast_to_m256d_from_m256(a: m256) -> m256d;
141 "Bit-preserving cast to `m256d` from `m256i`.",
142 cast_to_m256d_from_m256i(a: m256i) -> m256d;
143 "Bit-preserving cast to `m256i` from `m256`.",
144 cast_to_m256i_from_m256(a: m256) -> m256i;
145 "Bit-preserving cast to `m256i` from `m256d`.",
146 cast_to_m256i_from_m256d(a: m256d) -> m256i;
147 "Round `f32` lanes towards positive infinity.",
148 ceil_m256(a: m256) -> m256;
149 "Round `f64` lanes towards positive infinity.",
150 ceil_m256d(a: m256d) -> m256d;
151 "Compare `f32` lanes according to the operation specified, mask output.",
152 cmp_op_mask_m128<const OP: i32>(a: m128, b: m128) -> m128;
153 "Compare `f32` lanes according to the operation specified, mask output.",
154 cmp_op_mask_m128_s<const OP: i32>(a: m128, b: m128) -> m128;
155 "Compare `f64` lanes according to the operation specified, mask output.",
156 cmp_op_mask_m128d<const OP: i32>(a: m128d, b: m128d) -> m128d;
157 "Compare `f64` lanes according to the operation specified, mask output.",
158 cmp_op_mask_m128d_s<const OP: i32>(a: m128d, b: m128d) -> m128d;
159 "Compare `f32` lanes according to the operation specified, mask output.",
160 cmp_op_mask_m256<const OP: i32>(a: m256, b: m256) -> m256;
161 "Compare `f64` lanes according to the operation specified, mask output.",
162 cmp_op_mask_m256d<const OP: i32>(a: m256d, b: m256d) -> m256d;
163 "Convert the lowest `f32` lane to a single `f32`.",
164 convert_to_f32_from_m256_s(a: m256) -> f32;
165 "Convert the lowest `f64` lane to a single `f64`.",
166 convert_to_f64_from_m256d_s(a: m256d) -> f64;
167 "Convert the lowest `i32` lane to a single `i32`.",
168 convert_to_i32_from_m256i_s(a: m256i) -> i32;
169 "Convert `f64` lanes to be `i32` lanes.",
170 convert_to_i32_m128i_from_m256d(a: m256d) -> m128i;
171 "Convert `f32` lanes to be `i32` lanes.",
172 convert_to_i32_m256i_from_m256(a: m256) -> m256i;
173 "Convert `f64` lanes to be `f32` lanes.",
174 convert_to_m128_from_m256d(a: m256d) -> m128;
175 "Convert `i32` lanes to be `f32` lanes.",
176 convert_to_m256_from_i32_m256i(a: m256i) -> m256;
177 "Convert `i32` lanes to be `f64` lanes.",
178 convert_to_m256d_from_i32_m128i(a: m128i) -> m256d;
179 "Convert `f32` lanes to be `f64` lanes.",
180 convert_to_m256d_from_m128(a: m128) -> m256d;
181 "Convert `f64` lanes to `i32` lanes with truncation.",
182 convert_truncate_to_i32_m128i_from_m256d(a: m256d) -> m128i;
183 "Convert `f32` lanes to `i32` lanes with truncation.",
184 convert_truncate_to_i32_m256i_from_m256(a: m256) -> m256i;
185 "Lanewise `a / b` with `f32`.",
186 div_m256(a: m256, b: m256) -> m256;
187 "Lanewise `a / b` with `f64`.",
188 div_m256d(a: m256d, b: m256d) -> m256d;
189 "This works like `dot_product_m128`, but twice as wide.",
190 dot_product_m256<const IMM: i32>(a: m256, b: m256) -> m256;
191 "Duplicate the even-indexed lanes to the odd lanes.",
192 duplicate_even_lanes_m256(a: m256) -> m256;
193 "Duplicate the odd-indexed lanes to the even lanes.",
194 duplicate_odd_lanes_m256(a: m256) -> m256;
195 "Duplicate the odd-indexed lanes to the even lanes.",
196 duplicate_odd_lanes_m256d(a: m256d) -> m256d;
197 "Extracts an `i32` lane from `m256i`",
198 extract_i32_from_m256i<const IMM: i32>(a: m256i) -> i32;
199 "Extracts an `i64` lane from `m256i`",
200 extract_i64_from_m256i<const IMM: i32>(a: m256i) -> i64;
201 "Extracts an `m128` from `m256`",
202 extract_m128_from_m256<const IMM: i32>(a: m256) -> m128;
203 "Extracts an `m128d` from `m256d`",
204 extract_m128d_from_m256d<const IMM: i32>(a: m256d) -> m128d;
205 "Extracts an `m128i` from `m256i`",
206 extract_m128i_from_m256i<const IMM: i32>(a: m256i) -> m128i;
207 "Round `f32` lanes towards negative infinity.",
208 floor_m256(a: m256) -> m256;
209 "Round `f64` lanes towards negative infinity.",
210 floor_m256d(a: m256d) -> m256d;
211 "Inserts an `i16` to `m256i`",
212 insert_i16_to_m256i<const IMM: i32>(a: m256i, i: i16) -> m256i;
213 "Inserts an `i32` to `m256i`",
214 insert_i32_to_m256i<const IMM: i32>(a: m256i, i: i32) -> m256i;
215 "Inserts an `i64` to `m256i`",
216 insert_i64_to_m256i<const IMM: i32>(a: m256i, i: i64) -> m256i;
217 "Inserts an `i8` to `m256i`",
218 insert_i8_to_m256i<const IMM: i32>(a: m256i, i: i8) -> m256i;
219 "Inserts an `m128` to `m256`",
220 insert_m128_to_m256<const IMM: i32>(a: m256, b: m128) -> m256;
221 "Inserts an `m128d` to `m256d`",
222 insert_m128d_to_m256d<const IMM: i32>(a: m256d, b: m128d) -> m256d;
223 "Slowly inserts an `m128i` to `m256i`.",
224 insert_m128i_to_m256i_slow_avx<const IMM: i32>(a: m256i, b: m128i) -> m256i;
225 "Load an `f32` and splat it to all lanes of an `m256d`",
226 load_f32_splat_m256(a: &f32) -> m256;
227 "Load an `f64` and splat it to all lanes of an `m256d`",
228 load_f64_splat_m256d(a: &f64) -> m256d;
229 "Load an `m128` and splat it to the lower and upper half of an `m256`",
230 load_m128_splat_m256(a: &m128) -> m256;
231 "Load an `m128d` and splat it to the lower and upper half of an `m256d`",
232 load_m128d_splat_m256d(a: &m128d) -> m256d;
233 "Load data from memory into a register.",
234 load_m256(a: &m256) -> m256;
235 "Load data from memory into a register.",
236 load_m256d(a: &m256d) -> m256d;
237 "Load data from memory into a register.",
238 load_m256i(a: &m256i) -> m256i;
239 "Load data from memory into a register according to a mask.",
240 load_masked_m128(a: &m128, mask: m128i) -> m128;
241 "Load data from memory into a register according to a mask.",
242 load_masked_m128d(a: &m128d, mask: m128i) -> m128d;
243 "Load data from memory into a register according to a mask.",
244 load_masked_m256(a: &m256, mask: m256i) -> m256;
245 "Load data from memory into a register according to a mask.",
246 load_masked_m256d(a: &m256d, mask: m256i) -> m256d;
247 "Load data from memory into a register.",
248 load_unaligned_hi_lo_m256(a: &[f32; 4], b: &[f32; 4]) -> m256;
249 "Load data from memory into a register.",
250 load_unaligned_hi_lo_m256d(a: &[f64; 2], b: &[f64; 2]) -> m256d;
251 "Load data from memory into a register.",
252 load_unaligned_hi_lo_m256i(a: &[i8; 16], b: &[i8; 16]) -> m256i;
253 "Load data from memory into a register.",
254 load_unaligned_m256(a: &[f32; 8]) -> m256;
255 "Load data from memory into a register.",
256 load_unaligned_m256d(a: &[f64; 4]) -> m256d;
257 "Load data from memory into a register.",
258 load_unaligned_m256i(a: &[i8; 32]) -> m256i;
259 "Lanewise `max(a, b)`.",
260 max_m256(a: m256, b: m256) -> m256;
261 "Lanewise `max(a, b)`.",
262 max_m256d(a: m256d, b: m256d) -> m256d;
263 "Lanewise `min(a, b)`.",
264 min_m256(a: m256, b: m256) -> m256;
265 "Lanewise `min(a, b)`.",
266 min_m256d(a: m256d, b: m256d) -> m256d;
267 "Collects the sign bit of each lane into a 4-bit value.",
268 move_mask_m256(a: m256) -> i32;
269 "Collects the sign bit of each lane into a 4-bit value.",
270 move_mask_m256d(a: m256d) -> i32;
271 "Lanewise `a * b` with `f32` lanes.",
272 mul_m256(a: m256, b: m256) -> m256;
273 "Lanewise `a * b` with `f64` lanes.",
274 mul_m256d(a: m256d, b: m256d) -> m256d;
275 "Shuffle 128 bits of floating point data at a time from `$a` and `$b` using
276 an immediate control value.",
277 permute2z_m256<const MASK: i32>(a: m256, b: m256) -> m256;
278 "Shuffle 128 bits of floating point data at a time from `a` and `b` using an
279 immediate control value.",
280 permute2z_m256d<const MASK: i32>(a: m256d, b: m256d) -> m256d;
281 "<em>Slowly</em> swizzle 128 bits of integer data from `a` and `b` using an
282 immediate control value.",
283 permute2z_m256i<const MASK: i32>(a: m256i, b: m256i) -> m256i;
284 "Shuffle the `f32` lanes from `a` using an immediate control value.",
285 permute_m128<const MASK: i32>(a: m128) -> m128;
286 "Shuffle the `f64` lanes in `a` using an immediate control value.",
287 permute_m128d<const MASK: i32>(a: m128d) -> m128d;
288 "Shuffle the `f32` lanes in `a` using an immediate control value.",
289 permute_m256<const MASK: i32>(a: m256) -> m256;
290 "Shuffle the `f64` lanes from `a` together using an immediate control value.",
291 permute_m256d<const MASK: i32>(a: m256d) -> m256d;
292 "Reciprocal of `f32` lanes.",
293 reciprocal_m256(a: m256) -> m256;
294 "Reciprocal of `f32` lanes.",
295 reciprocal_sqrt_m256(a: m256) -> m256;
296 "Rounds each lane in the style specified.",
297 round_m256<const OP: i32>(a: m256) -> m256;
298 "Rounds each lane in the style specified.",
299 round_m256d<const OP: i32>(a: m256d) -> m256d;
300 "Set `i16` args into an `m256i` lane.",
301 set_i16_m256i(e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16, e9: i16, e8: i16,
302 e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> m256i;
303 "Set `i32` args into an `m256i` lane.",
304 set_i32_m256i(e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32,
305 e0: i32) -> m256i;
306 "Set `i64` args into an `m256i` lane.",
307 set_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i;
308 "Set `i8` args into an `m256i` lane.",
309 set_i8_m256i(e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8, e24: i8,
310 e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8, e15: i8,
311 e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8,
312 e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> m256i;
313 "Set `m128` args into an `m256`.",
314 set_m128_m256(high: m128, low: m128) -> m256;
315 "Set `m128d` args into an `m256d`.",
316 set_m128d_m256d(high: m128d, low: m128d) -> m256d;
317 "Set `m128i` args into an `m256i`.",
318 set_m128i_m256i(hi: m128i, lo: m128i) -> m256i;
319 "Set `f32` args into an `m256` lane.",
320 set_m256(e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32, e0: f32) -> m256;
321 "Set `f64` args into an `m256d` lane.",
322 set_m256d(e3: f64, e2: f64, e1: f64, e0: f64) -> m256d;
323 "Set `i16` args into an `m256i` lane.",
324 set_reversed_i16_m256i(e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16,
325 e9: i16, e8: i16, e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16,
326 e0: i16) -> m256i;
327 "Set `i32` args into an `m256i` lane.",
328 set_reversed_i32_m256i(e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32,
329 e0: i32) -> m256i;
330 "Set `i64` args into an `m256i` lane.",
331 set_reversed_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i;
332 "Set `i8` args into an `m256i` lane.",
333 set_reversed_i8_m256i(e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8,
334 e24: i8, e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8,
335 e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8,
336 e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> m256i;
337 "Set `m128` args into an `m256`.",
338 set_reversed_m128_m256(hi: m128, lo: m128) -> m256;
339 "Set `m128d` args into an `m256d`.",
340 set_reversed_m128d_m256d(hi: m128d, lo: m128d) -> m256d;
341 "Set `m128i` args into an `m256i`.",
342 set_reversed_m128i_m256i(hi: m128i, lo: m128i) -> m256i;
343 "Set `f32` args into an `m256` lane.",
344 set_reversed_m256(e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32,
345 e0: f32) -> m256;
346 "Set `f64` args into an `m256d` lane.",
347 set_reversed_m256d(e3: f64, e2: f64, e1: f64, e0: f64) -> m256d;
348 "Splat an `i16` arg into an `m256i` lane.",
349 set_splat_i16_m256i(i: i16) -> m256i;
350 "Splat an `i32` arg into an `m256i` lane.",
351 set_splat_i32_m256i(i: i32) -> m256i;
352 "Splat an `i64` arg into an `m256i` lane.",
353 set_splat_i64_m256i(i: i64) -> m256i;
354 "Splat an `i8` arg into an `m256i` lane.",
355 set_splat_i8_m256i(i: i8) -> m256i;
356 "Splat an `f32` arg into an `m256` lane.",
357 set_splat_m256(f: f32) -> m256;
358 "Splat an `f64` arg into an `m256d` lane.",
359 set_splat_m256d(f: f64) -> m256d;
360 "Shuffle `f32` values in `a` using `i32` values in `v`.",
361 shuffle_av_f32_all_m128(a: m128, v: m128i) -> m128;
362 "Shuffle `f32` values in `a` using `i32` values in `v`.",
363 shuffle_av_f32_half_m256(a: m256, v: m256i) -> m256;
364 "Shuffle `f64` lanes in `a` using <strong>bit 1</strong> of the `i64` lanes in `v`",
365 shuffle_av_f64_all_m128d(a: m128d, v: m128i) -> m128d;
366 "Shuffle `f64` lanes in `a` using <strong>bit 1</strong> of the `i64` lanes in `v`.",
367 shuffle_av_f64_half_m256d(a: m256d, b: m256i) -> m256d;
368 "Shuffle the `f32` lanes from `a` and `b` together using an immediate control value.",
369 shuffle_m256<const IMM: i32>(a: m256, b: m256) -> m256;
370 "Shuffle the `f64` lanes from `a` and `b` together using an immediate control value.",
371 shuffle_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d;
372 "Lanewise `sqrt` on `f64` lanes.",
373 sqrt_m256(a: m256) -> m256;
374 "Lanewise `sqrt` on `f64` lanes.",
375 sqrt_m256d(a: m256d) -> m256d;
376 "Store data from a register into memory.",
377 store_m256(addr: &mut m256, a: m256);
378 "Store data from a register into memory.",
379 store_m256d(addr: &mut m256d, a: m256d);
380 "Store data from a register into memory.",
381 store_m256i(addr: &mut m256i, a: m256i);
382 "Store data from a register into memory according to a mask.",
383 store_masked_m128(addr: &mut m128, mask: m128i, a: m128);
384 "Store data from a register into memory according to a mask.",
385 store_masked_m128d(addr: &mut m128d, mask: m128i, a: m128d);
386 "Store data from a register into memory according to a mask.",
387 store_masked_m256(addr: &mut m256, mask: m256i, a: m256);
388 "Store data from a register into memory according to a mask.",
389 store_masked_m256d(addr: &mut m256d, mask: m256i, a: m256d);
390 "Store data from a register into memory.",
391 store_unaligned_hi_lo_m256(hi_addr: &mut [f32; 4], lo_addr: &mut [f32; 4], a: m256);
392 "Store data from a register into memory.",
393 store_unaligned_hi_lo_m256d(hi_addr: &mut [f64; 2], lo_addr: &mut [f64; 2], a: m256d);
394 "Store data from a register into memory.",
395 store_unaligned_hi_lo_m256i(hi_addr: &mut [i8; 16], lo_addr: &mut [i8; 16], a: m256i);
396 "Store data from a register into memory.",
397 store_unaligned_m256(addr: &mut [f32; 8], a: m256);
398 "Store data from a register into memory.",
399 store_unaligned_m256d(addr: &mut [f64; 4], a: m256d);
400 "Store data from a register into memory.",
401 store_unaligned_m256i(addr: &mut [i8; 32], a: m256i);
402 "Subtract adjacent `f32` lanes.",
403 sub_horizontal_m256(a: m256, b: m256) -> m256;
404 "Subtract adjacent `f64` lanes.",
405 sub_horizontal_m256d(a: m256d, b: m256d) -> m256d;
406 "Lanewise `a - b` with `f32` lanes.",
407 sub_m256(a: m256, b: m256) -> m256;
408 "Lanewise `a - b` with `f64` lanes.",
409 sub_m256d(a: m256d, b: m256d) -> m256d;
410 "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
411 returns 1 if the result is zero, otherwise 0.",
412 testc_m128(a: m128, b: m128) -> i32;
413 "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
414 returns 1 if the result is zero, otherwise 0.",
415 testc_m128d(a: m128d, b: m128d) -> i32;
416 "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
417 returns 1 if the result is zero, otherwise 0.",
418 testc_m256(a: m256, b: m256) -> i32;
419 "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
420 returns 1 if the result is zero, otherwise 0.",
421 testc_m256d(a: m256d, b: m256d) -> i32;
422 "Compute the bitwise NOT of `a` and then AND with `b`,
423 returns 1 if the result is zero, otherwise 0.",
424 testc_m256i(a: m256i, b: m256i) -> i32;
425 "Computes the bitwise AND of 256 bits in `a` and
426 `b`, returns 1 if the result is zero, otherwise 0.",
427 testz_m128(a: m128, b: m128) -> i32;
428 "Computes the bitwise of sign bitAND of 256 bits in `a` and
429 `b`, returns 1 if the result is zero, otherwise 0.",
430 testz_m128d(a: m128d, b: m128d) -> i32;
431 "Computes the bitwise AND of 256 bits in `a` and
432 `b`, returns 1 if the result is zero, otherwise 0.",
433 testz_m256(a: m256, b: m256) -> i32;
434 "Computes the bitwise of sign bit AND of 256 bits in `a` and
435 `b`, returns 1 if the result is zero, otherwise 0.",
436 testz_m256d(a: m256d, b: m256d) -> i32;
437 "Computes the bitwise of sign bit AND of 256 bits in `a` and
438 `b`, returns 1 if the result is zero, otherwise 0.",
439 testz_m256i(a: m256i, b: m256i) -> i32;
440 "Unpack and interleave the high lanes.",
441 unpack_hi_m256(a: m256, b: m256) -> m256;
442 "Unpack and interleave the high lanes.",
443 unpack_hi_m256d(a: m256d, b: m256d) -> m256d;
444 "Unpack and interleave the high lanes.",
445 unpack_lo_m256(a: m256, b: m256) -> m256;
446 "Unpack and interleave the high lanes.",
447 unpack_lo_m256d(a: m256d, b: m256d) -> m256d;
448 "Zero extend an `m128` to `m256`",
449 zero_extend_m128(a: m128) -> m256;
450 "Zero extend an `m128d` to `m256d`",
451 zero_extend_m128d(a: m128d) -> m256d;
452 "Zero extend an `m128i` to `m256i`",
453 zero_extend_m128i(a: m128i) -> m256i;
454 "A zeroed `m256`",
455 zeroed_m256() -> m256;
456 "A zeroed `m256d`",
457 zeroed_m256d() -> m256d;
458 "A zeroed `m256i`",
459 zeroed_m256i() -> m256i;
460 }
461}
462impl_arch! {
463 #[doc = "# Functions requiring the `avx2` target feature.\n\n---"]
464 #[doc = "See: <https://en.wikipedia.org/wiki/Advanced_Vector_Extensions>"]
465 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "avx2";
466 arch_fn! {
467 "Absolute value of `i16` lanes.",
468 abs_i16_m256i(a: m256i) -> m256i;
469 "Absolute value of `i32` lanes.",
470 abs_i32_m256i(a: m256i) -> m256i;
471 "Absolute value of `i8` lanes.",
472 abs_i8_m256i(a: m256i) -> m256i;
473 "Horizontal `a + b` with lanes as `i16`.",
474 add_horizontal_i16_m256i(a: m256i, b: m256i) -> m256i;
475 "Horizontal `a + b` with lanes as `i32`.",
476 add_horizontal_i32_m256i(a: m256i, b: m256i) -> m256i;
477 "Horizontal saturating `a + b` with lanes as `i16`.",
478 add_horizontal_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
479 "Lanewise `a + b` with lanes as `i16`.",
480 add_i16_m256i(a: m256i, b: m256i) -> m256i;
481 "Lanewise `a + b` with lanes as `i32`.",
482 add_i32_m256i(a: m256i, b: m256i) -> m256i;
483 "Lanewise `a + b` with lanes as `i64`.",
484 add_i64_m256i(a: m256i, b: m256i) -> m256i;
485 "Lanewise `a + b` with lanes as `i8`.",
486 add_i8_m256i(a: m256i, b: m256i) -> m256i;
487 "Lanewise saturating `a + b` with lanes as `i16`.",
488 add_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
489 "Lanewise saturating `a + b` with lanes as `i8`.",
490 add_saturating_i8_m256i(a: m256i, b: m256i) -> m256i;
491 "Lanewise saturating `a + b` with lanes as `u16`.",
492 add_saturating_u16_m256i(a: m256i, b: m256i) -> m256i;
493 "Lanewise saturating `a + b` with lanes as `u8`.",
494 add_saturating_u8_m256i(a: m256i, b: m256i) -> m256i;
495 "Average `u16` lanes.",
496 average_u16_m256i(a: m256i, b: m256i) -> m256i;
497 "Average `u8` lanes.",
498 average_u8_m256i(a: m256i, b: m256i) -> m256i;
499 "Bitwise `a & b`.",
500 bitand_m256i(a: m256i, b: m256i) -> m256i;
501 "Bitwise `(!a) & b`.",
502 bitandnot_m256i(a: m256i, b: m256i) -> m256i;
503 "Bitwise `a | b`",
504 bitor_m256i(a: m256i, b: m256i) -> m256i;
505 "Bitwise `a ^ b`.",
506 bitxor_m256i(a: m256i, b: m256i) -> m256i;
507 "Blends the `i16` lanes according to the immediate value.",
508 blend_imm_i16_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
509 "Blends the `i32` lanes in `a` and `b` into a single value.",
510 blend_imm_i32_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
511 "Blends the `i32` lanes according to the immediate value.",
512 blend_imm_i32_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
513 "Blend `i8` lanes according to a runtime varying mask.",
514 blend_varying_i8_m256i(a: m256i, b: m256i, mask: m256i) -> m256i;
515 "Shifts each `u128` lane left by a number of <strong>bytes</strong>.",
516 byte_shl_imm_u128_m256i<const IMM: i32>(a: m256i) -> m256i;
517 "Shifts each `u128` lane right by a number of <strong>bytes</strong>.",
518 byte_shr_imm_u128_m256i<const IMM: i32>(a: m256i) -> m256i;
519 "Compare `i16` lanes for equality, mask output.",
520 cmp_eq_mask_i16_m256i(a: m256i, b: m256i) -> m256i;
521 "Compare `i32` lanes for equality, mask output.",
522 cmp_eq_mask_i32_m256i(a: m256i, b: m256i) -> m256i;
523 "Compare `i64` lanes for equality, mask output.",
524 cmp_eq_mask_i64_m256i(a: m256i, b: m256i) -> m256i;
525 "Compare `i8` lanes for equality, mask output.",
526 cmp_eq_mask_i8_m256i(a: m256i, b: m256i) -> m256i;
527 "Compare `i16` lanes for `a > b`, mask output.",
528 cmp_gt_mask_i16_m256i(a: m256i, b: m256i) -> m256i;
529 "Compare `i32` lanes for `a > b`, mask output.",
530 cmp_gt_mask_i32_m256i(a: m256i, b: m256i) -> m256i;
531 "Compare `i64` lanes for `a > b`, mask output.",
532 cmp_gt_mask_i64_m256i(a: m256i, b: m256i) -> m256i;
533 "Compare `i8` lanes for `a > b`, mask output.",
534 cmp_gt_mask_i8_m256i(a: m256i, b: m256i) -> m256i;
535 "Works like `combined_byte_shr_imm_m128i`, but twice as wide.",
536 combined_byte_shr_imm_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
537 "Convert `i8` values to `i16` values.",
538 convert_to_i16_m256i_from_i8_m128i(a: m128i) -> m256i;
539 "Convert lower 4 `u8` values to `i16` values.",
540 convert_to_i16_m256i_from_lower4_u8_m128i(a: m128i) -> m256i;
541 "Convert lower 8 `u8` values to `i16` values.",
542 convert_to_i16_m256i_from_lower8_u8_m128i(a: m128i) -> m256i;
543 "Convert `u8` values to `i16` values.",
544 convert_to_i16_m256i_from_u8_m128i(a: m128i) -> m256i;
545 "Convert `i16` values to `i32` values.",
546 convert_to_i32_m256i_from_i16_m128i(a: m128i) -> m256i;
547 "Convert the lower 8 `i8` values to `i32` values.",
548 convert_to_i32_m256i_from_lower8_i8_m128i(a: m128i) -> m256i;
549 "Convert `u16` values to `i32` values.",
550 convert_to_i32_m256i_from_u16_m128i(a: m128i) -> m256i;
551 "Convert `i32` values to `i64` values.",
552 convert_to_i64_m256i_from_i32_m128i(a: m128i) -> m256i;
553 "Convert `i16` values to `i64` values.",
554 convert_to_i64_m256i_from_lower4_i16_m128i(a: m128i) -> m256i;
555 "Convert the lower 4 `i8` values to `i64` values.",
556 convert_to_i64_m256i_from_lower4_i8_m128i(a: m128i) -> m256i;
557 "Convert `u16` values to `i64` values.",
558 convert_to_i64_m256i_from_lower4_u16_m128i(a: m128i) -> m256i;
559 "Convert `u32` values to `i64` values.",
560 convert_to_i64_m256i_from_u32_m128i(a: m128i) -> m256i;
561 "Gets an `i16` value out of an `m256i`, returns as `i32`.",
562 extract_i16_as_i32_m256i<const LANE: i32>(a: m256i) -> i32;
563 "Gets an `i8` value out of an `m256i`, returns as `i32`.",
564 extract_i8_as_i32_m256i<const LANE: i32>(a: m256i) -> i32;
565 "Gets an `m128i` value out of an `m256i`.",
566 extract_m128i_m256i<const LANE: i32>(a: m256i) -> m128i;
567 "Inserts an `m128i` to an `m256i` at the high or low position.",
568 insert_m128i_to_m256i<const LANE: i32>(a: m256i, b: m128i) -> m256i;
569 "Loads the reference given and zeroes any `i32` lanes not in the mask.",
570 load_masked_i32_m128i(a: &m128i, mask: m128i) -> m128i;
571 "Loads the reference given and zeroes any `i32` lanes not in the mask.",
572 load_masked_i32_m256i(a: &m256i, mask: m256i) -> m256i;
573 "Loads the reference given and zeroes any `i64` lanes not in the mask.",
574 load_masked_i64_m128i(a: &m128i, mask: m128i) -> m128i;
575 "Loads the reference given and zeroes any `i64` lanes not in the mask.",
576 load_masked_i64_m256i(a: &m256i, mask: m256i) -> m256i;
577 "Lanewise `max(a, b)` with lanes as `i16`.",
578 max_i16_m256i(a: m256i, b: m256i) -> m256i;
579 "Lanewise `max(a, b)` with lanes as `i32`.",
580 max_i32_m256i(a: m256i, b: m256i) -> m256i;
581 "Lanewise `max(a, b)` with lanes as `i8`.",
582 max_i8_m256i(a: m256i, b: m256i) -> m256i;
583 "Lanewise `max(a, b)` with lanes as `u16`.",
584 max_u16_m256i(a: m256i, b: m256i) -> m256i;
585 "Lanewise `max(a, b)` with lanes as `u32`.",
586 max_u32_m256i(a: m256i, b: m256i) -> m256i;
587 "Lanewise `max(a, b)` with lanes as `u8`.",
588 max_u8_m256i(a: m256i, b: m256i) -> m256i;
589 "Lanewise `min(a, b)` with lanes as `i16`.",
590 min_i16_m256i(a: m256i, b: m256i) -> m256i;
591 "Lanewise `min(a, b)` with lanes as `i32`.",
592 min_i32_m256i(a: m256i, b: m256i) -> m256i;
593 "Lanewise `min(a, b)` with lanes as `i8`.",
594 min_i8_m256i(a: m256i, b: m256i) -> m256i;
595 "Lanewise `min(a, b)` with lanes as `u16`.",
596 min_u16_m256i(a: m256i, b: m256i) -> m256i;
597 "Lanewise `min(a, b)` with lanes as `u32`.",
598 min_u32_m256i(a: m256i, b: m256i) -> m256i;
599 "Lanewise `min(a, b)` with lanes as `u8`.",
600 min_u8_m256i(a: m256i, b: m256i) -> m256i;
601 "Create an `i32` mask of each sign bit in the `i8` lanes.",
602 move_mask_i8_m256i(a: m256i) -> i32;
603 "Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
604 values to produce the final output.",
605 mul_i16_horizontal_add_m256i(a: m256i, b: m256i) -> m256i;
606 "Multiply the `i16` lanes and keep the high half of each 32-bit output.",
607 mul_i16_keep_high_m256i(a: m256i, b: m256i) -> m256i;
608 "Multiply the `i16` lanes and keep the low half of each 32-bit output.",
609 mul_i16_keep_low_m256i(a: m256i, b: m256i) -> m256i;
610 "Multiply `i16` lanes into `i32` intermediates, keep the high 18 bits, round
611 by adding 1, right shift by 1.",
612 mul_i16_scale_round_m256i(a: m256i, b: m256i) -> m256i;
613 "Multiply the `i32` lanes and keep the low half of each 64-bit output.",
614 mul_i32_keep_low_m256i(a: m256i, b: m256i) -> m256i;
615 "Multiply the lower `i32` within each `i64` lane, `i64` output.",
616 mul_i64_low_bits_m256i(a: m256i, b: m256i) -> m256i;
617 "Multiply the `u16` lanes and keep the high half of each 32-bit output.",
618 mul_u16_keep_high_m256i(a: m256i, b: m256i) -> m256i;
619 "Multiply the lower `u32` within each `u64` lane, `u64` output.",
620 mul_u64_low_bits_m256i(a: m256i, b: m256i) -> m256i;
621 "This is dumb and weird.",
622 mul_u8i8_add_horizontal_saturating_m256i(a: m256i, b: m256i) -> m256i;
623 "Computes eight `u16` “sum of absolute difference” values according to the
624 bytes selected.",
625 multi_packed_sum_abs_diff_u8_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
626 "Saturating convert `i16` to `i8`, and pack the values.",
627 pack_i16_to_i8_m256i(a: m256i, b: m256i) -> m256i;
628 "Saturating convert `i16` to `u8`, and pack the values.",
629 pack_i16_to_u8_m256i(a: m256i, b: m256i) -> m256i;
630 "Saturating convert `i32` to `i16`, and pack the values.",
631 pack_i32_to_i16_m256i(a: m256i, b: m256i) -> m256i;
632 "Saturating convert `i32` to `u16`, and pack the values.",
633 pack_i32_to_u16_m256i(a: m256i, b: m256i) -> m256i;
634 "Sets the lowest `i16` lane of an `m128i` as all lanes of an `m256i`.",
635 set_splat_i16_m128i_s_m256i(a: m128i) -> m256i;
636 "Sets the lowest `i32` lane of an `m128i` as all lanes of an `m256i`.",
637 set_splat_i32_m128i_s_m256i(a: m128i) -> m256i;
638 "Sets the lowest `i64` lane of an `m128i` as all lanes of an `m256i`.",
639 set_splat_i64_m128i_s_m256i(a: m128i) -> m256i;
640 "Sets the lowest `i8` lane of an `m128i` as all lanes of an `m256i`.",
641 set_splat_i8_m128i_s_m256i(a: m128i) -> m256i;
642 "Sets the lowest lane of an `m128` as all lanes of an `m256`.",
643 set_splat_m128_s_m256(a: m128) -> m256;
644 "Sets the lowest lane of an `m128d` as all lanes of an `m256d`.",
645 set_splat_m128d_s_m256d(a: m128d) -> m256d;
646 "Lanewise `u16` shift left by the lower `u64` lane of `count`.",
647 shl_all_u16_m256i(a: m256i, count: m128i) -> m256i;
648 "Shift all `u32` lanes left by the lower `u64` lane of `count`.",
649 shl_all_u32_m256i(a: m256i, count: m128i) -> m256i;
650 "Shift all `u64` lanes left by the lower `u64` lane of `count`.",
651 shl_all_u64_m256i(a: m256i, count: m128i) -> m256i;
652 "Shift `u32` values to the left by `count` bits.",
653 shl_each_u32_m128i(a: m128i, count: m128i) -> m128i;
654 "Lanewise `u32` shift left by the matching `i32` lane in `count`.",
655 shl_each_u32_m256i(a: m256i, count: m256i) -> m256i;
656 "Shift `u64` values to the left by `count` bits.",
657 shl_each_u64_m128i(a: m128i, count: m128i) -> m128i;
658 "Lanewise `u64` shift left by the matching `u64` lane in `count`.",
659 shl_each_u64_m256i(a: m256i, count: m256i) -> m256i;
660 "Shifts all `u16` lanes left by an immediate.",
661 shl_imm_u16_m256i<const IMM: i32>(a: m256i) -> m256i;
662 "Shifts all `u32` lanes left by an immediate.",
663 shl_imm_u32_m256i<const IMM: i32>(a: m256i) -> m256i;
664 "Shifts all `u64` lanes left by an immediate.",
665 shl_imm_u64_m256i<const IMM: i32>(a: m256i) -> m256i;
666 "Lanewise `i16` shift right by the lower `i64` lane of `count`.",
667 shr_all_i16_m256i(a: m256i, count: m128i) -> m256i;
668 "Lanewise `i32` shift right by the lower `i64` lane of `count`.",
669 shr_all_i32_m256i(a: m256i, count: m128i) -> m256i;
670 "Lanewise `u16` shift right by the lower `u64` lane of `count`.",
671 shr_all_u16_m256i(a: m256i, count: m128i) -> m256i;
672 "Lanewise `u32` shift right by the lower `u64` lane of `count`.",
673 shr_all_u32_m256i(a: m256i, count: m128i) -> m256i;
674 "Lanewise `u64` shift right by the lower `u64` lane of `count`.",
675 shr_all_u64_m256i(a: m256i, count: m128i) -> m256i;
676 "Shift `i32` values to the right by `count` bits.",
677 shr_each_i32_m128i(a: m128i, count: m128i) -> m128i;
678 "Lanewise `i32` shift right by the matching `i32` lane in `count`.",
679 shr_each_i32_m256i(a: m256i, count: m256i) -> m256i;
680 "Shift `u32` values to the left by `count` bits.",
681 shr_each_u32_m128i(a: m128i, count: m128i) -> m128i;
682 "Lanewise `u32` shift right by the matching `u32` lane in `count`.",
683 shr_each_u32_m256i(a: m256i, count: m256i) -> m256i;
684 "Shift `u64` values to the left by `count` bits.",
685 shr_each_u64_m128i(a: m128i, count: m128i) -> m128i;
686 "Lanewise `u64` shift right by the matching `i64` lane in `count`.",
687 shr_each_u64_m256i(a: m256i, count: m256i) -> m256i;
688 "Shifts all `i16` lanes left by an immediate.",
689 shr_imm_i16_m256i<const IMM: i32>(a: m256i) -> m256i;
690 "Shifts all `i32` lanes left by an immediate.",
691 shr_imm_i32_m256i<const IMM: i32>(a: m256i) -> m256i;
692 "Shifts all `u16` lanes right by an immediate.",
693 shr_imm_u16_m256i<const IMM: i32>(a: m256i) -> m256i;
694 "Shifts all `u32` lanes right by an immediate.",
695 shr_imm_u32_m256i<const IMM: i32>(a: m256i) -> m256i;
696 "Shifts all `u64` lanes right by an immediate.",
697 shr_imm_u64_m256i<const IMM: i32>(a: m256i) -> m256i;
698 "Shuffle 128 bits of integer data from `$a` and `$b` using an immediate control value.",
699 shuffle_abi_i128z_all_m256i<const MASK: i32>(a: m256i, b: m256i) -> m256i;
700 "Shuffle the `f64` lanes from `$a` using an immediate control value.",
701 shuffle_ai_f64_all_m256d<const IMM: i32>(a: m256d) -> m256d;
702 "Shuffle the high `i16` lanes in `$a` using an immediate control value.",
703 shuffle_ai_i16_h64half_m256i<const IMM: i32>(a: m256i) -> m256i;
704 "Shuffle the low `i16` lanes in `$a` using an immediate control value.",
705 shuffle_ai_i16_l64half_m256i<const IMM: i32>(a: m256i) -> m256i;
706 "Shuffle the `i32` lanes in `a` using an immediate control value.",
707 shuffle_ai_i32_half_m256i<const IMM: i32>(a: m256i) -> m256i;
708 "Shuffle the `f64` lanes in `$a` using an immediate control value.",
709 shuffle_ai_i64_all_m256i<const IMM: i32>(a: m256i) -> m256i;
710 "Shuffle `f32` lanes in `a` using `i32` values in `v`.",
711 shuffle_av_i32_all_m256(a: m256, v: m256i) -> m256;
712 "Shuffle `i32` lanes in `a` using `i32` values in `v`.",
713 shuffle_av_i32_all_m256i(a: m256i, v: m256i) -> m256i;
714 "Shuffle `i8` lanes in `a` using `i8` values in `v`.",
715 shuffle_av_i8z_half_m256i(a: m256i, v: m256i) -> m256i;
716 "Lanewise `a * signum(b)` with lanes as `i16`",
717 sign_apply_i16_m256i(a: m256i, b: m256i) -> m256i;
718 "Lanewise `a * signum(b)` with lanes as `i32`",
719 sign_apply_i32_m256i(a: m256i, b: m256i) -> m256i;
720 "Lanewise `a * signum(b)` with lanes as `i8`",
721 sign_apply_i8_m256i(a: m256i, b: m256i) -> m256i;
722 "Splat the lowest 16-bit lane across the entire 128 bits.",
723 splat_i16_m128i_s_m128i(a: m128i) -> m128i;
724 "Splat the lowest 32-bit lane across the entire 128 bits.",
725 splat_i32_m128i_s_m128i(a: m128i) -> m128i;
726 "Splat the lowest 64-bit lane across the entire 128 bits.",
727 splat_i64_m128i_s_m128i(a: m128i) -> m128i;
728 "Splat the lowest 8-bit lane across the entire 128 bits.",
729 splat_i8_m128i_s_m128i(a: m128i) -> m128i;
730 "Splat the lowest `f32` across all four lanes.",
731 splat_m128_s_m128(a: m128) -> m128;
732 "Splat the lower `f64` across both lanes of `m128d`.",
733 splat_m128d_s_m128d(a: m128d) -> m128d;
734 "Splat the 128-bits across 256-bits.",
735 splat_m128i_m256i(a: m128i) -> m256i;
736 "Stores the `i32` masked lanes given to the reference.",
737 store_masked_i32_m128i(addr: &mut m128i, mask: m128i, a: m128i);
738 "Stores the `i32` masked lanes given to the reference.",
739 store_masked_i32_m256i(addr: &mut m256i, mask: m256i, a: m256i);
740 "Stores the `i32` masked lanes given to the reference.",
741 store_masked_i64_m128i(addr: &mut m128i, mask: m128i, a: m128i);
742 "Stores the `i32` masked lanes given to the reference.",
743 store_masked_i64_m256i(addr: &mut m256i, mask: m256i, a: m256i);
744 "Horizontal `a - b` with lanes as `i16`.",
745 sub_horizontal_i16_m256i(a: m256i, b: m256i) -> m256i;
746 "Horizontal `a - b` with lanes as `i32`.",
747 sub_horizontal_i32_m256i(a: m256i, b: m256i) -> m256i;
748 "Horizontal saturating `a - b` with lanes as `i16`.",
749 sub_horizontal_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
750 "Lanewise `a - b` with lanes as `i16`.",
751 sub_i16_m256i(a: m256i, b: m256i) -> m256i;
752 "Lanewise `a - b` with lanes as `i32`.",
753 sub_i32_m256i(a: m256i, b: m256i) -> m256i;
754 "Lanewise `a - b` with lanes as `i64`.",
755 sub_i64_m256i(a: m256i, b: m256i) -> m256i;
756 "Lanewise `a - b` with lanes as `i8`.",
757 sub_i8_m256i(a: m256i, b: m256i) -> m256i;
758 "Lanewise saturating `a - b` with lanes as `i16`.",
759 sub_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
760 "Lanewise saturating `a - b` with lanes as `i8`.",
761 sub_saturating_i8_m256i(a: m256i, b: m256i) -> m256i;
762 "Lanewise saturating `a - b` with lanes as `u16`.",
763 sub_saturating_u16_m256i(a: m256i, b: m256i) -> m256i;
764 "Lanewise saturating `a - b` with lanes as `u8`.",
765 sub_saturating_u8_m256i(a: m256i, b: m256i) -> m256i;
766 "Compute “sum of `u8` absolute differences”.",
767 sum_of_u8_abs_diff_m256i(a: m256i, b: m256i) -> m256i;
768 "Unpack and interleave high `i16` lanes of `a` and `b`.",
769 unpack_high_i16_m256i(a: m256i, b: m256i) -> m256i;
770 "Unpack and interleave high `i32` lanes of `a` and `b`.",
771 unpack_high_i32_m256i(a: m256i, b: m256i) -> m256i;
772 "Unpack and interleave high `i64` lanes of `a` and `b`.",
773 unpack_high_i64_m256i(a: m256i, b: m256i) -> m256i;
774 "Unpack and interleave high `i8` lanes of `a` and `b`.",
775 unpack_high_i8_m256i(a: m256i, b: m256i) -> m256i;
776 "Unpack and interleave low `i16` lanes of `a` and `b`.",
777 unpack_low_i16_m256i(a: m256i, b: m256i) -> m256i;
778 "Unpack and interleave low `i32` lanes of `a` and `b`.",
779 unpack_low_i32_m256i(a: m256i, b: m256i) -> m256i;
780 "Unpack and interleave low `i64` lanes of `a` and `b`.",
781 unpack_low_i64_m256i(a: m256i, b: m256i) -> m256i;
782 "Unpack and interleave low `i8` lanes of `a` and `b`.",
783 unpack_low_i8_m256i(a: m256i, b: m256i) -> m256i;
784 }
785}
786impl_arch! {
787 #[doc = "# Functions requiring the `bmi1` target feature.\n\n---"]
788 #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#BMI1_(Bit_Manipulation_Instruction_Set_1)>"]
789 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "bmi1";
790 arch_fn! {
791 "Extract a span of bits from the `u32`, control value style.",
792 bit_extract2_u32(a: u32, control: u32) -> u32;
793 "Extract a span of bits from the `u64`, control value style.",
794 bit_extract2_u64(a: u64, control: u64) -> u64;
795 "Extract a span of bits from the `u32`, start and len style.",
796 bit_extract_u32(a: u32, start: u32, len: u32) -> u32;
797 "Extract a span of bits from the `u64`, start and len style.",
798 bit_extract_u64(a: u64, start: u32, len: u32) -> u64;
799 "Gets the mask of all bits up to and including the lowest set bit in a `u32`.",
800 bit_lowest_set_mask_u32(a: u32) -> u32;
801 "Gets the mask of all bits up to and including the lowest set bit in a `u64`.",
802 bit_lowest_set_mask_u64(a: u64) -> u64;
803 "Resets (clears) the lowest set bit.",
804 bit_lowest_set_reset_u32(a: u32) -> u32;
805 "Resets (clears) the lowest set bit.",
806 bit_lowest_set_reset_u64(a: u64) -> u64;
807 "Gets the <em>value</em> of the lowest set bit in a `u32`.",
808 bit_lowest_set_value_u32(a: u32) -> u32;
809 "Gets the <em>value</em> of the lowest set bit in a `u64`.",
810 bit_lowest_set_value_u64(a: u64) -> u64;
811 "Bitwise `(!a) & b` for `u32`",
812 bitandnot_u32(a: u32, b: u32) -> u32;
813 "Bitwise `(!a) & b` for `u64`",
814 bitandnot_u64(a: u64, b: u64) -> u64;
815 "Counts the number of trailing zero bits in a `u32`.",
816 trailing_zero_count_u32(a: u32) -> u32;
817 "Counts the number of trailing zero bits in a `u64`.",
818 trailing_zero_count_u64(a: u64) -> u64;
819 }
820}
821impl_arch! {
822 #[doc = "# Functions requiring the `bmi2` target feature.\n\n---"]
823 #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2)>"]
824 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "bmi2";
825 arch_fn! {
826 "Zero out all high bits in a `u32` starting at the index given.",
827 bit_zero_high_index_u32(a: u32, index: u32) -> u32;
828 "Zero out all high bits in a `u64` starting at the index given.",
829 bit_zero_high_index_u64(a: u64, index: u32) -> u64;
830 "Multiply two `u32`, outputting the low bits and storing the high bits in the reference.",
831 mul_extended_u32(a: u32, b: u32, extra: &mut u32) -> u32;
832 "Multiply two `u64`, outputting the low bits and storing the high bits in the reference.",
833 mul_extended_u64(a: u64, b: u64, extra: &mut u64) -> u64;
834 "Deposit contiguous low bits from a `u32` according to a mask.",
835 population_deposit_u32(a: u32, index: u32) -> u32;
836 "Deposit contiguous low bits from a `u64` according to a mask.",
837 population_deposit_u64(a: u64, index: u64) -> u64;
838 "Extract bits from a `u32` according to a mask.",
839 population_extract_u32(a: u32, index: u32) -> u32;
840 "Extract bits from a `u64` according to a mask.",
841 population_extract_u64(a: u64, index: u64) -> u64;
842 }
843}
844impl_arch! {
845 #[doc = "# Functions requiring the `fma` target feature.\n\n---"]
846 #[doc = "See: <https://en.wikipedia.org/wiki/FMA_instruction_set>"]
847 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "fma";
848 arch_fn! {
849 "Lanewise fused `(a * b) + c`",
850 fused_mul_add_m128(a: m128, b: m128, c: m128) -> m128;
851 "Low lane fused `(a * b) + c`, other lanes unchanged",
852 fused_mul_add_m128_s(a: m128, b: m128, c: m128) -> m128;
853 "Lanewise fused `(a * b) + c`",
854 fused_mul_add_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
855 "Low lane fused `(a * b) + c`, other lanes unchanged",
856 fused_mul_add_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
857 "Lanewise fused `(a * b) + c`",
858 fused_mul_add_m256(a: m256, b: m256, c: m256) -> m256;
859 "Lanewise fused `(a * b) + c`",
860 fused_mul_add_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
861 "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
862 fused_mul_addsub_m128(a: m128, b: m128, c: m128) -> m128;
863 "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
864 fused_mul_addsub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
865 "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
866 fused_mul_addsub_m256(a: m256, b: m256, c: m256) -> m256;
867 "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
868 fused_mul_addsub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
869 "Lanewise fused `-(a * b) + c`",
870 fused_mul_neg_add_m128(a: m128, b: m128, c: m128) -> m128;
871 "Low lane `-(a * b) + c`, other lanes unchanged.",
872 fused_mul_neg_add_m128_s(a: m128, b: m128, c: m128) -> m128;
873 "Lanewise fused `-(a * b) + c`",
874 fused_mul_neg_add_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
875 "Low lane `-(a * b) + c`, other lanes unchanged.",
876 fused_mul_neg_add_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
877 "Lanewise fused `-(a * b) + c`",
878 fused_mul_neg_add_m256(a: m256, b: m256, c: m256) -> m256;
879 "Lanewise fused `-(a * b) + c`",
880 fused_mul_neg_add_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
881 "Lanewise fused `-(a * b) - c`",
882 fused_mul_neg_sub_m128(a: m128, b: m128, c: m128) -> m128;
883 "Low lane fused `-(a * b) - c`, other lanes unchanged.",
884 fused_mul_neg_sub_m128_s(a: m128, b: m128, c: m128) -> m128;
885 "Lanewise fused `-(a * b) - c`",
886 fused_mul_neg_sub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
887 "Low lane fused `-(a * b) - c`, other lanes unchanged.",
888 fused_mul_neg_sub_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
889 "Lanewise fused `-(a * b) - c`",
890 fused_mul_neg_sub_m256(a: m256, b: m256, c: m256) -> m256;
891 "Lanewise fused `-(a * b) - c`",
892 fused_mul_neg_sub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
893 "Lanewise fused `(a * b) - c`",
894 fused_mul_sub_m128(a: m128, b: m128, c: m128) -> m128;
895 "Low lane fused `(a * b) - c`, other lanes unchanged.",
896 fused_mul_sub_m128_s(a: m128, b: m128, c: m128) -> m128;
897 "Lanewise fused `(a * b) - c`",
898 fused_mul_sub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
899 "Low lane fused `(a * b) - c`, other lanes unchanged.",
900 fused_mul_sub_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
901 "Lanewise fused `(a * b) - c`",
902 fused_mul_sub_m256(a: m256, b: m256, c: m256) -> m256;
903 "Lanewise fused `(a * b) - c`",
904 fused_mul_sub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
905 "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
906 fused_mul_subadd_m128(a: m128, b: m128, c: m128) -> m128;
907 "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
908 fused_mul_subadd_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
909 "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
910 fused_mul_subadd_m256(a: m256, b: m256, c: m256) -> m256;
911 "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
912 fused_mul_subadd_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
913 }
914}
915impl_arch! {
916 #[doc = "# Functions requiring the `lzcnt` target feature.\n\n---"]
917 #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#ABM_(Advanced_Bit_Manipulation)>"]
918 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "lzcnt";
919 arch_fn! {
920 "Count the leading zeroes in a `u32`.",
921 leading_zero_count_u32(a: u32) -> u32;
922 "Count the leading zeroes in a `u64`.",
923 leading_zero_count_u64(a: u64) -> u64;
924 }
925}
926impl_arch! {
927 #[doc = "# Functions requiring the `pclmulqdq` target feature.\n\n---"]
928 #[doc = "See: <https://en.wikipedia.org/wiki/CLMUL_instruction_set>"]
929 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "pclmulqdq";
930 arch_fn! {
931 "Performs a “carryless” multiplication of two `i64` values.",
932 mul_i64_carryless_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
933 }
934}
935impl_arch! {
936 #[doc = "# Functions requiring the `popcnt` target feature.\n\n---"]
937 #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#ABM_(Advanced_Bit_Manipulation)>"]
938 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "popcnt";
939 arch_fn! {
940 "Count the number of bits set within an `i32`",
941 population_count_i32(a: i32) -> i32;
942 "Count the number of bits set within an `i64`",
943 population_count_i64(a: i64) -> i32;
944 }
945}
946impl_arch! {
947 #[doc = "# Functions requiring the `rdrand` target feature.\n\n---"]
948 #[doc = "See: <https://en.wikipedia.org/wiki/RDRAND>"]
949 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "rdrand";
950 arch_fn! {
951 "Try to obtain a random `u16` from the hardware RNG.",
952 rdrand_u16(out: &mut u16) -> i32;
953 "Try to obtain a random `u32` from the hardware RNG.",
954 rdrand_u32(out: &mut u32) -> i32;
955 "Try to obtain a random `u64` from the hardware RNG.",
956 rdrand_u64(out: &mut u64) -> i32;
957 }
958}
959impl_arch! {
960 #[doc = "# Functions requiring the `rdseed` target feature.\n\n---"]
961 #[doc = "See: <https://en.wikipedia.org/wiki/RDRAND>"]
962 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "rdseed";
963 arch_fn! {
964 "Try to obtain a random `u16` from the hardware RNG.",
965 rdseed_u16(out: &mut u16) -> i32;
966 "Try to obtain a random `u32` from the hardware RNG.",
967 rdseed_u32(out: &mut u32) -> i32;
968 "Try to obtain a random `u64` from the hardware RNG.",
969 rdseed_u64(out: &mut u64) -> i32;
970 }
971}
972impl_arch! {
973 #[doc = "# Functions requiring the `sse` target feature.\n\n---"]
974 #[doc = "See: <https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions>"]
975 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse";
976 arch_fn! {
977 "Lanewise `a + b`.",
978 add_m128(a: m128, b: m128) -> m128;
979 "Low lane `a + b`, other lanes unchanged.",
980 add_m128_s(a: m128, b: m128) -> m128;
981 "Bitwise `a & b`.",
982 bitand_m128(a: m128, b: m128) -> m128;
983 "Bitwise `(!a) & b`.",
984 bitandnot_m128(a: m128, b: m128) -> m128;
985 "Bitwise `a | b`.",
986 bitor_m128(a: m128, b: m128) -> m128;
987 "Bitwise `a ^ b`.",
988 bitxor_m128(a: m128, b: m128) -> m128;
989 "Low lane equality.",
990 cmp_eq_i32_m128_s(a: m128, b: m128) -> i32;
991 "Lanewise `a == b`.",
992 cmp_eq_mask_m128(a: m128, b: m128) -> m128;
993 "Low lane `a == b`, other lanes unchanged.",
994 cmp_eq_mask_m128_s(a: m128, b: m128) -> m128;
995 "Low lane greater than or equal to.",
996 cmp_ge_i32_m128_s(a: m128, b: m128) -> i32;
997 "Lanewise `a >= b`.",
998 cmp_ge_mask_m128(a: m128, b: m128) -> m128;
999 "Low lane `a >= b`, other lanes unchanged.",
1000 cmp_ge_mask_m128_s(a: m128, b: m128) -> m128;
1001 "Low lane greater than.",
1002 cmp_gt_i32_m128_s(a: m128, b: m128) -> i32;
1003 "Lanewise `a > b`.",
1004 cmp_gt_mask_m128(a: m128, b: m128) -> m128;
1005 "Low lane `a > b`, other lanes unchanged.",
1006 cmp_gt_mask_m128_s(a: m128, b: m128) -> m128;
1007 "Low lane less than or equal to.",
1008 cmp_le_i32_m128_s(a: m128, b: m128) -> i32;
1009 "Lanewise `a <= b`.",
1010 cmp_le_mask_m128(a: m128, b: m128) -> m128;
1011 "Low lane `a <= b`, other lanes unchanged.",
1012 cmp_le_mask_m128_s(a: m128, b: m128) -> m128;
1013 "Low lane less than.",
1014 cmp_lt_i32_m128_s(a: m128, b: m128) -> i32;
1015 "Lanewise `a < b`.",
1016 cmp_lt_mask_m128(a: m128, b: m128) -> m128;
1017 "Low lane `a < b`, other lanes unchanged.",
1018 cmp_lt_mask_m128_s(a: m128, b: m128) -> m128;
1019 "Low lane not equal to.",
1020 cmp_neq_i32_m128_s(a: m128, b: m128) -> i32;
1021 "Lanewise `a != b`.",
1022 cmp_neq_mask_m128(a: m128, b: m128) -> m128;
1023 "Low lane `a != b`, other lanes unchanged.",
1024 cmp_neq_mask_m128_s(a: m128, b: m128) -> m128;
1025 "Lanewise `!(a >= b)`.",
1026 cmp_nge_mask_m128(a: m128, b: m128) -> m128;
1027 "Low lane `!(a >= b)`, other lanes unchanged.",
1028 cmp_nge_mask_m128_s(a: m128, b: m128) -> m128;
1029 "Lanewise `!(a > b)`.",
1030 cmp_ngt_mask_m128(a: m128, b: m128) -> m128;
1031 "Low lane `!(a > b)`, other lanes unchanged.",
1032 cmp_ngt_mask_m128_s(a: m128, b: m128) -> m128;
1033 "Lanewise `!(a <= b)`.",
1034 cmp_nle_mask_m128(a: m128, b: m128) -> m128;
1035 "Low lane `!(a <= b)`, other lanes unchanged.",
1036 cmp_nle_mask_m128_s(a: m128, b: m128) -> m128;
1037 "Lanewise `!(a < b)`.",
1038 cmp_nlt_mask_m128(a: m128, b: m128) -> m128;
1039 "Low lane `!(a < b)`, other lanes unchanged.",
1040 cmp_nlt_mask_m128_s(a: m128, b: m128) -> m128;
1041 "Lanewise `(!a.is_nan()) & (!b.is_nan())`.",
1042 cmp_ordered_mask_m128(a: m128, b: m128) -> m128;
1043 "Low lane `(!a.is_nan()) & (!b.is_nan())`, other lanes unchanged.",
1044 cmp_ordered_mask_m128_s(a: m128, b: m128) -> m128;
1045 "Lanewise `a.is_nan() | b.is_nan()`.",
1046 cmp_unord_mask_m128(a: m128, b: m128) -> m128;
1047 "Low lane `a.is_nan() | b.is_nan()`, other lanes unchanged.",
1048 cmp_unord_mask_m128_s(a: m128, b: m128) -> m128;
1049 "Convert `i32` to `f32` and replace the low lane of the input.",
1050 convert_i32_replace_m128_s(a: m128, i: i32) -> m128;
1051 "Lanewise `a / b`.",
1052 div_m128(a: m128, b: m128) -> m128;
1053 "Low lane `a / b`, other lanes unchanged.",
1054 div_m128_s(a: m128, b: m128) -> m128;
1055 "Gets the low lane as an individual `f32` value.",
1056 get_f32_from_m128_s(a: m128) -> f32;
1057 "Converts the low lane to `i32` and extracts as an individual value.",
1058 get_i32_from_m128_s(a: m128) -> i32;
1059 "Loads the `f32` reference into the low lane of the register.",
1060 load_f32_m128_s(a: &f32) -> m128;
1061 "Loads the `f32` reference into all lanes of a register.",
1062 load_f32_splat_m128(a: &f32) -> m128;
1063 "Loads the reference into a register.",
1064 load_m128(a: &m128) -> m128;
1065 "Loads the reference into a register with reversed order.",
1066 load_reverse_m128(a: &m128) -> m128;
1067 "Loads the reference into a register.",
1068 load_unaligned_m128(a: &[f32; 4]) -> m128;
1069 "Lanewise `max(a, b)`.",
1070 max_m128(a: m128, b: m128) -> m128;
1071 "Low lane `max(a, b)`, other lanes unchanged.",
1072 max_m128_s(a: m128, b: m128) -> m128;
1073 "Lanewise `min(a, b)`.",
1074 min_m128(a: m128, b: m128) -> m128;
1075 "Low lane `min(a, b)`, other lanes unchanged.",
1076 min_m128_s(a: m128, b: m128) -> m128;
1077 "Move the high lanes of `b` to the low lanes of `a`, other lanes unchanged.",
1078 move_high_low_m128(a: m128, b: m128) -> m128;
1079 "Move the low lanes of `b` to the high lanes of `a`, other lanes unchanged.",
1080 move_low_high_m128(a: m128, b: m128) -> m128;
1081 "Move the low lane of `b` to `a`, other lanes unchanged.",
1082 move_m128_s(a: m128, b: m128) -> m128;
1083 "Gathers the sign bit of each lane.",
1084 move_mask_m128(a: m128) -> i32;
1085 "Lanewise `a * b`.",
1086 mul_m128(a: m128, b: m128) -> m128;
1087 "Low lane `a * b`, other lanes unchanged.",
1088 mul_m128_s(a: m128, b: m128) -> m128;
1089 "Lanewise `1.0 / a` approximation.",
1090 reciprocal_m128(a: m128) -> m128;
1091 "Low lane `1.0 / a` approximation, other lanes unchanged.",
1092 reciprocal_m128_s(a: m128) -> m128;
1093 "Lanewise `1.0 / sqrt(a)` approximation.",
1094 reciprocal_sqrt_m128(a: m128) -> m128;
1095 "Low lane `1.0 / sqrt(a)` approximation, other lanes unchanged.",
1096 reciprocal_sqrt_m128_s(a: m128) -> m128;
1097 "Sets the args into an `m128`, first arg is the high lane.",
1098 set_m128(three: f32, two: f32, one: f32, zero: f32) -> m128;
1099 "Sets the args into an `m128`, first arg is the high lane.",
1100 set_m128_s(low: f32) -> m128;
1101 "Sets the args into an `m128`, first arg is the low lane.",
1102 set_reversed_m128(zero: f32, one: f32, two: f32, three: f32) -> m128;
1103 "Splats the value to all lanes.",
1104 set_splat_m128(all: f32) -> m128;
1105 "Shuffle the `f32` lanes from `$a` and `$b` together using an immediate control value.",
1106 shuffle_abi_f32_all_m128<const MASK: i32>(a: m128, b: m128) -> m128;
1107 "Lanewise `sqrt(a)`.",
1108 sqrt_m128(a: m128) -> m128;
1109 "Low lane `sqrt(a)`, other lanes unchanged.",
1110 sqrt_m128_s(a: m128) -> m128;
1111 "Stores the value to the reference given.",
1112 store_m128(r: &mut m128, a: m128);
1113 "Stores the low lane value to the reference given.",
1114 store_m128_s(r: &mut f32, a: m128);
1115 "Stores the value to the reference given in reverse order.",
1116 store_reverse_m128(r: &mut m128, a: m128);
1117 "Stores the low lane value to all lanes of the reference given.",
1118 store_splat_m128(r: &mut m128, a: m128);
1119 "Stores the value to the reference given.",
1120 store_unaligned_m128(r: &mut [f32; 4], a: m128);
1121 "Lanewise `a - b`.",
1122 sub_m128(a: m128, b: m128) -> m128;
1123 "Low lane `a - b`, other lanes unchanged.",
1124 sub_m128_s(a: m128, b: m128) -> m128;
1125 "Transpose four `m128` as if they were a 4x4 matrix.",
1126 transpose_four_m128(a: &mut m128, b: &mut m128, c: &mut m128, d: &mut m128);
1127 "Unpack and interleave high lanes of `a` and `b`.",
1128 unpack_high_m128(a: m128, b: m128) -> m128;
1129 "Unpack and interleave low lanes of `a` and `b`.",
1130 unpack_low_m128(a: m128, b: m128) -> m128;
1131 "All lanes zero.",
1132 zeroed_m128() -> m128;
1133 }
1134}
1135impl_arch! {
1136 #[doc = "# Generic functions requiring the `sse` target feature.\n\n---"]
1137 #[doc = "See: <https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions>"]
1138 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse";
1139
1140 pub fn prefetch_et0<T>(addr: &T) {
1143 prefetch_et0(addr);
1144 }
1145 pub fn prefetch_et1<T>(addr: &T) {
1147 prefetch_et1(addr);
1148 }
1149 pub fn prefetch_nta<T>(addr: &T) {
1155 prefetch_nta(addr);
1156 }
1157 pub fn prefetch_t0<T>(addr: &T) {
1159 prefetch_t0(addr);
1160 }
1161 pub fn prefetch_t1<T>(addr: &T) {
1163 prefetch_t1(addr);
1164 }
1165 pub fn prefetch_t2<T>(addr: &T) {
1168 prefetch_t2(addr);
1169 }
1170}
1171impl_arch! {
1172 #[doc = "# Functions requiring the `sse2` target feature.\n\n---"]
1173 #[doc = "See: <https://en.wikipedia.org/wiki/SSE2>"]
1174 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse2";
1175 arch_fn! {
1176 "Lanewise `a + b` with lanes as `i16`.",
1177 add_i16_m128i(a: m128i, b: m128i) -> m128i;
1178 "Lanewise `a + b` with lanes as `i32`.",
1179 add_i32_m128i(a: m128i, b: m128i) -> m128i;
1180 "Lanewise `a + b` with lanes as `i64`.",
1181 add_i64_m128i(a: m128i, b: m128i) -> m128i;
1182 "Lanewise `a + b` with lanes as `i8`.",
1183 add_i8_m128i(a: m128i, b: m128i) -> m128i;
1184 "Lanewise `a + b`.",
1185 add_m128d(a: m128d, b: m128d) -> m128d;
1186 "Lowest lane `a + b`, high lane unchanged.",
1187 add_m128d_s(a: m128d, b: m128d) -> m128d;
1188 "Lanewise saturating `a + b` with lanes as `i16`.",
1189 add_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1190 "Lanewise saturating `a + b` with lanes as `i8`.",
1191 add_saturating_i8_m128i(a: m128i, b: m128i) -> m128i;
1192 "Lanewise saturating `a + b` with lanes as `u16`.",
1193 add_saturating_u16_m128i(a: m128i, b: m128i) -> m128i;
1194 "Lanewise saturating `a + b` with lanes as `u8`.",
1195 add_saturating_u8_m128i(a: m128i, b: m128i) -> m128i;
1196 "Lanewise average of the `u16` values.",
1197 average_u16_m128i(a: m128i, b: m128i) -> m128i;
1198 "Lanewise average of the `u8` values.",
1199 average_u8_m128i(a: m128i, b: m128i) -> m128i;
1200 "Bitwise `a & b`.",
1201 bitand_m128d(a: m128d, b: m128d) -> m128d;
1202 "Bitwise `a & b`.",
1203 bitand_m128i(a: m128i, b: m128i) -> m128i;
1204 "Bitwise `(!a) & b`.",
1205 bitandnot_m128d(a: m128d, b: m128d) -> m128d;
1206 "Bitwise `(!a) & b`.",
1207 bitandnot_m128i(a: m128i, b: m128i) -> m128i;
1208 "Bitwise `a | b`.",
1209 bitor_m128d(a: m128d, b: m128d) -> m128d;
1210 "Bitwise `a | b`.",
1211 bitor_m128i(a: m128i, b: m128i) -> m128i;
1212 "Bitwise `a ^ b`.",
1213 bitxor_m128d(a: m128d, b: m128d) -> m128d;
1214 "Bitwise `a ^ b`.",
1215 bitxor_m128i(a: m128i, b: m128i) -> m128i;
1216 "Shifts all bits in the entire register left by a number of **bytes**.",
1217 byte_shl_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i;
1218 "Shifts all bits in the entire register right by a number of **bytes**.",
1219 byte_shr_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i;
1220 "Bit-preserving cast to `m128` from `m128d`",
1221 cast_to_m128_from_m128d(a: m128d) -> m128;
1222 "Bit-preserving cast to `m128` from `m128i`",
1223 cast_to_m128_from_m128i(a: m128i) -> m128;
1224 "Bit-preserving cast to `m128d` from `m128`",
1225 cast_to_m128d_from_m128(a: m128) -> m128d;
1226 "Bit-preserving cast to `m128d` from `m128i`",
1227 cast_to_m128d_from_m128i(a: m128i) -> m128d;
1228 "Bit-preserving cast to `m128i` from `m128`",
1229 cast_to_m128i_from_m128(a: m128) -> m128i;
1230 "Bit-preserving cast to `m128i` from `m128d`",
1231 cast_to_m128i_from_m128d(a: m128d) -> m128i;
1232 "Low lane `f64` equal to.",
1233 cmp_eq_i32_m128d_s(a: m128d, b: m128d) -> i32;
1234 "Lanewise `a == b` with lanes as `i16`.",
1235 cmp_eq_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1236 "Lanewise `a == b` with lanes as `i32`.",
1237 cmp_eq_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1238 "Lanewise `a == b` with lanes as `i8`.",
1239 cmp_eq_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1240 "Lanewise `a == b`, mask output.",
1241 cmp_eq_mask_m128d(a: m128d, b: m128d) -> m128d;
1242 "Low lane `a == b`, other lanes unchanged.",
1243 cmp_eq_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1244 "Low lane `f64` greater than or equal to.",
1245 cmp_ge_i32_m128d_s(a: m128d, b: m128d) -> i32;
1246 "Lanewise `a >= b`.",
1247 cmp_ge_mask_m128d(a: m128d, b: m128d) -> m128d;
1248 "Low lane `a >= b`, other lanes unchanged.",
1249 cmp_ge_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1250 "Low lane `f64` greater than.",
1251 cmp_gt_i32_m128d_s(a: m128d, b: m128d) -> i32;
1252 "Lanewise `a > b` with lanes as `i16`.",
1253 cmp_gt_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1254 "Lanewise `a > b` with lanes as `i32`.",
1255 cmp_gt_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1256 "Lanewise `a > b` with lanes as `i8`.",
1257 cmp_gt_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1258 "Lanewise `a > b`.",
1259 cmp_gt_mask_m128d(a: m128d, b: m128d) -> m128d;
1260 "Low lane `a > b`, other lanes unchanged.",
1261 cmp_gt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1262 "Low lane `f64` less than or equal to.",
1263 cmp_le_i32_m128d_s(a: m128d, b: m128d) -> i32;
1264 "Lanewise `a <= b`.",
1265 cmp_le_mask_m128d(a: m128d, b: m128d) -> m128d;
1266 "Low lane `a <= b`, other lanes unchanged.",
1267 cmp_le_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1268 "Low lane `f64` less than.",
1269 cmp_lt_i32_m128d_s(a: m128d, b: m128d) -> i32;
1270 "Lanewise `a < b` with lanes as `i16`.",
1271 cmp_lt_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1272 "Lanewise `a < b` with lanes as `i32`.",
1273 cmp_lt_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1274 "Lanewise `a < b` with lanes as `i8`.",
1275 cmp_lt_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1276 "Lanewise `a < b`.",
1277 cmp_lt_mask_m128d(a: m128d, b: m128d) -> m128d;
1278 "Low lane `a < b`, other lane unchanged.",
1279 cmp_lt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1280 "Low lane `f64` less than.",
1281 cmp_neq_i32_m128d_s(a: m128d, b: m128d) -> i32;
1282 "Lanewise `a != b`.",
1283 cmp_neq_mask_m128d(a: m128d, b: m128d) -> m128d;
1284 "Low lane `a != b`, other lane unchanged.",
1285 cmp_neq_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1286 "Lanewise `!(a >= b)`.",
1287 cmp_nge_mask_m128d(a: m128d, b: m128d) -> m128d;
1288 "Low lane `!(a >= b)`, other lane unchanged.",
1289 cmp_nge_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1290 "Lanewise `!(a > b)`.",
1291 cmp_ngt_mask_m128d(a: m128d, b: m128d) -> m128d;
1292 "Low lane `!(a > b)`, other lane unchanged.",
1293 cmp_ngt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1294 "Lanewise `!(a <= b)`.",
1295 cmp_nle_mask_m128d(a: m128d, b: m128d) -> m128d;
1296 "Low lane `!(a <= b)`, other lane unchanged.",
1297 cmp_nle_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1298 "Lanewise `!(a < b)`.",
1299 cmp_nlt_mask_m128d(a: m128d, b: m128d) -> m128d;
1300 "Low lane `!(a < b)`, other lane unchanged.",
1301 cmp_nlt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1302 "Lanewise `(!a.is_nan()) & (!b.is_nan())`.",
1303 cmp_ordered_mask_m128d(a: m128d, b: m128d) -> m128d;
1304 "Low lane `(!a.is_nan()) & (!b.is_nan())`, other lane unchanged.",
1305 cmp_ordered_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1306 "Lanewise `a.is_nan() | b.is_nan()`.",
1307 cmp_unord_mask_m128d(a: m128d, b: m128d) -> m128d;
1308 "Low lane `a.is_nan() | b.is_nan()`, other lane unchanged.",
1309 cmp_unord_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1310 "Convert `i32` to `f64` and replace the low lane of the input.",
1311 convert_i32_replace_m128d_s(a: m128d, i: i32) -> m128d;
1312 "Convert `i64` to `f64` and replace the low lane of the input.",
1313 convert_i64_replace_m128d_s(a: m128d, i: i64) -> m128d;
1314 "Converts the lower `f32` to `f64` and replace the low lane of the input",
1315 convert_m128_s_replace_m128d_s(a: m128d, b: m128) -> m128d;
1316 "Converts the low `f64` to `f32` and replaces the low lane of the input.",
1317 convert_m128d_s_replace_m128_s(a: m128, b: m128d) -> m128;
1318 "Rounds the `f32` lanes to `i32` lanes.",
1319 convert_to_i32_m128i_from_m128(a: m128) -> m128i;
1320 "Rounds the two `f64` lanes to the low two `i32` lanes.",
1321 convert_to_i32_m128i_from_m128d(a: m128d) -> m128i;
1322 "Rounds the four `i32` lanes to four `f32` lanes.",
1323 convert_to_m128_from_i32_m128i(a: m128i) -> m128;
1324 "Rounds the two `f64` lanes to the low two `f32` lanes.",
1325 convert_to_m128_from_m128d(a: m128d) -> m128;
1326 "Rounds the lower two `i32` lanes to two `f64` lanes.",
1327 convert_to_m128d_from_lower2_i32_m128i(a: m128i) -> m128d;
1328 "Rounds the two `f64` lanes to the low two `f32` lanes.",
1329 convert_to_m128d_from_lower2_m128(a: m128) -> m128d;
1330 "Copy the low `i64` lane to a new register, upper bits 0.",
1331 copy_i64_m128i_s(a: m128i) -> m128i;
1332 "Copies the `a` value and replaces the low lane with the low `b` value.",
1333 copy_replace_low_f64_m128d(a: m128d, b: m128d) -> m128d;
1334 "Lanewise `a / b`.",
1335 div_m128d(a: m128d, b: m128d) -> m128d;
1336 "Lowest lane `a / b`, high lane unchanged.",
1337 div_m128d_s(a: m128d, b: m128d) -> m128d;
1338 "Gets an `i16` value out of an `m128i`, returns as `i32`.",
1339 extract_i16_as_i32_m128i<const LANE: i32>(a: m128i) -> i32;
1340 "Gets the lower lane as an `f64` value.",
1341 get_f64_from_m128d_s(a: m128d) -> f64;
1342 "Converts the lower lane to an `i32` value.",
1343 get_i32_from_m128d_s(a: m128d) -> i32;
1344 "Converts the lower lane to an `i32` value.",
1345 get_i32_from_m128i_s(a: m128i) -> i32;
1346 "Converts the lower lane to an `i64` value.",
1347 get_i64_from_m128d_s(a: m128d) -> i64;
1348 "Converts the lower lane to an `i64` value.",
1349 get_i64_from_m128i_s(a: m128i) -> i64;
1350 "Inserts the low 16 bits of an `i32` value into an `m128i`.",
1351 insert_i16_from_i32_m128i<const LANE: i32>(a: m128i, i: i32) -> m128i;
1352 "Loads the reference into the low lane of the register.",
1353 load_f64_m128d_s(a: &f64) -> m128d;
1354 "Loads the `f64` reference into all lanes of a register.",
1355 load_f64_splat_m128d(a: &f64) -> m128d;
1356 "Loads the low `i64` into a register.",
1357 load_i64_m128i_s(a: &m128i) -> m128i;
1358 "Loads the reference into a register.",
1359 load_m128d(a: &m128d) -> m128d;
1360 "Loads the reference into a register.",
1361 load_m128i(a: &m128i) -> m128i;
1362 "Loads the reference into a register, replacing the high lane.",
1363 load_replace_high_m128d(a: m128d, b: &f64) -> m128d;
1364 "Loads the reference into a register, replacing the low lane.",
1365 load_replace_low_m128d(a: m128d, b: &f64) -> m128d;
1366 "Loads the reference into a register with reversed order.",
1367 load_reverse_m128d(a: &m128d) -> m128d;
1368 "Loads the reference into a register.",
1369 load_unaligned_m128d(a: &[f64; 2]) -> m128d;
1370 "Loads the reference into a register.",
1371 load_unaligned_m128i(a: &[u8; 16]) -> m128i;
1372 "Lanewise `max(a, b)` with lanes as `i16`.",
1373 max_i16_m128i(a: m128i, b: m128i) -> m128i;
1374 "Lanewise `max(a, b)`.",
1375 max_m128d(a: m128d, b: m128d) -> m128d;
1376 "Low lane `max(a, b)`, other lanes unchanged.",
1377 max_m128d_s(a: m128d, b: m128d) -> m128d;
1378 "Lanewise `max(a, b)` with lanes as `u8`.",
1379 max_u8_m128i(a: m128i, b: m128i) -> m128i;
1380 "Lanewise `min(a, b)` with lanes as `i16`.",
1381 min_i16_m128i(a: m128i, b: m128i) -> m128i;
1382 "Lanewise `min(a, b)`.",
1383 min_m128d(a: m128d, b: m128d) -> m128d;
1384 "Low lane `min(a, b)`, other lanes unchanged.",
1385 min_m128d_s(a: m128d, b: m128d) -> m128d;
1386 "Lanewise `min(a, b)` with lanes as `u8`.",
1387 min_u8_m128i(a: m128i, b: m128i) -> m128i;
1388 "Gathers the `i8` sign bit of each lane.",
1389 move_mask_i8_m128i(a: m128i) -> i32;
1390 "Gathers the sign bit of each lane.",
1391 move_mask_m128d(a: m128d) -> i32;
1392 "Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
1393 values to produce the final output.",
1394 mul_i16_horizontal_add_m128i(a: m128i, b: m128i) -> m128i;
1395 "Lanewise `a * b` with lanes as `i16`, keep the high bits of the `i32` intermediates.",
1396 mul_i16_keep_high_m128i(a: m128i, b: m128i) -> m128i;
1397 "Lanewise `a * b` with lanes as `i16`, keep the low bits of the `i32` intermediates.",
1398 mul_i16_keep_low_m128i(a: m128i, b: m128i) -> m128i;
1399 "Lanewise `a * b`.",
1400 mul_m128d(a: m128d, b: m128d) -> m128d;
1401 "Lowest lane `a * b`, high lane unchanged.",
1402 mul_m128d_s(a: m128d, b: m128d) -> m128d;
1403 "Lanewise `a * b` with lanes as `u16`, keep the high bits of the `u32` intermediates.",
1404 mul_u16_keep_high_m128i(a: m128i, b: m128i) -> m128i;
1405 "Multiplies the odd `u32` lanes and gives the widened (`u64`) results.",
1406 mul_widen_u32_odd_m128i(a: m128i, b: m128i) -> m128i;
1407 "Saturating convert `i16` to `i8`, and pack the values.",
1408 pack_i16_to_i8_m128i(a: m128i, b: m128i) -> m128i;
1409 "Saturating convert `i16` to `u8`, and pack the values.",
1410 pack_i16_to_u8_m128i(a: m128i, b: m128i) -> m128i;
1411 "Saturating convert `i32` to `i16`, and pack the values.",
1412 pack_i32_to_i16_m128i(a: m128i, b: m128i) -> m128i;
1413 "Sets the args into an `m128i`, first arg is the high lane.",
1414 set_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i;
1415 "Sets the args into an `m128i`, first arg is the high lane.",
1416 set_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i;
1417 "Set an `i32` as the low 32-bit lane of an `m128i`, other lanes blank.",
1418 set_i32_m128i_s(i: i32) -> m128i;
1419 "Sets the args into an `m128i`, first arg is the high lane.",
1420 set_i64_m128i(a: i64, b: i64) -> m128i;
1421 "Set an `i64` as the low 64-bit lane of an `m128i`, other lanes blank.",
1422 set_i64_m128i_s(i: i64) -> m128i;
1423 "Sets the args into an `m128i`, first arg is the high lane.",
1424 set_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8,
1425 k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i;
1426 "Sets the args into an `m128d`, first arg is the high lane.",
1427 set_m128d(a: f64, b: f64) -> m128d;
1428 "Sets the args into the low lane of a `m128d`.",
1429 set_m128d_s(a: f64) -> m128d;
1430 "Sets the args into an `m128i`, first arg is the low lane.",
1431 set_reversed_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16,
1432 h: i16) -> m128i;
1433 "Sets the args into an `m128i`, first arg is the low lane.",
1434 set_reversed_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i;
1435 "Sets the args into an `m128i`, first arg is the low lane.",
1436 set_reversed_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8,
1437 j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i;
1438 "Sets the args into an `m128d`, first arg is the low lane.",
1439 set_reversed_m128d(a: f64, b: f64) -> m128d;
1440 "Splats the `i16` to all lanes of the `m128i`.",
1441 set_splat_i16_m128i(i: i16) -> m128i;
1442 "Splats the `i32` to all lanes of the `m128i`.",
1443 set_splat_i32_m128i(i: i32) -> m128i;
1444 "Splats the `i64` to both lanes of the `m128i`.",
1445 set_splat_i64_m128i(i: i64) -> m128i;
1446 "Splats the `i8` to all lanes of the `m128i`.",
1447 set_splat_i8_m128i(i: i8) -> m128i;
1448 "Splats the args into both lanes of the `m128d`.",
1449 set_splat_m128d(a: f64) -> m128d;
1450 "Shift all `u16` lanes to the left by the `count` in the lower `u64` lane.",
1451 shl_all_u16_m128i(a: m128i, count: m128i) -> m128i;
1452 "Shift all `u32` lanes to the left by the `count` in the lower `u64` lane.",
1453 shl_all_u32_m128i(a: m128i, count: m128i) -> m128i;
1454 "Shift all `u64` lanes to the left by the `count` in the lower `u64` lane.",
1455 shl_all_u64_m128i(a: m128i, count: m128i) -> m128i;
1456 "Shifts all `u16` lanes left by an immediate.",
1457 shl_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i;
1458 "Shifts all `u32` lanes left by an immediate.",
1459 shl_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i;
1460 "Shifts both `u64` lanes left by an immediate.",
1461 shl_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i;
1462 "Shift each `i16` lane to the right by the `count` in the lower `i64` lane.",
1463 shr_all_i16_m128i(a: m128i, count: m128i) -> m128i;
1464 "Shift each `i32` lane to the right by the `count` in the lower `i64` lane.",
1465 shr_all_i32_m128i(a: m128i, count: m128i) -> m128i;
1466 "Shift each `u16` lane to the right by the `count` in the lower `u64` lane.",
1467 shr_all_u16_m128i(a: m128i, count: m128i) -> m128i;
1468 "Shift each `u32` lane to the right by the `count` in the lower `u64` lane.",
1469 shr_all_u32_m128i(a: m128i, count: m128i) -> m128i;
1470 "Shift each `u64` lane to the right by the `count` in the lower `u64` lane.",
1471 shr_all_u64_m128i(a: m128i, count: m128i) -> m128i;
1472 "Shifts all `i16` lanes right by an immediate.",
1473 shr_imm_i16_m128i<const IMM: i32>(a: m128i) -> m128i;
1474 "Shifts all `i32` lanes right by an immediate.",
1475 shr_imm_i32_m128i<const IMM: i32>(a: m128i) -> m128i;
1476 "Shifts all `u16` lanes right by an immediate.",
1477 shr_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i;
1478 "Shifts all `u32` lanes right by an immediate.",
1479 shr_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i;
1480 "Shifts both `u64` lanes right by an immediate.",
1481 shr_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i;
1482 "Shuffle the `f64` lanes from `$a` and `$b` together using an immediate control value.",
1483 shuffle_abi_f64_all_m128d<const MASK: i32>(a: m128d, b: m128d) -> m128d;
1484 "Shuffle the `i32` lanes in `$a` using an immediate control value.",
1485 shuffle_ai_f32_all_m128i<const MASK: i32>(a: m128i) -> m128i;
1486 "Shuffle the high `i16` lanes in `$a` using an immediate control value.",
1487 shuffle_ai_i16_h64all_m128i<const MASK: i32>(a: m128i) -> m128i;
1488 "Shuffle the low `i16` lanes in `$a` using an immediate control value.",
1489 shuffle_ai_i16_l64all_m128i<const MASK: i32>(a: m128i) -> m128i;
1490 "Lanewise `sqrt(a)`.",
1491 sqrt_m128d(a: m128d) -> m128d;
1492 "Low lane `sqrt(b)`, upper lane is unchanged from `a`.",
1493 sqrt_m128d_s(a: m128d, b: m128d) -> m128d;
1494 "Stores the high lane value to the reference given.",
1495 store_high_m128d_s(r: &mut f64, a: m128d);
1496 "Stores the value to the reference given.",
1497 store_i64_m128i_s(r: &mut i64, a: m128i);
1498 "Stores the value to the reference given.",
1499 store_m128d(r: &mut m128d, a: m128d);
1500 "Stores the low lane value to the reference given.",
1501 store_m128d_s(r: &mut f64, a: m128d);
1502 "Stores the value to the reference given.",
1503 store_m128i(r: &mut m128i, a: m128i);
1504 "Stores the value to the reference given.",
1505 store_reversed_m128d(r: &mut m128d, a: m128d);
1506 "Stores the low lane value to all lanes of the reference given.",
1507 store_splat_m128d(r: &mut m128d, a: m128d);
1508 "Stores the value to the reference given.",
1509 store_unaligned_m128d(r: &mut [f64; 2], a: m128d);
1510 "Stores the value to the reference given.",
1511 store_unaligned_m128i(r: &mut [u8; 16], a: m128i);
1512 "Lanewise `a - b` with lanes as `i16`.",
1513 sub_i16_m128i(a: m128i, b: m128i) -> m128i;
1514 "Lanewise `a - b` with lanes as `i32`.",
1515 sub_i32_m128i(a: m128i, b: m128i) -> m128i;
1516 "Lanewise `a - b` with lanes as `i64`.",
1517 sub_i64_m128i(a: m128i, b: m128i) -> m128i;
1518 "Lanewise `a - b` with lanes as `i8`.",
1519 sub_i8_m128i(a: m128i, b: m128i) -> m128i;
1520 "Lanewise `a - b`.",
1521 sub_m128d(a: m128d, b: m128d) -> m128d;
1522 "Lowest lane `a - b`, high lane unchanged.",
1523 sub_m128d_s(a: m128d, b: m128d) -> m128d;
1524 "Lanewise saturating `a - b` with lanes as `i16`.",
1525 sub_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1526 "Lanewise saturating `a - b` with lanes as `i8`.",
1527 sub_saturating_i8_m128i(a: m128i, b: m128i) -> m128i;
1528 "Lanewise saturating `a - b` with lanes as `u16`.",
1529 sub_saturating_u16_m128i(a: m128i, b: m128i) -> m128i;
1530 "Lanewise saturating `a - b` with lanes as `u8`.",
1531 sub_saturating_u8_m128i(a: m128i, b: m128i) -> m128i;
1532 "Compute “sum of `u8` absolute differences”.",
1533 sum_of_u8_abs_diff_m128i(a: m128i, b: m128i) -> m128i;
1534 "Truncate the `f32` lanes to `i32` lanes.",
1535 truncate_m128_to_m128i(a: m128) -> m128i;
1536 "Truncate the `f64` lanes to the lower `i32` lanes (upper `i32` lanes 0).",
1537 truncate_m128d_to_m128i(a: m128d) -> m128i;
1538 "Truncate the lower lane into an `i32`.",
1539 truncate_to_i32_m128d_s(a: m128d) -> i32;
1540 "Truncate the lower lane into an `i64`.",
1541 truncate_to_i64_m128d_s(a: m128d) -> i64;
1542 "Unpack and interleave high `i16` lanes of `a` and `b`.",
1543 unpack_high_i16_m128i(a: m128i, b: m128i) -> m128i;
1544 "Unpack and interleave high `i32` lanes of `a` and `b`.",
1545 unpack_high_i32_m128i(a: m128i, b: m128i) -> m128i;
1546 "Unpack and interleave high `i64` lanes of `a` and `b`.",
1547 unpack_high_i64_m128i(a: m128i, b: m128i) -> m128i;
1548 "Unpack and interleave high `i8` lanes of `a` and `b`.",
1549 unpack_high_i8_m128i(a: m128i, b: m128i) -> m128i;
1550 "Unpack and interleave high lanes of `a` and `b`.",
1551 unpack_high_m128d(a: m128d, b: m128d) -> m128d;
1552 "Unpack and interleave low `i16` lanes of `a` and `b`.",
1553 unpack_low_i16_m128i(a: m128i, b: m128i) -> m128i;
1554 "Unpack and interleave low `i32` lanes of `a` and `b`.",
1555 unpack_low_i32_m128i(a: m128i, b: m128i) -> m128i;
1556 "Unpack and interleave low `i64` lanes of `a` and `b`.",
1557 unpack_low_i64_m128i(a: m128i, b: m128i) -> m128i;
1558 "Unpack and interleave low `i8` lanes of `a` and `b`.",
1559 unpack_low_i8_m128i(a: m128i, b: m128i) -> m128i;
1560 "Unpack and interleave low lanes of `a` and `b`.",
1561 unpack_low_m128d(a: m128d, b: m128d) -> m128d;
1562 "Both lanes zero.",
1563 zeroed_m128d() -> m128d;
1564 "All lanes zero.",
1565 zeroed_m128i() -> m128i;
1566 }
1567}
1568impl_arch! {
1569 #[doc = "# Functions requiring the `sse3` target feature.\n\n---"]
1570 #[doc = "See: <https://en.wikipedia.org/wiki/SSE3>"]
1571 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse3";
1572 arch_fn! {
1573 "Add each lane horizontally, pack the outputs as `a` then `b`.",
1574 add_horizontal_m128(a: m128, b: m128) -> m128;
1575 "Add each lane horizontally, pack the outputs as `a` then `b`.",
1576 add_horizontal_m128d(a: m128d, b: m128d) -> m128d;
1577 "Alternately, from the top, add a lane and then subtract a lane.",
1578 addsub_m128(a: m128, b: m128) -> m128;
1579 "Add the high lane and subtract the low lane.",
1580 addsub_m128d(a: m128d, b: m128d) -> m128d;
1581 "Duplicate the odd lanes to the even lanes.",
1582 duplicate_even_lanes_m128(a: m128) -> m128;
1583 "Copy the low lane of the input to both lanes of the output.",
1584 duplicate_low_lane_m128d_s(a: m128d) -> m128d;
1585 "Duplicate the odd lanes to the even lanes.",
1586 duplicate_odd_lanes_m128(a: m128) -> m128;
1587 "Subtract each lane horizontally, pack the outputs as `a` then `b`.",
1588 sub_horizontal_m128(a: m128, b: m128) -> m128;
1589 "Subtract each lane horizontally, pack the outputs as `a` then `b`.",
1590 sub_horizontal_m128d(a: m128d, b: m128d) -> m128d;
1591 }
1592}
1593impl_arch! {
1594 #[doc = "# Functions requiring the `sse4.1` target feature.\n\n---"]
1595 #[doc = "See: <https://en.wikipedia.org/wiki/SSE4#SSE4.1>"]
1596 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse4.1";
1597 arch_fn! {
1598 "Blends the `i16` lanes according to the immediate mask.",
1599 blend_imm_i16_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1600 "Blends the lanes according to the immediate mask.",
1601 blend_imm_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1602 "Blends the `i16` lanes according to the immediate mask.",
1603 blend_imm_m128d<const IMM: i32>(a: m128d, b: m128d) -> m128d;
1604 "Blend the `i8` lanes according to a runtime varying mask.",
1605 blend_varying_i8_m128i(a: m128i, b: m128i, mask: m128i) -> m128i;
1606 "Blend the lanes according to a runtime varying mask.",
1607 blend_varying_m128(a: m128, b: m128, mask: m128) -> m128;
1608 "Blend the lanes according to a runtime varying mask.",
1609 blend_varying_m128d(a: m128d, b: m128d, mask: m128d) -> m128d;
1610 "Round each lane to a whole number, towards positive infinity.",
1611 ceil_m128(a: m128) -> m128;
1612 "Round the low lane of `b` toward positive infinity, other lanes `a`.",
1613 ceil_m128_s(a: m128, b: m128) -> m128;
1614 "Round each lane to a whole number, towards positive infinity.",
1615 ceil_m128d(a: m128d) -> m128d;
1616 "Round the low lane of `b` toward positive infinity, high lane is `a`.",
1617 ceil_m128d_s(a: m128d, b: m128d) -> m128d;
1618 "Lanewise `a == b` with lanes as `i64`.",
1619 cmp_eq_mask_i64_m128i(a: m128i, b: m128i) -> m128i;
1620 "Convert the lower two `i64` lanes to two `i32` lanes.",
1621 convert_to_i16_m128i_from_lower2_i16_m128i(a: m128i) -> m128i;
1622 "Convert the lower eight `i8` lanes to eight `i16` lanes.",
1623 convert_to_i16_m128i_from_lower8_i8_m128i(a: m128i) -> m128i;
1624 "Convert the lower four `i16` lanes to four `i32` lanes.",
1625 convert_to_i32_m128i_from_lower4_i16_m128i(a: m128i) -> m128i;
1626 "Convert the lower four `i8` lanes to four `i32` lanes.",
1627 convert_to_i32_m128i_from_lower4_i8_m128i(a: m128i) -> m128i;
1628 "Convert the lower two `i32` lanes to two `i64` lanes.",
1629 convert_to_i64_m128i_from_lower2_i32_m128i(a: m128i) -> m128i;
1630 "Convert the lower two `i8` lanes to two `i64` lanes.",
1631 convert_to_i64_m128i_from_lower2_i8_m128i(a: m128i) -> m128i;
1632 "Convert the lower eight `u8` lanes to eight `u16` lanes.",
1633 convert_to_u16_m128i_from_lower8_u8_m128i(a: m128i) -> m128i;
1634 "Convert the lower four `u16` lanes to four `u32` lanes.",
1635 convert_to_u32_m128i_from_lower4_u16_m128i(a: m128i) -> m128i;
1636 "Convert the lower four `u8` lanes to four `u32` lanes.",
1637 convert_to_u32_m128i_from_lower4_u8_m128i(a: m128i) -> m128i;
1638 "Convert the lower two `u16` lanes to two `u64` lanes.",
1639 convert_to_u64_m128i_from_lower2_u16_m128i(a: m128i) -> m128i;
1640 "Convert the lower two `u32` lanes to two `u64` lanes.",
1641 convert_to_u64_m128i_from_lower2_u32_m128i(a: m128i) -> m128i;
1642 "Convert the lower two `u8` lanes to two `u64` lanes.",
1643 convert_to_u64_m128i_from_lower2_u8_m128i(a: m128i) -> m128i;
1644 "Performs a dot product of two `m128` registers.",
1645 dot_product_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1646 "Performs a dot product of two `m128d` registers.",
1647 dot_product_m128d<const IMM: i32>(a: m128d, b: m128d) -> m128d;
1648 "Gets the `f32` lane requested. Returns as an `i32` bit pattern.",
1649 extract_f32_as_i32_bits_imm_m128<const IMM: i32>(a: m128) -> i32;
1650 "Gets the `i32` lane requested. Only the lowest 2 bits are considered.",
1651 extract_i32_imm_m128i<const IMM: i32>(a: m128i) -> i32;
1652 "Gets the `i64` lane requested. Only the lowest bit is considered.",
1653 extract_i64_imm_m128i<const IMM: i32>(a: m128i) -> i64;
1654 "Gets the `i8` lane requested. Only the lowest 4 bits are considered.",
1655 extract_i8_as_i32_imm_m128i<const IMM: i32>(a: m128i) -> i32;
1656 "Round each lane to a whole number, towards negative infinity",
1657 floor_m128(a: m128) -> m128;
1658 "Round the low lane of `b` toward negative infinity, other lanes `a`.",
1659 floor_m128_s(a: m128, b: m128) -> m128;
1660 "Round each lane to a whole number, towards negative infinity",
1661 floor_m128d(a: m128d) -> m128d;
1662 "Round the low lane of `b` toward negative infinity, high lane is `a`.",
1663 floor_m128d_s(a: m128d, b: m128d) -> m128d;
1664 "Inserts a lane from `$b` into `$a`, optionally at a new position.",
1665 insert_f32_imm_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1666 "Inserts a new value for the `i32` lane specified.",
1667 insert_i32_imm_m128i<const IMM: i32>(a: m128i, new: i32) -> m128i;
1668 "Inserts a new value for the `i64` lane specified.",
1669 insert_i64_imm_m128i<const IMM: i32>(a: m128i, new: i64) -> m128i;
1670 "Inserts a new value for the `i64` lane specified.",
1671 insert_i8_imm_m128i<const IMM: i32>(a: m128i, new: i32) -> m128i;
1672 "Lanewise `max(a, b)` with lanes as `i32`.",
1673 max_i32_m128i(a: m128i, b: m128i) -> m128i;
1674 "Lanewise `max(a, b)` with lanes as `i8`.",
1675 max_i8_m128i(a: m128i, b: m128i) -> m128i;
1676 "Lanewise `max(a, b)` with lanes as `u16`.",
1677 max_u16_m128i(a: m128i, b: m128i) -> m128i;
1678 "Lanewise `max(a, b)` with lanes as `u32`.",
1679 max_u32_m128i(a: m128i, b: m128i) -> m128i;
1680 "Lanewise `min(a, b)` with lanes as `i32`.",
1681 min_i32_m128i(a: m128i, b: m128i) -> m128i;
1682 "Lanewise `min(a, b)` with lanes as `i8`.",
1683 min_i8_m128i(a: m128i, b: m128i) -> m128i;
1684 "Min `u16` value, position, and other lanes zeroed.",
1685 min_position_u16_m128i(a: m128i) -> m128i;
1686 "Lanewise `min(a, b)` with lanes as `u16`.",
1687 min_u16_m128i(a: m128i, b: m128i) -> m128i;
1688 "Lanewise `min(a, b)` with lanes as `u32`.",
1689 min_u32_m128i(a: m128i, b: m128i) -> m128i;
1690 "Lanewise `a * b` with 32-bit lanes.",
1691 mul_32_m128i(a: m128i, b: m128i) -> m128i;
1692 "Multiplies the odd `i32` lanes and gives the widened (`i64`) results.",
1693 mul_widen_i32_odd_m128i(a: m128i, b: m128i) -> m128i;
1694 "Computes eight `u16` “sum of absolute difference” values according to the bytes selected.",
1695 multi_packed_sum_abs_diff_u8_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1696 "Saturating convert `i32` to `u16`, and pack the values.",
1697 pack_i32_to_u16_m128i(a: m128i, b: m128i) -> m128i;
1698 "Rounds each lane in the style specified.",
1699 round_m128<const MODE: i32>(a: m128) -> m128;
1700 "Rounds `$b` low as specified, other lanes use `$a`.",
1701 round_m128_s<const MODE: i32>(a: m128, b: m128) -> m128;
1702 "Rounds each lane in the style specified.",
1703 round_m128d<const MODE: i32>(a: m128d) -> m128d;
1704 "Rounds `$b` low as specified, keeps `$a` high.",
1705 round_m128d_s<const MODE: i32>(a: m128d, b: m128d) -> m128d;
1706 "Tests if all bits are 1.",
1707 test_all_ones_m128i(a: m128i) -> i32;
1708 "Returns if all masked bits are 0, `(a & mask) as u128 == 0`",
1709 test_all_zeroes_m128i(a: m128i, mask: m128i) -> i32;
1710 "Returns if, among the masked bits, there’s both 0s and 1s",
1711 test_mixed_ones_and_zeroes_m128i(a: m128i, mask: m128i) -> i32;
1712 "Compute the bitwise NOT of `a` and then AND with `b`,
1713 returns 1 if the result is zero, otherwise 0.",
1714 testc_m128i(a: m128i, b: m128i) -> i32;
1715 "Computes the bitwise AND of 256 bits in `a` and
1716 `b`, returns 1 if the result is zero, otherwise 0.",
1717 testz_m128i(a: m128i, b: m128i) -> i32;
1718 }
1719}
1720impl_arch! {
1721 #[doc = "# Functions requiring the `sse4.2` target feature.\n\n---"]
1722 #[doc = "See: <https://en.wikipedia.org/wiki/SSE4#SSE4.2>"]
1723 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse4.2";
1724 arch_fn! {
1725 "Lanewise `a > b` with lanes as `i64`.",
1726 cmp_gt_mask_i64_m128i(a: m128i, b: m128i) -> m128i;
1727 "Accumulates the `u16` into a running CRC32 value.",
1728 crc32_u16(crc: u32, v: u16) -> u32;
1729 "Accumulates the `u32` into a running CRC32 value.",
1730 crc32_u32(crc: u32, v: u32) -> u32;
1731 "Accumulates the `u64` into a running CRC32 value.",
1732 crc32_u64(crc: u64, v: u64) -> u64;
1733 "Accumulates the `u8` into a running CRC32 value.",
1734 crc32_u8(crc: u32, v: u8) -> u32;
1735 "Search for `needle` in `haystack, with explicit string length.",
1736 search_explicit_str_for_index<const IMM: i32>(
1737 needle: m128i, needle_len: i32, haystack: m128i, haystack_len: i32) -> i32;
1738 "Search for `needle` in `haystack, with explicit string length.",
1739 search_explicit_str_for_mask<const IMM: i32>(
1740 needle: m128i, needle_len: i32, haystack: m128i, haystack_len: i32) -> m128i;
1741 "Search for `needle` in `haystack, with implicit string length.",
1742 search_implicit_str_for_index<const IMM: i32>(needle: m128i, haystack: m128i) -> i32;
1743 "Search for `needle` in `haystack, with implicit string length.",
1744 search_implicit_str_for_mask<const IMM: i32>(needle: m128i, haystack: m128i) -> m128i;
1745 }
1746}
1747impl_arch! {
1748 #[doc = "# Functions requiring the `ssse3` target feature.\n\n---"]
1749 #[doc = "See: <https://en.wikipedia.org/wiki/SSSE3>"]
1750 features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "ssse3";
1751 arch_fn! {
1752 "Lanewise absolute value with lanes as `i16`.",
1753 abs_i16_m128i(a: m128i) -> m128i;
1754 "Lanewise absolute value with lanes as `i32`.",
1755 abs_i32_m128i(a: m128i) -> m128i;
1756 "Lanewise absolute value with lanes as `i8`.",
1757 abs_i8_m128i(a: m128i) -> m128i;
1758 "Add horizontal pairs of `i16` values, pack the outputs as `a` then `b`.",
1759 add_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i;
1760 "Add horizontal pairs of `i32` values, pack the outputs as `a` then `b`.",
1761 add_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i;
1762 "Add horizontal pairs of `i16` values, saturating, pack the outputs as `a` then `b`.",
1763 add_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1764 "Counts `$a` as the high bytes and `$b` as the low bytes then performs a
1765 **byte** shift to the right by the immediate value.",
1766 combined_byte_shr_imm_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1767 "Multiply `i16` lanes into `i32` intermediates, keep the high 18 bits,
1768 round by adding 1, right shift by 1.",
1769 mul_i16_scale_round_m128i(a: m128i, b: m128i) -> m128i;
1770 "This is dumb and weird.",
1771 mul_u8i8_add_horizontal_saturating_m128i(a: m128i, b: m128i) -> m128i;
1772 "Shuffle `i8` lanes in `a` using `i8` values in `v`.",
1773 shuffle_av_i8z_all_m128i(a: m128i, v: m128i) -> m128i;
1774 "Applies the sign of `i16` values in `b` to the values in `a`.",
1775 sign_apply_i16_m128i(a: m128i, b: m128i) -> m128i;
1776 "Applies the sign of `i32` values in `b` to the values in `a`.",
1777 sign_apply_i32_m128i(a: m128i, b: m128i) -> m128i;
1778 "Applies the sign of `i8` values in `b` to the values in `a`.",
1779 sign_apply_i8_m128i(a: m128i, b: m128i) -> m128i;
1780 "Subtract horizontal pairs of `i16` values, pack the outputs as `a` then `b`.",
1781 sub_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i;
1782 "Subtract horizontal pairs of `i32` values, pack the outputs as `a` then `b`.",
1783 sub_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i;
1784 "Subtract horizontal pairs of `i16` values, saturating, pack the outputs as `a` then `b`.",
1785 sub_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1786 }
1787}
1788
1789macro_rules! impl_arch {
1793 (
1794 $( #[doc = $doc:literal] )*
1795 $( features = $( $feature:literal ),+ $(,)? )? $( any_target_arch = $( $target_arch:literal ),+ $(,)? )? $( target_features = $( $target_feature:literal ),+ $(,)? )? ;
1799 $($item:item)*
1800 ) => {
1801 $( #[doc = $doc] )*
1802 $(
1803 #[cfg(any($(feature = $feature),+))]
1804 #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(feature = $feature),+))))]
1805 )?
1806 $(
1807 #[cfg(any($(target_arch = $target_arch),+))]
1808 #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(target_arch = $target_arch),+))))]
1809 )?
1810 $(
1811 #[cfg(any($(target_feature = $target_feature),+))]
1812 #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(target_feature = $target_feature),+))))]
1813 )?
1814 impl Arch { $($item)* }
1815 };
1816}
1817use impl_arch;
1818
1819#[allow(unused_macros, reason = "feature-gated")]
1821macro_rules! arch_fn {
1822 () => {};
1823 ( $doc:literal,
1825 $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1826 ($($param:ident: $ty:ty),* $(,)?) -> $ret:ty
1827 ) => { $crate::paste! { #[doc = $doc]
1829 #[doc = "\n\nSee: [`" $fn_name "`][crate::_dep::safe_arch::" $fn_name "]."] #[must_use]
1832 pub fn $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) -> $ret {
1833 $fn_name$(::<$($const_name),*>)?($($param),*)
1834 }
1835 }};
1836 ( $doc:literal,
1838 $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1839 ($($param:ident: $ty:ty),* $(,)?)
1840 ) => { $crate::paste! {
1841 #[doc = $doc]
1842 #[doc = "\n\nSee: [`" $fn_name "`][crate::_dep::safe_arch::" $fn_name "]."] pub fn $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) {
1845 $fn_name$(::<$($const_name),*>)?($($param),*)
1846 }
1847 }};
1848 ( $($doc:literal,
1850 $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1851 ($($param:ident: $ty:ty),* $(,)?) $(-> $ret:ty)?);+ $(;)?
1852 ) => {
1853 $( arch_fn![
1854 $doc,
1855 $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) $(-> $ret)?
1856 ]; )+
1857 };
1858}
1859#[allow(unused_imports, reason = "feature-gated")]
1860use arch_fn;