devela/sys/arch/
namespace.rs

1// devela::sys::arch::namespace
2//
3//! Defines the [`Arch`] namespace.
4//
5// TOC
6// - impl Arch blocks
7// - macro helpers
8//   - impl_arch
9//   - arch_fn
10
11#![allow(clippy::too_many_arguments)]
12
13#[cfg(feature = "dep_safe_arch")]
14use crate::_dep::safe_arch::*;
15
16#[doc = crate::TAG_NAMESPACE!()]
17/// Arch-related functionality.
18///
19/// ---
20/// Implementations that depend on: `dep_safe_arch`, (`x86` or `x86_64`)
21/// and the respective target feature:
22/// - [none](#functions-not-requiring-any-target-feature).
23/// - [`adx`](#functions-requiring-the-adx-target-feature).
24/// - [`aes`](#functions-requiring-the-aes-target-feature).
25/// - [`avx`](#functions-requiring-the-avx-target-feature).
26/// - [`avx2`](#functions-requiring-the-avx2-target-feature).
27/// - [`bmi1`](#functions-requiring-the-bmi1-target-feature).
28/// - [`bmi2`](#functions-requiring-the-bmi2-target-feature).
29/// - [`fma`](#functions-requiring-the-fma-target-feature).
30/// - [`lzcnt`](#functions-requiring-the-lzcnt-target-feature).
31/// - [`pclmulqdq`](#functions-requiring-the-pclmulqdq-target-feature).
32/// - [`popcnt`](#functions-requiring-the-popcnt-target-feature).
33/// - [`rdrand`](#functions-requiring-the-rdrand-target-feature).
34/// - [`rdseed`](#functions-requiring-the-rdseed-target-feature).
35/// - [`sse`](#functions-requiring-the-sse-target-feature)
36///   ([generic](#generic-functions-requiring-the-sse-target-feature)).
37/// - [`sse2`](#functions-requiring-the-sse2-target-feature).
38/// - [`sse3`](#functions-requiring-the-sse3-target-feature).
39/// - [`sse4.1`](#functions-requiring-the-sse41-target-feature).
40/// - [`sse4.2`](#functions-requiring-the-sse42-target-feature).
41/// - [`ssse3`](#functions-requiring-the-ssse3-target-feature).
42pub struct Arch;
43
44impl_arch! {
45    #[doc = "# Functions not requiring any target feature.\n\n---"]
46    features = "dep_safe_arch", any_target_arch = "x86", "x86_64";
47    arch_fn! {
48        "Swap the bytes of the given 32-bit value.",
49        byte_swap_i32(i: i32) -> i32;
50        "Swap the bytes of the given 64-bit value.",
51        byte_swap_i64(i: i64) -> i64;
52        "Reads the CPU’s timestamp counter value.",
53        read_timestamp_counter() -> u64;
54        "Reads the CPU’s timestamp counter value and store the processor signature.",
55        read_timestamp_counter_p(aux: &mut u32) -> u64;
56    }
57}
58impl_arch! {
59    #[doc = "# Functions requiring the `adx` target feature.\n\n---"]
60    #[doc = "See: <https://en.wikipedia.org/wiki/Intel_ADX>"]
61    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "adx";
62    arch_fn! {
63        "Add two `u32` with a carry value.",
64        add_carry_u32(c_in: u8, a: u32, b: u32, out: &mut u32) -> u8;
65        "Add two `u64` with a carry value.",
66        add_carry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8;
67    }
68}
69impl_arch! {
70    #[doc = "# Functions requiring the `aes` target feature.\n\n---"]
71    #[doc = "See: <https://en.wikipedia.org/wiki/AES_instruction_set>"]
72    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "aes";
73    arch_fn! {
74        "Perform the last round of an AES decryption flow on `a` using the `round_key`.",
75        aes_decrypt_last_m128i(a: m128i, round_key: m128i) -> m128i;
76        "Perform one round of an AES decryption flow on `a` using the `round_key`.",
77        aes_decrypt_m128i(a: m128i, round_key: m128i) -> m128i;
78        "Perform the last round of an AES encryption flow on `a` using the `round_key`.",
79        aes_encrypt_last_m128i(a: m128i, round_key: m128i) -> m128i;
80        "Perform one round of an AES encryption flow on `a` using the `round_key`.",
81        aes_encrypt_m128i(a: m128i, round_key: m128i) -> m128i;
82        "Perform the InvMixColumns transform on `a`.",
83        aes_inv_mix_columns_m128i(a: m128i) -> m128i;
84        "Assist in expanding an AES cipher key.",
85        aes_key_gen_assist_m128i<const IMM: i32>(a: m128i) -> m128i;
86    }
87}
88impl_arch! {
89    #[doc = "# Functions requiring the `avx` target feature.\n\n---"]
90    #[doc = "See: <https://en.wikipedia.org/wiki/Advanced_Vector_Extensions>"]
91    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "avx";
92    arch_fn! {
93        "Add adjacent `f32` lanes.",
94        add_horizontal_m256(a: m256, b: m256) -> m256;
95        "Add adjacent `f64` lanes.",
96        add_horizontal_m256d(a: m256d, b: m256d) -> m256d;
97        "Lanewise `a + b` with `f32` lanes.",
98        add_m256(a: m256, b: m256) -> m256;
99        "Lanewise `a + b` with `f64` lanes.",
100        add_m256d(a: m256d, b: m256d) -> m256d;
101        "Alternately, from the top, add `f32` then sub `f32`.",
102        addsub_m256(a: m256, b: m256) -> m256;
103        "Alternately, from the top, add `f64` then sub `f64`.",
104        addsub_m256d(a: m256d, b: m256d) -> m256d;
105        "Bitwise `a &amp; b`.",
106        bitand_m256(a: m256, b: m256) -> m256;
107        "Bitwise `a &amp; b`.",
108        bitand_m256d(a: m256d, b: m256d) -> m256d;
109        "Bitwise `(!a) &amp; b`.",
110        bitandnot_m256(a: m256, b: m256) -> m256;
111        "Bitwise `(!a) &amp; b`.",
112        bitandnot_m256d(a: m256d, b: m256d) -> m256d;
113        "Bitwise `a | b`.",
114        bitor_m256(a: m256, b: m256) -> m256;
115        "Bitwise `a | b`.",
116        bitor_m256d(a: m256d, b: m256d) -> m256d;
117        "Bitwise `a ^ b`.",
118        bitxor_m256(a: m256, b: m256) -> m256;
119        "Bitwise `a ^ b`.",
120        bitxor_m256d(a: m256d, b: m256d) -> m256d;
121        "Blends the `f32` lanes according to the immediate mask.",
122        blend_m256<const IMM: i32>(a: m256, b: m256) -> m256;
123        "Blends the `f64` lanes according to the immediate mask.",
124        blend_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d;
125        "Blend the lanes according to a runtime varying mask.",
126        blend_varying_m256(a: m256, b: m256, mask: m256) -> m256;
127        "Blend the lanes according to a runtime varying mask.",
128        blend_varying_m256d(a: m256d, b: m256d, mask: m256d) -> m256d;
129        "Bit-preserving cast to `m128` from `m256`.",
130        cast_to_m128_from_m256(a: m256) -> m128;
131        "Bit-preserving cast to `m128d` from `m256d`.",
132        cast_to_m128d_from_m256d(a: m256d) -> m128d;
133        "Bit-preserving cast to `m128i` from `m256i`.",
134        cast_to_m128i_from_m256i(a: m256i) -> m128i;
135        "Bit-preserving cast to `m256` from `m256d`.",
136        cast_to_m256_from_m256d(a: m256d) -> m256;
137        "Bit-preserving cast to `m256` from `m256i`.",
138        cast_to_m256_from_m256i(a: m256i) -> m256;
139        "Bit-preserving cast to `m256i` from `m256`.",
140        cast_to_m256d_from_m256(a: m256) -> m256d;
141        "Bit-preserving cast to `m256d` from `m256i`.",
142        cast_to_m256d_from_m256i(a: m256i) -> m256d;
143        "Bit-preserving cast to `m256i` from `m256`.",
144        cast_to_m256i_from_m256(a: m256) -> m256i;
145        "Bit-preserving cast to `m256i` from `m256d`.",
146        cast_to_m256i_from_m256d(a: m256d) -> m256i;
147        "Round `f32` lanes towards positive infinity.",
148        ceil_m256(a: m256) -> m256;
149        "Round `f64` lanes towards positive infinity.",
150        ceil_m256d(a: m256d) -> m256d;
151        "Compare `f32` lanes according to the operation specified, mask output.",
152        cmp_op_mask_m128<const OP: i32>(a: m128, b: m128) -> m128;
153        "Compare `f32` lanes according to the operation specified, mask output.",
154        cmp_op_mask_m128_s<const OP: i32>(a: m128, b: m128) -> m128;
155        "Compare `f64` lanes according to the operation specified, mask output.",
156        cmp_op_mask_m128d<const OP: i32>(a: m128d, b: m128d) -> m128d;
157        "Compare `f64` lanes according to the operation specified, mask output.",
158        cmp_op_mask_m128d_s<const OP: i32>(a: m128d, b: m128d) -> m128d;
159        "Compare `f32` lanes according to the operation specified, mask output.",
160        cmp_op_mask_m256<const OP: i32>(a: m256, b: m256) -> m256;
161        "Compare `f64` lanes according to the operation specified, mask output.",
162        cmp_op_mask_m256d<const OP: i32>(a: m256d, b: m256d) -> m256d;
163        "Convert the lowest `f32` lane to a single `f32`.",
164        convert_to_f32_from_m256_s(a: m256) -> f32;
165        "Convert the lowest `f64` lane to a single `f64`.",
166        convert_to_f64_from_m256d_s(a: m256d) -> f64;
167        "Convert the lowest `i32` lane to a single `i32`.",
168        convert_to_i32_from_m256i_s(a: m256i) -> i32;
169        "Convert `f64` lanes to be `i32` lanes.",
170        convert_to_i32_m128i_from_m256d(a: m256d) -> m128i;
171        "Convert `f32` lanes to be `i32` lanes.",
172        convert_to_i32_m256i_from_m256(a: m256) -> m256i;
173        "Convert `f64` lanes to be `f32` lanes.",
174        convert_to_m128_from_m256d(a: m256d) -> m128;
175        "Convert `i32` lanes to be `f32` lanes.",
176        convert_to_m256_from_i32_m256i(a: m256i) -> m256;
177        "Convert `i32` lanes to be `f64` lanes.",
178        convert_to_m256d_from_i32_m128i(a: m128i) -> m256d;
179        "Convert `f32` lanes to be `f64` lanes.",
180        convert_to_m256d_from_m128(a: m128) -> m256d;
181        "Convert `f64` lanes to `i32` lanes with truncation.",
182        convert_truncate_to_i32_m128i_from_m256d(a: m256d) -> m128i;
183        "Convert `f32` lanes to `i32` lanes with truncation.",
184        convert_truncate_to_i32_m256i_from_m256(a: m256) -> m256i;
185        "Lanewise `a / b` with `f32`.",
186        div_m256(a: m256, b: m256) -> m256;
187        "Lanewise `a / b` with `f64`.",
188        div_m256d(a: m256d, b: m256d) -> m256d;
189        "This works like `dot_product_m128`, but twice as wide.",
190        dot_product_m256<const IMM: i32>(a: m256, b: m256) -> m256;
191        "Duplicate the even-indexed lanes to the odd lanes.",
192        duplicate_even_lanes_m256(a: m256) -> m256;
193        "Duplicate the odd-indexed lanes to the even lanes.",
194        duplicate_odd_lanes_m256(a: m256) -> m256;
195        "Duplicate the odd-indexed lanes to the even lanes.",
196        duplicate_odd_lanes_m256d(a: m256d) -> m256d;
197        "Extracts an `i32` lane from `m256i`",
198        extract_i32_from_m256i<const IMM: i32>(a: m256i) -> i32;
199        "Extracts an `i64` lane from `m256i`",
200        extract_i64_from_m256i<const IMM: i32>(a: m256i) -> i64;
201        "Extracts an `m128` from `m256`",
202        extract_m128_from_m256<const IMM: i32>(a: m256) -> m128;
203        "Extracts an `m128d` from `m256d`",
204        extract_m128d_from_m256d<const IMM: i32>(a: m256d) -> m128d;
205        "Extracts an `m128i` from `m256i`",
206        extract_m128i_from_m256i<const IMM: i32>(a: m256i) -> m128i;
207        "Round `f32` lanes towards negative infinity.",
208        floor_m256(a: m256) -> m256;
209        "Round `f64` lanes towards negative infinity.",
210        floor_m256d(a: m256d) -> m256d;
211        "Inserts an `i16` to `m256i`",
212        insert_i16_to_m256i<const IMM: i32>(a: m256i, i: i16) -> m256i;
213        "Inserts an `i32` to `m256i`",
214        insert_i32_to_m256i<const IMM: i32>(a: m256i, i: i32) -> m256i;
215        "Inserts an `i64` to `m256i`",
216        insert_i64_to_m256i<const IMM: i32>(a: m256i, i: i64) -> m256i;
217        "Inserts an `i8` to `m256i`",
218        insert_i8_to_m256i<const IMM: i32>(a: m256i, i: i8) -> m256i;
219        "Inserts an `m128` to `m256`",
220        insert_m128_to_m256<const IMM: i32>(a: m256, b: m128) -> m256;
221        "Inserts an `m128d` to `m256d`",
222        insert_m128d_to_m256d<const IMM: i32>(a: m256d, b: m128d) -> m256d;
223        "Slowly inserts an `m128i` to `m256i`.",
224        insert_m128i_to_m256i_slow_avx<const IMM: i32>(a: m256i, b: m128i) -> m256i;
225        "Load an `f32` and splat it to all lanes of an `m256d`",
226        load_f32_splat_m256(a: &f32) -> m256;
227        "Load an `f64` and splat it to all lanes of an `m256d`",
228        load_f64_splat_m256d(a: &f64) -> m256d;
229        "Load an `m128` and splat it to the lower and upper half of an `m256`",
230        load_m128_splat_m256(a: &m128) -> m256;
231        "Load an `m128d` and splat it to the lower and upper half of an `m256d`",
232        load_m128d_splat_m256d(a: &m128d) -> m256d;
233        "Load data from memory into a register.",
234        load_m256(a: &m256) -> m256;
235        "Load data from memory into a register.",
236        load_m256d(a: &m256d) -> m256d;
237        "Load data from memory into a register.",
238        load_m256i(a: &m256i) -> m256i;
239        "Load data from memory into a register according to a mask.",
240        load_masked_m128(a: &m128, mask: m128i) -> m128;
241        "Load data from memory into a register according to a mask.",
242        load_masked_m128d(a: &m128d, mask: m128i) -> m128d;
243        "Load data from memory into a register according to a mask.",
244        load_masked_m256(a: &m256, mask: m256i) -> m256;
245        "Load data from memory into a register according to a mask.",
246        load_masked_m256d(a: &m256d, mask: m256i) -> m256d;
247        "Load data from memory into a register.",
248        load_unaligned_hi_lo_m256(a: &[f32; 4], b: &[f32; 4]) -> m256;
249        "Load data from memory into a register.",
250        load_unaligned_hi_lo_m256d(a: &[f64; 2], b: &[f64; 2]) -> m256d;
251        "Load data from memory into a register.",
252        load_unaligned_hi_lo_m256i(a: &[i8; 16], b: &[i8; 16]) -> m256i;
253        "Load data from memory into a register.",
254        load_unaligned_m256(a: &[f32; 8]) -> m256;
255        "Load data from memory into a register.",
256        load_unaligned_m256d(a: &[f64; 4]) -> m256d;
257        "Load data from memory into a register.",
258        load_unaligned_m256i(a: &[i8; 32]) -> m256i;
259        "Lanewise `max(a, b)`.",
260        max_m256(a: m256, b: m256) -> m256;
261        "Lanewise `max(a, b)`.",
262        max_m256d(a: m256d, b: m256d) -> m256d;
263        "Lanewise `min(a, b)`.",
264        min_m256(a: m256, b: m256) -> m256;
265        "Lanewise `min(a, b)`.",
266        min_m256d(a: m256d, b: m256d) -> m256d;
267        "Collects the sign bit of each lane into a 4-bit value.",
268        move_mask_m256(a: m256) -> i32;
269        "Collects the sign bit of each lane into a 4-bit value.",
270        move_mask_m256d(a: m256d) -> i32;
271        "Lanewise `a * b` with `f32` lanes.",
272        mul_m256(a: m256, b: m256) -> m256;
273        "Lanewise `a * b` with `f64` lanes.",
274        mul_m256d(a: m256d, b: m256d) -> m256d;
275        "Shuffle 128 bits of floating point data at a time from `$a` and `$b` using
276        an immediate control value.",
277        permute2z_m256<const MASK: i32>(a: m256, b: m256) -> m256;
278        "Shuffle 128 bits of floating point data at a time from `a` and `b` using an
279        immediate control value.",
280        permute2z_m256d<const MASK: i32>(a: m256d, b: m256d) -> m256d;
281        "<em>Slowly</em> swizzle 128 bits of integer data from `a` and `b` using an
282        immediate control value.",
283        permute2z_m256i<const MASK: i32>(a: m256i, b: m256i) -> m256i;
284        "Shuffle the `f32` lanes from `a` using an immediate control value.",
285        permute_m128<const MASK: i32>(a: m128) -> m128;
286        "Shuffle the `f64` lanes in `a` using an immediate control value.",
287        permute_m128d<const MASK: i32>(a: m128d) -> m128d;
288        "Shuffle the `f32` lanes in `a` using an immediate control value.",
289        permute_m256<const MASK: i32>(a: m256) -> m256;
290        "Shuffle the `f64` lanes from `a` together using an immediate control value.",
291        permute_m256d<const MASK: i32>(a: m256d) -> m256d;
292        "Reciprocal of `f32` lanes.",
293        reciprocal_m256(a: m256) -> m256;
294        "Reciprocal of `f32` lanes.",
295        reciprocal_sqrt_m256(a: m256) -> m256;
296        "Rounds each lane in the style specified.",
297        round_m256<const OP: i32>(a: m256) -> m256;
298        "Rounds each lane in the style specified.",
299        round_m256d<const OP: i32>(a: m256d) -> m256d;
300        "Set `i16` args into an `m256i` lane.",
301        set_i16_m256i(e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16, e9: i16, e8: i16,
302            e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16, e0: i16) -> m256i;
303        "Set `i32` args into an `m256i` lane.",
304        set_i32_m256i(e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32,
305            e0: i32) -> m256i;
306        "Set `i64` args into an `m256i` lane.",
307        set_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i;
308        "Set `i8` args into an `m256i` lane.",
309        set_i8_m256i(e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8, e24: i8,
310            e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8, e15: i8,
311            e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8, e6: i8,
312            e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> m256i;
313        "Set `m128` args into an `m256`.",
314        set_m128_m256(high: m128, low: m128) -> m256;
315        "Set `m128d` args into an `m256d`.",
316        set_m128d_m256d(high: m128d, low: m128d) -> m256d;
317        "Set `m128i` args into an `m256i`.",
318        set_m128i_m256i(hi: m128i, lo: m128i) -> m256i;
319        "Set `f32` args into an `m256` lane.",
320        set_m256(e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32, e0: f32) -> m256;
321        "Set `f64` args into an `m256d` lane.",
322        set_m256d(e3: f64, e2: f64, e1: f64, e0: f64) -> m256d;
323        "Set `i16` args into an `m256i` lane.",
324        set_reversed_i16_m256i(e15: i16, e14: i16, e13: i16, e12: i16, e11: i16, e10: i16,
325            e9: i16, e8: i16, e7: i16, e6: i16, e5: i16, e4: i16, e3: i16, e2: i16, e1: i16,
326            e0: i16) -> m256i;
327        "Set `i32` args into an `m256i` lane.",
328        set_reversed_i32_m256i(e7: i32, e6: i32, e5: i32, e4: i32, e3: i32, e2: i32, e1: i32,
329            e0: i32) -> m256i;
330        "Set `i64` args into an `m256i` lane.",
331        set_reversed_i64_m256i(e3: i64, e2: i64, e1: i64, e0: i64) -> m256i;
332        "Set `i8` args into an `m256i` lane.",
333        set_reversed_i8_m256i(e31: i8, e30: i8, e29: i8, e28: i8, e27: i8, e26: i8, e25: i8,
334            e24: i8, e23: i8, e22: i8, e21: i8, e20: i8, e19: i8, e18: i8, e17: i8, e16: i8,
335            e15: i8, e14: i8, e13: i8, e12: i8, e11: i8, e10: i8, e9: i8, e8: i8, e7: i8,
336            e6: i8, e5: i8, e4: i8, e3: i8, e2: i8, e1: i8, e0: i8) -> m256i;
337        "Set `m128` args into an `m256`.",
338        set_reversed_m128_m256(hi: m128, lo: m128) -> m256;
339        "Set `m128d` args into an `m256d`.",
340        set_reversed_m128d_m256d(hi: m128d, lo: m128d) -> m256d;
341        "Set `m128i` args into an `m256i`.",
342        set_reversed_m128i_m256i(hi: m128i, lo: m128i) -> m256i;
343        "Set `f32` args into an `m256` lane.",
344        set_reversed_m256(e7: f32, e6: f32, e5: f32, e4: f32, e3: f32, e2: f32, e1: f32,
345            e0: f32) -> m256;
346        "Set `f64` args into an `m256d` lane.",
347        set_reversed_m256d(e3: f64, e2: f64, e1: f64, e0: f64) -> m256d;
348        "Splat an `i16` arg into an `m256i` lane.",
349        set_splat_i16_m256i(i: i16) -> m256i;
350        "Splat an `i32` arg into an `m256i` lane.",
351        set_splat_i32_m256i(i: i32) -> m256i;
352        "Splat an `i64` arg into an `m256i` lane.",
353        set_splat_i64_m256i(i: i64) -> m256i;
354        "Splat an `i8` arg into an `m256i` lane.",
355        set_splat_i8_m256i(i: i8) -> m256i;
356        "Splat an `f32` arg into an `m256` lane.",
357        set_splat_m256(f: f32) -> m256;
358        "Splat an `f64` arg into an `m256d` lane.",
359        set_splat_m256d(f: f64) -> m256d;
360        "Shuffle `f32` values in `a` using `i32` values in `v`.",
361        shuffle_av_f32_all_m128(a: m128, v: m128i) -> m128;
362        "Shuffle `f32` values in `a` using `i32` values in `v`.",
363        shuffle_av_f32_half_m256(a: m256, v: m256i) -> m256;
364        "Shuffle `f64` lanes in `a` using <strong>bit 1</strong> of the `i64` lanes in `v`",
365        shuffle_av_f64_all_m128d(a: m128d, v: m128i) -> m128d;
366        "Shuffle `f64` lanes in `a` using <strong>bit 1</strong> of the `i64` lanes in `v`.",
367        shuffle_av_f64_half_m256d(a: m256d, b: m256i) -> m256d;
368        "Shuffle the `f32` lanes from `a` and `b` together using an immediate control value.",
369        shuffle_m256<const IMM: i32>(a: m256, b: m256) -> m256;
370        "Shuffle the `f64` lanes from `a` and `b` together using an immediate control value.",
371        shuffle_m256d<const IMM: i32>(a: m256d, b: m256d) -> m256d;
372        "Lanewise `sqrt` on `f64` lanes.",
373        sqrt_m256(a: m256) -> m256;
374        "Lanewise `sqrt` on `f64` lanes.",
375        sqrt_m256d(a: m256d) -> m256d;
376        "Store data from a register into memory.",
377        store_m256(addr: &mut m256, a: m256);
378        "Store data from a register into memory.",
379        store_m256d(addr: &mut m256d, a: m256d);
380        "Store data from a register into memory.",
381        store_m256i(addr: &mut m256i, a: m256i);
382        "Store data from a register into memory according to a mask.",
383        store_masked_m128(addr: &mut m128, mask: m128i, a: m128);
384        "Store data from a register into memory according to a mask.",
385        store_masked_m128d(addr: &mut m128d, mask: m128i, a: m128d);
386        "Store data from a register into memory according to a mask.",
387        store_masked_m256(addr: &mut m256, mask: m256i, a: m256);
388        "Store data from a register into memory according to a mask.",
389        store_masked_m256d(addr: &mut m256d, mask: m256i, a: m256d);
390        "Store data from a register into memory.",
391        store_unaligned_hi_lo_m256(hi_addr: &mut [f32; 4], lo_addr: &mut [f32; 4], a: m256);
392        "Store data from a register into memory.",
393        store_unaligned_hi_lo_m256d(hi_addr: &mut [f64; 2], lo_addr: &mut [f64; 2], a: m256d);
394        "Store data from a register into memory.",
395        store_unaligned_hi_lo_m256i(hi_addr: &mut [i8; 16], lo_addr: &mut [i8; 16], a: m256i);
396        "Store data from a register into memory.",
397        store_unaligned_m256(addr: &mut [f32; 8], a: m256);
398        "Store data from a register into memory.",
399        store_unaligned_m256d(addr: &mut [f64; 4], a: m256d);
400        "Store data from a register into memory.",
401        store_unaligned_m256i(addr: &mut [i8; 32], a: m256i);
402        "Subtract adjacent `f32` lanes.",
403        sub_horizontal_m256(a: m256, b: m256) -> m256;
404        "Subtract adjacent `f64` lanes.",
405        sub_horizontal_m256d(a: m256d, b: m256d) -> m256d;
406        "Lanewise `a - b` with `f32` lanes.",
407        sub_m256(a: m256, b: m256) -> m256;
408        "Lanewise `a - b` with `f64` lanes.",
409        sub_m256d(a: m256d, b: m256d) -> m256d;
410        "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
411        returns 1 if the result is zero, otherwise 0.",
412        testc_m128(a: m128, b: m128) -> i32;
413        "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
414        returns 1 if the result is zero, otherwise 0.",
415        testc_m128d(a: m128d, b: m128d) -> i32;
416        "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
417        returns 1 if the result is zero, otherwise 0.",
418        testc_m256(a: m256, b: m256) -> i32;
419        "Compute the bitwise of sign bit NOT of `a` and then AND with `b`,
420        returns 1 if the result is zero, otherwise 0.",
421        testc_m256d(a: m256d, b: m256d) -> i32;
422        "Compute the bitwise NOT of `a` and then AND with `b`,
423        returns 1 if the result is zero, otherwise 0.",
424        testc_m256i(a: m256i, b: m256i) -> i32;
425        "Computes the bitwise AND of 256 bits in `a` and
426        `b`, returns 1 if the result is zero, otherwise 0.",
427        testz_m128(a: m128, b: m128) -> i32;
428        "Computes the bitwise of sign bitAND of 256 bits in `a` and
429        `b`, returns 1 if the result is zero, otherwise 0.",
430        testz_m128d(a: m128d, b: m128d) -> i32;
431        "Computes the bitwise AND of 256 bits in `a` and
432        `b`, returns 1 if the result is zero, otherwise 0.",
433        testz_m256(a: m256, b: m256) -> i32;
434        "Computes the bitwise of sign bit AND of 256 bits in `a` and
435        `b`, returns 1 if the result is zero, otherwise 0.",
436        testz_m256d(a: m256d, b: m256d) -> i32;
437        "Computes the bitwise of sign bit AND of 256 bits in `a` and
438        `b`, returns 1 if the result is zero, otherwise 0.",
439        testz_m256i(a: m256i, b: m256i) -> i32;
440        "Unpack and interleave the high lanes.",
441        unpack_hi_m256(a: m256, b: m256) -> m256;
442        "Unpack and interleave the high lanes.",
443        unpack_hi_m256d(a: m256d, b: m256d) -> m256d;
444        "Unpack and interleave the high lanes.",
445        unpack_lo_m256(a: m256, b: m256) -> m256;
446        "Unpack and interleave the high lanes.",
447        unpack_lo_m256d(a: m256d, b: m256d) -> m256d;
448        "Zero extend an `m128` to `m256`",
449        zero_extend_m128(a: m128) -> m256;
450        "Zero extend an `m128d` to `m256d`",
451        zero_extend_m128d(a: m128d) -> m256d;
452        "Zero extend an `m128i` to `m256i`",
453        zero_extend_m128i(a: m128i) -> m256i;
454        "A zeroed `m256`",
455        zeroed_m256() -> m256;
456        "A zeroed `m256d`",
457        zeroed_m256d() -> m256d;
458        "A zeroed `m256i`",
459        zeroed_m256i() -> m256i;
460    }
461}
462impl_arch! {
463    #[doc = "# Functions requiring the `avx2` target feature.\n\n---"]
464    #[doc = "See: <https://en.wikipedia.org/wiki/Advanced_Vector_Extensions>"]
465    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "avx2";
466    arch_fn! {
467        "Absolute value of `i16` lanes.",
468        abs_i16_m256i(a: m256i) -> m256i;
469        "Absolute value of `i32` lanes.",
470        abs_i32_m256i(a: m256i) -> m256i;
471        "Absolute value of `i8` lanes.",
472        abs_i8_m256i(a: m256i) -> m256i;
473        "Horizontal `a + b` with lanes as `i16`.",
474        add_horizontal_i16_m256i(a: m256i, b: m256i) -> m256i;
475        "Horizontal `a + b` with lanes as `i32`.",
476        add_horizontal_i32_m256i(a: m256i, b: m256i) -> m256i;
477        "Horizontal saturating `a + b` with lanes as `i16`.",
478        add_horizontal_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
479        "Lanewise `a + b` with lanes as `i16`.",
480        add_i16_m256i(a: m256i, b: m256i) -> m256i;
481        "Lanewise `a + b` with lanes as `i32`.",
482        add_i32_m256i(a: m256i, b: m256i) -> m256i;
483        "Lanewise `a + b` with lanes as `i64`.",
484        add_i64_m256i(a: m256i, b: m256i) -> m256i;
485        "Lanewise `a + b` with lanes as `i8`.",
486        add_i8_m256i(a: m256i, b: m256i) -> m256i;
487        "Lanewise saturating `a + b` with lanes as `i16`.",
488        add_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
489        "Lanewise saturating `a + b` with lanes as `i8`.",
490        add_saturating_i8_m256i(a: m256i, b: m256i) -> m256i;
491        "Lanewise saturating `a + b` with lanes as `u16`.",
492        add_saturating_u16_m256i(a: m256i, b: m256i) -> m256i;
493        "Lanewise saturating `a + b` with lanes as `u8`.",
494        add_saturating_u8_m256i(a: m256i, b: m256i) -> m256i;
495        "Average `u16` lanes.",
496        average_u16_m256i(a: m256i, b: m256i) -> m256i;
497        "Average `u8` lanes.",
498        average_u8_m256i(a: m256i, b: m256i) -> m256i;
499        "Bitwise `a &amp; b`.",
500        bitand_m256i(a: m256i, b: m256i) -> m256i;
501        "Bitwise `(!a) &amp; b`.",
502        bitandnot_m256i(a: m256i, b: m256i) -> m256i;
503        "Bitwise `a | b`",
504        bitor_m256i(a: m256i, b: m256i) -> m256i;
505        "Bitwise `a ^ b`.",
506        bitxor_m256i(a: m256i, b: m256i) -> m256i;
507        "Blends the `i16` lanes according to the immediate value.",
508        blend_imm_i16_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
509        "Blends the `i32` lanes in `a` and `b` into a single value.",
510        blend_imm_i32_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
511        "Blends the `i32` lanes according to the immediate value.",
512        blend_imm_i32_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
513        "Blend `i8` lanes according to a runtime varying mask.",
514        blend_varying_i8_m256i(a: m256i, b: m256i, mask: m256i) -> m256i;
515        "Shifts each `u128` lane left by a number of <strong>bytes</strong>.",
516        byte_shl_imm_u128_m256i<const IMM: i32>(a: m256i) -> m256i;
517        "Shifts each `u128` lane right by a number of <strong>bytes</strong>.",
518        byte_shr_imm_u128_m256i<const IMM: i32>(a: m256i) -> m256i;
519        "Compare `i16` lanes for equality, mask output.",
520        cmp_eq_mask_i16_m256i(a: m256i, b: m256i) -> m256i;
521        "Compare `i32` lanes for equality, mask output.",
522        cmp_eq_mask_i32_m256i(a: m256i, b: m256i) -> m256i;
523        "Compare `i64` lanes for equality, mask output.",
524        cmp_eq_mask_i64_m256i(a: m256i, b: m256i) -> m256i;
525        "Compare `i8` lanes for equality, mask output.",
526        cmp_eq_mask_i8_m256i(a: m256i, b: m256i) -> m256i;
527        "Compare `i16` lanes for `a &gt; b`, mask output.",
528        cmp_gt_mask_i16_m256i(a: m256i, b: m256i) -> m256i;
529        "Compare `i32` lanes for `a &gt; b`, mask output.",
530        cmp_gt_mask_i32_m256i(a: m256i, b: m256i) -> m256i;
531        "Compare `i64` lanes for `a &gt; b`, mask output.",
532        cmp_gt_mask_i64_m256i(a: m256i, b: m256i) -> m256i;
533        "Compare `i8` lanes for `a &gt; b`, mask output.",
534        cmp_gt_mask_i8_m256i(a: m256i, b: m256i) -> m256i;
535        "Works like `combined_byte_shr_imm_m128i`, but twice as wide.",
536        combined_byte_shr_imm_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
537        "Convert `i8` values to `i16` values.",
538        convert_to_i16_m256i_from_i8_m128i(a: m128i) -> m256i;
539        "Convert lower 4 `u8` values to `i16` values.",
540        convert_to_i16_m256i_from_lower4_u8_m128i(a: m128i) -> m256i;
541        "Convert lower 8 `u8` values to `i16` values.",
542        convert_to_i16_m256i_from_lower8_u8_m128i(a: m128i) -> m256i;
543        "Convert `u8` values to `i16` values.",
544        convert_to_i16_m256i_from_u8_m128i(a: m128i) -> m256i;
545        "Convert `i16` values to `i32` values.",
546        convert_to_i32_m256i_from_i16_m128i(a: m128i) -> m256i;
547        "Convert the lower 8 `i8` values to `i32` values.",
548        convert_to_i32_m256i_from_lower8_i8_m128i(a: m128i) -> m256i;
549        "Convert `u16` values to `i32` values.",
550        convert_to_i32_m256i_from_u16_m128i(a: m128i) -> m256i;
551        "Convert `i32` values to `i64` values.",
552        convert_to_i64_m256i_from_i32_m128i(a: m128i) -> m256i;
553        "Convert `i16` values to `i64` values.",
554        convert_to_i64_m256i_from_lower4_i16_m128i(a: m128i) -> m256i;
555        "Convert the lower 4 `i8` values to `i64` values.",
556        convert_to_i64_m256i_from_lower4_i8_m128i(a: m128i) -> m256i;
557        "Convert `u16` values to `i64` values.",
558        convert_to_i64_m256i_from_lower4_u16_m128i(a: m128i) -> m256i;
559        "Convert `u32` values to `i64` values.",
560        convert_to_i64_m256i_from_u32_m128i(a: m128i) -> m256i;
561        "Gets an `i16` value out of an `m256i`, returns as `i32`.",
562        extract_i16_as_i32_m256i<const LANE: i32>(a: m256i) -> i32;
563        "Gets an `i8` value out of an `m256i`, returns as `i32`.",
564        extract_i8_as_i32_m256i<const LANE: i32>(a: m256i) -> i32;
565        "Gets an `m128i` value out of an `m256i`.",
566        extract_m128i_m256i<const LANE: i32>(a: m256i) -> m128i;
567        "Inserts an `m128i` to an `m256i` at the high or low position.",
568        insert_m128i_to_m256i<const LANE: i32>(a: m256i, b: m128i) -> m256i;
569        "Loads the reference given and zeroes any `i32` lanes not in the mask.",
570        load_masked_i32_m128i(a: &m128i, mask: m128i) -> m128i;
571        "Loads the reference given and zeroes any `i32` lanes not in the mask.",
572        load_masked_i32_m256i(a: &m256i, mask: m256i) -> m256i;
573        "Loads the reference given and zeroes any `i64` lanes not in the mask.",
574        load_masked_i64_m128i(a: &m128i, mask: m128i) -> m128i;
575        "Loads the reference given and zeroes any `i64` lanes not in the mask.",
576        load_masked_i64_m256i(a: &m256i, mask: m256i) -> m256i;
577        "Lanewise `max(a, b)` with lanes as `i16`.",
578        max_i16_m256i(a: m256i, b: m256i) -> m256i;
579        "Lanewise `max(a, b)` with lanes as `i32`.",
580        max_i32_m256i(a: m256i, b: m256i) -> m256i;
581        "Lanewise `max(a, b)` with lanes as `i8`.",
582        max_i8_m256i(a: m256i, b: m256i) -> m256i;
583        "Lanewise `max(a, b)` with lanes as `u16`.",
584        max_u16_m256i(a: m256i, b: m256i) -> m256i;
585        "Lanewise `max(a, b)` with lanes as `u32`.",
586        max_u32_m256i(a: m256i, b: m256i) -> m256i;
587        "Lanewise `max(a, b)` with lanes as `u8`.",
588        max_u8_m256i(a: m256i, b: m256i) -> m256i;
589        "Lanewise `min(a, b)` with lanes as `i16`.",
590        min_i16_m256i(a: m256i, b: m256i) -> m256i;
591        "Lanewise `min(a, b)` with lanes as `i32`.",
592        min_i32_m256i(a: m256i, b: m256i) -> m256i;
593        "Lanewise `min(a, b)` with lanes as `i8`.",
594        min_i8_m256i(a: m256i, b: m256i) -> m256i;
595        "Lanewise `min(a, b)` with lanes as `u16`.",
596        min_u16_m256i(a: m256i, b: m256i) -> m256i;
597        "Lanewise `min(a, b)` with lanes as `u32`.",
598        min_u32_m256i(a: m256i, b: m256i) -> m256i;
599        "Lanewise `min(a, b)` with lanes as `u8`.",
600        min_u8_m256i(a: m256i, b: m256i) -> m256i;
601        "Create an `i32` mask of each sign bit in the `i8` lanes.",
602        move_mask_i8_m256i(a: m256i) -> i32;
603        "Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
604        values to produce the final output.",
605        mul_i16_horizontal_add_m256i(a: m256i, b: m256i) -> m256i;
606        "Multiply the `i16` lanes and keep the high half of each 32-bit output.",
607        mul_i16_keep_high_m256i(a: m256i, b: m256i) -> m256i;
608        "Multiply the `i16` lanes and keep the low half of each 32-bit output.",
609        mul_i16_keep_low_m256i(a: m256i, b: m256i) -> m256i;
610        "Multiply `i16` lanes into `i32` intermediates, keep the high 18 bits, round
611        by adding 1, right shift by 1.",
612        mul_i16_scale_round_m256i(a: m256i, b: m256i) -> m256i;
613        "Multiply the `i32` lanes and keep the low half of each 64-bit output.",
614        mul_i32_keep_low_m256i(a: m256i, b: m256i) -> m256i;
615        "Multiply the lower `i32` within each `i64` lane, `i64` output.",
616        mul_i64_low_bits_m256i(a: m256i, b: m256i) -> m256i;
617        "Multiply the `u16` lanes and keep the high half of each 32-bit output.",
618        mul_u16_keep_high_m256i(a: m256i, b: m256i) -> m256i;
619        "Multiply the lower `u32` within each `u64` lane, `u64` output.",
620        mul_u64_low_bits_m256i(a: m256i, b: m256i) -> m256i;
621        "This is dumb and weird.",
622        mul_u8i8_add_horizontal_saturating_m256i(a: m256i, b: m256i) -> m256i;
623        "Computes eight `u16` “sum of absolute difference” values according to the
624        bytes selected.",
625        multi_packed_sum_abs_diff_u8_m256i<const IMM: i32>(a: m256i, b: m256i) -> m256i;
626        "Saturating convert `i16` to `i8`, and pack the values.",
627        pack_i16_to_i8_m256i(a: m256i, b: m256i) -> m256i;
628        "Saturating convert `i16` to `u8`, and pack the values.",
629        pack_i16_to_u8_m256i(a: m256i, b: m256i) -> m256i;
630        "Saturating convert `i32` to `i16`, and pack the values.",
631        pack_i32_to_i16_m256i(a: m256i, b: m256i) -> m256i;
632        "Saturating convert `i32` to `u16`, and pack the values.",
633        pack_i32_to_u16_m256i(a: m256i, b: m256i) -> m256i;
634        "Sets the lowest `i16` lane of an `m128i` as all lanes of an `m256i`.",
635        set_splat_i16_m128i_s_m256i(a: m128i) -> m256i;
636        "Sets the lowest `i32` lane of an `m128i` as all lanes of an `m256i`.",
637        set_splat_i32_m128i_s_m256i(a: m128i) -> m256i;
638        "Sets the lowest `i64` lane of an `m128i` as all lanes of an `m256i`.",
639        set_splat_i64_m128i_s_m256i(a: m128i) -> m256i;
640        "Sets the lowest `i8` lane of an `m128i` as all lanes of an `m256i`.",
641        set_splat_i8_m128i_s_m256i(a: m128i) -> m256i;
642        "Sets the lowest lane of an `m128` as all lanes of an `m256`.",
643        set_splat_m128_s_m256(a: m128) -> m256;
644        "Sets the lowest lane of an `m128d` as all lanes of an `m256d`.",
645        set_splat_m128d_s_m256d(a: m128d) -> m256d;
646        "Lanewise `u16` shift left by the lower `u64` lane of `count`.",
647        shl_all_u16_m256i(a: m256i, count: m128i) -> m256i;
648        "Shift all `u32` lanes left by the lower `u64` lane of `count`.",
649        shl_all_u32_m256i(a: m256i, count: m128i) -> m256i;
650        "Shift all `u64` lanes left by the lower `u64` lane of `count`.",
651        shl_all_u64_m256i(a: m256i, count: m128i) -> m256i;
652        "Shift `u32` values to the left by `count` bits.",
653        shl_each_u32_m128i(a: m128i, count: m128i) -> m128i;
654        "Lanewise `u32` shift left by the matching `i32` lane in `count`.",
655        shl_each_u32_m256i(a: m256i, count: m256i) -> m256i;
656        "Shift `u64` values to the left by `count` bits.",
657        shl_each_u64_m128i(a: m128i, count: m128i) -> m128i;
658        "Lanewise `u64` shift left by the matching `u64` lane in `count`.",
659        shl_each_u64_m256i(a: m256i, count: m256i) -> m256i;
660        "Shifts all `u16` lanes left by an immediate.",
661        shl_imm_u16_m256i<const IMM: i32>(a: m256i) -> m256i;
662        "Shifts all `u32` lanes left by an immediate.",
663        shl_imm_u32_m256i<const IMM: i32>(a: m256i) -> m256i;
664        "Shifts all `u64` lanes left by an immediate.",
665        shl_imm_u64_m256i<const IMM: i32>(a: m256i) -> m256i;
666        "Lanewise `i16` shift right by the lower `i64` lane of `count`.",
667        shr_all_i16_m256i(a: m256i, count: m128i) -> m256i;
668        "Lanewise `i32` shift right by the lower `i64` lane of `count`.",
669        shr_all_i32_m256i(a: m256i, count: m128i) -> m256i;
670        "Lanewise `u16` shift right by the lower `u64` lane of `count`.",
671        shr_all_u16_m256i(a: m256i, count: m128i) -> m256i;
672        "Lanewise `u32` shift right by the lower `u64` lane of `count`.",
673        shr_all_u32_m256i(a: m256i, count: m128i) -> m256i;
674        "Lanewise `u64` shift right by the lower `u64` lane of `count`.",
675        shr_all_u64_m256i(a: m256i, count: m128i) -> m256i;
676        "Shift `i32` values to the right by `count` bits.",
677        shr_each_i32_m128i(a: m128i, count: m128i) -> m128i;
678        "Lanewise `i32` shift right by the matching `i32` lane in `count`.",
679        shr_each_i32_m256i(a: m256i, count: m256i) -> m256i;
680        "Shift `u32` values to the left by `count` bits.",
681        shr_each_u32_m128i(a: m128i, count: m128i) -> m128i;
682        "Lanewise `u32` shift right by the matching `u32` lane in `count`.",
683        shr_each_u32_m256i(a: m256i, count: m256i) -> m256i;
684        "Shift `u64` values to the left by `count` bits.",
685        shr_each_u64_m128i(a: m128i, count: m128i) -> m128i;
686        "Lanewise `u64` shift right by the matching `i64` lane in `count`.",
687        shr_each_u64_m256i(a: m256i, count: m256i) -> m256i;
688        "Shifts all `i16` lanes left by an immediate.",
689        shr_imm_i16_m256i<const IMM: i32>(a: m256i) -> m256i;
690        "Shifts all `i32` lanes left by an immediate.",
691        shr_imm_i32_m256i<const IMM: i32>(a: m256i) -> m256i;
692        "Shifts all `u16` lanes right by an immediate.",
693        shr_imm_u16_m256i<const IMM: i32>(a: m256i) -> m256i;
694        "Shifts all `u32` lanes right by an immediate.",
695        shr_imm_u32_m256i<const IMM: i32>(a: m256i) -> m256i;
696        "Shifts all `u64` lanes right by an immediate.",
697        shr_imm_u64_m256i<const IMM: i32>(a: m256i) -> m256i;
698        "Shuffle 128 bits of integer data from `$a` and `$b` using an immediate control value.",
699        shuffle_abi_i128z_all_m256i<const MASK: i32>(a: m256i, b: m256i) -> m256i;
700        "Shuffle the `f64` lanes from `$a` using an immediate control value.",
701        shuffle_ai_f64_all_m256d<const IMM: i32>(a: m256d) -> m256d;
702        "Shuffle the high `i16` lanes in `$a` using an immediate control value.",
703        shuffle_ai_i16_h64half_m256i<const IMM: i32>(a: m256i) -> m256i;
704        "Shuffle the low `i16` lanes in `$a` using an immediate control value.",
705        shuffle_ai_i16_l64half_m256i<const IMM: i32>(a: m256i) -> m256i;
706        "Shuffle the `i32` lanes in `a` using an immediate control value.",
707        shuffle_ai_i32_half_m256i<const IMM: i32>(a: m256i) -> m256i;
708        "Shuffle the `f64` lanes in `$a` using an immediate control value.",
709        shuffle_ai_i64_all_m256i<const IMM: i32>(a: m256i) -> m256i;
710        "Shuffle `f32` lanes in `a` using `i32` values in `v`.",
711        shuffle_av_i32_all_m256(a: m256, v: m256i) -> m256;
712        "Shuffle `i32` lanes in `a` using `i32` values in `v`.",
713        shuffle_av_i32_all_m256i(a: m256i, v: m256i) -> m256i;
714        "Shuffle `i8` lanes in `a` using `i8` values in `v`.",
715        shuffle_av_i8z_half_m256i(a: m256i, v: m256i) -> m256i;
716        "Lanewise `a * signum(b)` with lanes as `i16`",
717        sign_apply_i16_m256i(a: m256i, b: m256i) -> m256i;
718        "Lanewise `a * signum(b)` with lanes as `i32`",
719        sign_apply_i32_m256i(a: m256i, b: m256i) -> m256i;
720        "Lanewise `a * signum(b)` with lanes as `i8`",
721        sign_apply_i8_m256i(a: m256i, b: m256i) -> m256i;
722        "Splat the lowest 16-bit lane across the entire 128 bits.",
723        splat_i16_m128i_s_m128i(a: m128i) -> m128i;
724        "Splat the lowest 32-bit lane across the entire 128 bits.",
725        splat_i32_m128i_s_m128i(a: m128i) -> m128i;
726        "Splat the lowest 64-bit lane across the entire 128 bits.",
727        splat_i64_m128i_s_m128i(a: m128i) -> m128i;
728        "Splat the lowest 8-bit lane across the entire 128 bits.",
729        splat_i8_m128i_s_m128i(a: m128i) -> m128i;
730        "Splat the lowest `f32` across all four lanes.",
731        splat_m128_s_m128(a: m128) -> m128;
732        "Splat the lower `f64` across both lanes of `m128d`.",
733        splat_m128d_s_m128d(a: m128d) -> m128d;
734        "Splat the 128-bits across 256-bits.",
735        splat_m128i_m256i(a: m128i) -> m256i;
736        "Stores the `i32` masked lanes given to the reference.",
737        store_masked_i32_m128i(addr: &mut m128i, mask: m128i, a: m128i);
738        "Stores the `i32` masked lanes given to the reference.",
739        store_masked_i32_m256i(addr: &mut m256i, mask: m256i, a: m256i);
740        "Stores the `i32` masked lanes given to the reference.",
741        store_masked_i64_m128i(addr: &mut m128i, mask: m128i, a: m128i);
742        "Stores the `i32` masked lanes given to the reference.",
743        store_masked_i64_m256i(addr: &mut m256i, mask: m256i, a: m256i);
744        "Horizontal `a - b` with lanes as `i16`.",
745        sub_horizontal_i16_m256i(a: m256i, b: m256i) -> m256i;
746        "Horizontal `a - b` with lanes as `i32`.",
747        sub_horizontal_i32_m256i(a: m256i, b: m256i) -> m256i;
748        "Horizontal saturating `a - b` with lanes as `i16`.",
749        sub_horizontal_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
750        "Lanewise `a - b` with lanes as `i16`.",
751        sub_i16_m256i(a: m256i, b: m256i) -> m256i;
752        "Lanewise `a - b` with lanes as `i32`.",
753        sub_i32_m256i(a: m256i, b: m256i) -> m256i;
754        "Lanewise `a - b` with lanes as `i64`.",
755        sub_i64_m256i(a: m256i, b: m256i) -> m256i;
756        "Lanewise `a - b` with lanes as `i8`.",
757        sub_i8_m256i(a: m256i, b: m256i) -> m256i;
758        "Lanewise saturating `a - b` with lanes as `i16`.",
759        sub_saturating_i16_m256i(a: m256i, b: m256i) -> m256i;
760        "Lanewise saturating `a - b` with lanes as `i8`.",
761        sub_saturating_i8_m256i(a: m256i, b: m256i) -> m256i;
762        "Lanewise saturating `a - b` with lanes as `u16`.",
763        sub_saturating_u16_m256i(a: m256i, b: m256i) -> m256i;
764        "Lanewise saturating `a - b` with lanes as `u8`.",
765        sub_saturating_u8_m256i(a: m256i, b: m256i) -> m256i;
766        "Compute “sum of `u8` absolute differences”.",
767        sum_of_u8_abs_diff_m256i(a: m256i, b: m256i) -> m256i;
768        "Unpack and interleave high `i16` lanes of `a` and `b`.",
769        unpack_high_i16_m256i(a: m256i, b: m256i) -> m256i;
770        "Unpack and interleave high `i32` lanes of `a` and `b`.",
771        unpack_high_i32_m256i(a: m256i, b: m256i) -> m256i;
772        "Unpack and interleave high `i64` lanes of `a` and `b`.",
773        unpack_high_i64_m256i(a: m256i, b: m256i) -> m256i;
774        "Unpack and interleave high `i8` lanes of `a` and `b`.",
775        unpack_high_i8_m256i(a: m256i, b: m256i) -> m256i;
776        "Unpack and interleave low `i16` lanes of `a` and `b`.",
777        unpack_low_i16_m256i(a: m256i, b: m256i) -> m256i;
778        "Unpack and interleave low `i32` lanes of `a` and `b`.",
779        unpack_low_i32_m256i(a: m256i, b: m256i) -> m256i;
780        "Unpack and interleave low `i64` lanes of `a` and `b`.",
781        unpack_low_i64_m256i(a: m256i, b: m256i) -> m256i;
782        "Unpack and interleave low `i8` lanes of `a` and `b`.",
783        unpack_low_i8_m256i(a: m256i, b: m256i) -> m256i;
784    }
785}
786impl_arch! {
787    #[doc = "# Functions requiring the `bmi1` target feature.\n\n---"]
788    #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#BMI1_(Bit_Manipulation_Instruction_Set_1)>"]
789    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "bmi1";
790    arch_fn! {
791        "Extract a span of bits from the `u32`, control value style.",
792        bit_extract2_u32(a: u32, control: u32) -> u32;
793        "Extract a span of bits from the `u64`, control value style.",
794        bit_extract2_u64(a: u64, control: u64) -> u64;
795        "Extract a span of bits from the `u32`, start and len style.",
796        bit_extract_u32(a: u32, start: u32, len: u32) -> u32;
797        "Extract a span of bits from the `u64`, start and len style.",
798        bit_extract_u64(a: u64, start: u32, len: u32) -> u64;
799        "Gets the mask of all bits up to and including the lowest set bit in a `u32`.",
800        bit_lowest_set_mask_u32(a: u32) -> u32;
801        "Gets the mask of all bits up to and including the lowest set bit in a `u64`.",
802        bit_lowest_set_mask_u64(a: u64) -> u64;
803        "Resets (clears) the lowest set bit.",
804        bit_lowest_set_reset_u32(a: u32) -> u32;
805        "Resets (clears) the lowest set bit.",
806        bit_lowest_set_reset_u64(a: u64) -> u64;
807        "Gets the <em>value</em> of the lowest set bit in a `u32`.",
808        bit_lowest_set_value_u32(a: u32) -> u32;
809        "Gets the <em>value</em> of the lowest set bit in a `u64`.",
810        bit_lowest_set_value_u64(a: u64) -> u64;
811        "Bitwise `(!a) &amp; b` for `u32`",
812        bitandnot_u32(a: u32, b: u32) -> u32;
813        "Bitwise `(!a) &amp; b` for `u64`",
814        bitandnot_u64(a: u64, b: u64) -> u64;
815        "Counts the number of trailing zero bits in a `u32`.",
816        trailing_zero_count_u32(a: u32) -> u32;
817        "Counts the number of trailing zero bits in a `u64`.",
818        trailing_zero_count_u64(a: u64) -> u64;
819    }
820}
821impl_arch! {
822    #[doc = "# Functions requiring the `bmi2` target feature.\n\n---"]
823    #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2)>"]
824    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "bmi2";
825    arch_fn! {
826        "Zero out all high bits in a `u32` starting at the index given.",
827        bit_zero_high_index_u32(a: u32, index: u32) -> u32;
828        "Zero out all high bits in a `u64` starting at the index given.",
829        bit_zero_high_index_u64(a: u64, index: u32) -> u64;
830        "Multiply two `u32`, outputting the low bits and storing the high bits in the reference.",
831        mul_extended_u32(a: u32, b: u32, extra: &mut u32) -> u32;
832        "Multiply two `u64`, outputting the low bits and storing the high bits in the reference.",
833        mul_extended_u64(a: u64, b: u64, extra: &mut u64) -> u64;
834        "Deposit contiguous low bits from a `u32` according to a mask.",
835        population_deposit_u32(a: u32, index: u32) -> u32;
836        "Deposit contiguous low bits from a `u64` according to a mask.",
837        population_deposit_u64(a: u64, index: u64) -> u64;
838        "Extract bits from a `u32` according to a mask.",
839        population_extract_u32(a: u32, index: u32) -> u32;
840        "Extract bits from a `u64` according to a mask.",
841        population_extract_u64(a: u64, index: u64) -> u64;
842    }
843}
844impl_arch! {
845    #[doc = "# Functions requiring the `fma` target feature.\n\n---"]
846    #[doc = "See: <https://en.wikipedia.org/wiki/FMA_instruction_set>"]
847    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "fma";
848    arch_fn! {
849        "Lanewise fused `(a * b) + c`",
850        fused_mul_add_m128(a: m128, b: m128, c: m128) -> m128;
851        "Low lane fused `(a * b) + c`, other lanes unchanged",
852        fused_mul_add_m128_s(a: m128, b: m128, c: m128) -> m128;
853        "Lanewise fused `(a * b) + c`",
854        fused_mul_add_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
855        "Low lane fused `(a * b) + c`, other lanes unchanged",
856        fused_mul_add_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
857        "Lanewise fused `(a * b) + c`",
858        fused_mul_add_m256(a: m256, b: m256, c: m256) -> m256;
859        "Lanewise fused `(a * b) + c`",
860        fused_mul_add_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
861        "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
862        fused_mul_addsub_m128(a: m128, b: m128, c: m128) -> m128;
863        "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
864        fused_mul_addsub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
865        "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
866        fused_mul_addsub_m256(a: m256, b: m256, c: m256) -> m256;
867        "Lanewise fused `(a * b) addsub c` (adds odd lanes and subtracts even lanes)",
868        fused_mul_addsub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
869        "Lanewise fused `-(a * b) + c`",
870        fused_mul_neg_add_m128(a: m128, b: m128, c: m128) -> m128;
871        "Low lane `-(a * b) + c`, other lanes unchanged.",
872        fused_mul_neg_add_m128_s(a: m128, b: m128, c: m128) -> m128;
873        "Lanewise fused `-(a * b) + c`",
874        fused_mul_neg_add_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
875        "Low lane `-(a * b) + c`, other lanes unchanged.",
876        fused_mul_neg_add_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
877        "Lanewise fused `-(a * b) + c`",
878        fused_mul_neg_add_m256(a: m256, b: m256, c: m256) -> m256;
879        "Lanewise fused `-(a * b) + c`",
880        fused_mul_neg_add_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
881        "Lanewise fused `-(a * b) - c`",
882        fused_mul_neg_sub_m128(a: m128, b: m128, c: m128) -> m128;
883        "Low lane fused `-(a * b) - c`, other lanes unchanged.",
884        fused_mul_neg_sub_m128_s(a: m128, b: m128, c: m128) -> m128;
885        "Lanewise fused `-(a * b) - c`",
886        fused_mul_neg_sub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
887        "Low lane fused `-(a * b) - c`, other lanes unchanged.",
888        fused_mul_neg_sub_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
889        "Lanewise fused `-(a * b) - c`",
890        fused_mul_neg_sub_m256(a: m256, b: m256, c: m256) -> m256;
891        "Lanewise fused `-(a * b) - c`",
892        fused_mul_neg_sub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
893        "Lanewise fused `(a * b) - c`",
894        fused_mul_sub_m128(a: m128, b: m128, c: m128) -> m128;
895        "Low lane fused `(a * b) - c`, other lanes unchanged.",
896        fused_mul_sub_m128_s(a: m128, b: m128, c: m128) -> m128;
897        "Lanewise fused `(a * b) - c`",
898        fused_mul_sub_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
899        "Low lane fused `(a * b) - c`, other lanes unchanged.",
900        fused_mul_sub_m128d_s(a: m128d, b: m128d, c: m128d) -> m128d;
901        "Lanewise fused `(a * b) - c`",
902        fused_mul_sub_m256(a: m256, b: m256, c: m256) -> m256;
903        "Lanewise fused `(a * b) - c`",
904        fused_mul_sub_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
905        "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
906        fused_mul_subadd_m128(a: m128, b: m128, c: m128) -> m128;
907        "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
908        fused_mul_subadd_m128d(a: m128d, b: m128d, c: m128d) -> m128d;
909        "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
910        fused_mul_subadd_m256(a: m256, b: m256, c: m256) -> m256;
911        "Lanewise fused `(a * b) subadd c` (subtracts odd lanes and adds even lanes)",
912        fused_mul_subadd_m256d(a: m256d, b: m256d, c: m256d) -> m256d;
913    }
914}
915impl_arch! {
916    #[doc = "# Functions requiring the `lzcnt` target feature.\n\n---"]
917    #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#ABM_(Advanced_Bit_Manipulation)>"]
918    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "lzcnt";
919    arch_fn! {
920        "Count the leading zeroes in a `u32`.",
921        leading_zero_count_u32(a: u32) -> u32;
922        "Count the leading zeroes in a `u64`.",
923        leading_zero_count_u64(a: u64) -> u64;
924    }
925}
926impl_arch! {
927    #[doc = "# Functions requiring the `pclmulqdq` target feature.\n\n---"]
928    #[doc = "See: <https://en.wikipedia.org/wiki/CLMUL_instruction_set>"]
929    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "pclmulqdq";
930    arch_fn! {
931        "Performs a “carryless” multiplication of two `i64` values.",
932        mul_i64_carryless_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
933    }
934}
935impl_arch! {
936    #[doc = "# Functions requiring the `popcnt` target feature.\n\n---"]
937    #[doc = "See: <https://en.wikipedia.org/wiki/X86_Bit_manipulation_instruction_set#ABM_(Advanced_Bit_Manipulation)>"]
938    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "popcnt";
939    arch_fn! {
940        "Count the number of bits set within an `i32`",
941        population_count_i32(a: i32) -> i32;
942        "Count the number of bits set within an `i64`",
943        population_count_i64(a: i64) -> i32;
944    }
945}
946impl_arch! {
947    #[doc = "# Functions requiring the `rdrand` target feature.\n\n---"]
948    #[doc = "See: <https://en.wikipedia.org/wiki/RDRAND>"]
949    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "rdrand";
950    arch_fn! {
951        "Try to obtain a random `u16` from the hardware RNG.",
952        rdrand_u16(out: &mut u16) -> i32;
953        "Try to obtain a random `u32` from the hardware RNG.",
954        rdrand_u32(out: &mut u32) -> i32;
955        "Try to obtain a random `u64` from the hardware RNG.",
956        rdrand_u64(out: &mut u64) -> i32;
957    }
958}
959impl_arch! {
960    #[doc = "# Functions requiring the `rdseed` target feature.\n\n---"]
961    #[doc = "See: <https://en.wikipedia.org/wiki/RDRAND>"]
962    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "rdseed";
963    arch_fn! {
964        "Try to obtain a random `u16` from the hardware RNG.",
965        rdseed_u16(out: &mut u16) -> i32;
966        "Try to obtain a random `u32` from the hardware RNG.",
967        rdseed_u32(out: &mut u32) -> i32;
968        "Try to obtain a random `u64` from the hardware RNG.",
969        rdseed_u64(out: &mut u64) -> i32;
970    }
971}
972impl_arch! {
973    #[doc = "# Functions requiring the `sse` target feature.\n\n---"]
974    #[doc = "See: <https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions>"]
975    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse";
976    arch_fn! {
977        "Lanewise `a + b`.",
978        add_m128(a: m128, b: m128) -> m128;
979        "Low lane `a + b`, other lanes unchanged.",
980        add_m128_s(a: m128, b: m128) -> m128;
981        "Bitwise `a &amp; b`.",
982        bitand_m128(a: m128, b: m128) -> m128;
983        "Bitwise `(!a) &amp; b`.",
984        bitandnot_m128(a: m128, b: m128) -> m128;
985        "Bitwise `a | b`.",
986        bitor_m128(a: m128, b: m128) -> m128;
987        "Bitwise `a ^ b`.",
988        bitxor_m128(a: m128, b: m128) -> m128;
989        "Low lane equality.",
990        cmp_eq_i32_m128_s(a: m128, b: m128) -> i32;
991        "Lanewise `a == b`.",
992        cmp_eq_mask_m128(a: m128, b: m128) -> m128;
993        "Low lane `a == b`, other lanes unchanged.",
994        cmp_eq_mask_m128_s(a: m128, b: m128) -> m128;
995        "Low lane greater than or equal to.",
996        cmp_ge_i32_m128_s(a: m128, b: m128) -> i32;
997        "Lanewise `a &gt;= b`.",
998        cmp_ge_mask_m128(a: m128, b: m128) -> m128;
999        "Low lane `a &gt;= b`, other lanes unchanged.",
1000        cmp_ge_mask_m128_s(a: m128, b: m128) -> m128;
1001        "Low lane greater than.",
1002        cmp_gt_i32_m128_s(a: m128, b: m128) -> i32;
1003        "Lanewise `a &gt; b`.",
1004        cmp_gt_mask_m128(a: m128, b: m128) -> m128;
1005        "Low lane `a &gt; b`, other lanes unchanged.",
1006        cmp_gt_mask_m128_s(a: m128, b: m128) -> m128;
1007        "Low lane less than or equal to.",
1008        cmp_le_i32_m128_s(a: m128, b: m128) -> i32;
1009        "Lanewise `a &lt;= b`.",
1010        cmp_le_mask_m128(a: m128, b: m128) -> m128;
1011        "Low lane `a &lt;= b`, other lanes unchanged.",
1012        cmp_le_mask_m128_s(a: m128, b: m128) -> m128;
1013        "Low lane less than.",
1014        cmp_lt_i32_m128_s(a: m128, b: m128) -> i32;
1015        "Lanewise `a &lt; b`.",
1016        cmp_lt_mask_m128(a: m128, b: m128) -> m128;
1017        "Low lane `a &lt; b`, other lanes unchanged.",
1018        cmp_lt_mask_m128_s(a: m128, b: m128) -> m128;
1019        "Low lane not equal to.",
1020        cmp_neq_i32_m128_s(a: m128, b: m128) -> i32;
1021        "Lanewise `a != b`.",
1022        cmp_neq_mask_m128(a: m128, b: m128) -> m128;
1023        "Low lane `a != b`, other lanes unchanged.",
1024        cmp_neq_mask_m128_s(a: m128, b: m128) -> m128;
1025        "Lanewise `!(a &gt;= b)`.",
1026        cmp_nge_mask_m128(a: m128, b: m128) -> m128;
1027        "Low lane `!(a &gt;= b)`, other lanes unchanged.",
1028        cmp_nge_mask_m128_s(a: m128, b: m128) -> m128;
1029        "Lanewise `!(a &gt; b)`.",
1030        cmp_ngt_mask_m128(a: m128, b: m128) -> m128;
1031        "Low lane `!(a &gt; b)`, other lanes unchanged.",
1032        cmp_ngt_mask_m128_s(a: m128, b: m128) -> m128;
1033        "Lanewise `!(a &lt;= b)`.",
1034        cmp_nle_mask_m128(a: m128, b: m128) -> m128;
1035        "Low lane `!(a &lt;= b)`, other lanes unchanged.",
1036        cmp_nle_mask_m128_s(a: m128, b: m128) -> m128;
1037        "Lanewise `!(a &lt; b)`.",
1038        cmp_nlt_mask_m128(a: m128, b: m128) -> m128;
1039        "Low lane `!(a &lt; b)`, other lanes unchanged.",
1040        cmp_nlt_mask_m128_s(a: m128, b: m128) -> m128;
1041        "Lanewise `(!a.is_nan()) &amp; (!b.is_nan())`.",
1042        cmp_ordered_mask_m128(a: m128, b: m128) -> m128;
1043        "Low lane `(!a.is_nan()) &amp; (!b.is_nan())`, other lanes unchanged.",
1044        cmp_ordered_mask_m128_s(a: m128, b: m128) -> m128;
1045        "Lanewise `a.is_nan() | b.is_nan()`.",
1046        cmp_unord_mask_m128(a: m128, b: m128) -> m128;
1047        "Low lane `a.is_nan() | b.is_nan()`, other lanes unchanged.",
1048        cmp_unord_mask_m128_s(a: m128, b: m128) -> m128;
1049        "Convert `i32` to `f32` and replace the low lane of the input.",
1050        convert_i32_replace_m128_s(a: m128, i: i32) -> m128;
1051        "Lanewise `a / b`.",
1052        div_m128(a: m128, b: m128) -> m128;
1053        "Low lane `a / b`, other lanes unchanged.",
1054        div_m128_s(a: m128, b: m128) -> m128;
1055        "Gets the low lane as an individual `f32` value.",
1056        get_f32_from_m128_s(a: m128) -> f32;
1057        "Converts the low lane to `i32` and extracts as an individual value.",
1058        get_i32_from_m128_s(a: m128) -> i32;
1059        "Loads the `f32` reference into the low lane of the register.",
1060        load_f32_m128_s(a: &f32) -> m128;
1061        "Loads the `f32` reference into all lanes of a register.",
1062        load_f32_splat_m128(a: &f32) -> m128;
1063        "Loads the reference into a register.",
1064        load_m128(a: &m128) -> m128;
1065        "Loads the reference into a register with reversed order.",
1066        load_reverse_m128(a: &m128) -> m128;
1067        "Loads the reference into a register.",
1068        load_unaligned_m128(a: &[f32; 4]) -> m128;
1069        "Lanewise `max(a, b)`.",
1070        max_m128(a: m128, b: m128) -> m128;
1071        "Low lane `max(a, b)`, other lanes unchanged.",
1072        max_m128_s(a: m128, b: m128) -> m128;
1073        "Lanewise `min(a, b)`.",
1074        min_m128(a: m128, b: m128) -> m128;
1075        "Low lane `min(a, b)`, other lanes unchanged.",
1076        min_m128_s(a: m128, b: m128) -> m128;
1077        "Move the high lanes of `b` to the low lanes of `a`, other lanes unchanged.",
1078        move_high_low_m128(a: m128, b: m128) -> m128;
1079        "Move the low lanes of `b` to the high lanes of `a`, other lanes unchanged.",
1080        move_low_high_m128(a: m128, b: m128) -> m128;
1081        "Move the low lane of `b` to `a`, other lanes unchanged.",
1082        move_m128_s(a: m128, b: m128) -> m128;
1083        "Gathers the sign bit of each lane.",
1084        move_mask_m128(a: m128) -> i32;
1085        "Lanewise `a * b`.",
1086        mul_m128(a: m128, b: m128) -> m128;
1087        "Low lane `a * b`, other lanes unchanged.",
1088        mul_m128_s(a: m128, b: m128) -> m128;
1089        "Lanewise `1.0 / a` approximation.",
1090        reciprocal_m128(a: m128) -> m128;
1091        "Low lane `1.0 / a` approximation, other lanes unchanged.",
1092        reciprocal_m128_s(a: m128) -> m128;
1093        "Lanewise `1.0 / sqrt(a)` approximation.",
1094        reciprocal_sqrt_m128(a: m128) -> m128;
1095        "Low lane `1.0 / sqrt(a)` approximation, other lanes unchanged.",
1096        reciprocal_sqrt_m128_s(a: m128) -> m128;
1097        "Sets the args into an `m128`, first arg is the high lane.",
1098        set_m128(three: f32, two: f32, one: f32, zero: f32) -> m128;
1099        "Sets the args into an `m128`, first arg is the high lane.",
1100        set_m128_s(low: f32) -> m128;
1101        "Sets the args into an `m128`, first arg is the low lane.",
1102        set_reversed_m128(zero: f32, one: f32, two: f32, three: f32) -> m128;
1103        "Splats the value to all lanes.",
1104        set_splat_m128(all: f32) -> m128;
1105        "Shuffle the `f32` lanes from `$a` and `$b` together using an immediate control value.",
1106        shuffle_abi_f32_all_m128<const MASK: i32>(a: m128, b: m128) -> m128;
1107        "Lanewise `sqrt(a)`.",
1108        sqrt_m128(a: m128) -> m128;
1109        "Low lane `sqrt(a)`, other lanes unchanged.",
1110        sqrt_m128_s(a: m128) -> m128;
1111        "Stores the value to the reference given.",
1112        store_m128(r: &mut m128, a: m128);
1113        "Stores the low lane value to the reference given.",
1114        store_m128_s(r: &mut f32, a: m128);
1115        "Stores the value to the reference given in reverse order.",
1116        store_reverse_m128(r: &mut m128, a: m128);
1117        "Stores the low lane value to all lanes of the reference given.",
1118        store_splat_m128(r: &mut m128, a: m128);
1119        "Stores the value to the reference given.",
1120        store_unaligned_m128(r: &mut [f32; 4], a: m128);
1121        "Lanewise `a - b`.",
1122        sub_m128(a: m128, b: m128) -> m128;
1123        "Low lane `a - b`, other lanes unchanged.",
1124        sub_m128_s(a: m128, b: m128) -> m128;
1125        "Transpose four `m128` as if they were a 4x4 matrix.",
1126        transpose_four_m128(a: &mut m128, b: &mut m128, c: &mut m128, d: &mut m128);
1127        "Unpack and interleave high lanes of `a` and `b`.",
1128        unpack_high_m128(a: m128, b: m128) -> m128;
1129        "Unpack and interleave low lanes of `a` and `b`.",
1130        unpack_low_m128(a: m128, b: m128) -> m128;
1131        "All lanes zero.",
1132        zeroed_m128() -> m128;
1133    }
1134}
1135impl_arch! {
1136    #[doc = "# Generic functions requiring the `sse` target feature.\n\n---"]
1137    #[doc = "See: <https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions>"]
1138    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse";
1139
1140    /// Fetches the cache line containing `addr` into all levels of the cache hierarchy,
1141    /// anticipating write.
1142    pub fn prefetch_et0<T>(addr: &T) {
1143        prefetch_et0(addr);
1144    }
1145    /// Fetches into L2 and higher, anticipating write.
1146    pub fn prefetch_et1<T>(addr: &T) {
1147        prefetch_et1(addr);
1148    }
1149    /// Fetch data using the non-temporal access (NTA) hint.
1150    ///
1151    /// It may be a place closer than main memory but outside of the cache hierarchy.
1152    ///
1153    /// This is used to reduce access latency without polluting the cache.
1154    pub fn prefetch_nta<T>(addr: &T) {
1155        prefetch_nta(addr);
1156    }
1157    /// Fetches the cache line containing `addr` into all levels of the cache hierarchy.
1158    pub fn prefetch_t0<T>(addr: &T) {
1159        prefetch_t0(addr);
1160    }
1161    /// Fetches into L2 and higher.
1162    pub fn prefetch_t1<T>(addr: &T) {
1163        prefetch_t1(addr);
1164    }
1165    /// Fetches into L3 and higher or an implementation-specific choice
1166    /// (e.g., L2 if there is no L3).
1167    pub fn prefetch_t2<T>(addr: &T) {
1168        prefetch_t2(addr);
1169    }
1170}
1171impl_arch! {
1172    #[doc = "# Functions requiring the `sse2` target feature.\n\n---"]
1173    #[doc = "See: <https://en.wikipedia.org/wiki/SSE2>"]
1174    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse2";
1175    arch_fn! {
1176        "Lanewise `a + b` with lanes as `i16`.",
1177        add_i16_m128i(a: m128i, b: m128i) -> m128i;
1178        "Lanewise `a + b` with lanes as `i32`.",
1179        add_i32_m128i(a: m128i, b: m128i) -> m128i;
1180        "Lanewise `a + b` with lanes as `i64`.",
1181        add_i64_m128i(a: m128i, b: m128i) -> m128i;
1182        "Lanewise `a + b` with lanes as `i8`.",
1183        add_i8_m128i(a: m128i, b: m128i) -> m128i;
1184        "Lanewise `a + b`.",
1185        add_m128d(a: m128d, b: m128d) -> m128d;
1186        "Lowest lane `a + b`, high lane unchanged.",
1187        add_m128d_s(a: m128d, b: m128d) -> m128d;
1188        "Lanewise saturating `a + b` with lanes as `i16`.",
1189        add_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1190        "Lanewise saturating `a + b` with lanes as `i8`.",
1191        add_saturating_i8_m128i(a: m128i, b: m128i) -> m128i;
1192        "Lanewise saturating `a + b` with lanes as `u16`.",
1193        add_saturating_u16_m128i(a: m128i, b: m128i) -> m128i;
1194        "Lanewise saturating `a + b` with lanes as `u8`.",
1195        add_saturating_u8_m128i(a: m128i, b: m128i) -> m128i;
1196        "Lanewise average of the `u16` values.",
1197        average_u16_m128i(a: m128i, b: m128i) -> m128i;
1198        "Lanewise average of the `u8` values.",
1199        average_u8_m128i(a: m128i, b: m128i) -> m128i;
1200        "Bitwise `a &amp; b`.",
1201        bitand_m128d(a: m128d, b: m128d) -> m128d;
1202        "Bitwise `a &amp; b`.",
1203        bitand_m128i(a: m128i, b: m128i) -> m128i;
1204        "Bitwise `(!a) &amp; b`.",
1205        bitandnot_m128d(a: m128d, b: m128d) -> m128d;
1206        "Bitwise `(!a) &amp; b`.",
1207        bitandnot_m128i(a: m128i, b: m128i) -> m128i;
1208        "Bitwise `a | b`.",
1209        bitor_m128d(a: m128d, b: m128d) -> m128d;
1210        "Bitwise `a | b`.",
1211        bitor_m128i(a: m128i, b: m128i) -> m128i;
1212        "Bitwise `a ^ b`.",
1213        bitxor_m128d(a: m128d, b: m128d) -> m128d;
1214        "Bitwise `a ^ b`.",
1215        bitxor_m128i(a: m128i, b: m128i) -> m128i;
1216        "Shifts all bits in the entire register left by a number of **bytes**.",
1217        byte_shl_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i;
1218        "Shifts all bits in the entire register right by a number of **bytes**.",
1219        byte_shr_imm_u128_m128i<const IMM: i32>(a: m128i) -> m128i;
1220        "Bit-preserving cast to `m128` from `m128d`",
1221        cast_to_m128_from_m128d(a: m128d) -> m128;
1222        "Bit-preserving cast to `m128` from `m128i`",
1223        cast_to_m128_from_m128i(a: m128i) -> m128;
1224        "Bit-preserving cast to `m128d` from `m128`",
1225        cast_to_m128d_from_m128(a: m128) -> m128d;
1226        "Bit-preserving cast to `m128d` from `m128i`",
1227        cast_to_m128d_from_m128i(a: m128i) -> m128d;
1228        "Bit-preserving cast to `m128i` from `m128`",
1229        cast_to_m128i_from_m128(a: m128) -> m128i;
1230        "Bit-preserving cast to `m128i` from `m128d`",
1231        cast_to_m128i_from_m128d(a: m128d) -> m128i;
1232        "Low lane `f64` equal to.",
1233        cmp_eq_i32_m128d_s(a: m128d, b: m128d) -> i32;
1234        "Lanewise `a == b` with lanes as `i16`.",
1235        cmp_eq_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1236        "Lanewise `a == b` with lanes as `i32`.",
1237        cmp_eq_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1238        "Lanewise `a == b` with lanes as `i8`.",
1239        cmp_eq_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1240        "Lanewise `a == b`, mask output.",
1241        cmp_eq_mask_m128d(a: m128d, b: m128d) -> m128d;
1242        "Low lane `a == b`, other lanes unchanged.",
1243        cmp_eq_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1244        "Low lane `f64` greater than or equal to.",
1245        cmp_ge_i32_m128d_s(a: m128d, b: m128d) -> i32;
1246        "Lanewise `a &gt;= b`.",
1247        cmp_ge_mask_m128d(a: m128d, b: m128d) -> m128d;
1248        "Low lane `a &gt;= b`, other lanes unchanged.",
1249        cmp_ge_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1250        "Low lane `f64` greater than.",
1251        cmp_gt_i32_m128d_s(a: m128d, b: m128d) -> i32;
1252        "Lanewise `a &gt; b` with lanes as `i16`.",
1253        cmp_gt_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1254        "Lanewise `a &gt; b` with lanes as `i32`.",
1255        cmp_gt_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1256        "Lanewise `a &gt; b` with lanes as `i8`.",
1257        cmp_gt_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1258        "Lanewise `a &gt; b`.",
1259        cmp_gt_mask_m128d(a: m128d, b: m128d) -> m128d;
1260        "Low lane `a &gt; b`, other lanes unchanged.",
1261        cmp_gt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1262        "Low lane `f64` less than or equal to.",
1263        cmp_le_i32_m128d_s(a: m128d, b: m128d) -> i32;
1264        "Lanewise `a &lt;= b`.",
1265        cmp_le_mask_m128d(a: m128d, b: m128d) -> m128d;
1266        "Low lane `a &lt;= b`, other lanes unchanged.",
1267        cmp_le_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1268        "Low lane `f64` less than.",
1269        cmp_lt_i32_m128d_s(a: m128d, b: m128d) -> i32;
1270        "Lanewise `a &lt; b` with lanes as `i16`.",
1271        cmp_lt_mask_i16_m128i(a: m128i, b: m128i) -> m128i;
1272        "Lanewise `a &lt; b` with lanes as `i32`.",
1273        cmp_lt_mask_i32_m128i(a: m128i, b: m128i) -> m128i;
1274        "Lanewise `a &lt; b` with lanes as `i8`.",
1275        cmp_lt_mask_i8_m128i(a: m128i, b: m128i) -> m128i;
1276        "Lanewise `a &lt; b`.",
1277        cmp_lt_mask_m128d(a: m128d, b: m128d) -> m128d;
1278        "Low lane `a &lt; b`, other lane unchanged.",
1279        cmp_lt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1280        "Low lane `f64` less than.",
1281        cmp_neq_i32_m128d_s(a: m128d, b: m128d) -> i32;
1282        "Lanewise `a != b`.",
1283        cmp_neq_mask_m128d(a: m128d, b: m128d) -> m128d;
1284        "Low lane `a != b`, other lane unchanged.",
1285        cmp_neq_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1286        "Lanewise `!(a &gt;= b)`.",
1287        cmp_nge_mask_m128d(a: m128d, b: m128d) -> m128d;
1288        "Low lane `!(a &gt;= b)`, other lane unchanged.",
1289        cmp_nge_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1290        "Lanewise `!(a &gt; b)`.",
1291        cmp_ngt_mask_m128d(a: m128d, b: m128d) -> m128d;
1292        "Low lane `!(a &gt; b)`, other lane unchanged.",
1293        cmp_ngt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1294        "Lanewise `!(a &lt;= b)`.",
1295        cmp_nle_mask_m128d(a: m128d, b: m128d) -> m128d;
1296        "Low lane `!(a &lt;= b)`, other lane unchanged.",
1297        cmp_nle_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1298        "Lanewise `!(a &lt; b)`.",
1299        cmp_nlt_mask_m128d(a: m128d, b: m128d) -> m128d;
1300        "Low lane `!(a &lt; b)`, other lane unchanged.",
1301        cmp_nlt_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1302        "Lanewise `(!a.is_nan()) &amp; (!b.is_nan())`.",
1303        cmp_ordered_mask_m128d(a: m128d, b: m128d) -> m128d;
1304        "Low lane `(!a.is_nan()) &amp; (!b.is_nan())`, other lane unchanged.",
1305        cmp_ordered_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1306        "Lanewise `a.is_nan() | b.is_nan()`.",
1307        cmp_unord_mask_m128d(a: m128d, b: m128d) -> m128d;
1308        "Low lane `a.is_nan() | b.is_nan()`, other lane unchanged.",
1309        cmp_unord_mask_m128d_s(a: m128d, b: m128d) -> m128d;
1310        "Convert `i32` to `f64` and replace the low lane of the input.",
1311        convert_i32_replace_m128d_s(a: m128d, i: i32) -> m128d;
1312        "Convert `i64` to `f64` and replace the low lane of the input.",
1313        convert_i64_replace_m128d_s(a: m128d, i: i64) -> m128d;
1314        "Converts the lower `f32` to `f64` and replace the low lane of the input",
1315        convert_m128_s_replace_m128d_s(a: m128d, b: m128) -> m128d;
1316        "Converts the low `f64` to `f32` and replaces the low lane of the input.",
1317        convert_m128d_s_replace_m128_s(a: m128, b: m128d) -> m128;
1318        "Rounds the `f32` lanes to `i32` lanes.",
1319        convert_to_i32_m128i_from_m128(a: m128) -> m128i;
1320        "Rounds the two `f64` lanes to the low two `i32` lanes.",
1321        convert_to_i32_m128i_from_m128d(a: m128d) -> m128i;
1322        "Rounds the four `i32` lanes to four `f32` lanes.",
1323        convert_to_m128_from_i32_m128i(a: m128i) -> m128;
1324        "Rounds the two `f64` lanes to the low two `f32` lanes.",
1325        convert_to_m128_from_m128d(a: m128d) -> m128;
1326        "Rounds the lower two `i32` lanes to two `f64` lanes.",
1327        convert_to_m128d_from_lower2_i32_m128i(a: m128i) -> m128d;
1328        "Rounds the two `f64` lanes to the low two `f32` lanes.",
1329        convert_to_m128d_from_lower2_m128(a: m128) -> m128d;
1330        "Copy the low `i64` lane to a new register, upper bits 0.",
1331        copy_i64_m128i_s(a: m128i) -> m128i;
1332        "Copies the `a` value and replaces the low lane with the low `b` value.",
1333        copy_replace_low_f64_m128d(a: m128d, b: m128d) -> m128d;
1334        "Lanewise `a / b`.",
1335        div_m128d(a: m128d, b: m128d) -> m128d;
1336        "Lowest lane `a / b`, high lane unchanged.",
1337        div_m128d_s(a: m128d, b: m128d) -> m128d;
1338        "Gets an `i16` value out of an `m128i`, returns as `i32`.",
1339        extract_i16_as_i32_m128i<const LANE: i32>(a: m128i) -> i32;
1340        "Gets the lower lane as an `f64` value.",
1341        get_f64_from_m128d_s(a: m128d) -> f64;
1342        "Converts the lower lane to an `i32` value.",
1343        get_i32_from_m128d_s(a: m128d) -> i32;
1344        "Converts the lower lane to an `i32` value.",
1345        get_i32_from_m128i_s(a: m128i) -> i32;
1346        "Converts the lower lane to an `i64` value.",
1347        get_i64_from_m128d_s(a: m128d) -> i64;
1348        "Converts the lower lane to an `i64` value.",
1349        get_i64_from_m128i_s(a: m128i) -> i64;
1350        "Inserts the low 16 bits of an `i32` value into an `m128i`.",
1351        insert_i16_from_i32_m128i<const LANE: i32>(a: m128i, i: i32) -> m128i;
1352        "Loads the reference into the low lane of the register.",
1353        load_f64_m128d_s(a: &f64) -> m128d;
1354        "Loads the `f64` reference into all lanes of a register.",
1355        load_f64_splat_m128d(a: &f64) -> m128d;
1356        "Loads the low `i64` into a register.",
1357        load_i64_m128i_s(a: &m128i) -> m128i;
1358        "Loads the reference into a register.",
1359        load_m128d(a: &m128d) -> m128d;
1360        "Loads the reference into a register.",
1361        load_m128i(a: &m128i) -> m128i;
1362        "Loads the reference into a register, replacing the high lane.",
1363        load_replace_high_m128d(a: m128d, b: &f64) -> m128d;
1364        "Loads the reference into a register, replacing the low lane.",
1365        load_replace_low_m128d(a: m128d, b: &f64) -> m128d;
1366        "Loads the reference into a register with reversed order.",
1367        load_reverse_m128d(a: &m128d) -> m128d;
1368        "Loads the reference into a register.",
1369        load_unaligned_m128d(a: &[f64; 2]) -> m128d;
1370        "Loads the reference into a register.",
1371        load_unaligned_m128i(a: &[u8; 16]) -> m128i;
1372        "Lanewise `max(a, b)` with lanes as `i16`.",
1373        max_i16_m128i(a: m128i, b: m128i) -> m128i;
1374        "Lanewise `max(a, b)`.",
1375        max_m128d(a: m128d, b: m128d) -> m128d;
1376        "Low lane `max(a, b)`, other lanes unchanged.",
1377        max_m128d_s(a: m128d, b: m128d) -> m128d;
1378        "Lanewise `max(a, b)` with lanes as `u8`.",
1379        max_u8_m128i(a: m128i, b: m128i) -> m128i;
1380        "Lanewise `min(a, b)` with lanes as `i16`.",
1381        min_i16_m128i(a: m128i, b: m128i) -> m128i;
1382        "Lanewise `min(a, b)`.",
1383        min_m128d(a: m128d, b: m128d) -> m128d;
1384        "Low lane `min(a, b)`, other lanes unchanged.",
1385        min_m128d_s(a: m128d, b: m128d) -> m128d;
1386        "Lanewise `min(a, b)` with lanes as `u8`.",
1387        min_u8_m128i(a: m128i, b: m128i) -> m128i;
1388        "Gathers the `i8` sign bit of each lane.",
1389        move_mask_i8_m128i(a: m128i) -> i32;
1390        "Gathers the sign bit of each lane.",
1391        move_mask_m128d(a: m128d) -> i32;
1392        "Multiply `i16` lanes producing `i32` values, horizontal add pairs of `i32`
1393        values to produce the final output.",
1394        mul_i16_horizontal_add_m128i(a: m128i, b: m128i) -> m128i;
1395        "Lanewise `a * b` with lanes as `i16`, keep the high bits of the `i32` intermediates.",
1396        mul_i16_keep_high_m128i(a: m128i, b: m128i) -> m128i;
1397        "Lanewise `a * b` with lanes as `i16`, keep the low bits of the `i32` intermediates.",
1398        mul_i16_keep_low_m128i(a: m128i, b: m128i) -> m128i;
1399        "Lanewise `a * b`.",
1400        mul_m128d(a: m128d, b: m128d) -> m128d;
1401        "Lowest lane `a * b`, high lane unchanged.",
1402        mul_m128d_s(a: m128d, b: m128d) -> m128d;
1403        "Lanewise `a * b` with lanes as `u16`, keep the high bits of the `u32` intermediates.",
1404        mul_u16_keep_high_m128i(a: m128i, b: m128i) -> m128i;
1405        "Multiplies the odd `u32` lanes and gives the widened (`u64`) results.",
1406        mul_widen_u32_odd_m128i(a: m128i, b: m128i) -> m128i;
1407        "Saturating convert `i16` to `i8`, and pack the values.",
1408        pack_i16_to_i8_m128i(a: m128i, b: m128i) -> m128i;
1409        "Saturating convert `i16` to `u8`, and pack the values.",
1410        pack_i16_to_u8_m128i(a: m128i, b: m128i) -> m128i;
1411        "Saturating convert `i32` to `i16`, and pack the values.",
1412        pack_i32_to_i16_m128i(a: m128i, b: m128i) -> m128i;
1413        "Sets the args into an `m128i`, first arg is the high lane.",
1414        set_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16, h: i16) -> m128i;
1415        "Sets the args into an `m128i`, first arg is the high lane.",
1416        set_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i;
1417        "Set an `i32` as the low 32-bit lane of an `m128i`, other lanes blank.",
1418        set_i32_m128i_s(i: i32) -> m128i;
1419        "Sets the args into an `m128i`, first arg is the high lane.",
1420        set_i64_m128i(a: i64, b: i64) -> m128i;
1421        "Set an `i64` as the low 64-bit lane of an `m128i`, other lanes blank.",
1422        set_i64_m128i_s(i: i64) -> m128i;
1423        "Sets the args into an `m128i`, first arg is the high lane.",
1424        set_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8, j: i8,
1425            k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i;
1426        "Sets the args into an `m128d`, first arg is the high lane.",
1427        set_m128d(a: f64, b: f64) -> m128d;
1428        "Sets the args into the low lane of a `m128d`.",
1429        set_m128d_s(a: f64) -> m128d;
1430        "Sets the args into an `m128i`, first arg is the low lane.",
1431        set_reversed_i16_m128i(a: i16, b: i16, c: i16, d: i16, e: i16, f: i16, g: i16,
1432            h: i16) -> m128i;
1433        "Sets the args into an `m128i`, first arg is the low lane.",
1434        set_reversed_i32_m128i(a: i32, b: i32, c: i32, d: i32) -> m128i;
1435        "Sets the args into an `m128i`, first arg is the low lane.",
1436        set_reversed_i8_m128i(a: i8, b: i8, c: i8, d: i8, e: i8, f: i8, g: i8, h: i8, i: i8,
1437            j: i8, k: i8, l: i8, m: i8, n: i8, o: i8, p: i8) -> m128i;
1438        "Sets the args into an `m128d`, first arg is the low lane.",
1439        set_reversed_m128d(a: f64, b: f64) -> m128d;
1440        "Splats the `i16` to all lanes of the `m128i`.",
1441        set_splat_i16_m128i(i: i16) -> m128i;
1442        "Splats the `i32` to all lanes of the `m128i`.",
1443        set_splat_i32_m128i(i: i32) -> m128i;
1444        "Splats the `i64` to both lanes of the `m128i`.",
1445        set_splat_i64_m128i(i: i64) -> m128i;
1446        "Splats the `i8` to all lanes of the `m128i`.",
1447        set_splat_i8_m128i(i: i8) -> m128i;
1448        "Splats the args into both lanes of the `m128d`.",
1449        set_splat_m128d(a: f64) -> m128d;
1450        "Shift all `u16` lanes to the left by the `count` in the lower `u64` lane.",
1451        shl_all_u16_m128i(a: m128i, count: m128i) -> m128i;
1452        "Shift all `u32` lanes to the left by the `count` in the lower `u64` lane.",
1453        shl_all_u32_m128i(a: m128i, count: m128i) -> m128i;
1454        "Shift all `u64` lanes to the left by the `count` in the lower `u64` lane.",
1455        shl_all_u64_m128i(a: m128i, count: m128i) -> m128i;
1456        "Shifts all `u16` lanes left by an immediate.",
1457        shl_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i;
1458        "Shifts all `u32` lanes left by an immediate.",
1459        shl_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i;
1460        "Shifts both `u64` lanes left by an immediate.",
1461        shl_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i;
1462        "Shift each `i16` lane to the right by the `count` in the lower `i64` lane.",
1463        shr_all_i16_m128i(a: m128i, count: m128i) -> m128i;
1464        "Shift each `i32` lane to the right by the `count` in the lower `i64` lane.",
1465        shr_all_i32_m128i(a: m128i, count: m128i) -> m128i;
1466        "Shift each `u16` lane to the right by the `count` in the lower `u64` lane.",
1467        shr_all_u16_m128i(a: m128i, count: m128i) -> m128i;
1468        "Shift each `u32` lane to the right by the `count` in the lower `u64` lane.",
1469        shr_all_u32_m128i(a: m128i, count: m128i) -> m128i;
1470        "Shift each `u64` lane to the right by the `count` in the lower `u64` lane.",
1471        shr_all_u64_m128i(a: m128i, count: m128i) -> m128i;
1472        "Shifts all `i16` lanes right by an immediate.",
1473        shr_imm_i16_m128i<const IMM: i32>(a: m128i) -> m128i;
1474        "Shifts all `i32` lanes right by an immediate.",
1475        shr_imm_i32_m128i<const IMM: i32>(a: m128i) -> m128i;
1476        "Shifts all `u16` lanes right by an immediate.",
1477        shr_imm_u16_m128i<const IMM: i32>(a: m128i) -> m128i;
1478        "Shifts all `u32` lanes right by an immediate.",
1479        shr_imm_u32_m128i<const IMM: i32>(a: m128i) -> m128i;
1480        "Shifts both `u64` lanes right by an immediate.",
1481        shr_imm_u64_m128i<const IMM: i32>(a: m128i) -> m128i;
1482        "Shuffle the `f64` lanes from `$a` and `$b` together using an immediate control value.",
1483        shuffle_abi_f64_all_m128d<const MASK: i32>(a: m128d, b: m128d) -> m128d;
1484        "Shuffle the `i32` lanes in `$a` using an immediate control value.",
1485        shuffle_ai_f32_all_m128i<const MASK: i32>(a: m128i) -> m128i;
1486        "Shuffle the high `i16` lanes in `$a` using an immediate control value.",
1487        shuffle_ai_i16_h64all_m128i<const MASK: i32>(a: m128i) -> m128i;
1488        "Shuffle the low `i16` lanes in `$a` using an immediate control value.",
1489        shuffle_ai_i16_l64all_m128i<const MASK: i32>(a: m128i) -> m128i;
1490        "Lanewise `sqrt(a)`.",
1491        sqrt_m128d(a: m128d) -> m128d;
1492        "Low lane `sqrt(b)`, upper lane is unchanged from `a`.",
1493        sqrt_m128d_s(a: m128d, b: m128d) -> m128d;
1494        "Stores the high lane value to the reference given.",
1495        store_high_m128d_s(r: &mut f64, a: m128d);
1496        "Stores the value to the reference given.",
1497        store_i64_m128i_s(r: &mut i64, a: m128i);
1498        "Stores the value to the reference given.",
1499        store_m128d(r: &mut m128d, a: m128d);
1500        "Stores the low lane value to the reference given.",
1501        store_m128d_s(r: &mut f64, a: m128d);
1502        "Stores the value to the reference given.",
1503        store_m128i(r: &mut m128i, a: m128i);
1504        "Stores the value to the reference given.",
1505        store_reversed_m128d(r: &mut m128d, a: m128d);
1506        "Stores the low lane value to all lanes of the reference given.",
1507        store_splat_m128d(r: &mut m128d, a: m128d);
1508        "Stores the value to the reference given.",
1509        store_unaligned_m128d(r: &mut [f64; 2], a: m128d);
1510        "Stores the value to the reference given.",
1511        store_unaligned_m128i(r: &mut [u8; 16], a: m128i);
1512        "Lanewise `a - b` with lanes as `i16`.",
1513        sub_i16_m128i(a: m128i, b: m128i) -> m128i;
1514        "Lanewise `a - b` with lanes as `i32`.",
1515        sub_i32_m128i(a: m128i, b: m128i) -> m128i;
1516        "Lanewise `a - b` with lanes as `i64`.",
1517        sub_i64_m128i(a: m128i, b: m128i) -> m128i;
1518        "Lanewise `a - b` with lanes as `i8`.",
1519        sub_i8_m128i(a: m128i, b: m128i) -> m128i;
1520        "Lanewise `a - b`.",
1521        sub_m128d(a: m128d, b: m128d) -> m128d;
1522        "Lowest lane `a - b`, high lane unchanged.",
1523        sub_m128d_s(a: m128d, b: m128d) -> m128d;
1524        "Lanewise saturating `a - b` with lanes as `i16`.",
1525        sub_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1526        "Lanewise saturating `a - b` with lanes as `i8`.",
1527        sub_saturating_i8_m128i(a: m128i, b: m128i) -> m128i;
1528        "Lanewise saturating `a - b` with lanes as `u16`.",
1529        sub_saturating_u16_m128i(a: m128i, b: m128i) -> m128i;
1530        "Lanewise saturating `a - b` with lanes as `u8`.",
1531        sub_saturating_u8_m128i(a: m128i, b: m128i) -> m128i;
1532        "Compute “sum of `u8` absolute differences”.",
1533        sum_of_u8_abs_diff_m128i(a: m128i, b: m128i) -> m128i;
1534        "Truncate the `f32` lanes to `i32` lanes.",
1535        truncate_m128_to_m128i(a: m128) -> m128i;
1536        "Truncate the `f64` lanes to the lower `i32` lanes (upper `i32` lanes 0).",
1537        truncate_m128d_to_m128i(a: m128d) -> m128i;
1538        "Truncate the lower lane into an `i32`.",
1539        truncate_to_i32_m128d_s(a: m128d) -> i32;
1540        "Truncate the lower lane into an `i64`.",
1541        truncate_to_i64_m128d_s(a: m128d) -> i64;
1542        "Unpack and interleave high `i16` lanes of `a` and `b`.",
1543        unpack_high_i16_m128i(a: m128i, b: m128i) -> m128i;
1544        "Unpack and interleave high `i32` lanes of `a` and `b`.",
1545        unpack_high_i32_m128i(a: m128i, b: m128i) -> m128i;
1546        "Unpack and interleave high `i64` lanes of `a` and `b`.",
1547        unpack_high_i64_m128i(a: m128i, b: m128i) -> m128i;
1548        "Unpack and interleave high `i8` lanes of `a` and `b`.",
1549        unpack_high_i8_m128i(a: m128i, b: m128i) -> m128i;
1550        "Unpack and interleave high lanes of `a` and `b`.",
1551        unpack_high_m128d(a: m128d, b: m128d) -> m128d;
1552        "Unpack and interleave low `i16` lanes of `a` and `b`.",
1553        unpack_low_i16_m128i(a: m128i, b: m128i) -> m128i;
1554        "Unpack and interleave low `i32` lanes of `a` and `b`.",
1555        unpack_low_i32_m128i(a: m128i, b: m128i) -> m128i;
1556        "Unpack and interleave low `i64` lanes of `a` and `b`.",
1557        unpack_low_i64_m128i(a: m128i, b: m128i) -> m128i;
1558        "Unpack and interleave low `i8` lanes of `a` and `b`.",
1559        unpack_low_i8_m128i(a: m128i, b: m128i) -> m128i;
1560        "Unpack and interleave low lanes of `a` and `b`.",
1561        unpack_low_m128d(a: m128d, b: m128d) -> m128d;
1562        "Both lanes zero.",
1563        zeroed_m128d() -> m128d;
1564        "All lanes zero.",
1565        zeroed_m128i() -> m128i;
1566    }
1567}
1568impl_arch! {
1569    #[doc = "# Functions requiring the `sse3` target feature.\n\n---"]
1570    #[doc = "See: <https://en.wikipedia.org/wiki/SSE3>"]
1571    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse3";
1572    arch_fn! {
1573        "Add each lane horizontally, pack the outputs as `a` then `b`.",
1574        add_horizontal_m128(a: m128, b: m128) -> m128;
1575        "Add each lane horizontally, pack the outputs as `a` then `b`.",
1576        add_horizontal_m128d(a: m128d, b: m128d) -> m128d;
1577        "Alternately, from the top, add a lane and then subtract a lane.",
1578        addsub_m128(a: m128, b: m128) -> m128;
1579        "Add the high lane and subtract the low lane.",
1580        addsub_m128d(a: m128d, b: m128d) -> m128d;
1581        "Duplicate the odd lanes to the even lanes.",
1582        duplicate_even_lanes_m128(a: m128) -> m128;
1583        "Copy the low lane of the input to both lanes of the output.",
1584        duplicate_low_lane_m128d_s(a: m128d) -> m128d;
1585        "Duplicate the odd lanes to the even lanes.",
1586        duplicate_odd_lanes_m128(a: m128) -> m128;
1587        "Subtract each lane horizontally, pack the outputs as `a` then `b`.",
1588        sub_horizontal_m128(a: m128, b: m128) -> m128;
1589        "Subtract each lane horizontally, pack the outputs as `a` then `b`.",
1590        sub_horizontal_m128d(a: m128d, b: m128d) -> m128d;
1591    }
1592}
1593impl_arch! {
1594    #[doc = "# Functions requiring the `sse4.1` target feature.\n\n---"]
1595    #[doc = "See: <https://en.wikipedia.org/wiki/SSE4#SSE4.1>"]
1596    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse4.1";
1597    arch_fn! {
1598        "Blends the `i16` lanes according to the immediate mask.",
1599        blend_imm_i16_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1600        "Blends the lanes according to the immediate mask.",
1601        blend_imm_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1602        "Blends the `i16` lanes according to the immediate mask.",
1603        blend_imm_m128d<const IMM: i32>(a: m128d, b: m128d) -> m128d;
1604        "Blend the `i8` lanes according to a runtime varying mask.",
1605        blend_varying_i8_m128i(a: m128i, b: m128i, mask: m128i) -> m128i;
1606        "Blend the lanes according to a runtime varying mask.",
1607        blend_varying_m128(a: m128, b: m128, mask: m128) -> m128;
1608        "Blend the lanes according to a runtime varying mask.",
1609        blend_varying_m128d(a: m128d, b: m128d, mask: m128d) -> m128d;
1610        "Round each lane to a whole number, towards positive infinity.",
1611        ceil_m128(a: m128) -> m128;
1612        "Round the low lane of `b` toward positive infinity, other lanes `a`.",
1613        ceil_m128_s(a: m128, b: m128) -> m128;
1614        "Round each lane to a whole number, towards positive infinity.",
1615        ceil_m128d(a: m128d) -> m128d;
1616        "Round the low lane of `b` toward positive infinity, high lane is `a`.",
1617        ceil_m128d_s(a: m128d, b: m128d) -> m128d;
1618        "Lanewise `a == b` with lanes as `i64`.",
1619        cmp_eq_mask_i64_m128i(a: m128i, b: m128i) -> m128i;
1620        "Convert the lower two `i64` lanes to two `i32` lanes.",
1621        convert_to_i16_m128i_from_lower2_i16_m128i(a: m128i) -> m128i;
1622        "Convert the lower eight `i8` lanes to eight `i16` lanes.",
1623        convert_to_i16_m128i_from_lower8_i8_m128i(a: m128i) -> m128i;
1624        "Convert the lower four `i16` lanes to four `i32` lanes.",
1625        convert_to_i32_m128i_from_lower4_i16_m128i(a: m128i) -> m128i;
1626        "Convert the lower four `i8` lanes to four `i32` lanes.",
1627        convert_to_i32_m128i_from_lower4_i8_m128i(a: m128i) -> m128i;
1628        "Convert the lower two `i32` lanes to two `i64` lanes.",
1629        convert_to_i64_m128i_from_lower2_i32_m128i(a: m128i) -> m128i;
1630        "Convert the lower two `i8` lanes to two `i64` lanes.",
1631        convert_to_i64_m128i_from_lower2_i8_m128i(a: m128i) -> m128i;
1632        "Convert the lower eight `u8` lanes to eight `u16` lanes.",
1633        convert_to_u16_m128i_from_lower8_u8_m128i(a: m128i) -> m128i;
1634        "Convert the lower four `u16` lanes to four `u32` lanes.",
1635        convert_to_u32_m128i_from_lower4_u16_m128i(a: m128i) -> m128i;
1636        "Convert the lower four `u8` lanes to four `u32` lanes.",
1637        convert_to_u32_m128i_from_lower4_u8_m128i(a: m128i) -> m128i;
1638        "Convert the lower two `u16` lanes to two `u64` lanes.",
1639        convert_to_u64_m128i_from_lower2_u16_m128i(a: m128i) -> m128i;
1640        "Convert the lower two `u32` lanes to two `u64` lanes.",
1641        convert_to_u64_m128i_from_lower2_u32_m128i(a: m128i) -> m128i;
1642        "Convert the lower two `u8` lanes to two `u64` lanes.",
1643        convert_to_u64_m128i_from_lower2_u8_m128i(a: m128i) -> m128i;
1644        "Performs a dot product of two `m128` registers.",
1645        dot_product_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1646        "Performs a dot product of two `m128d` registers.",
1647        dot_product_m128d<const IMM: i32>(a: m128d, b: m128d) -> m128d;
1648        "Gets the `f32` lane requested. Returns as an `i32` bit pattern.",
1649        extract_f32_as_i32_bits_imm_m128<const IMM: i32>(a: m128) -> i32;
1650        "Gets the `i32` lane requested. Only the lowest 2 bits are considered.",
1651        extract_i32_imm_m128i<const IMM: i32>(a: m128i) -> i32;
1652        "Gets the `i64` lane requested. Only the lowest bit is considered.",
1653        extract_i64_imm_m128i<const IMM: i32>(a: m128i) -> i64;
1654        "Gets the `i8` lane requested. Only the lowest 4 bits are considered.",
1655        extract_i8_as_i32_imm_m128i<const IMM: i32>(a: m128i) -> i32;
1656        "Round each lane to a whole number, towards negative infinity",
1657        floor_m128(a: m128) -> m128;
1658        "Round the low lane of `b` toward negative infinity, other lanes `a`.",
1659        floor_m128_s(a: m128, b: m128) -> m128;
1660        "Round each lane to a whole number, towards negative infinity",
1661        floor_m128d(a: m128d) -> m128d;
1662        "Round the low lane of `b` toward negative infinity, high lane is `a`.",
1663        floor_m128d_s(a: m128d, b: m128d) -> m128d;
1664        "Inserts a lane from `$b` into `$a`, optionally at a new position.",
1665        insert_f32_imm_m128<const IMM: i32>(a: m128, b: m128) -> m128;
1666        "Inserts a new value for the `i32` lane specified.",
1667        insert_i32_imm_m128i<const IMM: i32>(a: m128i, new: i32) -> m128i;
1668        "Inserts a new value for the `i64` lane specified.",
1669        insert_i64_imm_m128i<const IMM: i32>(a: m128i, new: i64) -> m128i;
1670        "Inserts a new value for the `i64` lane specified.",
1671        insert_i8_imm_m128i<const IMM: i32>(a: m128i, new: i32) -> m128i;
1672        "Lanewise `max(a, b)` with lanes as `i32`.",
1673        max_i32_m128i(a: m128i, b: m128i) -> m128i;
1674        "Lanewise `max(a, b)` with lanes as `i8`.",
1675        max_i8_m128i(a: m128i, b: m128i) -> m128i;
1676        "Lanewise `max(a, b)` with lanes as `u16`.",
1677        max_u16_m128i(a: m128i, b: m128i) -> m128i;
1678        "Lanewise `max(a, b)` with lanes as `u32`.",
1679        max_u32_m128i(a: m128i, b: m128i) -> m128i;
1680        "Lanewise `min(a, b)` with lanes as `i32`.",
1681        min_i32_m128i(a: m128i, b: m128i) -> m128i;
1682        "Lanewise `min(a, b)` with lanes as `i8`.",
1683        min_i8_m128i(a: m128i, b: m128i) -> m128i;
1684        "Min `u16` value, position, and other lanes zeroed.",
1685        min_position_u16_m128i(a: m128i) -> m128i;
1686        "Lanewise `min(a, b)` with lanes as `u16`.",
1687        min_u16_m128i(a: m128i, b: m128i) -> m128i;
1688        "Lanewise `min(a, b)` with lanes as `u32`.",
1689        min_u32_m128i(a: m128i, b: m128i) -> m128i;
1690        "Lanewise `a * b` with 32-bit lanes.",
1691        mul_32_m128i(a: m128i, b: m128i) -> m128i;
1692        "Multiplies the odd `i32` lanes and gives the widened (`i64`) results.",
1693        mul_widen_i32_odd_m128i(a: m128i, b: m128i) -> m128i;
1694        "Computes eight `u16` “sum of absolute difference” values according to the bytes selected.",
1695        multi_packed_sum_abs_diff_u8_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1696        "Saturating convert `i32` to `u16`, and pack the values.",
1697        pack_i32_to_u16_m128i(a: m128i, b: m128i) -> m128i;
1698        "Rounds each lane in the style specified.",
1699        round_m128<const MODE: i32>(a: m128) -> m128;
1700        "Rounds `$b` low as specified, other lanes use `$a`.",
1701        round_m128_s<const MODE: i32>(a: m128, b: m128) -> m128;
1702        "Rounds each lane in the style specified.",
1703        round_m128d<const MODE: i32>(a: m128d) -> m128d;
1704        "Rounds `$b` low as specified, keeps `$a` high.",
1705        round_m128d_s<const MODE: i32>(a: m128d, b: m128d) -> m128d;
1706        "Tests if all bits are 1.",
1707        test_all_ones_m128i(a: m128i) -> i32;
1708        "Returns if all masked bits are 0, `(a &amp; mask) as u128 == 0`",
1709        test_all_zeroes_m128i(a: m128i, mask: m128i) -> i32;
1710        "Returns if, among the masked bits, there’s both 0s and 1s",
1711        test_mixed_ones_and_zeroes_m128i(a: m128i, mask: m128i) -> i32;
1712        "Compute the bitwise NOT of `a` and then AND with `b`,
1713        returns 1 if the result is zero, otherwise 0.",
1714        testc_m128i(a: m128i, b: m128i) -> i32;
1715        "Computes the bitwise AND of 256 bits in `a` and
1716        `b`, returns 1 if the result is zero, otherwise 0.",
1717        testz_m128i(a: m128i, b: m128i) -> i32;
1718    }
1719}
1720impl_arch! {
1721    #[doc = "# Functions requiring the `sse4.2` target feature.\n\n---"]
1722    #[doc = "See: <https://en.wikipedia.org/wiki/SSE4#SSE4.2>"]
1723    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "sse4.2";
1724    arch_fn! {
1725        "Lanewise `a &gt; b` with lanes as `i64`.",
1726        cmp_gt_mask_i64_m128i(a: m128i, b: m128i) -> m128i;
1727        "Accumulates the `u16` into a running CRC32 value.",
1728        crc32_u16(crc: u32, v: u16) -> u32;
1729        "Accumulates the `u32` into a running CRC32 value.",
1730        crc32_u32(crc: u32, v: u32) -> u32;
1731        "Accumulates the `u64` into a running CRC32 value.",
1732        crc32_u64(crc: u64, v: u64) -> u64;
1733        "Accumulates the `u8` into a running CRC32 value.",
1734        crc32_u8(crc: u32, v: u8) -> u32;
1735        "Search for `needle` in `haystack, with explicit string length.",
1736        search_explicit_str_for_index<const IMM: i32>(
1737        needle: m128i, needle_len: i32, haystack: m128i, haystack_len: i32) -> i32;
1738        "Search for `needle` in `haystack, with explicit string length.",
1739        search_explicit_str_for_mask<const IMM: i32>(
1740        needle: m128i, needle_len: i32, haystack: m128i, haystack_len: i32) -> m128i;
1741        "Search for `needle` in `haystack, with implicit string length.",
1742        search_implicit_str_for_index<const IMM: i32>(needle: m128i, haystack: m128i) -> i32;
1743        "Search for `needle` in `haystack, with implicit string length.",
1744        search_implicit_str_for_mask<const IMM: i32>(needle: m128i, haystack: m128i) -> m128i;
1745    }
1746}
1747impl_arch! {
1748    #[doc = "# Functions requiring the `ssse3` target feature.\n\n---"]
1749    #[doc = "See: <https://en.wikipedia.org/wiki/SSSE3>"]
1750    features = "dep_safe_arch", any_target_arch = "x86", "x86_64", target_features = "ssse3";
1751    arch_fn! {
1752        "Lanewise absolute value with lanes as `i16`.",
1753        abs_i16_m128i(a: m128i) -> m128i;
1754        "Lanewise absolute value with lanes as `i32`.",
1755        abs_i32_m128i(a: m128i) -> m128i;
1756        "Lanewise absolute value with lanes as `i8`.",
1757        abs_i8_m128i(a: m128i) -> m128i;
1758        "Add horizontal pairs of `i16` values, pack the outputs as `a` then `b`.",
1759        add_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i;
1760        "Add horizontal pairs of `i32` values, pack the outputs as `a` then `b`.",
1761        add_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i;
1762        "Add horizontal pairs of `i16` values, saturating, pack the outputs as `a` then `b`.",
1763        add_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1764        "Counts `$a` as the high bytes and `$b` as the low bytes then performs a
1765        **byte** shift to the right by the immediate value.",
1766        combined_byte_shr_imm_m128i<const IMM: i32>(a: m128i, b: m128i) -> m128i;
1767        "Multiply `i16` lanes into `i32` intermediates, keep the high 18 bits,
1768        round by adding 1, right shift by 1.",
1769        mul_i16_scale_round_m128i(a: m128i, b: m128i) -> m128i;
1770        "This is dumb and weird.",
1771        mul_u8i8_add_horizontal_saturating_m128i(a: m128i, b: m128i) -> m128i;
1772        "Shuffle `i8` lanes in `a` using `i8` values in `v`.",
1773        shuffle_av_i8z_all_m128i(a: m128i, v: m128i) -> m128i;
1774        "Applies the sign of `i16` values in `b` to the values in `a`.",
1775        sign_apply_i16_m128i(a: m128i, b: m128i) -> m128i;
1776        "Applies the sign of `i32` values in `b` to the values in `a`.",
1777        sign_apply_i32_m128i(a: m128i, b: m128i) -> m128i;
1778        "Applies the sign of `i8` values in `b` to the values in `a`.",
1779        sign_apply_i8_m128i(a: m128i, b: m128i) -> m128i;
1780        "Subtract horizontal pairs of `i16` values, pack the outputs as `a` then `b`.",
1781        sub_horizontal_i16_m128i(a: m128i, b: m128i) -> m128i;
1782        "Subtract horizontal pairs of `i32` values, pack the outputs as `a` then `b`.",
1783        sub_horizontal_i32_m128i(a: m128i, b: m128i) -> m128i;
1784        "Subtract horizontal pairs of `i16` values, saturating, pack the outputs as `a` then `b`.",
1785        sub_horizontal_saturating_i16_m128i(a: m128i, b: m128i) -> m128i;
1786    }
1787}
1788
1789/* macro helpers */
1790
1791/// Generates an impl Arch block with optional conditional configurations and documentation.
1792macro_rules! impl_arch {
1793    (
1794        $( #[doc = $doc:literal] )*
1795        $( features = $( $feature:literal ),+ $(,)? )? // all
1796        $( any_target_arch = $( $target_arch:literal ),+ $(,)? )? // any
1797        $( target_features = $( $target_feature:literal ),+ $(,)? )? // all
1798        ;
1799        $($item:item)*
1800    ) => {
1801        $( #[doc = $doc] )*
1802        $(
1803        #[cfg(any($(feature = $feature),+))]
1804        #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(feature = $feature),+))))]
1805        )?
1806        $(
1807        #[cfg(any($(target_arch = $target_arch),+))]
1808        #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(target_arch = $target_arch),+))))]
1809        )?
1810        $(
1811        #[cfg(any($(target_feature = $target_feature),+))]
1812        #[cfg_attr(feature = "nightly_doc", doc(cfg(any($(target_feature = $target_feature),+))))]
1813        )?
1814        impl Arch { $($item)* }
1815    };
1816}
1817use impl_arch;
1818
1819/// Helps to re-export standalone functions as namespaced methods of a struct.
1820#[allow(unused_macros, reason = "feature-gated")]
1821macro_rules! arch_fn {
1822    () => {};
1823    (   // Function with return type
1824        $doc:literal,
1825        $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1826        ($($param:ident: $ty:ty),* $(,)?) -> $ret:ty
1827     ) => { $crate::paste! { // NOTE: compiles faster using paste! than concat! + stringify!
1828        #[doc = $doc]
1829        #[doc = "\n\nSee: [`" $fn_name "`][crate::_dep::safe_arch::" $fn_name "]."] // faster
1830        // #[doc = concat!("\n\nSee: [`", stringify!($fn_name), "`][", stringify!($fn_name), "].")]
1831        #[must_use]
1832        pub fn $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) -> $ret {
1833            $fn_name$(::<$($const_name),*>)?($($param),*)
1834        }
1835    }};
1836    (   // Function without return type
1837        $doc:literal,
1838        $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1839        ($($param:ident: $ty:ty),* $(,)?)
1840    ) => { $crate::paste! {
1841        #[doc = $doc]
1842        #[doc = "\n\nSee: [`" $fn_name "`][crate::_dep::safe_arch::" $fn_name "]."] // faster
1843        // #[doc = concat!("\n\nSee: [`", stringify!($fn_name), "`][", stringify!($fn_name), "].")]
1844        pub fn $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) {
1845            $fn_name$(::<$($const_name),*>)?($($param),*)
1846        }
1847    }};
1848    (   // List of functions
1849        $($doc:literal,
1850        $fn_name:ident$(<$(const $const_name:ident: $const_ty:ty),*>)?
1851        ($($param:ident: $ty:ty),* $(,)?) $(-> $ret:ty)?);+ $(;)?
1852    ) => {
1853        $( arch_fn![
1854            $doc,
1855            $fn_name$(<$(const $const_name: $const_ty),*>)?($($param: $ty),*) $(-> $ret)?
1856        ]; )+
1857    };
1858}
1859#[allow(unused_imports, reason = "feature-gated")]
1860use arch_fn;