devela/text/ascii/
char.rs

1// devela::text::ascii::char
2//
3//!
4//
5// Ported from:
6// - https://doc.rust-lang.org/stable/core/ascii/enum.Char.html
7// - WAIT: [ascii::Char](https://github.com/rust-lang/rust/issues/110998)
8
9#[cfg(feature = "unsafe_str")]
10use crate::transmute;
11use crate::{ConstDefault, _core::fmt};
12
13/// One of 128 Unicode characters (`U+0000` to `U+007F`), the ASCII subset.
14///
15/// Officially, this is the first [block] in Unicode, _Basic Latin_.
16/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
17///
18/// This block was based on older 7-bit character code standards such as
19/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
20///
21/// # When to use this
22/// The main advantage of this subset is that it's always valid UTF-8.  As such,
23/// the `&[ascii::AsciiChar]` -> `&str` conversion function (as well as other related
24/// ones) are O(1): *no* runtime checks are needed.
25///
26/// If you're consuming strings, you should usually handle Unicode and thus
27/// accept `str`s, not limit yourself to `ascii::AsciiChar`s.
28///
29/// However, certain formats are intentionally designed to produce ASCII-only
30/// output in order to be 8-bit-clean.  In those cases, it can be simpler and
31/// faster to generate `ascii::AsciiChar`s instead of dealing with the variable width
32/// properties of general UTF-8 encoded strings, while still allowing the result
33/// to be used freely with other Rust things that deal in general `str`s.
34///
35/// For example, a UUID library might offer a way to produce the string
36/// representation of a UUID as an `[ascii::AsciiChar; 36]` to avoid memory
37/// allocation yet still allow it to be used as UTF-8 via `as_str` without
38/// paying for validation (or needing `unsafe` code) the way it would if it
39/// were provided as a `[u8; 36]`.
40///
41/// # Layout
42/// This type is guaranteed to have a size and alignment of 1 byte.
43///
44/// # Names
45/// The variants on this type are [Unicode names][NamesList] of the characters
46/// in upper camel case, with a few tweaks:
47/// - For `<control>` characters, the primary alias name is used.
48/// - `LATIN` is dropped, as this block has no non-latin letters.
49/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
50/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
51///
52/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
53/// [block]: https://www.unicode.org/glossary/index.html#block
54/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
55/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
56/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
57#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
58#[repr(u8)]
59pub enum AsciiChar {
60    /// U+0000 (Default variant)
61    #[default]
62    Null = 0,
63    /// U+0001
64    StartOfHeading = 1,
65    /// U+0002
66    StartOfText = 2,
67    /// U+0003
68    EndOfText = 3,
69    /// U+0004
70    EndOfTransmission = 4,
71    /// U+0005
72    Enquiry = 5,
73    /// U+0006
74    Acknowledge = 6,
75    /// U+0007
76    Bell = 7,
77    /// U+0008
78    Backspace = 8,
79    /// U+0009
80    AsciiCharacterTabulation = 9,
81    /// U+000A
82    LineFeed = 10,
83    /// U+000B
84    LineTabulation = 11,
85    /// U+000C
86    FormFeed = 12,
87    /// U+000D
88    CarriageReturn = 13,
89    /// U+000E
90    ShiftOut = 14,
91    /// U+000F
92    ShiftIn = 15,
93    /// U+0010
94    DataLinkEscape = 16,
95    /// U+0011
96    DeviceControlOne = 17,
97    /// U+0012
98    DeviceControlTwo = 18,
99    /// U+0013
100    DeviceControlThree = 19,
101    /// U+0014
102    DeviceControlFour = 20,
103    /// U+0015
104    NegativeAcknowledge = 21,
105    /// U+0016
106    SynchronousIdle = 22,
107    /// U+0017
108    EndOfTransmissionBlock = 23,
109    /// U+0018
110    Cancel = 24,
111    /// U+0019
112    EndOfMedium = 25,
113    /// U+001A
114    Substitute = 26,
115    /// U+001B
116    Escape = 27,
117    /// U+001C
118    InformationSeparatorFour = 28,
119    /// U+001D
120    InformationSeparatorThree = 29,
121    /// U+001E
122    InformationSeparatorTwo = 30,
123    /// U+001F
124    InformationSeparatorOne = 31,
125    /// U+0020
126    Space = 32,
127    /// U+0021
128    ExclamationMark = 33,
129    /// U+0022
130    QuotationMark = 34,
131    /// U+0023
132    NumberSign = 35,
133    /// U+0024
134    DollarSign = 36,
135    /// U+0025
136    PercentSign = 37,
137    /// U+0026
138    Ampersand = 38,
139    /// U+0027
140    Apostrophe = 39,
141    /// U+0028
142    LeftParenthesis = 40,
143    /// U+0029
144    RightParenthesis = 41,
145    /// U+002A
146    Asterisk = 42,
147    /// U+002B
148    PlusSign = 43,
149    /// U+002C
150    Comma = 44,
151    /// U+002D
152    HyphenMinus = 45,
153    /// U+002E
154    FullStop = 46,
155    /// U+002F
156    Solidus = 47,
157    /// U+0030
158    Digit0 = 48,
159    /// U+0031
160    Digit1 = 49,
161    /// U+0032
162    Digit2 = 50,
163    /// U+0033
164    Digit3 = 51,
165    /// U+0034
166    Digit4 = 52,
167    /// U+0035
168    Digit5 = 53,
169    /// U+0036
170    Digit6 = 54,
171    /// U+0037
172    Digit7 = 55,
173    /// U+0038
174    Digit8 = 56,
175    /// U+0039
176    Digit9 = 57,
177    /// U+003A
178    Colon = 58,
179    /// U+003B
180    Semicolon = 59,
181    /// U+003C
182    LessThanSign = 60,
183    /// U+003D
184    EqualsSign = 61,
185    /// U+003E
186    GreaterThanSign = 62,
187    /// U+003F
188    QuestionMark = 63,
189    /// U+0040
190    CommercialAt = 64,
191    /// U+0041
192    CapitalA = 65,
193    /// U+0042
194    CapitalB = 66,
195    /// U+0043
196    CapitalC = 67,
197    /// U+0044
198    CapitalD = 68,
199    /// U+0045
200    CapitalE = 69,
201    /// U+0046
202    CapitalF = 70,
203    /// U+0047
204    CapitalG = 71,
205    /// U+0048
206    CapitalH = 72,
207    /// U+0049
208    CapitalI = 73,
209    /// U+004A
210    CapitalJ = 74,
211    /// U+004B
212    CapitalK = 75,
213    /// U+004C
214    CapitalL = 76,
215    /// U+004D
216    CapitalM = 77,
217    /// U+004E
218    CapitalN = 78,
219    /// U+004F
220    CapitalO = 79,
221    /// U+0050
222    CapitalP = 80,
223    /// U+0051
224    CapitalQ = 81,
225    /// U+0052
226    CapitalR = 82,
227    /// U+0053
228    CapitalS = 83,
229    /// U+0054
230    CapitalT = 84,
231    /// U+0055
232    CapitalU = 85,
233    /// U+0056
234    CapitalV = 86,
235    /// U+0057
236    CapitalW = 87,
237    /// U+0058
238    CapitalX = 88,
239    /// U+0059
240    CapitalY = 89,
241    /// U+005A
242    CapitalZ = 90,
243    /// U+005B
244    LeftSquareBracket = 91,
245    /// U+005C
246    ReverseSolidus = 92,
247    /// U+005D
248    RightSquareBracket = 93,
249    /// U+005E
250    CircumflexAccent = 94,
251    /// U+005F
252    LowLine = 95,
253    /// U+0060
254    GraveAccent = 96,
255    /// U+0061
256    SmallA = 97,
257    /// U+0062
258    SmallB = 98,
259    /// U+0063
260    SmallC = 99,
261    /// U+0064
262    SmallD = 100,
263    /// U+0065
264    SmallE = 101,
265    /// U+0066
266    SmallF = 102,
267    /// U+0067
268    SmallG = 103,
269    /// U+0068
270    SmallH = 104,
271    /// U+0069
272    SmallI = 105,
273    /// U+006A
274    SmallJ = 106,
275    /// U+006B
276    SmallK = 107,
277    /// U+006C
278    SmallL = 108,
279    /// U+006D
280    SmallM = 109,
281    /// U+006E
282    SmallN = 110,
283    /// U+006F
284    SmallO = 111,
285    /// U+0070
286    SmallP = 112,
287    /// U+0071
288    SmallQ = 113,
289    /// U+0072
290    SmallR = 114,
291    /// U+0073
292    SmallS = 115,
293    /// U+0074
294    SmallT = 116,
295    /// U+0075
296    SmallU = 117,
297    /// U+0076
298    SmallV = 118,
299    /// U+0077
300    SmallW = 119,
301    /// U+0078
302    SmallX = 120,
303    /// U+0079
304    SmallY = 121,
305    /// U+007A
306    SmallZ = 122,
307    /// U+007B
308    LeftCurlyBracket = 123,
309    /// U+007C
310    VerticalLine = 124,
311    /// U+007D
312    RightCurlyBracket = 125,
313    /// U+007E
314    Tilde = 126,
315    /// U+007F
316    Delete = 127,
317}
318
319impl AsciiChar {
320    /// Creates an ascii character from the byte `b`,
321    /// or returns `None` if it's too large.
322    #[must_use]
323    pub const fn from_u8(b: u8) -> Option<Self> {
324        match b {
325            0 => Some(Self::Null),
326            1 => Some(Self::StartOfHeading),
327            2 => Some(Self::StartOfText),
328            3 => Some(Self::EndOfText),
329            4 => Some(Self::EndOfTransmission),
330            5 => Some(Self::Enquiry),
331            6 => Some(Self::Acknowledge),
332            7 => Some(Self::Bell),
333            8 => Some(Self::Backspace),
334            9 => Some(Self::AsciiCharacterTabulation),
335            10 => Some(Self::LineFeed),
336            11 => Some(Self::LineTabulation),
337            12 => Some(Self::FormFeed),
338            13 => Some(Self::CarriageReturn),
339            14 => Some(Self::ShiftOut),
340            15 => Some(Self::ShiftIn),
341            16 => Some(Self::DataLinkEscape),
342            17 => Some(Self::DeviceControlOne),
343            18 => Some(Self::DeviceControlTwo),
344            19 => Some(Self::DeviceControlThree),
345            20 => Some(Self::DeviceControlFour),
346            21 => Some(Self::NegativeAcknowledge),
347            22 => Some(Self::SynchronousIdle),
348            23 => Some(Self::EndOfTransmissionBlock),
349            24 => Some(Self::Cancel),
350            25 => Some(Self::EndOfMedium),
351            26 => Some(Self::Substitute),
352            27 => Some(Self::Escape),
353            28 => Some(Self::InformationSeparatorFour),
354            29 => Some(Self::InformationSeparatorThree),
355            30 => Some(Self::InformationSeparatorTwo),
356            31 => Some(Self::InformationSeparatorOne),
357            32 => Some(Self::Space),
358            33 => Some(Self::ExclamationMark),
359            34 => Some(Self::QuotationMark),
360            35 => Some(Self::NumberSign),
361            36 => Some(Self::DollarSign),
362            37 => Some(Self::PercentSign),
363            38 => Some(Self::Ampersand),
364            39 => Some(Self::Apostrophe),
365            40 => Some(Self::LeftParenthesis),
366            41 => Some(Self::RightParenthesis),
367            42 => Some(Self::Asterisk),
368            43 => Some(Self::PlusSign),
369            44 => Some(Self::Comma),
370            45 => Some(Self::HyphenMinus),
371            46 => Some(Self::FullStop),
372            47 => Some(Self::Solidus),
373            48 => Some(Self::Digit0),
374            49 => Some(Self::Digit1),
375            50 => Some(Self::Digit2),
376            51 => Some(Self::Digit3),
377            52 => Some(Self::Digit4),
378            53 => Some(Self::Digit5),
379            54 => Some(Self::Digit6),
380            55 => Some(Self::Digit7),
381            56 => Some(Self::Digit8),
382            57 => Some(Self::Digit9),
383            58 => Some(Self::Colon),
384            59 => Some(Self::Semicolon),
385            60 => Some(Self::LessThanSign),
386            61 => Some(Self::EqualsSign),
387            62 => Some(Self::GreaterThanSign),
388            63 => Some(Self::QuestionMark),
389            64 => Some(Self::CommercialAt),
390            65 => Some(Self::CapitalA),
391            66 => Some(Self::CapitalB),
392            67 => Some(Self::CapitalC),
393            68 => Some(Self::CapitalD),
394            69 => Some(Self::CapitalE),
395            70 => Some(Self::CapitalF),
396            71 => Some(Self::CapitalG),
397            72 => Some(Self::CapitalH),
398            73 => Some(Self::CapitalI),
399            74 => Some(Self::CapitalJ),
400            75 => Some(Self::CapitalK),
401            76 => Some(Self::CapitalL),
402            77 => Some(Self::CapitalM),
403            78 => Some(Self::CapitalN),
404            79 => Some(Self::CapitalO),
405            80 => Some(Self::CapitalP),
406            81 => Some(Self::CapitalQ),
407            82 => Some(Self::CapitalR),
408            83 => Some(Self::CapitalS),
409            84 => Some(Self::CapitalT),
410            85 => Some(Self::CapitalU),
411            86 => Some(Self::CapitalV),
412            87 => Some(Self::CapitalW),
413            88 => Some(Self::CapitalX),
414            89 => Some(Self::CapitalY),
415            90 => Some(Self::CapitalZ),
416            91 => Some(Self::LeftSquareBracket),
417            92 => Some(Self::ReverseSolidus),
418            93 => Some(Self::RightSquareBracket),
419            94 => Some(Self::CircumflexAccent),
420            95 => Some(Self::LowLine),
421            96 => Some(Self::GraveAccent),
422            97 => Some(Self::SmallA),
423            98 => Some(Self::SmallB),
424            99 => Some(Self::SmallC),
425            100 => Some(Self::SmallD),
426            101 => Some(Self::SmallE),
427            102 => Some(Self::SmallF),
428            103 => Some(Self::SmallG),
429            104 => Some(Self::SmallH),
430            105 => Some(Self::SmallI),
431            106 => Some(Self::SmallJ),
432            107 => Some(Self::SmallK),
433            108 => Some(Self::SmallL),
434            109 => Some(Self::SmallM),
435            110 => Some(Self::SmallN),
436            111 => Some(Self::SmallO),
437            112 => Some(Self::SmallP),
438            113 => Some(Self::SmallQ),
439            114 => Some(Self::SmallR),
440            115 => Some(Self::SmallS),
441            116 => Some(Self::SmallT),
442            117 => Some(Self::SmallU),
443            118 => Some(Self::SmallV),
444            119 => Some(Self::SmallW),
445            120 => Some(Self::SmallX),
446            121 => Some(Self::SmallY),
447            122 => Some(Self::SmallZ),
448            123 => Some(Self::LeftCurlyBracket),
449            124 => Some(Self::VerticalLine),
450            125 => Some(Self::RightCurlyBracket),
451            126 => Some(Self::Tilde),
452            127 => Some(Self::Delete),
453            _ => None,
454        }
455    }
456
457    /// Creates an ASCII character from the byte `b`,
458    /// without checking whether it's valid.
459    /// # Safety
460    /// `b` must be in `0..=127`, or else this is UB.
461    #[must_use]
462    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
463    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
464    pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
465        // SAFETY: Our safety precondition is that `b` is in-range.
466        unsafe { transmute(b) }
467    }
468
469    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
470    /// `'0'`, `'1'`, …, `'9'` respectively.
471    ///
472    /// If `d >= 10`, returns `None`.
473    ///
474    /// # Features
475    /// Uses `unsafe_hint` for performance optimizations.
476    #[must_use]
477    pub const fn digit(d: u8) -> Option<Self> {
478        if d < 10 {
479            let sum = {
480                #[cfg(any(feature = "safe_text", not(feature = "unsafe_hint")))]
481                {
482                    b'0' + d
483                }
484                #[cfg(all(not(feature = "safe_text"), feature = "unsafe_hint"))]
485                // SAFETY: we've checked d < 10
486                unsafe {
487                    b'0'.unchecked_add(d)
488                }
489            };
490            Self::from_u8(sum)
491        } else {
492            None
493        }
494    }
495
496    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
497    /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
498    ///
499    /// # Safety
500    /// This is immediate UB if called with `d > 64`.
501    ///
502    /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
503    /// Notably, it should not be expected to return hex digits, or any other
504    /// reasonable extension of the decimal digits.
505    ///
506    /// (This lose safety condition is intended to simplify soundness proofs
507    /// when writing code using this method, since the implementation doesn't
508    /// need something really specific, not to make those other arguments do
509    /// something useful. It might be tightened before stabilization.)
510    #[must_use]
511    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
512    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
513    pub const unsafe fn digit_unchecked(d: u8) -> Self {
514        debug_assert!(d < 10);
515
516        // SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
517        // so because `d` must be 64 or less the addition can return at most
518        // 112 (0x70), which doesn't overflow and is within the ASCII range.
519        unsafe {
520            let byte = b'0'.unchecked_add(d);
521            Self::from_u8_unchecked(byte)
522        }
523    }
524
525    /// Gets this ASCII character as a byte.
526    #[must_use]
527    pub const fn as_u8(self) -> u8 {
528        self as u8
529    }
530
531    /// Gets this ASCII character as a `char` Unicode Scalar Value.
532    #[must_use]
533    pub const fn as_char(self) -> char {
534        self as u8 as char
535    }
536
537    /// Views this ASCII character as a one-code-unit UTF-8 `str`.
538    #[must_use]
539    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
540    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
541    pub const fn as_str(&self) -> &str {
542        Self::slice_as_str(core::slice::from_ref(self))
543    }
544}
545
546impl AsciiChar {
547    /// Views a slice of ASCII characters as a UTF-8 `str`.
548    #[must_use]
549    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
550    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
551    pub const fn slice_as_str(slice: &[AsciiChar]) -> &str {
552        let ascii_ptr: *const [AsciiChar] = slice;
553        let str_ptr = ascii_ptr as *const str;
554        // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
555        // code unit having the same value as the ASCII byte.
556        unsafe { &*str_ptr }
557    }
558
559    /// Views a slice of ASCII characters as a slice of `u8` bytes.
560    #[must_use]
561    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
562    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
563    pub const fn slice_as_bytes(slice: &[AsciiChar]) -> &[u8] {
564        AsciiChar::slice_as_str(slice).as_bytes()
565    }
566}
567// impl [AsciiChar] {
568//     /// Views this slice of ASCII characters as a UTF-8 `str`.
569//     #[must_use]
570//     pub const fn as_str(&self) -> &str {
571//         let ascii_ptr: *const Self = self;
572//         let str_ptr = ascii_ptr as *const str;
573//         // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
574//         // code unit having the same value as the ASCII byte.
575//         unsafe { &*str_ptr }
576//     }
577//
578//     /// Views this slice of ASCII characters as a slice of `u8` bytes.
579//     #[must_use]
580//     pub const fn as_bytes(&self) -> &[u8] {
581//         self.as_str().as_bytes()
582//     }
583// }
584
585impl fmt::Display for AsciiChar {
586    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
587        fmt::Display::fmt(&self.as_char(), f)
588    }
589}
590
591impl ConstDefault for AsciiChar {
592    const DEFAULT: Self = AsciiChar::Null;
593}
594
595#[cfg(feature = "bit")]
596crate::bit_sized![= 7; for AsciiChar];