devela/text/ascii/
char.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
// devela::text::ascii::char
//
//!
//
// Ported from:
// - https://doc.rust-lang.org/stable/core/ascii/enum.Char.html
// - WAIT: [ascii::Char](https://github.com/rust-lang/rust/issues/110998)

#[cfg(feature = "unsafe_str")]
use crate::transmute;
use crate::{ConstDefault, _core::fmt};

/// One of the 128 Unicode characters from U+0000 through U+007F,
/// often known as the [ASCII] subset.
///
/// Officially, this is the first [block] in Unicode, _Basic Latin_.
/// For details, see the [*C0 Controls and Basic Latin*][chart] code chart.
///
/// This block was based on older 7-bit character code standards such as
/// ANSI X3.4-1977, ISO 646-1973, and [NIST FIPS 1-2].
///
/// # When to use this
/// The main advantage of this subset is that it's always valid UTF-8.  As such,
/// the `&[ascii::AsciiChar]` -> `&str` conversion function (as well as other related
/// ones) are O(1): *no* runtime checks are needed.
///
/// If you're consuming strings, you should usually handle Unicode and thus
/// accept `str`s, not limit yourself to `ascii::AsciiChar`s.
///
/// However, certain formats are intentionally designed to produce ASCII-only
/// output in order to be 8-bit-clean.  In those cases, it can be simpler and
/// faster to generate `ascii::AsciiChar`s instead of dealing with the variable width
/// properties of general UTF-8 encoded strings, while still allowing the result
/// to be used freely with other Rust things that deal in general `str`s.
///
/// For example, a UUID library might offer a way to produce the string
/// representation of a UUID as an `[ascii::AsciiChar; 36]` to avoid memory
/// allocation yet still allow it to be used as UTF-8 via `as_str` without
/// paying for validation (or needing `unsafe` code) the way it would if it
/// were provided as a `[u8; 36]`.
///
/// # Layout
/// This type is guaranteed to have a size and alignment of 1 byte.
///
/// # Names
/// The variants on this type are [Unicode names][NamesList] of the characters
/// in upper camel case, with a few tweaks:
/// - For `<control>` characters, the primary alias name is used.
/// - `LATIN` is dropped, as this block has no non-latin letters.
/// - `LETTER` is dropped, as `CAPITAL`/`SMALL` suffices in this block.
/// - `DIGIT`s use a single digit rather than writing out `ZERO`, `ONE`, etc.
///
/// [ASCII]: https://www.unicode.org/glossary/index.html#ASCII
/// [block]: https://www.unicode.org/glossary/index.html#block
/// [chart]: https://www.unicode.org/charts/PDF/U0000.pdf
/// [NIST FIPS 1-2]: https://nvlpubs.nist.gov/nistpubs/Legacy/FIPS/fipspub1-2-1977.pdf
/// [NamesList]: https://www.unicode.org/Public/15.0.0/ucd/NamesList.txt
#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[repr(u8)]
pub enum AsciiChar {
    /// U+0000 (Default variant)
    #[default]
    Null = 0,
    /// U+0001
    StartOfHeading = 1,
    /// U+0002
    StartOfText = 2,
    /// U+0003
    EndOfText = 3,
    /// U+0004
    EndOfTransmission = 4,
    /// U+0005
    Enquiry = 5,
    /// U+0006
    Acknowledge = 6,
    /// U+0007
    Bell = 7,
    /// U+0008
    Backspace = 8,
    /// U+0009
    AsciiCharacterTabulation = 9,
    /// U+000A
    LineFeed = 10,
    /// U+000B
    LineTabulation = 11,
    /// U+000C
    FormFeed = 12,
    /// U+000D
    CarriageReturn = 13,
    /// U+000E
    ShiftOut = 14,
    /// U+000F
    ShiftIn = 15,
    /// U+0010
    DataLinkEscape = 16,
    /// U+0011
    DeviceControlOne = 17,
    /// U+0012
    DeviceControlTwo = 18,
    /// U+0013
    DeviceControlThree = 19,
    /// U+0014
    DeviceControlFour = 20,
    /// U+0015
    NegativeAcknowledge = 21,
    /// U+0016
    SynchronousIdle = 22,
    /// U+0017
    EndOfTransmissionBlock = 23,
    /// U+0018
    Cancel = 24,
    /// U+0019
    EndOfMedium = 25,
    /// U+001A
    Substitute = 26,
    /// U+001B
    Escape = 27,
    /// U+001C
    InformationSeparatorFour = 28,
    /// U+001D
    InformationSeparatorThree = 29,
    /// U+001E
    InformationSeparatorTwo = 30,
    /// U+001F
    InformationSeparatorOne = 31,
    /// U+0020
    Space = 32,
    /// U+0021
    ExclamationMark = 33,
    /// U+0022
    QuotationMark = 34,
    /// U+0023
    NumberSign = 35,
    /// U+0024
    DollarSign = 36,
    /// U+0025
    PercentSign = 37,
    /// U+0026
    Ampersand = 38,
    /// U+0027
    Apostrophe = 39,
    /// U+0028
    LeftParenthesis = 40,
    /// U+0029
    RightParenthesis = 41,
    /// U+002A
    Asterisk = 42,
    /// U+002B
    PlusSign = 43,
    /// U+002C
    Comma = 44,
    /// U+002D
    HyphenMinus = 45,
    /// U+002E
    FullStop = 46,
    /// U+002F
    Solidus = 47,
    /// U+0030
    Digit0 = 48,
    /// U+0031
    Digit1 = 49,
    /// U+0032
    Digit2 = 50,
    /// U+0033
    Digit3 = 51,
    /// U+0034
    Digit4 = 52,
    /// U+0035
    Digit5 = 53,
    /// U+0036
    Digit6 = 54,
    /// U+0037
    Digit7 = 55,
    /// U+0038
    Digit8 = 56,
    /// U+0039
    Digit9 = 57,
    /// U+003A
    Colon = 58,
    /// U+003B
    Semicolon = 59,
    /// U+003C
    LessThanSign = 60,
    /// U+003D
    EqualsSign = 61,
    /// U+003E
    GreaterThanSign = 62,
    /// U+003F
    QuestionMark = 63,
    /// U+0040
    CommercialAt = 64,
    /// U+0041
    CapitalA = 65,
    /// U+0042
    CapitalB = 66,
    /// U+0043
    CapitalC = 67,
    /// U+0044
    CapitalD = 68,
    /// U+0045
    CapitalE = 69,
    /// U+0046
    CapitalF = 70,
    /// U+0047
    CapitalG = 71,
    /// U+0048
    CapitalH = 72,
    /// U+0049
    CapitalI = 73,
    /// U+004A
    CapitalJ = 74,
    /// U+004B
    CapitalK = 75,
    /// U+004C
    CapitalL = 76,
    /// U+004D
    CapitalM = 77,
    /// U+004E
    CapitalN = 78,
    /// U+004F
    CapitalO = 79,
    /// U+0050
    CapitalP = 80,
    /// U+0051
    CapitalQ = 81,
    /// U+0052
    CapitalR = 82,
    /// U+0053
    CapitalS = 83,
    /// U+0054
    CapitalT = 84,
    /// U+0055
    CapitalU = 85,
    /// U+0056
    CapitalV = 86,
    /// U+0057
    CapitalW = 87,
    /// U+0058
    CapitalX = 88,
    /// U+0059
    CapitalY = 89,
    /// U+005A
    CapitalZ = 90,
    /// U+005B
    LeftSquareBracket = 91,
    /// U+005C
    ReverseSolidus = 92,
    /// U+005D
    RightSquareBracket = 93,
    /// U+005E
    CircumflexAccent = 94,
    /// U+005F
    LowLine = 95,
    /// U+0060
    GraveAccent = 96,
    /// U+0061
    SmallA = 97,
    /// U+0062
    SmallB = 98,
    /// U+0063
    SmallC = 99,
    /// U+0064
    SmallD = 100,
    /// U+0065
    SmallE = 101,
    /// U+0066
    SmallF = 102,
    /// U+0067
    SmallG = 103,
    /// U+0068
    SmallH = 104,
    /// U+0069
    SmallI = 105,
    /// U+006A
    SmallJ = 106,
    /// U+006B
    SmallK = 107,
    /// U+006C
    SmallL = 108,
    /// U+006D
    SmallM = 109,
    /// U+006E
    SmallN = 110,
    /// U+006F
    SmallO = 111,
    /// U+0070
    SmallP = 112,
    /// U+0071
    SmallQ = 113,
    /// U+0072
    SmallR = 114,
    /// U+0073
    SmallS = 115,
    /// U+0074
    SmallT = 116,
    /// U+0075
    SmallU = 117,
    /// U+0076
    SmallV = 118,
    /// U+0077
    SmallW = 119,
    /// U+0078
    SmallX = 120,
    /// U+0079
    SmallY = 121,
    /// U+007A
    SmallZ = 122,
    /// U+007B
    LeftCurlyBracket = 123,
    /// U+007C
    VerticalLine = 124,
    /// U+007D
    RightCurlyBracket = 125,
    /// U+007E
    Tilde = 126,
    /// U+007F
    Delete = 127,
}

impl AsciiChar {
    /// Creates an ascii character from the byte `b`,
    /// or returns `None` if it's too large.
    #[must_use]
    pub const fn from_u8(b: u8) -> Option<Self> {
        match b {
            0 => Some(Self::Null),
            1 => Some(Self::StartOfHeading),
            2 => Some(Self::StartOfText),
            3 => Some(Self::EndOfText),
            4 => Some(Self::EndOfTransmission),
            5 => Some(Self::Enquiry),
            6 => Some(Self::Acknowledge),
            7 => Some(Self::Bell),
            8 => Some(Self::Backspace),
            9 => Some(Self::AsciiCharacterTabulation),
            10 => Some(Self::LineFeed),
            11 => Some(Self::LineTabulation),
            12 => Some(Self::FormFeed),
            13 => Some(Self::CarriageReturn),
            14 => Some(Self::ShiftOut),
            15 => Some(Self::ShiftIn),
            16 => Some(Self::DataLinkEscape),
            17 => Some(Self::DeviceControlOne),
            18 => Some(Self::DeviceControlTwo),
            19 => Some(Self::DeviceControlThree),
            20 => Some(Self::DeviceControlFour),
            21 => Some(Self::NegativeAcknowledge),
            22 => Some(Self::SynchronousIdle),
            23 => Some(Self::EndOfTransmissionBlock),
            24 => Some(Self::Cancel),
            25 => Some(Self::EndOfMedium),
            26 => Some(Self::Substitute),
            27 => Some(Self::Escape),
            28 => Some(Self::InformationSeparatorFour),
            29 => Some(Self::InformationSeparatorThree),
            30 => Some(Self::InformationSeparatorTwo),
            31 => Some(Self::InformationSeparatorOne),
            32 => Some(Self::Space),
            33 => Some(Self::ExclamationMark),
            34 => Some(Self::QuotationMark),
            35 => Some(Self::NumberSign),
            36 => Some(Self::DollarSign),
            37 => Some(Self::PercentSign),
            38 => Some(Self::Ampersand),
            39 => Some(Self::Apostrophe),
            40 => Some(Self::LeftParenthesis),
            41 => Some(Self::RightParenthesis),
            42 => Some(Self::Asterisk),
            43 => Some(Self::PlusSign),
            44 => Some(Self::Comma),
            45 => Some(Self::HyphenMinus),
            46 => Some(Self::FullStop),
            47 => Some(Self::Solidus),
            48 => Some(Self::Digit0),
            49 => Some(Self::Digit1),
            50 => Some(Self::Digit2),
            51 => Some(Self::Digit3),
            52 => Some(Self::Digit4),
            53 => Some(Self::Digit5),
            54 => Some(Self::Digit6),
            55 => Some(Self::Digit7),
            56 => Some(Self::Digit8),
            57 => Some(Self::Digit9),
            58 => Some(Self::Colon),
            59 => Some(Self::Semicolon),
            60 => Some(Self::LessThanSign),
            61 => Some(Self::EqualsSign),
            62 => Some(Self::GreaterThanSign),
            63 => Some(Self::QuestionMark),
            64 => Some(Self::CommercialAt),
            65 => Some(Self::CapitalA),
            66 => Some(Self::CapitalB),
            67 => Some(Self::CapitalC),
            68 => Some(Self::CapitalD),
            69 => Some(Self::CapitalE),
            70 => Some(Self::CapitalF),
            71 => Some(Self::CapitalG),
            72 => Some(Self::CapitalH),
            73 => Some(Self::CapitalI),
            74 => Some(Self::CapitalJ),
            75 => Some(Self::CapitalK),
            76 => Some(Self::CapitalL),
            77 => Some(Self::CapitalM),
            78 => Some(Self::CapitalN),
            79 => Some(Self::CapitalO),
            80 => Some(Self::CapitalP),
            81 => Some(Self::CapitalQ),
            82 => Some(Self::CapitalR),
            83 => Some(Self::CapitalS),
            84 => Some(Self::CapitalT),
            85 => Some(Self::CapitalU),
            86 => Some(Self::CapitalV),
            87 => Some(Self::CapitalW),
            88 => Some(Self::CapitalX),
            89 => Some(Self::CapitalY),
            90 => Some(Self::CapitalZ),
            91 => Some(Self::LeftSquareBracket),
            92 => Some(Self::ReverseSolidus),
            93 => Some(Self::RightSquareBracket),
            94 => Some(Self::CircumflexAccent),
            95 => Some(Self::LowLine),
            96 => Some(Self::GraveAccent),
            97 => Some(Self::SmallA),
            98 => Some(Self::SmallB),
            99 => Some(Self::SmallC),
            100 => Some(Self::SmallD),
            101 => Some(Self::SmallE),
            102 => Some(Self::SmallF),
            103 => Some(Self::SmallG),
            104 => Some(Self::SmallH),
            105 => Some(Self::SmallI),
            106 => Some(Self::SmallJ),
            107 => Some(Self::SmallK),
            108 => Some(Self::SmallL),
            109 => Some(Self::SmallM),
            110 => Some(Self::SmallN),
            111 => Some(Self::SmallO),
            112 => Some(Self::SmallP),
            113 => Some(Self::SmallQ),
            114 => Some(Self::SmallR),
            115 => Some(Self::SmallS),
            116 => Some(Self::SmallT),
            117 => Some(Self::SmallU),
            118 => Some(Self::SmallV),
            119 => Some(Self::SmallW),
            120 => Some(Self::SmallX),
            121 => Some(Self::SmallY),
            122 => Some(Self::SmallZ),
            123 => Some(Self::LeftCurlyBracket),
            124 => Some(Self::VerticalLine),
            125 => Some(Self::RightCurlyBracket),
            126 => Some(Self::Tilde),
            127 => Some(Self::Delete),
            _ => None,
        }
    }

    /// Creates an ASCII character from the byte `b`,
    /// without checking whether it's valid.
    /// # Safety
    /// `b` must be in `0..=127`, or else this is UB.
    #[must_use]
    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
    pub const unsafe fn from_u8_unchecked(b: u8) -> Self {
        // SAFETY: Our safety precondition is that `b` is in-range.
        unsafe { transmute(b) }
    }

    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
    /// `'0'`, `'1'`, …, `'9'` respectively.
    ///
    /// If `d >= 10`, returns `None`.
    ///
    /// # Features
    /// Uses `unsafe_hint` for performance optimizations.
    #[must_use]
    pub const fn digit(d: u8) -> Option<Self> {
        if d < 10 {
            let sum = {
                #[cfg(any(feature = "safe_text", not(feature = "unsafe_hint")))]
                {
                    b'0' + d
                }
                #[cfg(all(not(feature = "safe_text"), feature = "unsafe_hint"))]
                // SAFETY: we've checked d < 10
                unsafe {
                    b'0'.unchecked_add(d)
                }
            };
            Self::from_u8(sum)
        } else {
            None
        }
    }

    /// When passed the *number* `0`, `1`, …, `9`, returns the *character*
    /// `'0'`, `'1'`, …, `'9'` respectively, without checking that it's in-range.
    ///
    /// # Safety
    /// This is immediate UB if called with `d > 64`.
    ///
    /// If `d >= 10` and `d <= 64`, this is allowed to return any value or panic.
    /// Notably, it should not be expected to return hex digits, or any other
    /// reasonable extension of the decimal digits.
    ///
    /// (This lose safety condition is intended to simplify soundness proofs
    /// when writing code using this method, since the implementation doesn't
    /// need something really specific, not to make those other arguments do
    /// something useful. It might be tightened before stabilization.)
    #[must_use]
    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
    pub const unsafe fn digit_unchecked(d: u8) -> Self {
        debug_assert!(d < 10);

        // SAFETY: `'0'` through `'9'` are U+00030 through U+0039,
        // so because `d` must be 64 or less the addition can return at most
        // 112 (0x70), which doesn't overflow and is within the ASCII range.
        unsafe {
            let byte = b'0'.unchecked_add(d);
            Self::from_u8_unchecked(byte)
        }
    }

    /// Gets this ASCII character as a byte.
    #[must_use]
    pub const fn as_u8(self) -> u8 {
        self as u8
    }

    /// Gets this ASCII character as a `char` Unicode Scalar Value.
    #[must_use]
    pub const fn as_char(self) -> char {
        self as u8 as char
    }

    /// Views this ASCII character as a one-code-unit UTF-8 `str`.
    #[must_use]
    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
    pub const fn as_str(&self) -> &str {
        Self::slice_as_str(core::slice::from_ref(self))
    }
}

impl AsciiChar {
    /// Views a slice of ASCII characters as a UTF-8 `str`.
    #[must_use]
    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
    pub const fn slice_as_str(slice: &[AsciiChar]) -> &str {
        let ascii_ptr: *const [AsciiChar] = slice;
        let str_ptr = ascii_ptr as *const str;
        // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
        // code unit having the same value as the ASCII byte.
        unsafe { &*str_ptr }
    }

    /// Views a slice of ASCII characters as a slice of `u8` bytes.
    #[must_use]
    #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "unsafe_str")))]
    pub const fn slice_as_bytes(slice: &[AsciiChar]) -> &[u8] {
        AsciiChar::slice_as_str(slice).as_bytes()
    }
}
// impl [AsciiChar] {
//     /// Views this slice of ASCII characters as a UTF-8 `str`.
//     #[must_use]
//     pub const fn as_str(&self) -> &str {
//         let ascii_ptr: *const Self = self;
//         let str_ptr = ascii_ptr as *const str;
//         // SAFETY: Each ASCII codepoint in UTF-8 is encoded as one single-byte
//         // code unit having the same value as the ASCII byte.
//         unsafe { &*str_ptr }
//     }
//
//     /// Views this slice of ASCII characters as a slice of `u8` bytes.
//     #[must_use]
//     pub const fn as_bytes(&self) -> &[u8] {
//         self.as_str().as_bytes()
//     }
// }

impl fmt::Display for AsciiChar {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        fmt::Display::fmt(&self.as_char(), f)
    }
}

impl ConstDefault for AsciiChar {
    const DEFAULT: Self = AsciiChar::Null;
}

#[cfg(feature = "bit")]
crate::bit_sized![= 7; for AsciiChar];