devela/text/char/impls/
c8.rs

1// devela::text::char::impls::char8
2
3use super::*;
4#[cfg(feature = "ascii")]
5use crate::AsciiChar;
6use crate::{Char, DataOverflow};
7
8impl char8 {
9    /* private helper fns */
10
11    // SAFETY: this is not marked as unsafe because it's only used privately
12    // by this module for a few selected operations.
13    #[must_use]
14    const fn from_char_unchecked(c: char) -> char8 {
15        char8(c as u32 as u8)
16    }
17
18    /* constants */
19
20    /// The lowest unicode scalar a `char8` can represent, `'\u{00}'`.
21    pub const MIN: char8 = char8(0x00);
22
23    /// The highest unicode scalar a `char8` can represent, `'\u{FF}'`.
24    pub const MAX: char8 = char8(0xFF);
25
26    /* conversions */
27
28    /// Converts an `AsciiChar` to `char8`.
29    #[must_use]
30    #[cfg(feature = "ascii")]
31    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "ascii")))]
32    pub const fn from_ascii_char(c: AsciiChar) -> char8 {
33        char8(c as u8)
34    }
35
36    /// Converts a `char7` to `char8`.
37    #[must_use]
38    #[cfg(feature = "_char7")]
39    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char7")))]
40    pub const fn from_char7(c: char7) -> char8 {
41        char8(c.0.get())
42    }
43    /// Tries to convert a `char16` to `char8`.
44    ///
45    /// # Errors
46    /// Returns [`DataOverflow`] if the character can't fit in 8 bits.
47    #[cfg(feature = "_char16")]
48    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char16")))]
49    pub const fn try_from_char16(c: char16) -> Result<char8, DataOverflow> {
50        if Char::byte_len(c.to_u32()) == 1 {
51            Ok(char8(c.to_u32() as u8))
52        } else {
53            Err(DataOverflow(Some(c.to_u32() as usize)))
54        }
55    }
56    /// Tries to convert a `char` to `char8`.
57    ///
58    /// # Errors
59    /// Returns [`DataOverflow`] if the character can't fit in 8 bits.
60    pub const fn try_from_char(c: char) -> Result<char8, DataOverflow> {
61        if Char::byte_len(c as u32) == 1 {
62            Ok(char8(c as u32 as u8))
63        } else {
64            Err(DataOverflow(Some(c as u32 as usize)))
65        }
66    }
67
68    //
69
70    /// Tries to convert this `char8` to `AsciiChar`.
71    ///
72    /// # Errors
73    /// Returns [`DataOverflow`] if `self` can't fit in 7 bits.
74    ///
75    /// # Features
76    /// Makes use of the `unsafe_str` feature if enabled.
77    #[cfg(feature = "ascii")]
78    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "ascii")))]
79    pub const fn try_to_ascii_char(self) -> Result<AsciiChar, DataOverflow> {
80        if Char::is_7bit(self.to_u32()) {
81            #[cfg(any(feature = "safe_text", not(feature = "unsafe_str")))]
82            if let Some(c) = AsciiChar::from_u8(self.0) {
83                return Ok(c);
84            } else {
85                unreachable![]
86            }
87
88            #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
89            // SAFETY: we've already checked it's in range.
90            return Ok(unsafe { AsciiChar::from_u8_unchecked(self.0) });
91        }
92        Err(DataOverflow(Some(self.to_u32() as usize)))
93    }
94
95    /// Tries to convert this `char8` to `char7`.
96    ///
97    /// # Errors
98    /// Returns [`DataOverflow`] if `self` can't fit in 7 bits.
99    #[cfg(feature = "_char7")]
100    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char7")))]
101    pub const fn try_to_char7(self) -> Result<char7, DataOverflow> {
102        char7::try_from_char8(self)
103    }
104    /// Converts this `char8` to `char16`.
105    #[must_use]
106    #[cfg(feature = "_char16")]
107    #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char16")))]
108    pub const fn to_char16(self) -> char16 {
109        char16::from_char8(self)
110    }
111    /// Converts this `char8` to `char`.
112    #[must_use]
113    pub const fn to_char(self) -> char {
114        self.0 as char
115    }
116    /// Converts this `char8` to `u32`.
117    #[must_use]
118    pub const fn to_u32(self) -> u32 {
119        self.0 as u32
120    }
121
122    /// Converts this `char8` to an UTF-8 encoded sequence of bytes.
123    ///
124    /// Note that this function always returns a 2-byte array, but the actual
125    /// UTF-8 sequence may be shorter. The unused bytes are set to 0.
126    //
127    // https://en.wikipedia.org/wiki/UTF-8#Encoding
128    #[must_use]
129    #[allow(clippy::unusual_byte_groupings, clippy::single_match_else)]
130    pub const fn to_utf8_bytes(self) -> [u8; 2] {
131        let c = self.0;
132        match c {
133            // From 0x0000 to 0x007F:
134            // the UTF-8 encoding is the same as the scalar value.
135            0x0000..=0x007F => [c, 0],
136
137            // from 0x0080 to 0x00FF:
138            // the UTF-8 encoding is 110xxxxx 10xxxxxx,
139            // where xxxxx and xxxxxx are the bits of the scalar value.
140            0x0080.. => {
141                let y = 0b10_000000 | (0b0011_1111 & c);
142                let x = 0b110_00000 | (c >> 6);
143                [x, y]
144            }
145        }
146    }
147
148    //
149
150    /* queries */
151
152    /// Returns `true` if this unicode scalar is a [noncharacter][0].
153    ///
154    /// [0]: https://www.unicode.org/glossary/#noncharacter
155    #[must_use]
156    pub const fn is_noncharacter(self) -> bool {
157        Char::is_noncharacter(self.0 as u32)
158    }
159
160    /// Returns `true` if this unicode scalar is an [abstract character][0].
161    ///
162    /// [0]: https://www.unicode.org/glossary/#abstract_character
163    #[must_use]
164    pub const fn is_character(self) -> bool {
165        !self.is_noncharacter()
166    }
167
168    /// Checks if the value is within the ASCII range.
169    #[must_use]
170    pub const fn is_ascii(self) -> bool {
171        self.0 <= 0x7F
172    }
173
174    /// Makes a copy of the value in its ASCII upper case equivalent.
175    ///
176    /// ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters
177    /// are unchanged.
178    #[must_use]
179    pub const fn to_ascii_uppercase(self) -> char8 {
180        Self::from_char_unchecked(char::to_ascii_uppercase(&self.to_char()))
181    }
182
183    /// Makes a copy of the value in its ASCII lower case equivalent.
184    ///
185    /// ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters
186    /// are unchanged.
187    #[must_use]
188    pub const fn to_ascii_lowercase(self) -> char8 {
189        Self::from_char_unchecked(char::to_ascii_lowercase(&self.to_char()))
190    }
191}
192
193#[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char8")))]
194#[cfg(all(not(feature = "safe_text"), feature = "unsafe_layout"))]
195unsafe impl crate::MemPod for char8 {}