devela/text/char/impls/c16.rs
1// devela::text::char::impls::char16
2//
3//!
4//
5
6use super::*;
7#[cfg(feature = "ascii")]
8use crate::AsciiChar;
9use crate::{text::char::NonSurrogateU16, Char, DataOverflow};
10
11impl char16 {
12 /* private helper fns */
13
14 // SAFETY: this is not marked as unsafe because it's only used privately
15 // by this module for a few selected operations.
16 #[must_use]
17 const fn from_char_unchecked(c: char) -> char16 {
18 char16::new_unchecked(c as u32 as u16)
19 }
20
21 // useful because Option::<T>::unwrap is not yet stable as const fn
22 #[must_use]
23 const fn new_unchecked(value: u16) -> char16 {
24 #[cfg(any(feature = "safe_text", not(feature = "unsafe_niche")))]
25 if let Some(c) = NonSurrogateU16::new(value) {
26 char16(c)
27 } else {
28 unreachable![]
29 }
30 #[cfg(all(not(feature = "safe_text"), feature = "unsafe_niche"))]
31 unsafe {
32 char16(NonSurrogateU16::new_unchecked(value))
33 }
34 }
35
36 /* constants */
37
38 /// The lowest unicode scalar a `char16` can represent, `'\u{00}'`.
39 pub const MIN: char16 = char16::new_unchecked(0x0000);
40
41 /// The highest unicode scalar a `char16` can represent, `'\u{FFFF}'`.
42 ///
43 /// Note that `'\u{FFFF}'` is a *noncharacter*.
44 pub const MAX: char16 = char16::new_unchecked(0xFFFF);
45
46 /// `U+FFFD REPLACEMENT CHARACTER (�)` is used in Unicode to represent a decoding error.
47 pub const REPLACEMENT_CHARACTER: char16 =
48 char16::new_unchecked(char::REPLACEMENT_CHARACTER as u32 as u16);
49
50 /* conversions */
51
52 /// Converts an `AsciiChar` to `char16`.
53 #[must_use]
54 #[cfg(feature = "ascii")]
55 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "ascii")))]
56 pub const fn from_ascii_char(c: AsciiChar) -> char16 {
57 char16::new_unchecked(c as u8 as u16)
58 }
59
60 /// Converts a `char7` to `char16`.
61 #[must_use]
62 #[cfg(feature = "_char7")]
63 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char7")))]
64 pub const fn from_char7(c: char7) -> char16 {
65 char16::new_unchecked(c.0.get() as u16)
66 }
67 /// Converts a `char8` to `char16`.
68 #[must_use]
69 #[cfg(feature = "_char8")]
70 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char8")))]
71 pub const fn from_char8(c: char8) -> char16 {
72 char16::new_unchecked(c.0 as u16)
73 }
74 /// Tries to convert a `char` to `char16`.
75 ///
76 /// # Errors
77 /// Returns [`DataOverflow`] if the character can't fit in 16 bits.
78 pub const fn try_from_char(c: char) -> Result<char16, DataOverflow> {
79 if Char::byte_len(c as u32) <= 2 {
80 Ok(char16::new_unchecked(c as u32 as u16))
81 } else {
82 Err(DataOverflow(Some(c as u32 as usize)))
83 }
84 }
85
86 //
87
88 /// Tries to convert this `char16` to `AsciiChar`.
89 ///
90 /// # Errors
91 /// Returns [`DataOverflow`] if `self` can't fit in 7 bits.
92 ///
93 /// # Features
94 /// Makes use of the `unsafe_niche` feature if enabled.
95 #[cfg(feature = "ascii")]
96 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "ascii")))]
97 pub const fn try_to_ascii_char(self) -> Result<AsciiChar, DataOverflow> {
98 if Char::is_7bit(self.to_u32()) {
99 #[cfg(any(feature = "safe_text", not(feature = "unsafe_niche")))]
100 if let Some(c) = AsciiChar::from_u8(self.0.get() as u8) {
101 return Ok(c);
102 } else {
103 unreachable![]
104 }
105
106 #[cfg(all(not(feature = "safe_text"), feature = "unsafe_niche"))]
107 // SAFETY: we've already checked it's in range.
108 return Ok(unsafe { AsciiChar::from_u8_unchecked(self.0.get() as u8) });
109 }
110 Err(DataOverflow(Some(self.to_u32() as usize)))
111 }
112
113 /// Tries to convert this `char16` to `char7`.
114 ///
115 /// # Errors
116 /// Returns [`DataOverflow`] if `self` can't fit in 7 bits.
117 #[cfg(feature = "_char7")]
118 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char7")))]
119 pub const fn try_to_char7(self) -> Result<char7, DataOverflow> {
120 char7::try_from_char16(self)
121 }
122 /// Tries to convert this `char16` to `char8`.
123 ///
124 /// # Errors
125 /// Returns [`DataOverflow`] if `self` can't fit in 8 bits.
126 #[cfg(feature = "_char8")]
127 #[cfg_attr(feature = "nightly_doc", doc(cfg(feature = "_char8")))]
128 pub const fn try_to_char8(self) -> Result<char8, DataOverflow> {
129 char8::try_from_char16(self)
130 }
131 /// Converts this `char16` to `char`.
132 #[must_use]
133 #[rustfmt::skip]
134 pub const fn to_char(self) -> char {
135 // #[cfg(any(feature = "safe_text", not(feature = "unsafe_niche")))]
136 if let Some(c) = char::from_u32(self.0.get() as u32) { c } else { unreachable![] }
137
138 // WAIT: [stable const](https://github.com/rust-lang/rust/issues/89259)
139 // #[cfg(all(not(feature = "safe_text"), feature = "unsafe_niche"))]
140 // SAFETY: we've already checked we contain a valid char.
141 // return unsafe { char::from_u32_unchecked(self.0 as u32) };
142 }
143 /// Converts this `char16` to `u32`.
144 #[must_use]
145 pub const fn to_u32(self) -> u32 {
146 self.0.get() as u32
147 }
148
149 /// Converts this `char16` to an UTF-8 encoded sequence of bytes.
150 ///
151 /// Note that this function always returns a 3-byte array, but the actual
152 /// UTF-8 sequence may be shorter. The unused bytes are set to 0.
153 //
154 // https://en.wikipedia.org/wiki/UTF-8#Encoding
155 #[must_use]
156 #[allow(clippy::unusual_byte_groupings)]
157 pub const fn to_utf8_bytes(self) -> [u8; 3] {
158 let c = self.0.get();
159 match c {
160 // From 0x0000 to 0x007F:
161 // the UTF-8 encoding is the same as the scalar value.
162 0x0000..=0x007F => [c as u8, 0, 0],
163
164 // from 0x0080 to 0x07FF:
165 // the UTF-8 encoding is 110xxxxx 10xxxxxx,
166 // where xxxxx and xxxxxx are the bits of the scalar value.
167 0x0080..=0x07FF => {
168 let y = 0b10_000000 | (0b0011_1111 & (c as u8));
169 let x = 0b110_00000 | ((c >> 6) as u8);
170 [x, y, 0]
171 }
172
173 // From from 0x0800 to 0xFFFF:
174 // the UTF-8 encoding is 1110xxxx 10xxxxxx 10xxxxxx.
175 0x0800..=0xFFFF => {
176 let z = 0b10_000000 | (0b0011_1111 & (c as u8));
177 let y = 0b10_000000 | ((c >> 6) & 0b0011_1111) as u8;
178 let x = 0b1110_0000 | ((c >> 12) as u8);
179 [x, y, z]
180 }
181 }
182 }
183
184 //
185
186 /// Makes a copy of the value in its ASCII upper case equivalent.
187 ///
188 /// ASCII letters ‘a’ to ‘z’ are mapped to ‘A’ to ‘Z’, but non-ASCII letters
189 /// are unchanged.
190 #[must_use]
191 pub const fn to_ascii_uppercase(self) -> char16 {
192 Self::from_char_unchecked(char::to_ascii_uppercase(&self.to_char()))
193 }
194
195 /// Makes a copy of the value in its ASCII lower case equivalent.
196 ///
197 /// ASCII letters ‘A’ to ‘Z’ are mapped to ‘a’ to ‘z’, but non-ASCII letters
198 /// are unchanged.
199 #[must_use]
200 pub const fn to_ascii_lowercase(self) -> char16 {
201 Self::from_char_unchecked(char::to_ascii_lowercase(&self.to_char()))
202 }
203
204 /* queries */
205
206 /// Returns `true` if this unicode scalar is a [noncharacter][0].
207 ///
208 /// [0]: https://www.unicode.org/glossary/#noncharacter
209 #[must_use]
210 pub const fn is_noncharacter(self) -> bool {
211 Char::is_noncharacter(self.0.get() as u32)
212 }
213
214 /// Returns `true` if this unicode scalar is an [abstract character][0].
215 ///
216 /// [0]: https://www.unicode.org/glossary/#abstract_character
217 #[must_use]
218 pub const fn is_character(self) -> bool {
219 !self.is_noncharacter()
220 }
221
222 /// Checks if the value is within the ASCII range.
223 #[must_use]
224 pub const fn is_ascii(self) -> bool {
225 self.0.get() <= 0x7F
226 }
227}