devela/text/str/namespace.rs
1// devela::text::str::namespace
2//
3//! [`Str`] namespace.
4//
5
6#[cfg(doc)]
7use crate::ExtStr;
8use crate::{iif, Ascii, InvalidUtf8, Slice};
9#[allow(unused_imports, reason = "unsafe")]
10#[cfg(feature = "alloc")]
11use crate::{Box, _dep::_alloc::str::from_boxed_utf8_unchecked};
12#[allow(unused_imports, reason = "unsafe")]
13use crate::{
14 _core::str::{from_utf8_unchecked, from_utf8_unchecked_mut},
15 sf, unwrap,
16};
17// TODO: IMPROVE:
18// - one default, (simd == api if possible)
19// - other faster-simdversion if possible (no care about api, errors)
20// can't import either or, has to be both, for this module…
21use ::core::str::from_utf8_mut;
22// crate::_use! {basic::from_utf8} // MAYBE not needed
23
24#[doc = crate::TAG_NAMESPACE!()]
25/// A string slice namespace.
26///
27/// See also the [`std::str`] module.
28pub struct Str;
29
30impl Str {
31 /// Converts a slice of bytes to a string slice.
32 ///
33 /// See `core::str::`[`from_utf8`].
34 //
35 // WAIT:[const_methods](https://github.com/rusticstuff/simdutf8/pull/111)
36 // /// # Features
37 // /// if the `dep_simdutf8` is enabled
38 // /// then `simdutf8::compat::`[`from_utf8`] is called instead.
39 pub const fn from_utf8(v: &[u8]) -> Result<&str, InvalidUtf8> {
40 // #[cfg(not(feature = "dep_simdutf8"))]
41 match ::core::str::from_utf8(v) {
42 Ok(v) => Ok(v),
43 Err(e) => Err(InvalidUtf8::from_utf8_error(e)),
44 }
45 // #[cfg(feature = "dep_simdutf8")]
46 // match ::simdutf8::compat::from_utf8(v) {
47 // Ok(v) => Ok(v),
48 // Err(e) => Err(InvalidUtf8::from_compat_utf8_error(e)),
49 // }
50 }
51
52 /// Converts a mutable slice of bytes to a mutable string slice.
53 ///
54 /// See [`from_utf8_mut`].
55 // WAIT: [const_str_from_utf8](https://github.com/rust-lang/rust/pull/136668)
56 pub fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, InvalidUtf8> {
57 match from_utf8_mut(v) {
58 Ok(v) => Ok(v),
59 Err(e) => Err(InvalidUtf8::from_utf8_error(e)),
60 }
61 }
62
63 /// Converts a slice of bytes to a string slice without checking valid UTF-8.
64 ///
65 /// See [`from_utf8_unchecked`].
66 ///
67 /// # Safety
68 /// The bytes passed in must be valid UTF-8.
69 #[must_use]
70 #[cfg(all(not(feature = "safe_text"), unsafe··))]
71 #[cfg_attr(feature = "nightly_doc", doc(cfg(unsafe··)))]
72 pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
73 // SAFETY: Caller must uphold the safety contract.
74 unsafe { from_utf8_unchecked(v) }
75 }
76
77 /// Converts a mutable slice of bytes to a mutable string slice without checking valid UTF-8.
78 ///
79 /// See [`from_utf8_unchecked_mut`].
80 ///
81 /// # Safety
82 /// The bytes passed in must be valid UTF-8.
83 #[must_use]
84 #[cfg(all(not(feature = "safe_text"), unsafe··))]
85 #[cfg_attr(feature = "nightly_doc", doc(cfg(unsafe··)))]
86 pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
87 // SAFETY: Caller must uphold the safety contract.
88 unsafe { from_utf8_unchecked_mut(v) }
89 }
90
91 /// Converts a boxed slice of bytes to a boxed string slice without checking valid UTF-8.
92 ///
93 /// See [`from_boxed_utf8_unchecked`].
94 ///
95 /// # Safety
96 /// The bytes passed in must be valid UTF-8.
97 #[must_use]
98 #[cfg(feature = "alloc")]
99 #[cfg(all(not(feature = "safe_text"), unsafe··))]
100 #[cfg_attr(feature = "nightly_doc", doc(cfg(all(feature = "alloc", unsafe··))))]
101 pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
102 // SAFETY: Caller must uphold the safety contract.
103 unsafe { from_boxed_utf8_unchecked(v) }
104 }
105
106 /// Repeats a `string` a given number of times into the provided `buffer`.
107 /// and returns a reference to the new `&str`.
108 /// # Examples
109 /// ```
110 /// # use devela::Str;
111 /// let mut buf = [0_u8; 12];
112 /// let repeated = Str::repeat_into("ay", 3, &mut buf);
113 /// assert_eq![repeated, "ayayay"];
114 /// ```
115 /// # Features
116 /// Makes use of the `unsafe_str` feature if enabled.
117 ///
118 /// See also [`ExtStr::new_counter`], which should be faster,
119 /// because it uses `copy_from_slice`.
120 #[must_use]
121 pub const fn repeat_into<'input, const CAP: usize>(
122 string: &str,
123 n: usize,
124 buffer: &'input mut [u8; CAP],
125 ) -> &'input str {
126 let s_bytes = string.as_bytes();
127 let mut index = 0;
128 // for _ in 0..n {
129 // for &b in s_bytes {
130 // iif![index == CAP; break];
131 // buffer[index] = b;
132 // index += 1;
133 // }
134 // } // const loop:
135 let mut outer_count = 0;
136 while outer_count < n {
137 let mut inner_index = 0;
138 while inner_index < s_bytes.len() {
139 iif![index == CAP; break];
140 buffer[index] = s_bytes[inner_index];
141 index += 1;
142 inner_index += 1;
143 }
144 outer_count += 1;
145 }
146
147 #[cfg(any(feature = "safe_text", not(feature = "unsafe_str")))]
148 return unwrap![ok Str::from_utf8(Slice::range_to(buffer, index))];
149 #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
150 // SAFETY: since `string` is a valid &str, checks are unneeded.
151 sf! { unsafe { Str::from_utf8_unchecked(Slice::range_to(buffer, index)) }}
152 }
153
154 /// Returns a [`&str`] backed by a `buffer`, where you always know each
155 /// character's position.
156 ///
157 /// A [*counter string*][0] is a graduated string of arbitrary `length`,
158 /// with a `separator` positioned after the immediately preceding number.
159 /// # Examples
160 /// ```
161 /// # use devela::Str;
162 /// let mut buf = [0; 15];
163 /// assert_eq!("2*4*6*8*11*14*", Str::new_counter(&mut buf, 14, '*'));
164 /// assert_eq!("_3_5_7_9_12_15_", Str::new_counter(&mut buf, 15, '_'));
165 /// ```
166 /// # Panics
167 /// Panics if `buffer.len() < length`, or if `!char.is_ascii()`.
168 ///
169 /// # Features
170 /// Makes use of the `unsafe_str` feature if enabled.
171 ///
172 /// See also [`ExtStr::new_counter`].
173 ///
174 /// [0]: https://www.satisfice.com/blog/archives/22
175 pub const fn new_counter(buffer: &mut [u8], length: usize, separator: char) -> &str {
176 assert![buffer.len() >= length];
177 assert![separator.is_ascii()];
178 if length == 0 {
179 Str::new_cold_empty()
180 } else {
181 let separator = separator as u8;
182 let mut index = length - 1; // start writing from the end
183 let mut num = length; // the first number to write is the length
184 let mut separator_turn = true; // start writing the separator
185
186 let mut num_buf = Ascii(num).digits();
187 let mut num_bytes = Slice::trim_leading_bytes(&num_buf, b'0');
188 // IMPROVE:BENCH use NumToStr
189 // let mut num_buf = [0u8; 22];
190 // let mut num_bytes = num.to_bytes_base(10, &mut num_buf);
191
192 let mut num_len = num_bytes.len();
193
194 loop {
195 if separator_turn {
196 buffer[index] = separator;
197 } else {
198 iif![index > 0; index -= num_len - 1];
199 // buffer[index..(num_len + index)].copy_from_slice(&num_bytes[..num_len]);
200 // Slice::range_mut(buffer, index, num_len + index)
201 // .copy_from_slice(Slice::range_to(num_bytes, num_len));
202 let mut i = 0;
203 while i < num_len {
204 buffer[index + i] = num_bytes[i];
205 i += 1;
206 }
207
208 num = index;
209
210 num_buf = Ascii(num).digits();
211 num_bytes = Slice::trim_leading_bytes(&num_buf, b'0');
212 // IMPROVE: use NumToStr
213 // num_bytes = num.to_bytes_base(10, &mut num_buf);
214
215 num_len = num_bytes.len();
216 }
217 iif![index == 0; break; index -= 1];
218 separator_turn = !separator_turn;
219 }
220
221 #[cfg(any(feature = "safe_text", not(feature = "unsafe_str")))]
222 return unwrap![ok Str::from_utf8(Slice::range_to(buffer, length))];
223 #[cfg(all(not(feature = "safe_text"), feature = "unsafe_str"))]
224 // SAFETY: TODO: since `string` is a valid &str, checks are unneeded.
225 sf! { unsafe { Str::from_utf8_unchecked(Slice::range_to(buffer, length)) }}
226 }
227 }
228
229 /* private utilities */
230
231 /// The cold path that returns an empty string slice.
232 #[cold] #[rustfmt::skip]
233 pub(crate) const fn new_cold_empty() -> &'static str { "" }
234}