|
| 1 | +//! Arena String. |
| 2 | +//! |
| 3 | +//! See [`String`] for more details. |
| 4 | +
|
| 5 | +// All methods which just delegate to `bumpalo::collections::String` methods marked `#[inline(always)]` |
| 6 | +#![expect(clippy::inline_always)] |
| 7 | + |
| 8 | +use std::{ |
| 9 | + fmt::{self, Debug, Display}, |
| 10 | + hash::{Hash, Hasher}, |
| 11 | + mem::ManuallyDrop, |
| 12 | + ops::{Deref, DerefMut}, |
| 13 | +}; |
| 14 | + |
| 15 | +use bumpalo::collections::String as BumpaloString; |
| 16 | +use simdutf8::basic::from_utf8; |
| 17 | +pub use simdutf8::basic::Utf8Error; |
| 18 | + |
| 19 | +use crate::{Allocator, Vec}; |
| 20 | + |
| 21 | +/// Arena String. |
| 22 | +/// |
| 23 | +/// UTF-8 encoded, growable string. Identical to [`std::string::String`] except that it stores |
| 24 | +/// string contents in arena allocator. |
| 25 | +#[derive(PartialOrd, Eq, Ord)] |
| 26 | +pub struct String<'alloc>(BumpaloString<'alloc>); |
| 27 | + |
| 28 | +impl<'alloc> String<'alloc> { |
| 29 | + /// Creates a new empty [`String`]. |
| 30 | + /// |
| 31 | + /// Given that the `String` is empty, this will not allocate any initial |
| 32 | + /// buffer. While that means that this initial operation is very |
| 33 | + /// inexpensive, it may cause excessive allocation later when you add |
| 34 | + /// data. If you have an idea of how much data the `String` will hold, |
| 35 | + /// consider the [`with_capacity_in`] method to prevent excessive |
| 36 | + /// re-allocation. |
| 37 | + /// |
| 38 | + /// [`with_capacity_in`]: String::with_capacity_in |
| 39 | + #[inline(always)] |
| 40 | + pub fn new_in(allocator: &'alloc Allocator) -> String<'alloc> { |
| 41 | + Self(BumpaloString::new_in(allocator)) |
| 42 | + } |
| 43 | + |
| 44 | + /// Creates a new empty [`String`] with specified capacity. |
| 45 | + /// |
| 46 | + /// `String`s have an internal buffer to hold their data. The capacity is |
| 47 | + /// the length of that buffer, and can be queried with the `capacity` |
| 48 | + /// method. This method creates an empty `String`, but one with an initial |
| 49 | + /// buffer that can hold `capacity` bytes. This is useful when you may be |
| 50 | + /// appending a bunch of data to the `String`, reducing the number of |
| 51 | + /// reallocations it needs to do. |
| 52 | + /// |
| 53 | + /// If the given capacity is `0`, no allocation will occur, and this method |
| 54 | + /// is identical to the [`new_in`] method. |
| 55 | + /// |
| 56 | + /// [`capacity`]: String::capacity |
| 57 | + /// [`new_in`]: String::new_in |
| 58 | + #[inline(always)] |
| 59 | + pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> String<'alloc> { |
| 60 | + Self(BumpaloString::with_capacity_in(capacity, allocator)) |
| 61 | + } |
| 62 | + |
| 63 | + /// Construct a new [`String`] from a string slice. |
| 64 | + /// |
| 65 | + /// # Examples |
| 66 | + /// |
| 67 | + /// ``` |
| 68 | + /// use oxc_allocator::{Allocator, String}; |
| 69 | + /// |
| 70 | + /// let allocator = Allocator::default(); |
| 71 | + /// |
| 72 | + /// let s = String::from_str_in("hello", &allocator); |
| 73 | + /// assert_eq!(s, "hello"); |
| 74 | + /// ``` |
| 75 | + #[inline(always)] |
| 76 | + pub fn from_str_in(s: &str, allocator: &'alloc Allocator) -> String<'alloc> { |
| 77 | + Self(BumpaloString::from_str_in(s, allocator)) |
| 78 | + } |
| 79 | + |
| 80 | + /// Convert `Vec<u8>` into [`String`]. |
| 81 | + /// |
| 82 | + /// # Errors |
| 83 | + /// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string. |
| 84 | + pub fn from_utf8(bytes: Vec<'alloc, u8>) -> Result<String<'alloc>, Utf8Error> { |
| 85 | + // Check vec comprises a valid UTF-8 string. |
| 86 | + from_utf8(&bytes)?; |
| 87 | + // SAFETY: We just checked it's a valid UTF-8 string |
| 88 | + let s = unsafe { Self::from_utf8_unchecked(bytes) }; |
| 89 | + Ok(s) |
| 90 | + } |
| 91 | + |
| 92 | + /// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string. |
| 93 | + /// |
| 94 | + /// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation. |
| 95 | + /// |
| 96 | + /// # SAFETY |
| 97 | + /// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string. |
| 98 | + // |
| 99 | + // `#[inline(always)]` because this is a no-op at runtime |
| 100 | + #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] |
| 101 | + #[inline(always)] |
| 102 | + pub unsafe fn from_utf8_unchecked(bytes: Vec<'alloc, u8>) -> String<'alloc> { |
| 103 | + // Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`, |
| 104 | + // and our inner `Vec` type is `allocator_api2::vec::Vec`. |
| 105 | + // SAFETY: Conversion is safe because both types store data in arena in same way. |
| 106 | + // Lifetime of returned `String` is same as lifetime of original `Vec<u8>`. |
| 107 | + let inner = ManuallyDrop::into_inner(bytes.0); |
| 108 | + let (ptr, len, capacity, bump) = inner.into_raw_parts_with_alloc(); |
| 109 | + Self(BumpaloString::from_raw_parts_in(ptr, len, capacity, bump)) |
| 110 | + } |
| 111 | + |
| 112 | + /// Creates a new [`String`] from a length, capacity, and pointer. |
| 113 | + /// |
| 114 | + /// # SAFETY |
| 115 | + /// |
| 116 | + /// This is highly unsafe, due to the number of invariants that aren't checked: |
| 117 | + /// |
| 118 | + /// * The memory at `ptr` needs to have been previously allocated by the same [`Allocator`]. |
| 119 | + /// * `length` needs to be less than or equal to `capacity`. |
| 120 | + /// * `capacity` needs to be the correct value. |
| 121 | + /// |
| 122 | + /// Violating these may cause problems like corrupting the allocator's internal data structures. |
| 123 | + /// |
| 124 | + /// The ownership of `ptr` is effectively transferred to the `String` which may then deallocate, |
| 125 | + /// reallocate or change the contents of memory pointed to by the pointer at will. Ensure that |
| 126 | + /// nothing else uses the pointer after calling this function. |
| 127 | + /// |
| 128 | + /// # Examples |
| 129 | + /// |
| 130 | + /// Basic usage: |
| 131 | + /// |
| 132 | + /// ``` |
| 133 | + /// use std::mem; |
| 134 | + /// use oxc_allocator::{Allocator, String}; |
| 135 | + /// |
| 136 | + /// let allocator = Allocator::default(); |
| 137 | + /// |
| 138 | + /// unsafe { |
| 139 | + /// let mut s = String::from_str_in("hello", &allocator); |
| 140 | + /// let ptr = s.as_mut_ptr(); |
| 141 | + /// let len = s.len(); |
| 142 | + /// let capacity = s.capacity(); |
| 143 | + /// |
| 144 | + /// mem::forget(s); |
| 145 | + /// |
| 146 | + /// let s = String::from_raw_parts_in(ptr, len, capacity, &allocator); |
| 147 | + /// |
| 148 | + /// assert_eq!(s, "hello"); |
| 149 | + /// } |
| 150 | + /// ``` |
| 151 | + #[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)] |
| 152 | + #[inline(always)] |
| 153 | + pub unsafe fn from_raw_parts_in( |
| 154 | + buf: *mut u8, |
| 155 | + length: usize, |
| 156 | + capacity: usize, |
| 157 | + allocator: &'alloc Allocator, |
| 158 | + ) -> String<'alloc> { |
| 159 | + // SAFETY: Safety conditions of this method are the same as `BumpaloString`'s method |
| 160 | + Self(BumpaloString::from_raw_parts_in(buf, length, capacity, allocator)) |
| 161 | + } |
| 162 | + |
| 163 | + /// Convert this `String<'alloc>` into an `&'alloc str`. This is analogous to |
| 164 | + /// [`std::string::String::into_boxed_str`]. |
| 165 | + /// |
| 166 | + /// # Example |
| 167 | + /// |
| 168 | + /// ``` |
| 169 | + /// use oxc_allocator::{Allocator, String}; |
| 170 | + /// |
| 171 | + /// let allocator = Allocator::default(); |
| 172 | + /// |
| 173 | + /// let s = String::from_str_in("foo", &allocator); |
| 174 | + /// assert_eq!(s.into_bump_str(), "foo"); |
| 175 | + /// ``` |
| 176 | + #[inline(always)] |
| 177 | + pub fn into_bump_str(self) -> &'alloc str { |
| 178 | + self.0.into_bump_str() |
| 179 | + } |
| 180 | +} |
| 181 | + |
| 182 | +// Provide access to all `bumpalo::String`'s methods via deref |
| 183 | +impl<'alloc> Deref for String<'alloc> { |
| 184 | + type Target = BumpaloString<'alloc>; |
| 185 | + |
| 186 | + #[inline] |
| 187 | + fn deref(&self) -> &Self::Target { |
| 188 | + &self.0 |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +impl<'alloc> DerefMut for String<'alloc> { |
| 193 | + #[inline] |
| 194 | + fn deref_mut(&mut self) -> &mut BumpaloString<'alloc> { |
| 195 | + &mut self.0 |
| 196 | + } |
| 197 | +} |
| 198 | + |
| 199 | +impl PartialEq for String<'_> { |
| 200 | + #[inline] |
| 201 | + fn eq(&self, other: &String) -> bool { |
| 202 | + PartialEq::eq(&self[..], &other[..]) |
| 203 | + } |
| 204 | +} |
| 205 | + |
| 206 | +// `impl_eq!` macro copied from `bumpalo` |
| 207 | +macro_rules! impl_eq { |
| 208 | + ($lhs:ty, $rhs: ty) => { |
| 209 | + impl<'a, 'alloc> PartialEq<$rhs> for $lhs { |
| 210 | + #[inline] |
| 211 | + fn eq(&self, other: &$rhs) -> bool { |
| 212 | + PartialEq::eq(&self[..], &other[..]) |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + impl<'a, 'alloc> PartialEq<$lhs> for $rhs { |
| 217 | + #[inline] |
| 218 | + fn eq(&self, other: &$lhs) -> bool { |
| 219 | + PartialEq::eq(&self[..], &other[..]) |
| 220 | + } |
| 221 | + } |
| 222 | + }; |
| 223 | +} |
| 224 | + |
| 225 | +impl_eq! { String<'alloc>, str } |
| 226 | +impl_eq! { String<'alloc>, &'a str } |
| 227 | +impl_eq! { std::borrow::Cow<'a, str>, String<'alloc> } |
| 228 | +impl_eq! { std::string::String, String<'alloc> } |
| 229 | + |
| 230 | +impl Display for String<'_> { |
| 231 | + #[inline] |
| 232 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 233 | + Display::fmt(self.as_str(), f) |
| 234 | + } |
| 235 | +} |
| 236 | + |
| 237 | +impl Debug for String<'_> { |
| 238 | + #[inline] |
| 239 | + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 240 | + Debug::fmt(self.as_str(), f) |
| 241 | + } |
| 242 | +} |
| 243 | + |
| 244 | +impl Hash for String<'_> { |
| 245 | + #[inline] |
| 246 | + fn hash<H: Hasher>(&self, hasher: &mut H) { |
| 247 | + self.as_str().hash(hasher); |
| 248 | + } |
| 249 | +} |
0 commit comments