Skip to content

Commit f4e39e8

Browse files
committed
refactor(allocator): String type
1 parent 93df57f commit f4e39e8

File tree

7 files changed

+262
-21
lines changed

7 files changed

+262
-21
lines changed

crates/oxc_allocator/src/convert.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ impl<'a> FromIn<'a, String> for crate::String<'a> {
4949
impl<'a> FromIn<'a, String> for &'a str {
5050
#[inline(always)]
5151
fn from_in(value: String, allocator: &'a Allocator) -> Self {
52-
crate::String::from_str_in(value.as_str(), allocator).into_bump_str()
52+
allocator.alloc_str(value.as_str())
5353
}
5454
}
5555

crates/oxc_allocator/src/lib.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ use std::{
4444
ops::{Deref, DerefMut},
4545
};
4646

47-
pub use bumpalo::collections::String;
4847
use bumpalo::Bump;
4948

5049
mod address;
@@ -53,13 +52,15 @@ mod boxed;
5352
mod clone_in;
5453
mod convert;
5554
pub mod hash_map;
55+
pub mod string;
5656
mod vec;
5757

5858
pub use address::{Address, GetAddress};
5959
pub use boxed::Box;
6060
pub use clone_in::CloneIn;
6161
pub use convert::{FromIn, IntoIn};
6262
pub use hash_map::HashMap;
63+
pub use string::String;
6364
pub use vec::Vec;
6465

6566
/// A bump-allocated memory arena based on [bumpalo].

crates/oxc_allocator/src/string.rs

+249
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
//! Arena String.
2+
//!
3+
//! See [`String`] for more details.
4+
5+
// All methods which just delegate to `bumpalo::collections::String` methods marked `#[inline(always)]`
6+
#![expect(clippy::inline_always)]
7+
8+
use std::{
9+
fmt::{self, Debug, Display},
10+
hash::{Hash, Hasher},
11+
mem::ManuallyDrop,
12+
ops::{Deref, DerefMut},
13+
};
14+
15+
use bumpalo::collections::String as BumpaloString;
16+
use simdutf8::basic::from_utf8;
17+
pub use simdutf8::basic::Utf8Error;
18+
19+
use crate::{Allocator, Vec};
20+
21+
/// Arena String.
22+
///
23+
/// UTF-8 encoded, growable string. Identical to [`std::string::String`] except that it stores
24+
/// string contents in arena allocator.
25+
#[derive(PartialOrd, Eq, Ord)]
26+
pub struct String<'alloc>(BumpaloString<'alloc>);
27+
28+
impl<'alloc> String<'alloc> {
29+
/// Creates a new empty [`String`].
30+
///
31+
/// Given that the `String` is empty, this will not allocate any initial
32+
/// buffer. While that means that this initial operation is very
33+
/// inexpensive, it may cause excessive allocation later when you add
34+
/// data. If you have an idea of how much data the `String` will hold,
35+
/// consider the [`with_capacity_in`] method to prevent excessive
36+
/// re-allocation.
37+
///
38+
/// [`with_capacity_in`]: String::with_capacity_in
39+
#[inline(always)]
40+
pub fn new_in(allocator: &'alloc Allocator) -> String<'alloc> {
41+
Self(BumpaloString::new_in(allocator))
42+
}
43+
44+
/// Creates a new empty [`String`] with specified capacity.
45+
///
46+
/// `String`s have an internal buffer to hold their data. The capacity is
47+
/// the length of that buffer, and can be queried with the `capacity`
48+
/// method. This method creates an empty `String`, but one with an initial
49+
/// buffer that can hold `capacity` bytes. This is useful when you may be
50+
/// appending a bunch of data to the `String`, reducing the number of
51+
/// reallocations it needs to do.
52+
///
53+
/// If the given capacity is `0`, no allocation will occur, and this method
54+
/// is identical to the [`new_in`] method.
55+
///
56+
/// [`capacity`]: String::capacity
57+
/// [`new_in`]: String::new_in
58+
#[inline(always)]
59+
pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> String<'alloc> {
60+
Self(BumpaloString::with_capacity_in(capacity, allocator))
61+
}
62+
63+
/// Construct a new [`String`] from a string slice.
64+
///
65+
/// # Examples
66+
///
67+
/// ```
68+
/// use oxc_allocator::{Allocator, String};
69+
///
70+
/// let allocator = Allocator::default();
71+
///
72+
/// let s = String::from_str_in("hello", &allocator);
73+
/// assert_eq!(s, "hello");
74+
/// ```
75+
#[inline(always)]
76+
pub fn from_str_in(s: &str, allocator: &'alloc Allocator) -> String<'alloc> {
77+
Self(BumpaloString::from_str_in(s, allocator))
78+
}
79+
80+
/// Convert `Vec<u8>` into [`String`].
81+
///
82+
/// # Errors
83+
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
84+
pub fn from_utf8(bytes: Vec<'alloc, u8>) -> Result<String<'alloc>, Utf8Error> {
85+
// Check vec comprises a valid UTF-8 string.
86+
from_utf8(&bytes)?;
87+
// SAFETY: We just checked it's a valid UTF-8 string
88+
let s = unsafe { Self::from_utf8_unchecked(bytes) };
89+
Ok(s)
90+
}
91+
92+
/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
93+
///
94+
/// Does not copy the contents of the `Vec`, converts in place. This is a zero-cost operation.
95+
///
96+
/// # SAFETY
97+
/// Caller must ensure this `Vec<u8>` comprises a valid UTF-8 string.
98+
//
99+
// `#[inline(always)]` because this is a no-op at runtime
100+
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
101+
#[inline(always)]
102+
pub unsafe fn from_utf8_unchecked(bytes: Vec<'alloc, u8>) -> String<'alloc> {
103+
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
104+
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
105+
// SAFETY: Conversion is safe because both types store data in arena in same way.
106+
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
107+
let inner = ManuallyDrop::into_inner(bytes.0);
108+
let (ptr, len, capacity, bump) = inner.into_raw_parts_with_alloc();
109+
Self(BumpaloString::from_raw_parts_in(ptr, len, capacity, bump))
110+
}
111+
112+
/// Creates a new [`String`] from a length, capacity, and pointer.
113+
///
114+
/// # SAFETY
115+
///
116+
/// This is highly unsafe, due to the number of invariants that aren't checked:
117+
///
118+
/// * The memory at `ptr` needs to have been previously allocated by the same [`Allocator`].
119+
/// * `length` needs to be less than or equal to `capacity`.
120+
/// * `capacity` needs to be the correct value.
121+
///
122+
/// Violating these may cause problems like corrupting the allocator's internal data structures.
123+
///
124+
/// The ownership of `ptr` is effectively transferred to the `String` which may then deallocate,
125+
/// reallocate or change the contents of memory pointed to by the pointer at will. Ensure that
126+
/// nothing else uses the pointer after calling this function.
127+
///
128+
/// # Examples
129+
///
130+
/// Basic usage:
131+
///
132+
/// ```
133+
/// use std::mem;
134+
/// use oxc_allocator::{Allocator, String};
135+
///
136+
/// let allocator = Allocator::default();
137+
///
138+
/// unsafe {
139+
/// let mut s = String::from_str_in("hello", &allocator);
140+
/// let ptr = s.as_mut_ptr();
141+
/// let len = s.len();
142+
/// let capacity = s.capacity();
143+
///
144+
/// mem::forget(s);
145+
///
146+
/// let s = String::from_raw_parts_in(ptr, len, capacity, &allocator);
147+
///
148+
/// assert_eq!(s, "hello");
149+
/// }
150+
/// ```
151+
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
152+
#[inline(always)]
153+
pub unsafe fn from_raw_parts_in(
154+
buf: *mut u8,
155+
length: usize,
156+
capacity: usize,
157+
allocator: &'alloc Allocator,
158+
) -> String<'alloc> {
159+
// SAFETY: Safety conditions of this method are the same as `BumpaloString`'s method
160+
Self(BumpaloString::from_raw_parts_in(buf, length, capacity, allocator))
161+
}
162+
163+
/// Convert this `String<'alloc>` into an `&'alloc str`. This is analogous to
164+
/// [`std::string::String::into_boxed_str`].
165+
///
166+
/// # Example
167+
///
168+
/// ```
169+
/// use oxc_allocator::{Allocator, String};
170+
///
171+
/// let allocator = Allocator::default();
172+
///
173+
/// let s = String::from_str_in("foo", &allocator);
174+
/// assert_eq!(s.into_bump_str(), "foo");
175+
/// ```
176+
#[inline(always)]
177+
pub fn into_bump_str(self) -> &'alloc str {
178+
self.0.into_bump_str()
179+
}
180+
}
181+
182+
// Provide access to all `bumpalo::String`'s methods via deref
183+
impl<'alloc> Deref for String<'alloc> {
184+
type Target = BumpaloString<'alloc>;
185+
186+
#[inline]
187+
fn deref(&self) -> &Self::Target {
188+
&self.0
189+
}
190+
}
191+
192+
impl<'alloc> DerefMut for String<'alloc> {
193+
#[inline]
194+
fn deref_mut(&mut self) -> &mut BumpaloString<'alloc> {
195+
&mut self.0
196+
}
197+
}
198+
199+
impl PartialEq for String<'_> {
200+
#[inline]
201+
fn eq(&self, other: &String) -> bool {
202+
PartialEq::eq(&self[..], &other[..])
203+
}
204+
}
205+
206+
// `impl_eq!` macro copied from `bumpalo`
207+
macro_rules! impl_eq {
208+
($lhs:ty, $rhs: ty) => {
209+
impl<'a, 'alloc> PartialEq<$rhs> for $lhs {
210+
#[inline]
211+
fn eq(&self, other: &$rhs) -> bool {
212+
PartialEq::eq(&self[..], &other[..])
213+
}
214+
}
215+
216+
impl<'a, 'alloc> PartialEq<$lhs> for $rhs {
217+
#[inline]
218+
fn eq(&self, other: &$lhs) -> bool {
219+
PartialEq::eq(&self[..], &other[..])
220+
}
221+
}
222+
};
223+
}
224+
225+
impl_eq! { String<'alloc>, str }
226+
impl_eq! { String<'alloc>, &'a str }
227+
impl_eq! { std::borrow::Cow<'a, str>, String<'alloc> }
228+
impl_eq! { std::string::String, String<'alloc> }
229+
230+
impl Display for String<'_> {
231+
#[inline]
232+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
233+
Display::fmt(self.as_str(), f)
234+
}
235+
}
236+
237+
impl Debug for String<'_> {
238+
#[inline]
239+
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
240+
Debug::fmt(self.as_str(), f)
241+
}
242+
}
243+
244+
impl Hash for String<'_> {
245+
#[inline]
246+
fn hash<H: Hasher>(&self, hasher: &mut H) {
247+
self.as_str().hash(hasher);
248+
}
249+
}

crates/oxc_allocator/src/vec.rs

+6-15
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use allocator_api2::vec::Vec as InnerVec;
1919
use bumpalo::Bump;
2020
#[cfg(any(feature = "serialize", test))]
2121
use serde::{ser::SerializeSeq, Serialize, Serializer};
22-
use simdutf8::basic::{from_utf8, Utf8Error};
22+
use simdutf8::basic::Utf8Error;
2323

2424
use crate::{Allocator, Box, String};
2525

@@ -32,7 +32,7 @@ use crate::{Allocator, Box, String};
3232
/// Note: This is not a soundness issue, as Rust does not support relying on `drop`
3333
/// being called to guarantee soundness.
3434
#[derive(PartialEq, Eq)]
35-
pub struct Vec<'alloc, T>(ManuallyDrop<InnerVec<T, &'alloc Bump>>);
35+
pub struct Vec<'alloc, T>(pub(crate) ManuallyDrop<InnerVec<T, &'alloc Bump>>);
3636

3737
/// SAFETY: Not actually safe, but for enabling `Send` for downstream crates.
3838
unsafe impl<T> Send for Vec<'_, T> {}
@@ -190,16 +190,12 @@ impl<'alloc, T> Vec<'alloc, T> {
190190
}
191191

192192
impl<'alloc> Vec<'alloc, u8> {
193-
/// Convert `Vec<u8>` into `String`.
193+
/// Convert `Vec<u8>` into [`String`].
194194
///
195195
/// # Errors
196196
/// Returns [`Err`] if the `Vec` does not comprise a valid UTF-8 string.
197197
pub fn into_string(self) -> Result<String<'alloc>, Utf8Error> {
198-
// Check vec comprises a valid UTF-8 string.
199-
from_utf8(&self.0)?;
200-
// SAFETY: We just checked it's a valid UTF-8 string
201-
let s = unsafe { self.into_string_unchecked() };
202-
Ok(s)
198+
String::from_utf8(self)
203199
}
204200

205201
/// Convert `Vec<u8>` into [`String`], without checking bytes comprise a valid UTF-8 string.
@@ -211,13 +207,8 @@ impl<'alloc> Vec<'alloc, u8> {
211207
#[expect(clippy::missing_safety_doc, clippy::unnecessary_safety_comment)]
212208
#[inline(always)] // `#[inline(always)]` because this is a no-op at runtime
213209
pub unsafe fn into_string_unchecked(self) -> String<'alloc> {
214-
// Cannot use `bumpalo::String::from_utf8_unchecked` because it takes a `bumpalo::collections::Vec`,
215-
// and our inner `Vec` type is `allocator_api2::vec::Vec`.
216-
// SAFETY: Conversion is safe because both types store data in arena in same way.
217-
// Lifetime of returned `String` is same as lifetime of original `Vec<u8>`.
218-
let inner = ManuallyDrop::into_inner(self.0);
219-
let (ptr, len, cap, bump) = inner.into_raw_parts_with_alloc();
220-
String::from_raw_parts_in(ptr, len, cap, bump)
210+
// SAFETY: Caller guarantees vec comprises a valid UTF-8 string.
211+
String::from_utf8_unchecked(self)
221212
}
222213
}
223214

crates/oxc_ast/src/ast_builder_impl.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
use std::{borrow::Cow, mem};
1010

11-
use oxc_allocator::{Allocator, Box, FromIn, String, Vec};
11+
use oxc_allocator::{Allocator, Box, FromIn, Vec};
1212
use oxc_span::{Atom, Span, SPAN};
1313
use oxc_syntax::{number::NumberBase, operator::UnaryOperator, scope::ScopeId};
1414

@@ -78,7 +78,7 @@ impl<'a> AstBuilder<'a> {
7878
/// in the heap.
7979
#[inline]
8080
pub fn str(self, value: &str) -> &'a str {
81-
String::from_str_in(value, self.allocator).into_bump_str()
81+
self.allocator.alloc_str(value)
8282
}
8383

8484
/// Allocate an [`Atom`] from a string slice.

crates/oxc_prettier/src/macros.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ macro_rules! text {
4747
#[macro_export]
4848
macro_rules! dynamic_text {
4949
($p:ident, $str:expr) => {{
50-
let s = oxc_allocator::String::from_str_in($str, $p.allocator).into_bump_str();
50+
let s = $p.allocator.alloc_str($str);
5151
$crate::ir::Doc::Str(s)
5252
}};
5353
}

crates/oxc_span/src/atom.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ impl<'alloc> FromIn<'alloc, &Atom<'alloc>> for Atom<'alloc> {
7979

8080
impl<'alloc> FromIn<'alloc, &str> for Atom<'alloc> {
8181
fn from_in(s: &str, allocator: &'alloc Allocator) -> Self {
82-
Self::from(oxc_allocator::String::from_str_in(s, allocator))
82+
Self::from(&*allocator.alloc_str(s))
8383
}
8484
}
8585

0 commit comments

Comments
 (0)