Skip to content

Commit 6589c3b

Browse files
sapphi-redBoshenautofix-ci[bot]
authored
feat(mangler): reuse variable names (#8562)
Changed the mangler to reuse variable names where possible. This will reduce the code size as shorter variable names can be used in more places. But requires global information and limits parallelism in a single file and requires more memory. --------- Co-authored-by: Boshen <boshenc@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 32e0e47 commit 6589c3b

File tree

11 files changed

+317
-99
lines changed

11 files changed

+317
-99
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ encoding_rs = "0.8.35"
149149
encoding_rs_io = "0.1.7"
150150
env_logger = { version = "0.11.5", default-features = false }
151151
fast-glob = "0.4.0"
152+
fixedbitset = "0.5.7"
152153
flate2 = "1.0.35"
153154
futures = "0.3.31"
154155
globset = "0.4.15"

crates/oxc_mangler/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,6 @@ oxc_index = { workspace = true }
2727
oxc_semantic = { workspace = true }
2828
oxc_span = { workspace = true }
2929

30+
fixedbitset = { workspace = true }
3031
itertools = { workspace = true }
3132
rustc-hash = { workspace = true }

crates/oxc_mangler/src/lib.rs

+140-64
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
use std::iter;
12
use std::ops::Deref;
23

4+
use fixedbitset::FixedBitSet;
35
use itertools::Itertools;
46
use rustc_hash::FxHashSet;
57

68
use oxc_allocator::{Allocator, Vec};
79
use oxc_ast::ast::{Declaration, Program, Statement};
810
use oxc_index::Idx;
9-
use oxc_semantic::{ReferenceId, ScopeTree, SemanticBuilder, SymbolId, SymbolTable};
11+
use oxc_semantic::{ReferenceId, ScopeTree, Semantic, SemanticBuilder, SymbolId, SymbolTable};
1012
use oxc_span::Atom;
1113

1214
#[derive(Default, Debug, Clone, Copy)]
@@ -22,42 +24,93 @@ type Slot = usize;
2224
/// See:
2325
/// * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/docs/architecture.md#symbol-minification)
2426
///
25-
/// This algorithm is targeted for better gzip compression.
27+
/// This algorithm is based on the implementation of esbuild and additionally implements improved name reuse functionality.
28+
/// It targets for better gzip compression.
2629
///
27-
/// Visually, a slot is the index position for binding identifiers:
30+
/// A slot is a placeholder for binding identifiers that shares the same name.
31+
/// Visually, it is the index position for binding identifiers:
2832
///
2933
/// ```javascript
30-
/// function slot0(slot2, slot3, slot4) {
34+
/// function slot0(slot1, slot2, slot3) {
3135
/// slot2 = 1;
3236
/// }
33-
/// function slot1(slot2, slot3) {
34-
/// function slot4() {
35-
/// slot2 = 1;
37+
/// function slot1(slot0) {
38+
/// function slot2() {
39+
/// slot0 = 1;
3640
/// }
3741
/// }
3842
/// ```
3943
///
4044
/// The slot number for a new scope starts after the maximum slot of the parent scope.
4145
///
4246
/// Occurrences of slots and their corresponding newly assigned short identifiers are:
43-
/// - slot2: 4 - a
44-
/// - slot3: 2 - b
45-
/// - slot4: 2 - c
46-
/// - slot0: 1 - d
47-
/// - slot1: 1 - e
47+
/// - slot2: 3 - a
48+
/// - slot0: 2 - b
49+
/// - slot1: 2 - c
50+
/// - slot3: 1 - d
4851
///
4952
/// After swapping out the mangled names:
5053
///
5154
/// ```javascript
52-
/// function d(a, b, c) {
55+
/// function b(c, a, d) {
5356
/// a = 1;
5457
/// }
55-
/// function e(a, b) {
56-
/// function c() {
57-
/// a = 1;
58+
/// function c(b) {
59+
/// function a() {
60+
/// b = 1;
5861
/// }
5962
/// }
6063
/// ```
64+
///
65+
/// ## Name Reuse Calculation
66+
///
67+
/// This improvement was inspired by [evanw/esbuild#2614](https://github.com/evanw/esbuild/pull/2614).
68+
///
69+
/// For better compression, we shadow the variables where possible to reuse the same name.
70+
/// For example, the following code:
71+
/// ```javascript
72+
/// var top_level_a = 0;
73+
/// var top_level_b = 1;
74+
/// function foo() {
75+
/// var foo_a = 1;
76+
/// console.log(top_level_b, foo_a);
77+
/// }
78+
/// function bar() {
79+
/// var bar_a = 1;
80+
/// console.log(top_level_b, bar_a);
81+
/// }
82+
/// console.log(top_level_a, foo(), bar())
83+
/// ```
84+
/// `top_level_a` is declared in the root scope, but is not used in function `foo` and function `bar`.
85+
/// Therefore, we can reuse the same name for `top_level_a` and `foo_a` and `bar_a`.
86+
///
87+
/// To calculate whether the variable name can be reused in the descendant scopes,
88+
/// this mangler introduces a concept of symbol liveness and slot liveness.
89+
/// Symbol liveness is a subtree of the scope tree that contains the declared scope of the symbol and
90+
/// all the scopes that the symbol is used in. It is a subtree, so any scopes that are between the declared scope and the used scope
91+
/// are also included. This is to ensure that the symbol is not shadowed by a different symbol before the use in the descendant scope.
92+
///
93+
/// For the example above, the liveness of each symbols are:
94+
/// - `top_level_a`: root_scope
95+
/// - `top_level_b`: root_scope -> foo, root_scope -> bar
96+
/// - `foo_a`: root_scope -> foo
97+
/// - `bar_a`: root_scope -> bar
98+
/// - `foo`: root_scope
99+
/// - `bar`: root_scope
100+
///
101+
/// Slot liveness is the same as symbol liveness, but it is a subforest (multiple subtrees) of the scope tree that can contain
102+
/// multiple symbol liveness.
103+
///
104+
/// Now that we have the liveness of each symbol, we want to assign symbols to minimal number of slots.
105+
/// This is a graph coloring problem where the node of the graph is the symbol and the edge of the graph indicates whether
106+
/// the symbols has a common alive scope and the color of the node is the slot.
107+
/// This mangler uses a greedy algorithm to assign symbols to slots to achieve that.
108+
/// In other words, it assigns symbols to the first slot that does not live in the liveness of the symbol.
109+
/// For the example above, each symbol is assigned to the following slots:
110+
/// - slot 0: `top_level_a`
111+
/// - slot 1: `top_level_b`, `foo_a`, `bar_a`
112+
/// - slot 2: `foo`
113+
/// - slot 3: `bar`
61114
#[derive(Default)]
62115
pub struct Mangler {
63116
symbol_table: SymbolTable,
@@ -88,22 +141,20 @@ impl Mangler {
88141

89142
#[must_use]
90143
pub fn build(self, program: &Program<'_>) -> Mangler {
91-
let semantic = SemanticBuilder::new().build(program).semantic;
92-
let (symbol_table, scope_tree) = semantic.into_symbol_table_and_scope_tree();
93-
self.build_with_symbols_and_scopes(symbol_table, &scope_tree, program)
144+
let semantic =
145+
SemanticBuilder::new().with_scope_tree_child_ids(true).build(program).semantic;
146+
self.build_with_semantic(semantic, program)
94147
}
95148

149+
/// # Panics
150+
///
151+
/// Panics if the child_ids does not exist in scope_tree.
96152
#[must_use]
97-
pub fn build_with_symbols_and_scopes(
98-
self,
99-
symbol_table: SymbolTable,
100-
scope_tree: &ScopeTree,
101-
program: &Program<'_>,
102-
) -> Mangler {
153+
pub fn build_with_semantic(self, semantic: Semantic<'_>, program: &Program<'_>) -> Mangler {
103154
if self.options.debug {
104-
self.build_with_symbols_and_scopes_impl(symbol_table, scope_tree, program, debug_name)
155+
self.build_with_symbols_and_scopes_impl(semantic, program, debug_name)
105156
} else {
106-
self.build_with_symbols_and_scopes_impl(symbol_table, scope_tree, program, base54)
157+
self.build_with_symbols_and_scopes_impl(semantic, program, base54)
107158
}
108159
}
109160

@@ -112,11 +163,14 @@ impl Mangler {
112163
G: Fn(usize) -> InlineString<CAPACITY>,
113164
>(
114165
mut self,
115-
symbol_table: SymbolTable,
116-
scope_tree: &ScopeTree,
166+
semantic: Semantic<'_>,
117167
program: &Program<'_>,
118168
generate_name: G,
119169
) -> Mangler {
170+
let (mut symbol_table, scope_tree, ast_nodes) = semantic.into_symbols_scopes_nodes();
171+
172+
assert!(scope_tree.has_child_ids(), "child_id needs to be generated");
173+
120174
let (exported_names, exported_symbols) = if self.options.top_level {
121175
Mangler::collect_exported_symbols(program)
122176
} else {
@@ -125,59 +179,81 @@ impl Mangler {
125179

126180
let allocator = Allocator::default();
127181

128-
// Mangle the symbol table by computing slots from the scope tree.
129-
// A slot is the occurrence index of a binding identifier inside a scope.
130-
let mut symbol_table = symbol_table;
131-
132-
// Total number of slots for all scopes
133-
let mut total_number_of_slots: Slot = 0;
134-
135182
// All symbols with their assigned slots. Keyed by symbol id.
136183
let mut slots: Vec<'_, Slot> = Vec::with_capacity_in(symbol_table.len(), &allocator);
137184
for _ in 0..symbol_table.len() {
138185
slots.push(0);
139186
}
140187

141-
// Keep track of the maximum slot number for each scope
142-
let mut max_slot_for_scope = Vec::with_capacity_in(scope_tree.len(), &allocator);
143-
for _ in 0..scope_tree.len() {
144-
max_slot_for_scope.push(0);
145-
}
188+
// Stores the lived scope ids for each slot. Keyed by slot number.
189+
let mut slot_liveness: std::vec::Vec<FixedBitSet> = vec![];
146190

147-
// Walk the scope tree and compute the slot number for each scope
148191
let mut tmp_bindings = std::vec::Vec::with_capacity(100);
149-
for scope_id in scope_tree.descendants_from_root() {
192+
let mut reusable_slots = std::vec::Vec::new();
193+
// Walk down the scope tree and assign a slot number for each symbol.
194+
// It is possible to do this in a loop over the symbol list,
195+
// but walking down the scope tree seems to generate a better code.
196+
for scope_id in iter::once(scope_tree.root_scope_id())
197+
.chain(scope_tree.iter_all_child_ids(scope_tree.root_scope_id()))
198+
{
150199
let bindings = scope_tree.get_bindings(scope_id);
151-
152-
// The current slot number is continued by the maximum slot from the parent scope
153-
let parent_max_slot = scope_tree
154-
.get_parent_id(scope_id)
155-
.map_or(0, |parent_scope_id| max_slot_for_scope[parent_scope_id.index()]);
156-
157-
let mut slot = parent_max_slot;
158-
159-
if !bindings.is_empty() {
160-
// Sort `bindings` in declaration order.
161-
tmp_bindings.clear();
162-
tmp_bindings.extend(bindings.values().copied());
163-
tmp_bindings.sort_unstable();
164-
for symbol_id in &tmp_bindings {
165-
slots[symbol_id.index()] = slot;
166-
slot += 1;
167-
}
200+
if bindings.is_empty() {
201+
continue;
168202
}
169203

170-
max_slot_for_scope[scope_id.index()] = slot;
204+
let mut slot = slot_liveness.len();
205+
206+
reusable_slots.clear();
207+
reusable_slots.extend(
208+
// Slots that are already assigned to other symbols, but does not live in the current scope.
209+
slot_liveness
210+
.iter()
211+
.enumerate()
212+
.filter(|(_, slot_liveness)| !slot_liveness.contains(scope_id.index()))
213+
.map(|(slot, _)| slot)
214+
.take(bindings.len()),
215+
);
216+
217+
// The number of new slots that needs to be allocated.
218+
let remaining_count = bindings.len() - reusable_slots.len();
219+
reusable_slots.extend(slot..slot + remaining_count);
220+
221+
slot += remaining_count;
222+
if slot_liveness.len() < slot {
223+
slot_liveness.resize_with(slot, || FixedBitSet::with_capacity(scope_tree.len()));
224+
}
171225

172-
if slot > total_number_of_slots {
173-
total_number_of_slots = slot;
226+
// Sort `bindings` in declaration order.
227+
tmp_bindings.clear();
228+
tmp_bindings.extend(bindings.values().copied());
229+
tmp_bindings.sort_unstable();
230+
for (symbol_id, assigned_slot) in
231+
tmp_bindings.iter().zip(reusable_slots.iter().copied())
232+
{
233+
slots[symbol_id.index()] = assigned_slot;
234+
235+
// Calculate the scope ids that this symbol is alive in.
236+
let lived_scope_ids = symbol_table
237+
.get_resolved_references(*symbol_id)
238+
.flat_map(|reference| {
239+
let used_scope_id = ast_nodes.get_node(reference.node_id()).scope_id();
240+
scope_tree.ancestors(used_scope_id).take_while(|s_id| *s_id != scope_id)
241+
})
242+
.chain(iter::once(scope_id));
243+
244+
// Since the slot is now assigned to this symbol, it is alive in all the scopes that this symbol is alive in.
245+
for scope_id in lived_scope_ids {
246+
slot_liveness[assigned_slot].insert(scope_id.index());
247+
}
174248
}
175249
}
176250

251+
let total_number_of_slots = slot_liveness.len();
252+
177253
let frequencies = self.tally_slot_frequencies(
178254
&symbol_table,
179255
&exported_symbols,
180-
scope_tree,
256+
&scope_tree,
181257
total_number_of_slots,
182258
&slots,
183259
&allocator,

crates/oxc_minifier/src/lib.rs

+6-5
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,12 @@ impl Minifier {
5555
Stats::default()
5656
};
5757
let mangler = self.options.mangle.map(|options| {
58-
let semantic = SemanticBuilder::new().with_stats(stats).build(program).semantic;
59-
let (symbols, scopes) = semantic.into_symbol_table_and_scope_tree();
60-
Mangler::default()
61-
.with_options(options)
62-
.build_with_symbols_and_scopes(symbols, &scopes, program)
58+
let semantic = SemanticBuilder::new()
59+
.with_stats(stats)
60+
.with_scope_tree_child_ids(true)
61+
.build(program)
62+
.semantic;
63+
Mangler::default().with_options(options).build_with_semantic(semantic, program)
6364
});
6465
MinifierReturn { mangler }
6566
}

crates/oxc_minifier/tests/mangler/mod.rs

+15
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@ fn mangler() {
2525
"var x; function foo(a) { ({ x } = y) }",
2626
"import { x } from 's'; export { x }",
2727
"function _ (exports) { Object.defineProperty(exports, '__esModule', { value: true }) }",
28+
"function foo(foo_a, foo_b, foo_c) {}; function bar(bar_a, bar_b, bar_c) {}", // foo_a and bar_a can be reused
29+
"function _() { function foo() { var x; foo; } }", // x should not use the same name with foo
30+
"function _() { var x; function foo() { var y; function bar() { x } } }", // y should not shadow x
31+
"function _() { function x(a) {} }", // a can shadow x
32+
"function _() { function x(a) { x } }", // a should not shadow x
33+
"function _() { var x; { var y }}", // y should not shadow x
34+
"function _() { var x; { let y }}", // y can shadow x
35+
"function _() { let x; { let y }}", // y can shadow x
36+
"function _() { var x; { const y }}", // y can shadow x
37+
"function _() { let x; { const y }}", // y can shadow x
38+
"function _() { var x; { class Y{} }}", // Y can shadow x
39+
"function _() { let x; { class Y{} }}", // Y can shadow x
40+
"function _() { var x; try { throw 0 } catch (e) { e } }", // e can shadow x
41+
"function _() { var x; try { throw 0 } catch (e) { var e } }", // e can shadow x (not implemented)
42+
"function _() { var x; try { throw 0 } catch { var e } }", // e should not shadow x
2843
];
2944
let top_level_cases = [
3045
"function foo(a) {a}",

0 commit comments

Comments
 (0)