1
+ use std:: iter;
1
2
use std:: ops:: Deref ;
2
3
4
+ use fixedbitset:: FixedBitSet ;
3
5
use itertools:: Itertools ;
4
6
use rustc_hash:: FxHashSet ;
5
7
6
8
use oxc_allocator:: { Allocator , Vec } ;
7
9
use oxc_ast:: ast:: { Declaration , Program , Statement } ;
8
10
use oxc_index:: Idx ;
9
- use oxc_semantic:: { ReferenceId , ScopeTree , SemanticBuilder , SymbolId , SymbolTable } ;
11
+ use oxc_semantic:: { ReferenceId , ScopeTree , Semantic , SemanticBuilder , SymbolId , SymbolTable } ;
10
12
use oxc_span:: Atom ;
11
13
12
14
#[ derive( Default , Debug , Clone , Copy ) ]
@@ -22,42 +24,93 @@ type Slot = usize;
22
24
/// See:
23
25
/// * [esbuild](https://github.com/evanw/esbuild/blob/v0.24.0/docs/architecture.md#symbol-minification)
24
26
///
25
- /// This algorithm is targeted for better gzip compression.
27
+ /// This algorithm is based on the implementation of esbuild and additionally implements improved name reuse functionality.
28
+ /// It targets for better gzip compression.
26
29
///
27
- /// Visually, a slot is the index position for binding identifiers:
30
+ /// A slot is a placeholder for binding identifiers that shares the same name.
31
+ /// Visually, it is the index position for binding identifiers:
28
32
///
29
33
/// ```javascript
30
- /// function slot0(slot2, slot3, slot4 ) {
34
+ /// function slot0(slot1, slot2, slot3 ) {
31
35
/// slot2 = 1;
32
36
/// }
33
- /// function slot1(slot2, slot3 ) {
34
- /// function slot4 () {
35
- /// slot2 = 1;
37
+ /// function slot1(slot0 ) {
38
+ /// function slot2 () {
39
+ /// slot0 = 1;
36
40
/// }
37
41
/// }
38
42
/// ```
39
43
///
40
44
/// The slot number for a new scope starts after the maximum slot of the parent scope.
41
45
///
42
46
/// Occurrences of slots and their corresponding newly assigned short identifiers are:
43
- /// - slot2: 4 - a
44
- /// - slot3: 2 - b
45
- /// - slot4: 2 - c
46
- /// - slot0: 1 - d
47
- /// - slot1: 1 - e
47
+ /// - slot2: 3 - a
48
+ /// - slot0: 2 - b
49
+ /// - slot1: 2 - c
50
+ /// - slot3: 1 - d
48
51
///
49
52
/// After swapping out the mangled names:
50
53
///
51
54
/// ```javascript
52
- /// function d(a, b, c ) {
55
+ /// function b(c, a, d ) {
53
56
/// a = 1;
54
57
/// }
55
- /// function e(a, b) {
56
- /// function c () {
57
- /// a = 1;
58
+ /// function c( b) {
59
+ /// function a () {
60
+ /// b = 1;
58
61
/// }
59
62
/// }
60
63
/// ```
64
+ ///
65
+ /// ## Name Reuse Calculation
66
+ ///
67
+ /// This improvement was inspired by [evanw/esbuild#2614](https://github.com/evanw/esbuild/pull/2614).
68
+ ///
69
+ /// For better compression, we shadow the variables where possible to reuse the same name.
70
+ /// For example, the following code:
71
+ /// ```javascript
72
+ /// var top_level_a = 0;
73
+ /// var top_level_b = 1;
74
+ /// function foo() {
75
+ /// var foo_a = 1;
76
+ /// console.log(top_level_b, foo_a);
77
+ /// }
78
+ /// function bar() {
79
+ /// var bar_a = 1;
80
+ /// console.log(top_level_b, bar_a);
81
+ /// }
82
+ /// console.log(top_level_a, foo(), bar())
83
+ /// ```
84
+ /// `top_level_a` is declared in the root scope, but is not used in function `foo` and function `bar`.
85
+ /// Therefore, we can reuse the same name for `top_level_a` and `foo_a` and `bar_a`.
86
+ ///
87
+ /// To calculate whether the variable name can be reused in the descendant scopes,
88
+ /// this mangler introduces a concept of symbol liveness and slot liveness.
89
+ /// Symbol liveness is a subtree of the scope tree that contains the declared scope of the symbol and
90
+ /// all the scopes that the symbol is used in. It is a subtree, so any scopes that are between the declared scope and the used scope
91
+ /// are also included. This is to ensure that the symbol is not shadowed by a different symbol before the use in the descendant scope.
92
+ ///
93
+ /// For the example above, the liveness of each symbols are:
94
+ /// - `top_level_a`: root_scope
95
+ /// - `top_level_b`: root_scope -> foo, root_scope -> bar
96
+ /// - `foo_a`: root_scope -> foo
97
+ /// - `bar_a`: root_scope -> bar
98
+ /// - `foo`: root_scope
99
+ /// - `bar`: root_scope
100
+ ///
101
+ /// Slot liveness is the same as symbol liveness, but it is a subforest (multiple subtrees) of the scope tree that can contain
102
+ /// multiple symbol liveness.
103
+ ///
104
+ /// Now that we have the liveness of each symbol, we want to assign symbols to minimal number of slots.
105
+ /// This is a graph coloring problem where the node of the graph is the symbol and the edge of the graph indicates whether
106
+ /// the symbols has a common alive scope and the color of the node is the slot.
107
+ /// This mangler uses a greedy algorithm to assign symbols to slots to achieve that.
108
+ /// In other words, it assigns symbols to the first slot that does not live in the liveness of the symbol.
109
+ /// For the example above, each symbol is assigned to the following slots:
110
+ /// - slot 0: `top_level_a`
111
+ /// - slot 1: `top_level_b`, `foo_a`, `bar_a`
112
+ /// - slot 2: `foo`
113
+ /// - slot 3: `bar`
61
114
#[ derive( Default ) ]
62
115
pub struct Mangler {
63
116
symbol_table : SymbolTable ,
@@ -88,22 +141,20 @@ impl Mangler {
88
141
89
142
#[ must_use]
90
143
pub fn build ( self , program : & Program < ' _ > ) -> Mangler {
91
- let semantic = SemanticBuilder :: new ( ) . build ( program ) . semantic ;
92
- let ( symbol_table , scope_tree ) = semantic . into_symbol_table_and_scope_tree ( ) ;
93
- self . build_with_symbols_and_scopes ( symbol_table , & scope_tree , program)
144
+ let semantic =
145
+ SemanticBuilder :: new ( ) . with_scope_tree_child_ids ( true ) . build ( program ) . semantic ;
146
+ self . build_with_semantic ( semantic , program)
94
147
}
95
148
149
+ /// # Panics
150
+ ///
151
+ /// Panics if the child_ids does not exist in scope_tree.
96
152
#[ must_use]
97
- pub fn build_with_symbols_and_scopes (
98
- self ,
99
- symbol_table : SymbolTable ,
100
- scope_tree : & ScopeTree ,
101
- program : & Program < ' _ > ,
102
- ) -> Mangler {
153
+ pub fn build_with_semantic ( self , semantic : Semantic < ' _ > , program : & Program < ' _ > ) -> Mangler {
103
154
if self . options . debug {
104
- self . build_with_symbols_and_scopes_impl ( symbol_table , scope_tree , program, debug_name)
155
+ self . build_with_symbols_and_scopes_impl ( semantic , program, debug_name)
105
156
} else {
106
- self . build_with_symbols_and_scopes_impl ( symbol_table , scope_tree , program, base54)
157
+ self . build_with_symbols_and_scopes_impl ( semantic , program, base54)
107
158
}
108
159
}
109
160
@@ -112,11 +163,14 @@ impl Mangler {
112
163
G : Fn ( usize ) -> InlineString < CAPACITY > ,
113
164
> (
114
165
mut self ,
115
- symbol_table : SymbolTable ,
116
- scope_tree : & ScopeTree ,
166
+ semantic : Semantic < ' _ > ,
117
167
program : & Program < ' _ > ,
118
168
generate_name : G ,
119
169
) -> Mangler {
170
+ let ( mut symbol_table, scope_tree, ast_nodes) = semantic. into_symbols_scopes_nodes ( ) ;
171
+
172
+ assert ! ( scope_tree. has_child_ids( ) , "child_id needs to be generated" ) ;
173
+
120
174
let ( exported_names, exported_symbols) = if self . options . top_level {
121
175
Mangler :: collect_exported_symbols ( program)
122
176
} else {
@@ -125,59 +179,81 @@ impl Mangler {
125
179
126
180
let allocator = Allocator :: default ( ) ;
127
181
128
- // Mangle the symbol table by computing slots from the scope tree.
129
- // A slot is the occurrence index of a binding identifier inside a scope.
130
- let mut symbol_table = symbol_table;
131
-
132
- // Total number of slots for all scopes
133
- let mut total_number_of_slots: Slot = 0 ;
134
-
135
182
// All symbols with their assigned slots. Keyed by symbol id.
136
183
let mut slots: Vec < ' _ , Slot > = Vec :: with_capacity_in ( symbol_table. len ( ) , & allocator) ;
137
184
for _ in 0 ..symbol_table. len ( ) {
138
185
slots. push ( 0 ) ;
139
186
}
140
187
141
- // Keep track of the maximum slot number for each scope
142
- let mut max_slot_for_scope = Vec :: with_capacity_in ( scope_tree. len ( ) , & allocator) ;
143
- for _ in 0 ..scope_tree. len ( ) {
144
- max_slot_for_scope. push ( 0 ) ;
145
- }
188
+ // Stores the lived scope ids for each slot. Keyed by slot number.
189
+ let mut slot_liveness: std:: vec:: Vec < FixedBitSet > = vec ! [ ] ;
146
190
147
- // Walk the scope tree and compute the slot number for each scope
148
191
let mut tmp_bindings = std:: vec:: Vec :: with_capacity ( 100 ) ;
149
- for scope_id in scope_tree. descendants_from_root ( ) {
192
+ let mut reusable_slots = std:: vec:: Vec :: new ( ) ;
193
+ // Walk down the scope tree and assign a slot number for each symbol.
194
+ // It is possible to do this in a loop over the symbol list,
195
+ // but walking down the scope tree seems to generate a better code.
196
+ for scope_id in iter:: once ( scope_tree. root_scope_id ( ) )
197
+ . chain ( scope_tree. iter_all_child_ids ( scope_tree. root_scope_id ( ) ) )
198
+ {
150
199
let bindings = scope_tree. get_bindings ( scope_id) ;
151
-
152
- // The current slot number is continued by the maximum slot from the parent scope
153
- let parent_max_slot = scope_tree
154
- . get_parent_id ( scope_id)
155
- . map_or ( 0 , |parent_scope_id| max_slot_for_scope[ parent_scope_id. index ( ) ] ) ;
156
-
157
- let mut slot = parent_max_slot;
158
-
159
- if !bindings. is_empty ( ) {
160
- // Sort `bindings` in declaration order.
161
- tmp_bindings. clear ( ) ;
162
- tmp_bindings. extend ( bindings. values ( ) . copied ( ) ) ;
163
- tmp_bindings. sort_unstable ( ) ;
164
- for symbol_id in & tmp_bindings {
165
- slots[ symbol_id. index ( ) ] = slot;
166
- slot += 1 ;
167
- }
200
+ if bindings. is_empty ( ) {
201
+ continue ;
168
202
}
169
203
170
- max_slot_for_scope[ scope_id. index ( ) ] = slot;
204
+ let mut slot = slot_liveness. len ( ) ;
205
+
206
+ reusable_slots. clear ( ) ;
207
+ reusable_slots. extend (
208
+ // Slots that are already assigned to other symbols, but does not live in the current scope.
209
+ slot_liveness
210
+ . iter ( )
211
+ . enumerate ( )
212
+ . filter ( |( _, slot_liveness) | !slot_liveness. contains ( scope_id. index ( ) ) )
213
+ . map ( |( slot, _) | slot)
214
+ . take ( bindings. len ( ) ) ,
215
+ ) ;
216
+
217
+ // The number of new slots that needs to be allocated.
218
+ let remaining_count = bindings. len ( ) - reusable_slots. len ( ) ;
219
+ reusable_slots. extend ( slot..slot + remaining_count) ;
220
+
221
+ slot += remaining_count;
222
+ if slot_liveness. len ( ) < slot {
223
+ slot_liveness. resize_with ( slot, || FixedBitSet :: with_capacity ( scope_tree. len ( ) ) ) ;
224
+ }
171
225
172
- if slot > total_number_of_slots {
173
- total_number_of_slots = slot;
226
+ // Sort `bindings` in declaration order.
227
+ tmp_bindings. clear ( ) ;
228
+ tmp_bindings. extend ( bindings. values ( ) . copied ( ) ) ;
229
+ tmp_bindings. sort_unstable ( ) ;
230
+ for ( symbol_id, assigned_slot) in
231
+ tmp_bindings. iter ( ) . zip ( reusable_slots. iter ( ) . copied ( ) )
232
+ {
233
+ slots[ symbol_id. index ( ) ] = assigned_slot;
234
+
235
+ // Calculate the scope ids that this symbol is alive in.
236
+ let lived_scope_ids = symbol_table
237
+ . get_resolved_references ( * symbol_id)
238
+ . flat_map ( |reference| {
239
+ let used_scope_id = ast_nodes. get_node ( reference. node_id ( ) ) . scope_id ( ) ;
240
+ scope_tree. ancestors ( used_scope_id) . take_while ( |s_id| * s_id != scope_id)
241
+ } )
242
+ . chain ( iter:: once ( scope_id) ) ;
243
+
244
+ // Since the slot is now assigned to this symbol, it is alive in all the scopes that this symbol is alive in.
245
+ for scope_id in lived_scope_ids {
246
+ slot_liveness[ assigned_slot] . insert ( scope_id. index ( ) ) ;
247
+ }
174
248
}
175
249
}
176
250
251
+ let total_number_of_slots = slot_liveness. len ( ) ;
252
+
177
253
let frequencies = self . tally_slot_frequencies (
178
254
& symbol_table,
179
255
& exported_symbols,
180
- scope_tree,
256
+ & scope_tree,
181
257
total_number_of_slots,
182
258
& slots,
183
259
& allocator,
0 commit comments