Skip to content

Commit 7ea99f4

Browse files
committed
feat(minifier): compress array of string literals to 'str1,str2'.split(',') (#8786)
Ported `["str1", "str2", ...]` => `"str1 str2".split(" ")` compression from closure compiler with some tweaks.
1 parent 2eac9c0 commit 7ea99f4

File tree

2 files changed

+104
-18
lines changed

2 files changed

+104
-18
lines changed

crates/oxc_minifier/src/peephole/substitute_alternate_syntax.rs

+97-11
Original file line numberDiff line numberDiff line change
@@ -907,6 +907,7 @@ impl<'a> LatePeepholeOptimizations {
907907

908908
if let Some(folded_expr) = match expr {
909909
Expression::BooleanLiteral(_) => Self::try_compress_boolean(expr, ctx),
910+
Expression::ArrayExpression(_) => Self::try_compress_array_expression(expr, ctx),
910911
_ => None,
911912
} {
912913
*expr = folded_expr;
@@ -939,6 +940,78 @@ impl<'a> LatePeepholeOptimizations {
939940
Some(ctx.ast.expression_unary(lit.span, UnaryOperator::LogicalNot, num))
940941
}
941942

943+
/// Transforms long array expression with string literals to `"str1,str2".split(',')`
944+
fn try_compress_array_expression(
945+
expr: &mut Expression<'a>,
946+
ctx: Ctx<'a, '_>,
947+
) -> Option<Expression<'a>> {
948+
// this threshold is chosen by hand by checking the minsize output
949+
const THRESHOLD: usize = 40;
950+
951+
let Expression::ArrayExpression(array) = expr else { unreachable!() };
952+
953+
let is_all_string = array
954+
.elements
955+
.iter()
956+
.all(|element| element.as_expression().is_some_and(Expression::is_string_literal));
957+
if !is_all_string {
958+
return None;
959+
}
960+
961+
let element_count = array.elements.len();
962+
// replace with `.split` only when the saved size is great enough
963+
// because using `.split` in some places and not in others may cause gzipped size to be bigger
964+
let can_save = element_count * 2 > ".split('.')".len() + THRESHOLD;
965+
if !can_save {
966+
return None;
967+
}
968+
969+
let strings = array.elements.iter().map(|element| {
970+
let Expression::StringLiteral(str) = element.to_expression() else { unreachable!() };
971+
str.value.as_str()
972+
});
973+
let delimiter = Self::pick_delimiter(&strings)?;
974+
975+
let concatenated_string = strings.collect::<std::vec::Vec<_>>().join(delimiter);
976+
977+
// "str1,str2".split(',')
978+
Some(ctx.ast.expression_call(
979+
expr.span(),
980+
Expression::StaticMemberExpression(ctx.ast.alloc_static_member_expression(
981+
expr.span(),
982+
ctx.ast.expression_string_literal(
983+
expr.span(),
984+
ctx.ast.atom(&concatenated_string),
985+
None,
986+
),
987+
ctx.ast.identifier_name(expr.span(), "split"),
988+
false,
989+
)),
990+
Option::<TSTypeParameterInstantiation>::None,
991+
ctx.ast.vec1(Argument::from(ctx.ast.expression_string_literal(
992+
expr.span(),
993+
ctx.ast.atom(delimiter),
994+
None,
995+
))),
996+
false,
997+
))
998+
}
999+
1000+
fn pick_delimiter<'s>(
1001+
strings: &(impl Iterator<Item = &'s str> + Clone),
1002+
) -> Option<&'static str> {
1003+
// These delimiters are chars that appears a lot in the program
1004+
// therefore probably have a small Huffman encoding.
1005+
const DELIMITERS: [&str; 5] = [".", ",", "(", ")", " "];
1006+
1007+
let is_all_length_1 = strings.clone().all(|s| s.len() == 1);
1008+
if is_all_length_1 {
1009+
return Some("");
1010+
}
1011+
1012+
DELIMITERS.into_iter().find(|&delimiter| strings.clone().all(|s| !s.contains(delimiter)))
1013+
}
1014+
9421015
pub fn substitute_catch_clause(&mut self, catch: &mut CatchClause<'a>, ctx: Ctx<'a, '_>) {
9431016
if self.target >= ESTarget::ES2019 {
9441017
if let Some(param) = &catch.param {
@@ -1233,20 +1306,33 @@ mod test {
12331306
}
12341307

12351308
#[test]
1236-
#[ignore]
12371309
fn test_string_array_splitting() {
1238-
test_same("var x=['1','2','3','4']");
1239-
test_same("var x=['1','2','3','4','5']");
1240-
test("var x=['1','2','3','4','5','6']", "var x='123456'.split('')");
1241-
test("var x=['1','2','3','4','5','00']", "var x='1 2 3 4 5 00'.split(' ')");
1242-
test("var x=['1','2','3','4','5','6','7']", "var x='1234567'.split('')");
1243-
test("var x=['1','2','3','4','5','6','00']", "var x='1 2 3 4 5 6 00'.split(' ')");
1244-
test("var x=[' ,',',',',',',',',',',']", "var x=' ,;,;,;,;,;,'.split(';')");
1245-
test("var x=[',,',' ',',',',',',',',']", "var x=',,; ;,;,;,;,'.split(';')");
1246-
test("var x=['a,',' ',',',',',',',',']", "var x='a,; ;,;,;,;,'.split(';')");
1310+
const REPEAT: usize = 20;
1311+
let additional_args = ",'1'".repeat(REPEAT);
1312+
let test_with_longer_args =
1313+
|source_text_partial: &str, expected_partial: &str, delimiter: &str| {
1314+
let expected = &format!(
1315+
"var x='{expected_partial}{}'.split('{delimiter}')",
1316+
format!("{delimiter}1").repeat(REPEAT)
1317+
);
1318+
test(&format!("var x=[{source_text_partial}{additional_args}]"), expected);
1319+
};
1320+
let test_same_with_longer_args = |source_text_partial: &str| {
1321+
test_same(&format!("var x=[{source_text_partial}{additional_args}]"));
1322+
};
1323+
1324+
test_same_with_longer_args("'1','2','3','4'");
1325+
test_same_with_longer_args("'1','2','3','4','5'");
1326+
test_with_longer_args("'1','2','3','4','5','6'", "123456", "");
1327+
test_with_longer_args("'1','2','3','4','5','00'", "1.2.3.4.5.00", ".");
1328+
test_with_longer_args("'1','2','3','4','5','6','7'", "1234567", "");
1329+
test_with_longer_args("'1','2','3','4','5','6','00'", "1.2.3.4.5.6.00", ".");
1330+
test_with_longer_args("'.,',',',',',',',',',','", ".,(,(,(,(,(,", "(");
1331+
test_with_longer_args("',,','.',',',',',',',','", ",,(.(,(,(,(,", "(");
1332+
test_with_longer_args("'a,','.',',',',',',',','", "a,(.(,(,(,(,", "(");
12471333

12481334
// all possible delimiters used, leave it alone
1249-
test_same("var x=[',', ' ', ';', '{', '}']");
1335+
test_same_with_longer_args("'.', ',', '(', ')', ' '");
12501336
}
12511337

12521338
#[test]

tasks/minsize/minsize.snap

+7-7
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@ Original | minified | minified | gzip | gzip | Fixture
99

1010
342.15 kB | 117.69 kB | 118.14 kB | 43.55 kB | 44.37 kB | vue.js
1111

12-
544.10 kB | 71.49 kB | 72.48 kB | 25.89 kB | 26.20 kB | lodash.js
12+
544.10 kB | 71.44 kB | 72.48 kB | 25.87 kB | 26.20 kB | lodash.js
1313

1414
555.77 kB | 271.48 kB | 270.13 kB | 88.38 kB | 90.80 kB | d3.js
1515

16-
1.01 MB | 457.63 kB | 458.89 kB | 123.53 kB | 126.71 kB | bundle.min.js
16+
1.01 MB | 441.51 kB | 458.89 kB | 122.54 kB | 126.71 kB | bundle.min.js
1717

18-
1.25 MB | 650.59 kB | 646.76 kB | 161.11 kB | 163.73 kB | three.js
18+
1.25 MB | 650.46 kB | 646.76 kB | 161 kB | 163.73 kB | three.js
1919

20-
2.14 MB | 718.83 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js
20+
2.14 MB | 718.76 kB | 724.14 kB | 162.15 kB | 181.07 kB | victory.js
2121

22-
3.20 MB | 1.01 MB | 1.01 MB | 324.36 kB | 331.56 kB | echarts.js
22+
3.20 MB | 1.01 MB | 1.01 MB | 324.35 kB | 331.56 kB | echarts.js
2323

24-
6.69 MB | 2.30 MB | 2.31 MB | 469.42 kB | 488.28 kB | antd.js
24+
6.69 MB | 2.30 MB | 2.31 MB | 469.30 kB | 488.28 kB | antd.js
2525

26-
10.95 MB | 3.37 MB | 3.49 MB | 864.74 kB | 915.50 kB | typescript.js
26+
10.95 MB | 3.37 MB | 3.49 MB | 864.70 kB | 915.50 kB | typescript.js
2727

0 commit comments

Comments
 (0)