From 40f93564ac0ef4fb7760f300bbf416fc2c31a296 Mon Sep 17 00:00:00 2001 From: Dunqing <29533304+Dunqing@users.noreply.github.com> Date: Mon, 22 Jul 2024 10:47:54 +0000 Subject: [PATCH] perf(semantic): calculate number of nodes, scopes, symbols, references before visiting AST (#4367) context: #4328 --- crates/oxc_semantic/src/builder.rs | 31 ++++++++++ crates/oxc_semantic/src/counter.rs | 90 ++++++++++++++++++++++++++++++ crates/oxc_semantic/src/lib.rs | 1 + crates/oxc_semantic/src/node.rs | 5 ++ crates/oxc_semantic/src/scope.rs | 8 +++ crates/oxc_semantic/src/symbol.rs | 12 ++++ 6 files changed, 147 insertions(+) create mode 100644 crates/oxc_semantic/src/counter.rs diff --git a/crates/oxc_semantic/src/builder.rs b/crates/oxc_semantic/src/builder.rs index 836f33d4aa353..825dfdc7ede35 100644 --- a/crates/oxc_semantic/src/builder.rs +++ b/crates/oxc_semantic/src/builder.rs @@ -20,6 +20,7 @@ use crate::{ binder::Binder, checker, class::ClassTableBuilder, + counter::Counter, diagnostics::redeclaration, jsdoc::JSDocBuilder, label::LabelBuilder, @@ -178,8 +179,38 @@ impl<'a> SemanticBuilder<'a> { let scope_id = self.scope.add_root_scope(AstNodeId::DUMMY, ScopeFlags::Top); program.scope_id.set(Some(scope_id)); } else { + // Count the number of nodes, scopes, symbols, and references. + // Use these counts to reserve sufficient capacity in `AstNodes`, `ScopeTree` + // and `SymbolTable` to store them. + // This means that as we traverse the AST and fill up these structures with data, + // they never need to grow and reallocate - which is an expensive operation as it + // involves copying all the memory from the old allocation to the new one. + // For large source files, these structures are very large, so growth is very costly + // as it involves copying massive chunks of memory. + // Avoiding this growth produces up to 30% perf boost on our benchmarks. + // TODO: It would be even more efficient to calculate counts in parser to avoid + // this extra AST traversal. + let mut counter = Counter::default(); + counter.visit_program(program); + self.nodes.reserve(counter.nodes_count); + self.scope.reserve(counter.scopes_count); + self.symbols.reserve(counter.symbols_count, counter.references_count); + + // Visit AST to generate scopes tree etc self.visit_program(program); + // Check that `Counter` got accurate counts + debug_assert_eq!(self.nodes.len(), counter.nodes_count); + debug_assert_eq!(self.scope.len(), counter.scopes_count); + debug_assert_eq!(self.symbols.references.len(), counter.references_count); + // `Counter` may overestimate number of symbols, because multiple `BindingIdentifier`s + // can result in only a single symbol. + // e.g. `var x; var x;` = 2 x `BindingIdentifier` but 1 x symbol. + // This is not a big problem - allocating a `Vec` with excess capacity is cheap. + // It's allocating with *not enough* capacity which is costly, as then the `Vec` + // will grow and reallocate. + debug_assert!(self.symbols.len() <= counter.symbols_count); + // Checking syntax error on module record requires scope information from the previous AST pass if self.check_syntax_error { checker::check_module_record(&self); diff --git a/crates/oxc_semantic/src/counter.rs b/crates/oxc_semantic/src/counter.rs new file mode 100644 index 0000000000000..38e84989af929 --- /dev/null +++ b/crates/oxc_semantic/src/counter.rs @@ -0,0 +1,90 @@ +//! Visitor to count nodes, scopes, symbols and references in AST. +//! These counts can be used to pre-allocate sufficient capacity in `AstNodes`, +//! `ScopeTree`, and `SymbolTable` to store info for all these items. + +use std::cell::Cell; + +use oxc_ast::{ + ast::{ + BindingIdentifier, IdentifierReference, JSXElementName, JSXMemberExpressionObject, + TSEnumMemberName, TSModuleDeclarationName, + }, + visit::walk::{walk_ts_enum_member_name, walk_ts_module_declaration_name}, + AstKind, Visit, +}; +use oxc_syntax::scope::{ScopeFlags, ScopeId}; + +#[allow(clippy::struct_field_names)] +#[derive(Default, Debug)] +pub struct Counter { + pub nodes_count: usize, + pub scopes_count: usize, + pub symbols_count: usize, + pub references_count: usize, +} + +impl<'a> Visit<'a> for Counter { + #[inline] + fn enter_node(&mut self, _: AstKind<'a>) { + self.nodes_count += 1; + } + #[inline] + fn enter_scope(&mut self, _: ScopeFlags, _: &Cell>) { + self.scopes_count += 1; + } + + #[inline] + fn visit_binding_identifier(&mut self, _: &BindingIdentifier<'a>) { + self.nodes_count += 1; + self.symbols_count += 1; + } + + #[inline] + fn visit_identifier_reference(&mut self, _: &IdentifierReference<'a>) { + self.nodes_count += 1; + self.references_count += 1; + } + + #[inline] + fn visit_jsx_member_expression_object(&mut self, it: &JSXMemberExpressionObject<'a>) { + self.nodes_count += 1; + match it { + JSXMemberExpressionObject::MemberExpression(expr) => { + self.visit_jsx_member_expression(expr); + } + JSXMemberExpressionObject::Identifier(_) => { + self.nodes_count += 1; + self.references_count += 1; + } + } + } + + #[inline] + fn visit_jsx_element_name(&mut self, it: &JSXElementName<'a>) { + self.nodes_count += 1; + match it { + JSXElementName::Identifier(ident) => { + self.nodes_count += 1; + if ident.name.chars().next().is_some_and(char::is_uppercase) { + self.references_count += 1; + } + } + JSXElementName::NamespacedName(name) => self.visit_jsx_namespaced_name(name), + JSXElementName::MemberExpression(expr) => self.visit_jsx_member_expression(expr), + } + } + + #[inline] + fn visit_ts_enum_member_name(&mut self, it: &TSEnumMemberName<'a>) { + if !it.is_expression() { + self.symbols_count += 1; + } + walk_ts_enum_member_name(self, it); + } + + #[inline] + fn visit_ts_module_declaration_name(&mut self, it: &TSModuleDeclarationName<'a>) { + self.symbols_count += 1; + walk_ts_module_declaration_name(self, it); + } +} diff --git a/crates/oxc_semantic/src/lib.rs b/crates/oxc_semantic/src/lib.rs index 22d111d53ead4..08ca5b2902e9f 100644 --- a/crates/oxc_semantic/src/lib.rs +++ b/crates/oxc_semantic/src/lib.rs @@ -2,6 +2,7 @@ mod binder; mod builder; mod checker; mod class; +mod counter; mod diagnostics; mod jsdoc; mod label; diff --git a/crates/oxc_semantic/src/node.rs b/crates/oxc_semantic/src/node.rs index fd6ddfcf86524..0588f0196803c 100644 --- a/crates/oxc_semantic/src/node.rs +++ b/crates/oxc_semantic/src/node.rs @@ -171,6 +171,11 @@ impl<'a> AstNodes<'a> { self.nodes.push(node); ast_node_id } + + pub fn reserve(&mut self, additional: usize) { + self.nodes.reserve(additional); + self.parent_ids.reserve(additional); + } } #[derive(Debug)] diff --git a/crates/oxc_semantic/src/scope.rs b/crates/oxc_semantic/src/scope.rs index c5e9cc77649fa..73d334ae41dd6 100644 --- a/crates/oxc_semantic/src/scope.rs +++ b/crates/oxc_semantic/src/scope.rs @@ -236,4 +236,12 @@ impl ScopeTree { pub fn remove_binding(&mut self, scope_id: ScopeId, name: &CompactStr) { self.bindings[scope_id].shift_remove(name); } + + pub fn reserve(&mut self, additional: usize) { + self.parent_ids.reserve(additional); + self.child_ids.reserve(additional); + self.flags.reserve(additional); + self.bindings.reserve(additional); + self.node_ids.reserve(additional); + } } diff --git a/crates/oxc_semantic/src/symbol.rs b/crates/oxc_semantic/src/symbol.rs index d485439250b1c..1a6e50858334a 100644 --- a/crates/oxc_semantic/src/symbol.rs +++ b/crates/oxc_semantic/src/symbol.rs @@ -193,4 +193,16 @@ impl SymbolTable { _ => false, } } + + pub fn reserve(&mut self, additional_symbols: usize, additional_references: usize) { + self.spans.reserve(additional_symbols); + self.names.reserve(additional_symbols); + self.flags.reserve(additional_symbols); + self.scope_ids.reserve(additional_symbols); + self.declarations.reserve(additional_symbols); + self.resolved_references.reserve(additional_symbols); + self.redeclare_variables.reserve(additional_symbols); + + self.references.reserve(additional_references); + } }