Adding new API that accepts resusable context. (#196)

Sadly due to how the code was structured, I needed to change the `Env' fields so basically everything that was used was changed as well. I did not benchmark anything yet (work in progress). Context -> https://bytecodealliance.zulipchat.com/#narrow/stream/217117-cranelift/topic/Using.20context.20for.20.60TargetIsa.3A.3Acompile_function.60
bytecodealliance · Oct 3, 2024 · f2b9533 · f2b9533
1 parent 0130fee
commit f2b9533
Show file tree

Hide file tree

Showing 22 changed files with 801 additions and 564 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -17,8 +17,7 @@ repository = "https://github.com/bytecodealliance/regalloc2"
 log = { version = "0.4.8", default-features = false }
 smallvec = { version = "1.6.1", features = ["union"] }
 rustc-hash = { version = "2.0.0", default-features = false }
-slice-group-by = { version = "0.3.0", default-features = false }
-hashbrown = { version = "0.14", features = ["ahash"], default-features = false }
+hashbrown = { version = "0.14", default-features = false, features = [] }
 
 # Optional serde support, enabled by feature below.
 serde = { version = "1.0.136", features = [
@@ -28,6 +27,8 @@ serde = { version = "1.0.136", features = [
 
 # The below are only needed for fuzzing.
 libfuzzer-sys = { version = "0.4.2", optional = true }
+bumpalo = { version = "3.16.0", features = ["allocator-api2"] }
+allocator-api2 = { version = "0.2.18", default-features = false, features = ["alloc"] }
 
 # When testing regalloc2 by itself, enable debug assertions and overflow checks
 [profile.release]

diff --git a/fuzz/fuzz_targets/domtree.rs b/fuzz/fuzz_targets/domtree.rs
@@ -116,13 +116,21 @@ impl Arbitrary<'_> for TestCase {
 }
 
 fuzz_target!(|testcase: TestCase| {
-    let postord = postorder::calculate(testcase.cfg.num_blocks, Block::new(0), |block| {
-        &testcase.cfg.succs[block.index()]
-    });
-    let idom = domtree::calculate(
+    let mut postorder = vec![];
+    postorder::calculate(
+        testcase.cfg.num_blocks,
+        Block::new(0),
+        &mut vec![],
+        &mut postorder,
+        |block| &testcase.cfg.succs[block.index()],
+    );
+    let mut idom = vec![];
+    domtree::calculate(
         testcase.cfg.num_blocks,
         |block| &testcase.cfg.preds[block.index()],
-        &postord[..],
+        &postorder[..],
+        &mut vec![],
+        &mut idom,
         Block::new(0),
     );
     check_idom_violations(&idom[..], &testcase.path);

diff --git a/fuzz/fuzz_targets/ion.rs b/fuzz/fuzz_targets/ion.rs
@@ -11,6 +11,14 @@ fuzz_target!(|func: Func| {
     let _ = env_logger::try_init();
     log::trace!("func:\n{:?}", func);
     let env = regalloc2::fuzzing::func::machine_env();
-    let _out =
-        regalloc2::fuzzing::ion::run(&func, &env, false, false).expect("regalloc did not succeed");
+
+    thread_local! {
+        // We test that ctx is cleared properly between runs.
+        static CTX: std::cell::RefCell<regalloc2::fuzzing::ion::Ctx> = std::cell::RefCell::default();
+    }
+
+    CTX.with(|ctx| {
+        let _out = regalloc2::fuzzing::ion::run(&func, &env, &mut *ctx.borrow_mut(), false, false)
+            .expect("regalloc did not succeed");
+    });
 });
diff --git a/fuzz/fuzz_targets/ion_checker.rs b/fuzz/fuzz_targets/ion_checker.rs
@@ -36,10 +36,18 @@ fuzz_target!(|testcase: TestCase| {
     let _ = env_logger::try_init();
     log::trace!("func:\n{:?}", func);
     let env = regalloc2::fuzzing::func::machine_env();
-    let out =
-        regalloc2::fuzzing::ion::run(&func, &env, true, false).expect("regalloc did not succeed");
 
-    let mut checker = Checker::new(&func, &env);
-    checker.prepare(&out);
-    checker.run().expect("checker failed");
+    thread_local! {
+        // We test that ctx is cleared properly between runs.
+        static CTX: std::cell::RefCell<regalloc2::fuzzing::ion::Ctx> = std::cell::RefCell::default();
+    }
+
+    CTX.with(|ctx| {
+        regalloc2::fuzzing::ion::run(&func, &env, &mut *ctx.borrow_mut(), true, false)
+            .expect("regalloc did not succeed");
+
+        let mut checker = Checker::new(&func, &env);
+        checker.prepare(&ctx.borrow().output);
+        checker.run().expect("checker failed");
+    });
 });
diff --git a/fuzz/fuzz_targets/ssagen.rs b/fuzz/fuzz_targets/ssagen.rs
@@ -5,7 +5,7 @@
 
 #![no_main]
 use regalloc2::fuzzing::arbitrary::{Arbitrary, Result, Unstructured};
-use regalloc2::fuzzing::cfg::CFGInfo;
+use regalloc2::fuzzing::cfg::{CFGInfo, CFGInfoCtx};
 use regalloc2::fuzzing::func::{Func, Options};
 use regalloc2::fuzzing::fuzz_target;
 use regalloc2::ssa::validate_ssa;
@@ -33,6 +33,13 @@ impl Arbitrary<'_> for TestCase {
 }
 
 fuzz_target!(|t: TestCase| {
-    let cfginfo = CFGInfo::new(&t.f).expect("could not create CFG info");
-    validate_ssa(&t.f, &cfginfo).expect("invalid SSA");
+    thread_local! {
+        // We test that ctx is cleared properly between runs.
+        static CFG_INFO: std::cell::RefCell<(CFGInfo, CFGInfoCtx)> = std::cell::RefCell::default();
+    }
+
+    CFG_INFO.with_borrow_mut(|(cfginfo, ctx)| {
+        cfginfo.init(&t.f, ctx).expect("could not create CFG info");
+        validate_ssa(&t.f, &cfginfo).expect("invalid SSA");
+    });
 });
diff --git a/regalloc2-tool/src/main.rs b/regalloc2-tool/src/main.rs
@@ -71,7 +71,7 @@ fn main() {
 }
 
 fn print_output(func: &SerializableFunction, output: &Output) {
-    print!("Register allocation result: {{\n");
+    println!("Register allocation result: {{");
     for i in 0..func.num_blocks() {
         let block = Block::new(i);
         let succs = func
@@ -84,7 +84,7 @@ fn print_output(func: &SerializableFunction, output: &Output) {
             .iter()
             .map(|b| b.index())
             .collect::<Vec<_>>();
-        print!("  block{}: # succs:{:?} preds:{:?}\n", i, succs, preds);
+        println!("  block{}: # succs:{:?} preds:{:?}", i, succs, preds);
         for inst_or_edit in output.block_insts_and_edits(func, block) {
             match inst_or_edit {
                 InstOrEdit::Inst(inst) => {
@@ -102,13 +102,13 @@ fn print_output(func: &SerializableFunction, output: &Output) {
                         .map(|(op, alloc)| format!("{op} => {alloc}"))
                         .collect();
                     let ops = ops.join(", ");
-                    print!("    inst{}: {op} {ops}\n", inst.index(),);
+                    println!("    inst{}: {op} {ops}", inst.index(),);
                 }
                 InstOrEdit::Edit(Edit::Move { from, to }) => {
-                    print!("    edit: move {to} <- {from}\n");
+                    println!("    edit: move {to} <- {from}");
                 }
             }
         }
     }
-    print!("}}\n");
+    println!("}}");
 }
diff --git a/src/cfg.rs b/src/cfg.rs
@@ -5,12 +5,19 @@
 
 //! Lightweight CFG analyses.
 
-use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError};
-use alloc::vec;
-use alloc::vec::Vec;
+use crate::alloc::vec::Vec;
+
+use crate::{domtree, postorder, Block, Function, Inst, ProgPoint, RegAllocError, VecExt};
 use smallvec::{smallvec, SmallVec};
 
-#[derive(Clone, Debug)]
+#[derive(Debug, Default)]
+pub struct CFGInfoCtx {
+    visited: Vec<bool>,
+    block_to_rpo: Vec<Option<u32>>,
+    backedge: Vec<u32>,
+}
+
+#[derive(Debug, Default)]
 pub struct CFGInfo {
     /// Postorder traversal of blocks.
     pub postorder: Vec<Block>,
@@ -33,21 +40,41 @@ pub struct CFGInfo {
 }
 
 impl CFGInfo {
-    pub fn new<F: Function>(f: &F) -> Result<CFGInfo, RegAllocError> {
-        let postorder = postorder::calculate(f.num_blocks(), f.entry_block(), |block| {
-            f.block_succs(block)
-        });
-        let domtree = domtree::calculate(
-            f.num_blocks(),
+    pub fn new<F: Function>(f: &F) -> Result<Self, RegAllocError> {
+        let mut ctx = CFGInfoCtx::default();
+        let mut this = Self::default();
+        this.init(f, &mut ctx)?;
+        Ok(this)
+    }
+
+    pub fn init<F: Function>(&mut self, f: &F, ctx: &mut CFGInfoCtx) -> Result<(), RegAllocError> {
+        let nb = f.num_blocks();
+
+        postorder::calculate(
+            nb,
+            f.entry_block(),
+            &mut ctx.visited,
+            &mut self.postorder,
+            |block| f.block_succs(block),
+        );
+
+        domtree::calculate(
+            nb,
             |block| f.block_preds(block),
-            &postorder[..],
+            &self.postorder,
+            &mut ctx.block_to_rpo,
+            &mut self.domtree,
             f.entry_block(),
         );
-        let mut insn_block = vec![Block::invalid(); f.num_insts()];
-        let mut block_entry = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()];
-        let mut block_exit = vec![ProgPoint::before(Inst::invalid()); f.num_blocks()];
-        let mut backedge_in = vec![0; f.num_blocks()];
-        let mut backedge_out = vec![0; f.num_blocks()];
+
+        let insn_block = self.insn_block.repopulate(f.num_insts(), Block::invalid());
+        let block_entry = self
+            .block_entry
+            .repopulate(nb, ProgPoint::before(Inst::invalid()));
+        let block_exit = self
+            .block_exit
+            .repopulate(nb, ProgPoint::before(Inst::invalid()));
+        let (backedge_in, backedge_out) = ctx.backedge.repopulate(nb * 2, 0).split_at_mut(nb);
 
         for block in 0..f.num_blocks() {
             let block = Block::new(block);
@@ -98,10 +125,10 @@ impl CFGInfo {
             }
         }
 
-        let mut approx_loop_depth = vec![];
-        let mut backedge_stack: SmallVec<[usize; 4]> = smallvec![];
+        let approx_loop_depth = self.approx_loop_depth.cleared();
+        let mut backedge_stack: SmallVec<[u32; 4]> = smallvec![];
         let mut cur_depth = 0;
-        for block in 0..f.num_blocks() {
+        for block in 0..nb {
             if backedge_in[block] > 0 {
                 cur_depth += 1;
                 backedge_stack.push(backedge_in[block]);
@@ -119,14 +146,7 @@ impl CFGInfo {
             }
         }
 
-        Ok(CFGInfo {
-            postorder,
-            domtree,
-            insn_block,
-            block_entry,
-            block_exit,
-            approx_loop_depth,
-        })
+        Ok(())
     }
 
     pub fn dominates(&self, a: Block, b: Block) -> bool {

diff --git a/src/domtree.rs b/src/domtree.rs
@@ -12,10 +12,11 @@
 //   TR-06-33870
 //   https://www.cs.rice.edu/~keith/EMBED/dom.pdf
 
-use alloc::vec;
+use core::u32;
+
 use alloc::vec::Vec;
 
-use crate::Block;
+use crate::{Block, VecExt};
 
 // Helper
 fn merge_sets(
@@ -44,19 +45,18 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>(
     num_blocks: usize,
     preds: PredFn,
     post_ord: &[Block],
+    block_to_rpo_scratch: &mut Vec<Option<u32>>,
+    out: &mut Vec<Block>,
     start: Block,
-) -> Vec<Block> {
+) {
     // We have post_ord, which is the postorder sequence.
-
     // Compute maps from RPO to block number and vice-versa.
-    let mut block_to_rpo = vec![None; num_blocks];
-    block_to_rpo.resize(num_blocks, None);
+    let block_to_rpo = block_to_rpo_scratch.repopulate(num_blocks, None);
     for (i, rpo_block) in post_ord.iter().rev().enumerate() {
         block_to_rpo[rpo_block.index()] = Some(i as u32);
     }
 
-    let mut idom = vec![Block::invalid(); num_blocks];
-
+    let idom = out.repopulate(num_blocks, Block::invalid());
     // The start node must have itself as a parent.
     idom[start.index()] = start;
 
@@ -70,11 +70,11 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>(
             let mut parent = Block::invalid();
             for &pred in preds(node).iter() {
                 let pred_rpo = match block_to_rpo[pred.index()] {
-                    Some(r) => r,
                     None => {
                         // Skip unreachable preds.
                         continue;
                     }
+                    Some(r) => r,
                 };
                 if pred_rpo < rponum {
                     parent = pred;
@@ -104,8 +104,6 @@ pub fn calculate<'a, PredFn: Fn(Block) -> &'a [Block]>(
     // Now set the start node's dominator-tree parent to "invalid";
     // this allows the loop in `dominates` to terminate.
     idom[start.index()] = Block::invalid();
-
-    idom
 }
 
 pub fn dominates(idom: &[Block], a: Block, mut b: Block) -> bool {

diff --git a/src/fastalloc/lru.rs b/src/fastalloc/lru.rs
@@ -1,11 +1,10 @@
-use crate::{PReg, PRegSet, RegClass};
+use crate::{FxHashSet, PReg, PRegSet, RegClass};
 use alloc::vec;
 use alloc::vec::Vec;
 use core::{
     fmt,
     ops::{Index, IndexMut},
 };
-use hashbrown::HashSet;
 
 /// A least-recently-used cache organized as a linked list based on a vector.
 pub struct Lru {
@@ -193,7 +192,7 @@ impl Lru {
         );
         if self.head != u8::MAX {
             let mut node = self.data[self.head as usize].next;
-            let mut seen = HashSet::new();
+            let mut seen = FxHashSet::default();
             while node != self.head {
                 if seen.contains(&node) {
                     panic!(
@@ -245,7 +244,7 @@ impl fmt::Debug for Lru {
         } else {
             let mut data_str = format!("p{}", self.head);
             let mut node = self.data[self.head as usize].next;
-            let mut seen = HashSet::new();
+            let mut seen = FxHashSet::default();
             while node != self.head {
                 if seen.contains(&node) {
                     panic!(

diff --git a/src/fastalloc/mod.rs b/src/fastalloc/mod.rs
@@ -2,8 +2,8 @@ use crate::moves::{MoveAndScratchResolver, ParallelMoves};
 use crate::{cfg::CFGInfo, ion::Stats, Allocation, RegAllocError};
 use crate::{ssa::validate_ssa, Edit, Function, MachineEnv, Output, ProgPoint};
 use crate::{
-    AllocationKind, Block, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos,
-    PReg, PRegSet, RegClass, SpillSlot, VReg,
+    AllocationKind, Block, FxHashMap, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
+    OperandPos, PReg, PRegSet, RegClass, SpillSlot, VReg,
 };
 use alloc::vec::Vec;
 use core::convert::TryInto;
@@ -1150,17 +1150,16 @@ impl<'a, F: Function> Env<'a, F> {
 
     fn log_post_reload_at_begin_state(&self, block: Block) {
         use alloc::format;
-        use hashbrown::HashMap;
         trace!("");
         trace!("State after instruction reload_at_begin of {:?}", block);
-        let mut map = HashMap::new();
+        let mut map = FxHashMap::default();
         for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() {
             if *alloc != Allocation::none() {
                 map.insert(format!("vreg{vreg_idx}"), alloc);
             }
         }
         trace!("vreg_allocs: {:?}", map);
-        let mut map = HashMap::new();
+        let mut map = FxHashMap::default();
         for i in 0..self.vreg_in_preg.len() {
             if self.vreg_in_preg[i] != VReg::invalid() {
                 map.insert(PReg::from_index(i), self.vreg_in_preg[i]);
@@ -1174,10 +1173,9 @@ impl<'a, F: Function> Env<'a, F> {
 
     fn log_post_inst_processing_state(&self, inst: Inst) {
         use alloc::format;
-        use hashbrown::HashMap;
         trace!("");
         trace!("State after instruction {:?}", inst);
-        let mut map = HashMap::new();
+        let mut map = FxHashMap::default();
         for (vreg_idx, alloc) in self.vreg_allocs.iter().enumerate() {
             if *alloc != Allocation::none() {
                 map.insert(format!("vreg{vreg_idx}"), alloc);
@@ -1289,8 +1287,7 @@ pub fn run<F: Function>(
     enable_ssa_checker: bool,
 ) -> Result<Output, RegAllocError> {
     if enable_ssa_checker {
-        let cfginfo = CFGInfo::new(func)?;
-        validate_ssa(func, &cfginfo)?;
+        validate_ssa(func, &CFGInfo::new(func)?)?;
     }
 
     if trace_enabled!() || verbose_log {