From 1ab7eaac8a86058c8d4ec43ee91b15812010d8ca Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 27 Feb 2025 09:50:48 +0000 Subject: [PATCH 1/2] Further optimise `PtrAdd`. In essence this follows chains of `PtrAdd`s, collapsing them down where possible. This allows some `PtrAdd`s to be optimised away completely, as CSE then recognises them as being equivalent to previous instructions. I've implemented this optimisation many times, but because it keeps variables alive for longer, it played badly with the register allocator. With the various changes we've made to that, it now works much better, shaving more time off big_loop and friends. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 50 +++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index f782159e8..22e0a5d34 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -751,12 +751,30 @@ impl Opt { Ok(()) } - fn opt_ptradd(&mut self, iidx: InstIdx, inst: PtrAddInst) -> Result<(), CompilationError> { - match self.an.op_map(&self.m, inst.ptr(&self.m)) { - Operand::Const(_) => todo!(), - Operand::Var(op_iidx) => { - if inst.off() == 0 { - self.m.replace(iidx, Inst::Copy(op_iidx)); + fn opt_ptradd( + &mut self, + iidx: InstIdx, + mut pa_inst: PtrAddInst, + ) -> Result<(), CompilationError> { + let mut off = 0; + loop { + off += pa_inst.off(); + match self.an.op_map(&self.m, pa_inst.ptr(&self.m)) { + Operand::Const(_) => todo!(), + Operand::Var(op_iidx) => { + if let Inst::PtrAdd(x) = self.m.inst(op_iidx) { + pa_inst = x; + } else { + if off == 0 { + self.m.replace(iidx, Inst::Copy(op_iidx)); + } else { + self.m.replace( + iidx, + Inst::PtrAdd(PtrAddInst::new(Operand::Var(op_iidx), off)), + ); + } + break; + } } } } @@ -1721,7 +1739,7 @@ mod test { } #[test] - fn opt_ptradd_zero() { + fn opt_ptradd() { Module::assert_ir_transform_eq( " entry: @@ -1737,6 +1755,24 @@ mod test { black_box %0 ", ); + + Module::assert_ir_transform_eq( + " + entry: + %0: ptr = param reg + %1: ptr = ptr_add %0, 4 + %2: ptr = ptr_add %1, 4 + %3: ptr = ptr_add %2, -8 + black_box %3 + ", + |m| opt(m).unwrap(), + " + ... + entry: + %0: ptr = param ... + black_box %0 + ", + ); } #[test] From 4c1d86cd3f071275321cb4b99b0bc1101d54b2f1 Mon Sep 17 00:00:00 2001 From: Laurence Tratt Date: Thu, 27 Feb 2025 09:54:34 +0000 Subject: [PATCH 2/2] Optimise `PtrAdd`s that were optimised from `DynPtrAdd`. In essence, when we've been able to convert a `DynPtrAdd` to a `PtrAdd`, we can then run our existing `PtrAdd` optimisation on the result. This can then show up further optimisation opportunities to CSE. --- ykrt/src/compile/jitc_yk/opt/mod.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/ykrt/src/compile/jitc_yk/opt/mod.rs b/ykrt/src/compile/jitc_yk/opt/mod.rs index 22e0a5d34..4ff634388 100644 --- a/ykrt/src/compile/jitc_yk/opt/mod.rs +++ b/ykrt/src/compile/jitc_yk/opt/mod.rs @@ -580,8 +580,9 @@ impl Opt { Operand::Const(cidx) => self.m.replace(iidx, Inst::Const(cidx)), } } else { - self.m - .replace(iidx, Inst::PtrAdd(PtrAddInst::new(inst.ptr(&self.m), off))); + let pa_inst = PtrAddInst::new(inst.ptr(&self.m), off); + self.m.replace(iidx, Inst::PtrAdd(pa_inst)); + self.opt_ptradd(iidx, pa_inst)?; } } @@ -1776,7 +1777,7 @@ mod test { } #[test] - fn opt_dynptradd_const() { + fn opt_dynptradd() { Module::assert_ir_transform_eq( " entry: @@ -1807,6 +1808,23 @@ mod test { black_box 0x1234 ", ); + + Module::assert_ir_transform_eq( + " + entry: + %0: ptr = param reg + %1: ptr = ptr_add %0, -4 + %2: ptr = dyn_ptr_add %1, 1i64, 4 + black_box %2 + ", + |m| opt(m).unwrap(), + " + ... + entry: + %0: ptr = param ... + black_box %0 + ", + ); } #[test]